Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate to OpenAI 1.0 #19

Merged
merged 1 commit into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 61 additions & 49 deletions llms/providers/openai.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from typing import AsyncGenerator, Dict, Generator, List, Optional, Union

import aiohttp
import tiktoken

import openai
from openai import AsyncOpenAI, OpenAI
import json

from ..results.result import AsyncStreamResult, Result, StreamResult
Expand All @@ -13,19 +11,18 @@
class OpenAIProvider(BaseProvider):
# cost is per million tokens
MODEL_INFO = {

"gpt-3.5-turbo": {"prompt": 2.0, "completion": 2.0, "token_limit": 4097, "is_chat": True},
"gpt-3.5-turbo-instruct": {"prompt": 2.0, "completion": 2.0, "token_limit": 4097, "is_chat": False},
"gpt-4": {"prompt": 30.0, "completion": 60.0, "token_limit": 8192, "is_chat": True},
"gpt-4-1106-preview": {"prompt": 10.0, "completion": 20.0, "token_limit": 128000, "is_chat": True},
}

def __init__(self, api_key, model=None):
openai.api_key = api_key
def __init__(self, api_key=None, model=None):
if model is None:
model = list(self.MODEL_INFO.keys())[0]
self.model = model
self.client = openai.ChatCompletion if self.is_chat_model else openai.Completion
self.client = OpenAI(api_key=api_key)
self.async_client = AsyncOpenAI(api_key=api_key)

@property
def is_chat_model(self) -> bool:
Expand Down Expand Up @@ -58,7 +55,7 @@ def _prepare_model_inputs(
self,
prompt: str,
history: Optional[List[dict]] = None,
system_message: str = None,
system_message: Union[str, List[dict], None] = None,
temperature: float = 0,
max_tokens: int = 300,
stream: bool = False,
Expand All @@ -70,9 +67,13 @@ def _prepare_model_inputs(
if history:
messages = [*history, *messages]

if system_message:
if isinstance(system_message, str):
messages = [{"role": "system", "content": system_message}, *messages]

# users can input multiple full system message in dict form
elif isinstance(system_message, list):
messages = [*system_message, *messages]

model_inputs = {
"messages": messages,
"temperature": temperature,
Expand Down Expand Up @@ -115,7 +116,7 @@ def complete(
system_message: system messages in OpenAI format, must have role and content key.
It can has name key to include few-shots examples.
"""

model_inputs = self._prepare_model_inputs(
prompt=prompt,
history=history,
Expand All @@ -124,35 +125,40 @@ def complete(
max_tokens=max_tokens,
**kwargs,
)

with self.track_latency():
response = self.client.create(model=self.model, **model_inputs)

if self.is_chat_model:
response = self.client.chat.completions.create(model=self.model, **model_inputs)
else:
response = self.client.completions.create(model=self.model, **model_inputs)

is_func_call = response.choices[0].finish_reason == "function_call"
function_call = {}
completion = ""
if self.is_chat_model:
if is_func_call:
completion = {
function_call = {
"name": response.choices[0].message.function_call.name,
"arguments": json.loads(response.choices[0].message.function_call.arguments)
}
else:
completion = response.choices[0].message.content.strip()
else:
completion = response.choices[0].text.strip()

usage = response.usage

meta = {
"tokens_prompt": usage["prompt_tokens"],
"tokens_completion": usage["completion_tokens"],
"tokens_prompt": usage.prompt_tokens,
"tokens_completion": usage.completion_tokens,
"latency": self.latency,
}
return Result(
text=completion if not is_func_call else '',
text=completion,
model_inputs=model_inputs,
provider=self,
meta=meta,
function_call=completion if is_func_call else {}
function_call=function_call,
)

async def acomplete(
Expand All @@ -162,7 +168,6 @@ async def acomplete(
system_message: Optional[List[dict]] = None,
temperature: float = 0,
max_tokens: int = 300,
aiosession: Optional[aiohttp.ClientSession] = None,
**kwargs,
) -> Result:
"""
Expand All @@ -171,9 +176,6 @@ async def acomplete(
system_message: system messages in OpenAI format, must have role and content key.
It can has name key to include few-shots examples.
"""
if aiosession is not None:
openai.aiosession.set(aiosession)

model_inputs = self._prepare_model_inputs(
prompt=prompt,
history=history,
Expand All @@ -184,7 +186,10 @@ async def acomplete(
)

with self.track_latency():
response = await self.client.acreate(model=self.model, **model_inputs)
if self.is_chat_model:
response = await self.async_client.chat.completions.create(model=self.model, **model_inputs)
else:
response = await self.async_client.completions.create(model=self.model, **model_inputs)

if self.is_chat_model:
completion = response.choices[0].message.content.strip()
Expand All @@ -194,8 +199,8 @@ async def acomplete(
usage = response.usage

meta = {
"tokens_prompt": usage["prompt_tokens"],
"tokens_completion": usage["completion_tokens"],
"tokens_prompt": usage.prompt_tokens,
"tokens_completion": usage.completion_tokens,
"latency": self.latency,
}
return Result(
Expand All @@ -208,8 +213,8 @@ async def acomplete(
def complete_stream(
self,
prompt: str,
history: Optional[List[tuple]] = None,
system_message: str = None,
history: Optional[List[dict]] = None,
system_message: Union[str, List[dict], None] = None,
temperature: float = 0,
max_tokens: int = 300,
**kwargs,
Expand All @@ -229,35 +234,39 @@ def complete_stream(
stream=True,
**kwargs,
)
response = self.client.create(model=self.model, **model_inputs)

if self.is_chat_model:
response = self.client.chat.completions.create(model=self.model, **model_inputs)
else:
response = self.client.completions.create(model=self.model, **model_inputs)
stream = self._process_stream(response)

return StreamResult(stream=stream, model_inputs=model_inputs, provider=self)

def _process_stream(self, response: Generator) -> Generator:
if self.is_chat_model:
chunk_generator = (
chunk["choices"][0].get("delta", {}).get("content", "")
for chunk in response
chunk.choices[0].delta.content for chunk in response
)
else:
chunk_generator = (
chunk["choices"][0].get("text", "") for chunk in response
chunk.choices[0].text for chunk in response
)

while not (first_text := next(chunk_generator)):
continue
yield first_text.lstrip()
yield from chunk_generator
for chunk in chunk_generator:
if chunk is not None:
yield chunk

async def acomplete_stream(
self,
prompt: str,
history: Optional[List[tuple]] = None,
system_message: str = None,
history: Optional[List[dict]] = None,
system_message: Union[str, List[dict], None] = None,
temperature: float = 0,
max_tokens: int = 300,
aiosession: Optional[aiohttp.ClientSession] = None,
**kwargs,
) -> AsyncStreamResult:
"""
Expand All @@ -266,9 +275,6 @@ async def acomplete_stream(
system_message: system messages in OpenAI format, must have role and content key.
It can has name key to include few-shots examples.
"""
if aiosession is not None:
openai.aiosession.set(aiosession)

model_inputs = self._prepare_model_inputs(
prompt=prompt,
history=history,
Expand All @@ -279,7 +285,11 @@ async def acomplete_stream(
**kwargs,
)

response = await self.client.acreate(model=self.model, **model_inputs)
with self.track_latency():
if self.is_chat_model:
response = await self.async_client.chat.completions.create(model=self.model, **model_inputs)
else:
response = await self.async_client.completions.create(model=self.model, **model_inputs)
stream = self._aprocess_stream(response)
return AsyncStreamResult(
stream=stream, model_inputs=model_inputs, provider=self
Expand All @@ -288,21 +298,23 @@ async def acomplete_stream(
async def _aprocess_stream(self, response: AsyncGenerator) -> AsyncGenerator:
if self.is_chat_model:
while True:
first_completion = (await response.__anext__())["choices"][0].get("delta", {}).get("content", "")
first_completion = (await response.__anext__()).choices[0].delta.content
if first_completion:
yield first_completion.lstrip()
break

async for chunk in response:
completion = chunk["choices"][0].get("delta", {}).get("content", "")
yield completion
completion = chunk.choices[0].delta.content
if completion is not None:
yield completion
else:
while True:
first_completion = (await response.__anext__())["choices"][0].get("text", "")
first_completion = (await response.__anext__()).choices[0].text
if first_completion:
yield first_completion.lstrip()
break

async for chunk in response:
completion = chunk["choices"][0].get("text", "")
yield completion
completion = chunk.choices[0].text
if completion is not None:
yield completion
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
openai
tiktoken
anthropic==0.3
anthropic>=0.3
anthropic_bedrock
ai21
cohere
Expand Down
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@

setup(
name="pyllms",
version="0.3.8.1",
version="0.3.9",
description="Minimal Python library to connect to LLMs (OpenAI, Anthropic, Google Palm2/Vertex, AI21, Cohere, Aleph-Alpha, HuggingfaceHub), with a built-in model performance benchmark.",
long_description=long_description,
long_description_content_type="text/markdown",
author="Vladimir Prelovac",
author_email="[email protected]",
packages=find_packages(),
install_requires=[
"openai",
"openai>=1",
"tiktoken",
"anthropic>=0.3",
"anthropic_bedrock",
Expand All @@ -28,7 +28,8 @@
"google-cloud-aiplatform",
"prettytable",
"protobuf~=3.20.3",
"grpcio~=1.54.2"
"grpcio~=1.54.2",
"google-generativeai",
],
extras_require={
"local": ["einops", "accelerate"]
Expand Down