-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(model): Integrate GPT-4 text generation in Rago (#13)
- Loading branch information
Showing
13 changed files
with
503 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
HF_TOKEN=${HF_TOKEN} | ||
OPENAI_API_KEY=${OPENAI_API_KEY} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
"""OpenAIAug class for query augmentation using OpenAI API.""" | ||
|
||
from __future__ import annotations | ||
|
||
import openai | ||
|
||
from typeguard import typechecked | ||
|
||
from rago.augmented.base import AugmentedBase | ||
|
||
|
||
@typechecked | ||
class OpenAIAug(AugmentedBase): | ||
"""OpenAIAug class for query augmentation using OpenAI API.""" | ||
|
||
def __init__(self, model_name: str = 'gpt-4', k: int = 1) -> None: | ||
"""Initialize the OpenAIAug class.""" | ||
self.model_name = model_name | ||
self.k = k | ||
|
||
def search( | ||
self, query: str, documents: list[str], k: int = 1 | ||
) -> list[str]: | ||
"""Augment the query by expanding or rephrasing it using OpenAI.""" | ||
prompt = f"Retrieval: '{query}'\nContext: {' '.join(documents)}" | ||
|
||
response = openai.Completion.create( # type: ignore[attr-defined] | ||
model=self.model_name, | ||
messages=[{'role': 'user', 'content': prompt}], | ||
max_tokens=50, | ||
temperature=0.5, | ||
) | ||
|
||
augmented_query = response.choices[0]['message']['content'].strip() | ||
return [augmented_query] * self.k |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
"""OpenAI Generation Model class for flexible GPT-based text generation.""" | ||
|
||
from __future__ import annotations | ||
|
||
from typing import cast | ||
|
||
import openai | ||
|
||
from typeguard import typechecked | ||
|
||
from rago.generation.base import GenerationBase | ||
|
||
|
||
@typechecked | ||
class OpenAIGPTGen(GenerationBase): | ||
"""OpenAI generation model for text generation.""" | ||
|
||
def __init__( | ||
self, | ||
model_name: str = 'gpt-4', | ||
output_max_tokens: int = 500, | ||
api_key: str = '', | ||
) -> None: | ||
"""Initialize OpenAIGenerationModel with OpenAI's model.""" | ||
super().__init__( | ||
model_name=model_name, output_max_length=output_max_tokens | ||
) | ||
openai.api_key = api_key | ||
|
||
def generate( | ||
self, | ||
query: str, | ||
context: list[str], | ||
language: str = 'en', | ||
) -> str: | ||
"""Generate text using OpenAI's API with dynamic model support.""" | ||
input_text = ( | ||
f"Question: {query}\nContext: {' '.join(context)}\n" | ||
f"Answer in {language}:" | ||
) | ||
|
||
response = openai.Completion.create( # type: ignore[attr-defined] | ||
model=self.model_name, | ||
messages=[{'role': 'user', 'content': input_text}], | ||
max_tokens=self.output_max_length, | ||
temperature=0.7, | ||
top_p=0.9, | ||
frequency_penalty=0.5, | ||
presence_penalty=0.3, | ||
) | ||
|
||
return cast(str, response['choices'][0]['message']['content'].strip()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
"""Tests for Rago package using OpenAI GPT-4.""" | ||
|
||
import os | ||
|
||
from pathlib import Path | ||
|
||
import pytest | ||
|
||
from rago import Rago | ||
from rago.augmented import OpenAIAug | ||
from rago.generation.openai_gpt import ( | ||
OpenAIGPTGen, | ||
) | ||
from rago.retrieval import StringRet | ||
|
||
|
||
@pytest.fixture | ||
def animals_data() -> list[str]: | ||
"""Fixture for loading the animals dataset.""" | ||
data_path = Path(__file__).parent / 'data' / 'animals.txt' | ||
with open(data_path) as f: | ||
data = [line.strip() for line in f.readlines() if line.strip()] | ||
return data | ||
|
||
|
||
@pytest.fixture | ||
def openai_api_key() -> str: | ||
"""Fixture for OpenAI API key from environment.""" | ||
api_key = os.getenv('OPENAI_API_KEY') | ||
if not api_key: | ||
raise EnvironmentError( | ||
'Please set the OPENAI_API_KEY environment variable.' | ||
) | ||
return api_key | ||
|
||
|
||
@pytest.mark.skip_on_ci | ||
def test_openai_gpt4(animals_data: list[str], openai_api_key: str) -> None: | ||
"""Test RAG pipeline with OpenAI's GPT-4.""" | ||
rag = Rago( | ||
retrieval=StringRet(animals_data), | ||
augmented=OpenAIAug(k=3), | ||
generation=OpenAIGPTGen(api_key=openai_api_key, model_name='gpt-4'), | ||
) | ||
|
||
query = 'Is there any animal larger than a dinosaur?' | ||
result = rag.prompt(query) | ||
|
||
assert ( | ||
'Blue Whale' in result | ||
), 'Expected response to mention Blue Whale as a larger animal.' |