From f37dfd4bf377751d693cc51a7b2415c1ee1a3f1e Mon Sep 17 00:00:00 2001 From: Peter Jung Date: Sat, 16 Nov 2024 11:18:43 +0700 Subject: [PATCH] Unify variable name and use gpt-4o for cheaper runs --- prediction_market_agent_tooling/deploy/agent.py | 12 ++++++------ .../tools/image_gen/market_thumbnail_gen.py | 2 +- prediction_market_agent_tooling/tools/is_invalid.py | 2 +- .../tools/is_predictable.py | 11 ++++++++--- pyproject.toml | 2 +- tests/tools/test_is_predictable.py | 2 +- 6 files changed, 18 insertions(+), 13 deletions(-) diff --git a/prediction_market_agent_tooling/deploy/agent.py b/prediction_market_agent_tooling/deploy/agent.py index e8132fbc..68acb995 100644 --- a/prediction_market_agent_tooling/deploy/agent.py +++ b/prediction_market_agent_tooling/deploy/agent.py @@ -297,10 +297,10 @@ class DeployablePredictionAgent(DeployableAgent): def __init__( self, enable_langfuse: bool = APIKeys().default_enable_langfuse, - store_prediction: bool = True, + store_predictions: bool = True, ) -> None: super().__init__(enable_langfuse=enable_langfuse) - self.store_prediction = store_prediction + self.store_predictions = store_predictions def initialize_langfuse(self) -> None: super().initialize_langfuse() @@ -443,13 +443,13 @@ def after_process_market( processed_market: ProcessedMarket | None, ) -> None: keys = APIKeys() - if self.store_prediction: + if self.store_predictions: market.store_prediction( processed_market=processed_market, keys=keys, agent_name=self.agent_name ) else: logger.info( - f"Prediction {processed_market} not stored because {self.store_prediction=}." + f"Prediction {processed_market} not stored because {self.store_predictions=}." ) def before_process_markets(self, market_type: MarketType) -> None: @@ -508,12 +508,12 @@ class DeployableTraderAgent(DeployablePredictionAgent): def __init__( self, enable_langfuse: bool = APIKeys().default_enable_langfuse, - store_prediction: bool = True, + store_predictions: bool = True, store_trades: bool = True, place_trades: bool = True, ) -> None: super().__init__( - enable_langfuse=enable_langfuse, store_prediction=store_prediction + enable_langfuse=enable_langfuse, store_predictions=store_predictions ) self.store_trades = store_trades self.place_trades = place_trades diff --git a/prediction_market_agent_tooling/tools/image_gen/market_thumbnail_gen.py b/prediction_market_agent_tooling/tools/image_gen/market_thumbnail_gen.py index 60b2aa5f..79ab64d9 100644 --- a/prediction_market_agent_tooling/tools/image_gen/market_thumbnail_gen.py +++ b/prediction_market_agent_tooling/tools/image_gen/market_thumbnail_gen.py @@ -17,7 +17,7 @@ def rewrite_question_into_image_generation_prompt(question: str) -> str: "openai not installed, please install extras `langchain` to use this function." ) llm = ChatOpenAI( - model="gpt-4-turbo", + model="gpt-4o-2024-08-06", temperature=0.0, api_key=APIKeys().openai_api_key_secretstr_v1, ) diff --git a/prediction_market_agent_tooling/tools/is_invalid.py b/prediction_market_agent_tooling/tools/is_invalid.py index dc1a59a6..3a04047c 100644 --- a/prediction_market_agent_tooling/tools/is_invalid.py +++ b/prediction_market_agent_tooling/tools/is_invalid.py @@ -60,7 +60,7 @@ @db_cache def is_invalid( question: str, - engine: str = "gpt-4o", + engine: str = "gpt-4o-2024-08-06", temperature: float = LLM_SUPER_LOW_TEMPERATURE, seed: int = LLM_SEED, prompt_template: str = QUESTION_IS_INVALID_PROMPT, diff --git a/prediction_market_agent_tooling/tools/is_predictable.py b/prediction_market_agent_tooling/tools/is_predictable.py index 37895eb1..5d392dc8 100644 --- a/prediction_market_agent_tooling/tools/is_predictable.py +++ b/prediction_market_agent_tooling/tools/is_predictable.py @@ -7,7 +7,10 @@ get_langfuse_langchain_config, observe, ) -from prediction_market_agent_tooling.tools.utils import LLM_SUPER_LOW_TEMPERATURE +from prediction_market_agent_tooling.tools.utils import ( + LLM_SEED, + LLM_SUPER_LOW_TEMPERATURE, +) # I tried to make it return a JSON, but it didn't work well in combo with asking it to do chain of thought. QUESTION_IS_PREDICTABLE_BINARY_PROMPT = """Main signs about a fully qualified question (sometimes referred to as a "market"): @@ -81,7 +84,7 @@ @db_cache def is_predictable_binary( question: str, - engine: str = "gpt-4-1106-preview", + engine: str = "gpt-4o-2024-08-06", prompt_template: str = QUESTION_IS_PREDICTABLE_BINARY_PROMPT, max_tokens: int = 1024, ) -> bool: @@ -98,6 +101,7 @@ def is_predictable_binary( llm = ChatOpenAI( model=engine, temperature=LLM_SUPER_LOW_TEMPERATURE, + seed=LLM_SEED, api_key=APIKeys().openai_api_key_secretstr_v1, ) @@ -118,7 +122,7 @@ def is_predictable_binary( def is_predictable_without_description( question: str, description: str, - engine: str = "gpt-4-1106-preview", + engine: str = "gpt-4o-2024-08-06", prompt_template: str = QUESTION_IS_PREDICTABLE_WITHOUT_DESCRIPTION_PROMPT, max_tokens: int = 1024, ) -> bool: @@ -137,6 +141,7 @@ def is_predictable_without_description( llm = ChatOpenAI( model=engine, temperature=LLM_SUPER_LOW_TEMPERATURE, + seed=LLM_SEED, api_key=APIKeys().openai_api_key_secretstr_v1, ) diff --git a/pyproject.toml b/pyproject.toml index c3f96165..8f2d9241 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "prediction-market-agent-tooling" -version = "0.56.3" +version = "0.57.0" description = "Tools to benchmark, deploy and monitor prediction market agents." authors = ["Gnosis"] readme = "README.md" diff --git a/tests/tools/test_is_predictable.py b/tests/tools/test_is_predictable.py index c30b7254..dd9ba1bf 100644 --- a/tests/tools/test_is_predictable.py +++ b/tests/tools/test_is_predictable.py @@ -84,7 +84,7 @@ def test_is_predictable_binary(question: str, answerable: bool) -> None: ( "Will an AI get gold on any International Math Olympiad by 2025?", "Resolves to YES if either Eliezer or Paul acknowledge that an AI has succeeded at this task.", - True, # True, because description doesn't provide any extra information. + False, # False, because description says that either `Eliezer or Paul` needs to acknowledge it. ), ], )