From f37dfd4bf377751d693cc51a7b2415c1ee1a3f1e Mon Sep 17 00:00:00 2001
From: Peter Jung <peter@jung.ninja>
Date: Sat, 16 Nov 2024 11:18:43 +0700
Subject: [PATCH] Unify variable name and use gpt-4o for cheaper runs

---
 prediction_market_agent_tooling/deploy/agent.py      | 12 ++++++------
 .../tools/image_gen/market_thumbnail_gen.py          |  2 +-
 prediction_market_agent_tooling/tools/is_invalid.py  |  2 +-
 .../tools/is_predictable.py                          | 11 ++++++++---
 pyproject.toml                                       |  2 +-
 tests/tools/test_is_predictable.py                   |  2 +-
 6 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/prediction_market_agent_tooling/deploy/agent.py b/prediction_market_agent_tooling/deploy/agent.py
index e8132fbc..68acb995 100644
--- a/prediction_market_agent_tooling/deploy/agent.py
+++ b/prediction_market_agent_tooling/deploy/agent.py
@@ -297,10 +297,10 @@ class DeployablePredictionAgent(DeployableAgent):
     def __init__(
         self,
         enable_langfuse: bool = APIKeys().default_enable_langfuse,
-        store_prediction: bool = True,
+        store_predictions: bool = True,
     ) -> None:
         super().__init__(enable_langfuse=enable_langfuse)
-        self.store_prediction = store_prediction
+        self.store_predictions = store_predictions
 
     def initialize_langfuse(self) -> None:
         super().initialize_langfuse()
@@ -443,13 +443,13 @@ def after_process_market(
         processed_market: ProcessedMarket | None,
     ) -> None:
         keys = APIKeys()
-        if self.store_prediction:
+        if self.store_predictions:
             market.store_prediction(
                 processed_market=processed_market, keys=keys, agent_name=self.agent_name
             )
         else:
             logger.info(
-                f"Prediction {processed_market} not stored because {self.store_prediction=}."
+                f"Prediction {processed_market} not stored because {self.store_predictions=}."
             )
 
     def before_process_markets(self, market_type: MarketType) -> None:
@@ -508,12 +508,12 @@ class DeployableTraderAgent(DeployablePredictionAgent):
     def __init__(
         self,
         enable_langfuse: bool = APIKeys().default_enable_langfuse,
-        store_prediction: bool = True,
+        store_predictions: bool = True,
         store_trades: bool = True,
         place_trades: bool = True,
     ) -> None:
         super().__init__(
-            enable_langfuse=enable_langfuse, store_prediction=store_prediction
+            enable_langfuse=enable_langfuse, store_predictions=store_predictions
         )
         self.store_trades = store_trades
         self.place_trades = place_trades
diff --git a/prediction_market_agent_tooling/tools/image_gen/market_thumbnail_gen.py b/prediction_market_agent_tooling/tools/image_gen/market_thumbnail_gen.py
index 60b2aa5f..79ab64d9 100644
--- a/prediction_market_agent_tooling/tools/image_gen/market_thumbnail_gen.py
+++ b/prediction_market_agent_tooling/tools/image_gen/market_thumbnail_gen.py
@@ -17,7 +17,7 @@ def rewrite_question_into_image_generation_prompt(question: str) -> str:
             "openai not installed, please install extras `langchain` to use this function."
         )
     llm = ChatOpenAI(
-        model="gpt-4-turbo",
+        model="gpt-4o-2024-08-06",
         temperature=0.0,
         api_key=APIKeys().openai_api_key_secretstr_v1,
     )
diff --git a/prediction_market_agent_tooling/tools/is_invalid.py b/prediction_market_agent_tooling/tools/is_invalid.py
index dc1a59a6..3a04047c 100644
--- a/prediction_market_agent_tooling/tools/is_invalid.py
+++ b/prediction_market_agent_tooling/tools/is_invalid.py
@@ -60,7 +60,7 @@
 @db_cache
 def is_invalid(
     question: str,
-    engine: str = "gpt-4o",
+    engine: str = "gpt-4o-2024-08-06",
     temperature: float = LLM_SUPER_LOW_TEMPERATURE,
     seed: int = LLM_SEED,
     prompt_template: str = QUESTION_IS_INVALID_PROMPT,
diff --git a/prediction_market_agent_tooling/tools/is_predictable.py b/prediction_market_agent_tooling/tools/is_predictable.py
index 37895eb1..5d392dc8 100644
--- a/prediction_market_agent_tooling/tools/is_predictable.py
+++ b/prediction_market_agent_tooling/tools/is_predictable.py
@@ -7,7 +7,10 @@
     get_langfuse_langchain_config,
     observe,
 )
-from prediction_market_agent_tooling.tools.utils import LLM_SUPER_LOW_TEMPERATURE
+from prediction_market_agent_tooling.tools.utils import (
+    LLM_SEED,
+    LLM_SUPER_LOW_TEMPERATURE,
+)
 
 # I tried to make it return a JSON, but it didn't work well in combo with asking it to do chain of thought.
 QUESTION_IS_PREDICTABLE_BINARY_PROMPT = """Main signs about a fully qualified question (sometimes referred to as a "market"):
@@ -81,7 +84,7 @@
 @db_cache
 def is_predictable_binary(
     question: str,
-    engine: str = "gpt-4-1106-preview",
+    engine: str = "gpt-4o-2024-08-06",
     prompt_template: str = QUESTION_IS_PREDICTABLE_BINARY_PROMPT,
     max_tokens: int = 1024,
 ) -> bool:
@@ -98,6 +101,7 @@ def is_predictable_binary(
     llm = ChatOpenAI(
         model=engine,
         temperature=LLM_SUPER_LOW_TEMPERATURE,
+        seed=LLM_SEED,
         api_key=APIKeys().openai_api_key_secretstr_v1,
     )
 
@@ -118,7 +122,7 @@ def is_predictable_binary(
 def is_predictable_without_description(
     question: str,
     description: str,
-    engine: str = "gpt-4-1106-preview",
+    engine: str = "gpt-4o-2024-08-06",
     prompt_template: str = QUESTION_IS_PREDICTABLE_WITHOUT_DESCRIPTION_PROMPT,
     max_tokens: int = 1024,
 ) -> bool:
@@ -137,6 +141,7 @@ def is_predictable_without_description(
     llm = ChatOpenAI(
         model=engine,
         temperature=LLM_SUPER_LOW_TEMPERATURE,
+        seed=LLM_SEED,
         api_key=APIKeys().openai_api_key_secretstr_v1,
     )
 
diff --git a/pyproject.toml b/pyproject.toml
index c3f96165..8f2d9241 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "prediction-market-agent-tooling"
-version = "0.56.3"
+version = "0.57.0"
 description = "Tools to benchmark, deploy and monitor prediction market agents."
 authors = ["Gnosis"]
 readme = "README.md"
diff --git a/tests/tools/test_is_predictable.py b/tests/tools/test_is_predictable.py
index c30b7254..dd9ba1bf 100644
--- a/tests/tools/test_is_predictable.py
+++ b/tests/tools/test_is_predictable.py
@@ -84,7 +84,7 @@ def test_is_predictable_binary(question: str, answerable: bool) -> None:
         (
             "Will an AI get gold on any International Math Olympiad by 2025?",
             "Resolves to YES if either Eliezer or Paul acknowledge that an AI has succeeded at this task.",
-            True,  # True, because description doesn't provide any extra information.
+            False,  # False, because description says that either `Eliezer or Paul` needs to acknowledge it.
         ),
     ],
 )