diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css new file mode 100644 index 00000000..8f4f9369 --- /dev/null +++ b/docs/source/_static/css/custom.css @@ -0,0 +1,94 @@ +/* custom.css */ + +:root { + --pst-color-info: 23, 162, 184; + --pst-color-logo: #2EB5EB; + --bs-gray-500:#adb5bd; + + } +p { + font-size: 0.9em; + margin-bottom: 1.15rem; + } + +html[data-theme=light] { + --pst-color-secondary: #3d3d3d; /*change the secondary color, header link to gray */ + --pst-color-link-hover: #25262; /*change the side bar link color to black */ + --pst-color-table-row-hover-bg: #f8f9fa; /*change the table row hover color to light gray */ + --sd-color-secondary: gray; /*change the secondary color to black */ + --sd-color-secondary-highlight: gray; /*change the secondary highlight color to black */ + } + +.bd-main .bd-content .bd-article-container { + max-width: 100%; /* default is 60em */ + } + +h1{ + font-size: 2rem; /* make the h1 in the code smaller */ +} +/* .bd-page-width { + max-width: 100%; + } */ + +.sig-name { + color: black; /* set the class name and attributes to black */ +} +.caption-text { + font-size: 14px; /* Sets the smallest font size */ + color: gray; /* Sets the color to gray */ +} +.bd-links__title { + /* remove the "section navigation" string*/ + display: none; +} + +table { + width: auto; /* Override fit-content which breaks Styler user guide ipynb */ +} + +/* Main index page overview cards */ + +.intro-card { + padding: 30px 10px 20px 10px; +} + +.intro-card .sd-card-img-top { + margin: 10px; + height: 52px; + background: none !important; +} + +.intro-card .sd-card-title { + color: var(--pst-color-primary); + font-size: var(--pst-font-size-h5); + padding: 1rem 0rem 0.5rem 0rem; +} + +.intro-card .sd-card-footer { + border: none !important; +} + +.intro-card .sd-card-footer p.sd-card-text { + max-width: 220px; + margin-left: auto; + margin-right: auto; +} + +.intro-card .sd-btn-secondary { + background-color: #6c757d !important; + border-color: #6c757d !important; +} + +.intro-card .sd-btn-secondary:hover { + background-color: #5a6268 !important; + border-color: #545b62 !important; +} + +.card, .card img { + background-color: var(--pst-color-background); +} + +/* Remove hover effect from table rows */ +table tr:hover { + background-color: transparent !important; +} diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css deleted file mode 100644 index c9e7a583..00000000 --- a/docs/source/_static/custom.css +++ /dev/null @@ -1,93 +0,0 @@ -/* custom.css */ - -:root { - --pst-color-info: 23, 162, 184; - --pst-color-logo: #2EB5EB; - --bs-gray-500:#adb5bd; - - } -p { - font-size: 0.9em; - margin-bottom: 1.15rem; - } - -html[data-theme=light] { - --pst-color-secondary: #3d3d3d; /*change the secondary color, header link to gray */ - --pst-color-link-hover: #25262; /*change the side bar link color to black */ - --pst-color-table-row-hover-bg: #f8f9fa; /*change the table row hover color to light gray */ - --sd-color-secondary: gray; /*change the secondary color to black */ - --sd-color-secondary-highlight: gray; /*change the secondary highlight color to black */ - } - -.bd-main .bd-content .bd-article-container { - max-width: 100%; /* default is 60em */ - } - -h1{ - font-size: 2rem; /* make the h1 in the code smaller */ -} -/* .bd-page-width { - max-width: 100%; - } */ - -.sig-name { - color: black; /* set the class name and attributes to black */ -} -.caption-text { - font-size: 14px; /* Sets the smallest font size */ - color: gray; /* Sets the color to gray */ -} -.bd-links__title { - /* remove the "section navigation" string*/ - display: none; -} - -table { - width: auto; /* Override fit-content which breaks Styler user guide ipynb */ - } - - /* Main index page overview cards */ - - .intro-card { - padding: 30px 10px 20px 10px; - } - - .intro-card .sd-card-img-top { - margin: 10px; - height: 52px; - background: none !important; - } - - .intro-card .sd-card-title { - color: var(--pst-color-primary); - font-size: var(--pst-font-size-h5); - padding: 1rem 0rem 0.5rem 0rem; - } - - .intro-card .sd-card-footer { - border: none !important; - } - - .intro-card .sd-card-footer p.sd-card-text { - max-width: 220px; - margin-left: auto; - margin-right: auto; - } - - .intro-card .sd-btn-secondary { - background-color: #6c757d !important; - border-color: #6c757d !important; - } - - .intro-card .sd-btn-secondary:hover { - background-color: #5a6268 !important; - border-color: #545b62 !important; - } - - .card, .card img { - background-color: var(--pst-color-background); - } - -table th, table td { - background-color: transparent; -} diff --git a/docs/source/_static/LLM_arch.png b/docs/source/_static/images/LLM_arch.png similarity index 100% rename from docs/source/_static/LLM_arch.png rename to docs/source/_static/images/LLM_arch.png diff --git a/docs/source/_static/images/LightRAG-logo-circle.png b/docs/source/_static/images/LightRAG-logo-circle.png new file mode 100644 index 00000000..899dba0c Binary files /dev/null and b/docs/source/_static/images/LightRAG-logo-circle.png differ diff --git a/docs/source/_static/images/LightRAG-logo-doc.jpeg b/docs/source/_static/images/LightRAG-logo-doc.jpeg new file mode 100644 index 00000000..f8c64904 Binary files /dev/null and b/docs/source/_static/images/LightRAG-logo-doc.jpeg differ diff --git a/docs/source/_static/images/LightRAG-logo.jpg b/docs/source/_static/images/LightRAG-logo.jpg new file mode 100644 index 00000000..13ec4b56 Binary files /dev/null and b/docs/source/_static/images/LightRAG-logo.jpg differ diff --git a/docs/source/_static/images/LightRAG_dataflow.png b/docs/source/_static/images/LightRAG_dataflow.png new file mode 100644 index 00000000..b385553c Binary files /dev/null and b/docs/source/_static/images/LightRAG_dataflow.png differ diff --git a/docs/source/_static/images/ReAct.jpg b/docs/source/_static/images/ReAct.jpg new file mode 100644 index 00000000..b062d258 Binary files /dev/null and b/docs/source/_static/images/ReAct.jpg differ diff --git a/docs/source/_static/images/lightrag_structure.png b/docs/source/_static/images/lightrag_structure.png new file mode 100644 index 00000000..cf40969b Binary files /dev/null and b/docs/source/_static/images/lightrag_structure.png differ diff --git a/docs/source/_static/model_client.png b/docs/source/_static/images/model_client.png similarity index 100% rename from docs/source/_static/model_client.png rename to docs/source/_static/images/model_client.png diff --git a/docs/source/_static/retriever.png b/docs/source/_static/images/retriever.png similarity index 100% rename from docs/source/_static/retriever.png rename to docs/source/_static/images/retriever.png diff --git a/docs/source/apis/components/agent_prompt.rst b/docs/source/apis/components/agent_prompt.rst new file mode 100644 index 00000000..5432c912 --- /dev/null +++ b/docs/source/apis/components/agent_prompt.rst @@ -0,0 +1,71 @@ +.. _agent_prompt: + +.. _DEFAULT_REACT_AGENT_SYSTEM_PROMPT: + +DEFAULT_REACT_AGENT_SYSTEM_PROMPT +---------------------------------- + +This is the default prompt used by the system to interact with the agents. It contains the following structure: + +.. code-block:: python + + DEFAULT_REACT_AGENT_SYSTEM_PROMPT = r""" + {# role/task description #} + You task is to answer user's query with minimum steps and maximum accuracy using the tools provided. + {# REACT instructions #} + Each step you will read the previous Thought, Action, and Observation(execution result of the action)steps and then provide the next Thought and Action. + + You only have access to the following tools: + {# tools #} + {% for tool in tools %} + {{ loop.index }}. ToolName: {{ tool.metadata.name }} + Tool Description: {{ tool.metadata.description }} + Tool Parameters: {{ tool.metadata.fn_schema_str }} {#tool args can be misleading, especially if we already have type hints and docstring in the function#} + {% endfor %} + {# output is always more robust to use json than string #} + --- + Your output must be in valid JSON format(raw Python string format) with two keys: + { + "thought": "", + "action": "ToolName(, )" + } + - Must double quote the JSON str. + - Inside of the JSON str, Must use escape double quote and escape backslash for string. + For example: + "action": "finish(\"John's.\")" + --- + {# Specifications TODO: preference between the usage of llm tool vs the other tool #} + Process: + - Step 1: Read the user query and potentially divide it into subqueries. And get started with the first subquery. + - Call one available tool at a time to solve each subquery/subquestion. \ + - At step 'finish', join all subqueries answers and finish the task. + Remember: + - Action must call one of the above tools with Took Name. It can not be empty. + - Read the Tool Description and ensure your args and kwarg follow what each tool expects in types. e.g. (a=1, b=2) if it is keyword argument or (1, 2) if it is positional. + - You will always end with 'finish' action to finish the task. The answer can be the final answer or failure message. + - When the initial query is simple, use minimum steps to answer the query. + {#Examples can be here#} + {# Check if there are any examples #} + {% if examples %} + + {% for example in examples %} + {{ example }} + {% endfor %} + + {% endif %} + <> + ----------------- + {# History #} + {% for history in step_history %} + Step {{history.step}}: + { + "thought": "{{history.thought}}", + "action": "{{history.action}}", + } + "observation": "{{history.observation}}" + {% endfor %} + {% if input_str %} + User query: + {{ input_str }} + {% endif %} + """ \ No newline at end of file diff --git a/docs/source/apis/components/index.rst b/docs/source/apis/components/index.rst index f5003726..0fef9a70 100644 --- a/docs/source/apis/components/index.rst +++ b/docs/source/apis/components/index.rst @@ -31,7 +31,7 @@ Agents components.agent Model Clients ------------ +----------------- .. toctree:: :maxdepth: 1 diff --git a/docs/source/apis/core/index.rst b/docs/source/apis/core/index.rst index 5842224b..1010b29b 100644 --- a/docs/source/apis/core/index.rst +++ b/docs/source/apis/core/index.rst @@ -22,6 +22,7 @@ Overview core.prompt_builder core.retriever core.string_parser + core.text_splitter core.tokenizer core.tool_helper core.types @@ -66,7 +67,8 @@ Document Processing .. toctree:: :maxdepth: 1 - core.document_splitter + .. core.document_splitter + core.text_splitter Embedding and Retrieval ----------------------- diff --git a/docs/source/apis/index.rst b/docs/source/apis/index.rst index 12bca548..548a2e90 100644 --- a/docs/source/apis/index.rst +++ b/docs/source/apis/index.rst @@ -3,49 +3,127 @@ API Reference Welcome to the LightRAG API reference! This section provides detailed documentation of the internal APIs that make up the LightRAG framework. Explore the APIs to understand how to effectively utilize and integrate LightRAG components into your projects. + +Core +---------- + +The core section of the LightRAG API documentation provides detailed information about the foundational components of the LightRAG system. These components are essential for the basic operations and serve as the building blocks for higher-level functionalities. + +.. autosummary:: + + core.base_data_class + core.model_client + core.component + core.data_components + core.db + core.default_prompt_template + core.document_splitter + core.embedder + core.functional + core.generator + core.memory + core.parameter + core.prompt_builder + core.retriever + core.string_parser + core.text_splitter + core.tokenizer + core.tool_helper + core.types + + +Components +----------- + +The components section of the LightRAG API documentation outlines the detailed specifications and functionalities of various API components. Each component plays a crucial role in the LightRAG framework, providing specialized capabilities and interactions. + +.. autosummary:: + + components.agent + components.model_client + + .. components.reasoning + + components.retriever + components.output_parsers + + +Evaluation +---------- +.. autosummary:: + + eval.answer_match_acc + eval.retriever_recall + eval.retriever_relevance + eval.llm_as_judge + + +Optimizer +---------- +.. autosummary:: + :maxdepth: 2 + + optim.optimizer + optim.sampler + optim.few_shot_optimizer + optim.llm_augment + optim.llm_optimizer + + +Tracing +---------- +.. autosummary:: + + tracing.decorators + tracing.generator_state_logger + tracing.generator_call_logger + + +Utils +---------- +.. autosummary:: + + utils.logger + utils.serialization + utils.config + utils.registry + utils.setup_env + + .. toctree:: :maxdepth: 2 - :caption: Core + :hidden: core/index .. toctree:: :maxdepth: 2 - :caption: Components + :hidden: components/index - .. toctree:: - :maxdepth: 1 - :caption: Optimizer + :maxdepth: 2 + :hidden: optim/index - .. optim.optimizer - .. optim.sampler - .. optim.few_shot_optimizer - .. optim.llm_augment - .. optim.llm_optimizer - - .. Why cant this one layer work? - .. toctree:: :maxdepth: 2 - :caption: Tracing + :hidden: tracing/index .. toctree:: :maxdepth: 2 - :caption: Evaluation + :hidden: eval/index .. toctree:: :maxdepth: 2 - :caption: Utils + :hidden: utils/index \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 1ca4a0dc..986d5c25 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -55,6 +55,9 @@ "sphinx.ext.autosectionlabel", "sphinx_design", "sphinx_copybutton", + "nbsphinx", + "sphinx_search.extension" + # "myst_nb", # "sphinx.builders.changes", # 'recommonmark', # 'myst_parser' @@ -79,7 +82,7 @@ html_theme = "pydata_sphinx_theme" -html_logo = "../../images/LightRAG-logo-doc.jpeg" +html_logo = "./_static/images/LightRAG-logo-doc.jpeg" # These options are for the sphinx_rtd_theme html_theme_options = { @@ -106,7 +109,7 @@ # A shorter title for the navigation bar. Default is the same as html_title. html_short_title = "LightRAG" # this will be the logo shown on the browser header -html_favicon = "../../images/LightRAG-logo-circle.png" +html_favicon = "./_static/images/LightRAG-logo-circle.png" # In Sphinx documentation, the configuration option add_module_names in the conf.py file controls # whether module names are prefixed before object names in the documentation. This setting is particularly @@ -132,4 +135,4 @@ def setup(app): - app.add_css_file("custom.css") # Add custom CSS file to the Sphinx configuration + app.add_css_file("css/custom.css") # Add custom CSS file to the Sphinx configuration diff --git a/docs/source/contributor/contribute_to_document.rst b/docs/source/contributor/contribute_to_document.rst index c56bc1f6..2f33a24d 100644 --- a/docs/source/contributor/contribute_to_document.rst +++ b/docs/source/contributor/contribute_to_document.rst @@ -35,7 +35,7 @@ Setup & Build Documentation **2. Install Necessary Packages** -LightRAG's documentation style is `pydata_sphinx_theme `_ (version: 0.15.2). +LightRAG's documentation style is `pydata_sphinx_theme `_ (version: 0.15.3). Install by ``pip``: diff --git a/docs/source/developer_notes/generator.rst b/docs/source/developer_notes/generator.rst index 7f89f751..00625a47 100644 --- a/docs/source/developer_notes/generator.rst +++ b/docs/source/developer_notes/generator.rst @@ -1,3 +1,5 @@ +.. _generator: + Generator ========= *The Center of it All* diff --git a/docs/source/developer_notes/index.rst b/docs/source/developer_notes/index.rst index 8e92339a..69c8dcac 100644 --- a/docs/source/developer_notes/index.rst +++ b/docs/source/developer_notes/index.rst @@ -15,7 +15,7 @@ This is our tutorials before you move ahead to build use cases (LLM application -.. figure:: /_static/LLM_arch.png +.. figure:: /_static/images/LLM_arch.png :alt: LLM application is no different from a mode training/eval workflow :align: center :width: 600px diff --git a/docs/source/developer_notes/llm_intro.rst b/docs/source/developer_notes/llm_intro.rst index 18bea461..7612093b 100644 --- a/docs/source/developer_notes/llm_intro.rst +++ b/docs/source/developer_notes/llm_intro.rst @@ -68,7 +68,7 @@ But, you don't have to use their advanced features, our library in default will Data Flow & LightRAG structure ---------------------- +----------------------------------- TODO: Add our data flow diagram here. diff --git a/docs/source/developer_notes/model_client.rst b/docs/source/developer_notes/model_client.rst index c1e2d446..98b21bea 100644 --- a/docs/source/developer_notes/model_client.rst +++ b/docs/source/developer_notes/model_client.rst @@ -10,7 +10,7 @@ What you will learn? Because so, by switching off ``ModelClient`` in a ``Generator`` or ``Embedder`` component, you can make your prompt or ``Retriever`` model-agnostic. -.. figure:: /_static/model_client.png +.. figure:: /_static/images/model_client.png :align: center :alt: ModelClient :width: 400px @@ -22,7 +22,7 @@ Because so, by switching off ``ModelClient`` in a ``Generator`` or ``Embedder`` All users are encouraged to customize your own ``ModelClient`` whenever you need to do so. You can refer our code in ``components.model_client`` dir. Model Inference SDKs -------------------- +------------------------ With cloud API providers like OpenAI, Groq, Anthropic, it often comes with a `sync` and an `async` client via their SDKs. For example: diff --git a/docs/source/developer_notes/react_agent_xy.rst b/docs/source/developer_notes/react_agent_xy.rst new file mode 100644 index 00000000..f3c0599a --- /dev/null +++ b/docs/source/developer_notes/react_agent_xy.rst @@ -0,0 +1,231 @@ +ReAct Agent +================= + +Check out `react agent notebook <../notebooks/react_agent.ipynb>`_ + +The goal of this tutorial is to: + +1. Demonstrate how ``LightRAG`` implements the ReAct agent +2. Provide a Deep Dive on ReAct Agent(Reference) + +What is an agent and why you need it? +------------------------------------------------ + +An agent, is better defined as a system that strategically uses LLM models and various tools to plan and execute steps. +Although LLMs and RAGs can generate text response with conversation history and internal knowledge, +they are unable to plan sequentially and decide which resource to use. + +Introduction +----------------------- +Before explaining ``LightRAG Agent`` implementation, here is a quick introduction of ReAct Agent. + +To solve a query, the `ReAct Agent `_, like its name(``Re``- Reason; ``Act`` - Act), +first uses LLM to analyze the context and plan actions to answer the query(reasoning). +Then it takes actions to utilize external resources(action). For more details, please see the :ref:`deep-dive`. + +LightRAG's Implementation +----------------------------------------------------- +Next, let's look at how ``LightRAG`` makes the implementation convenient. In ``LightRAG``, the ReAct agent is a type of :ref:`generator` that runs multiple sequential steps to generate the final response, with designed prompt, external functions(named as ``tools``) and ``JsonParser output_processors``. + +1. **Prompt:** We have a easy-to-customizable prompt template designed for ReAct agent that takes in +``tools``, few shot ``examples``, ``history``, and ``user query``. +The ``history`` will be automatically managed by the agent. ``user query`` will be handled in each single turn. +Hence when initializing an agent, we only need to set up the ``tools`` and the ``examples`` in the ``preset_prompt_kwargs`` for the system prompt and +use ``user query`` in each agent call. :ref:`Prompt `. + +2. **Tools:** ReAct Agent needs to plan the tool to use, which means it needs to access the tools' descriptions. +``LightRAG`` provides dynamic tool handling, using ``FunctionTool`` to encapsulate tool functionalities. The metadata(function name, description, and parameters) will be extracted and passed to the prompt automatically. This process not only makes tool integration more seamless but also enhances developer efficiency by allowing straightforward definition and management of tools. + +Here is the example to illustrate the usage of ``FunctionTool``. It's easy to set up using ``from_defaults``. + +.. code-block:: python + + from lightrag.core.tool_helper import FunctionTool + + # define the tools + def multiply(a: int, b: int) -> int: + '''Multiply two numbers.''' + return a * b + def add(a: int, b: int) -> int: + '''Add two numbers.''' + return a + b + + tools = [ + FunctionTool.from_defaults(fn=multiply), + FunctionTool.from_defaults(fn=add), + ] + + for tool in tools: + name = tool.metadata.name + description = tool.metadata.description + parameter = tool.metadata.fn_schema_str + print(f"Function name: {name}") + print(f"Function description: {description}") + print(f"Function parameter: {parameter}") + + # Function name: multiply + # Function description: multiply(a: int, b: int) -> int + # Multiply two numbers. + # Function parameter: {"type": "object", "properties": {"a": {"type": "int"}, "b": {"type": "int"}}, "required": ["a", "b"]} + # Function name: add + # Function description: add(a: int, b: int) -> int + # Add two numbers. + # Function parameter: {"type": "object", "properties": {"a": {"type": "int"}, "b": {"type": "int"}}, "required": ["a", "b"]} + +The agent will then call these external functions based on the function descriptions. +In addition to user-defined tools, the :class:`ReActAgent ` built-in ``llm_tool`` +for leveraging LLM's internal knowledge, and ``finish`` for completing processes. ``llm_tool`` uses the same model with the agent. Developers have the flexibility to enable or disable these as needed. + +3. **Output Parser:** ``LightRAG`` requests the model to output intermediate Thought and Action as JSON, which facilitates better error handling and easier data manipulation than strings. For example, + +.. code-block:: json + + { + "thought": "", + "action": "ToolName(, )" + } + +This format allows the ``LightRAG`` JSON parser to efficiently decode the model's output and extract arguments. +The parsed data is then utilized by the ``StepOutput`` class to manage the flow of thought, action and observation. + +4. **Example:** Let's see a Q&A agent example: + +.. code-block:: python + + from lightrag.core.tool_helper import FunctionTool + from lightrag.components.agent.react_agent import ReActAgent + from lightrag.components.model_client import OpenAIClient + from lightrag.components.model_client import GroqAPIClient + + import dotenv + # load evironment + dotenv.load_dotenv(dotenv_path=".env", override=True) + + # define the tools + def multiply(a: int, b: int) -> int: + '''Multiply two numbers.''' + return a * b + def add(a: int, b: int) -> int: + '''Add two numbers.''' + return a + b + + tools = [ + FunctionTool.from_defaults(fn=multiply), + FunctionTool.from_defaults(fn=add), + ] + + # for tool in tools: + # name = tool.metadata.name + # description = tool.metadata.description + # parameter = tool.metadata.fn_schema_str + # print(f"Function name: {name}") + # print(f"Function description: {description}") + # print(f"Function parameter: {parameter}") + + + examples = [ + """ + User: What is 9 - 3? + You: { + "thought": "I need to subtract 3 from 9, but there is no subtraction tool, so I ask llm_tool to answer the query.", + "action": "llm_tool('What is 9 - 3?')" + } + """ + ] + + preset_prompt_kwargs = {"example": examples} + llm_model_kwargs = { + "model": "llama3-70b-8192", + "temperature": 0.0 + } + + agent = ReActAgent( + tools=tools, + model_client=GroqAPIClient(), + model_kwargs=llm_model_kwargs, + max_steps=3, + preset_prompt_kwargs=preset_prompt_kwargs + ) + + import time + queries = ["What is 3 add 4?", "3*9=?"] + average_time = 0 + for query in queries: + t0 = time.time() + answer = agent(query) + + # Answer: The answer is 7. + # Answer: The answer is 27. + +5. **Subquery and History:** Moreover, in our design, the agent will potentially divide a query into subqueries, join all subqueries answers and finish the task. Developers can customize the prompt depending on the use cases. +The intermediate step history is managed. The agent will visit its previous reasoning, action and observations before making decisions. + +.. _deep-dive: + +ReAct Agent Deep Dive +--------------------------- +Please read this section if you need more information on ReAct agent. + +`ReAct Agent `_, like its name(``Re``- Reason; ``Act`` - Act), is a framework generating reasoning and taking actions in an interleaved manner. The reasoning step guides the model to action plans and the action step allows the agent to interact with external sources such as knowledge bases. + +The paper shows: +1. ReAct with few-shot prompt and Wikipedia API interaction outperforms chain-of-thought on `HotpotQA `_ (Question and Answering) and `Fever `_ (Fact Verification). +2. ReAct performs well on two interactive decision making benchmarks. + +**1. Overall Workflow** + +Unlike the reasoning only and acting only approaches, given a query, the ReAct agent will go through a sequence of steps to solve the problem. (`Source `_) + +Here is an example from the paper that demonstrates the workflow. + +.. image:: ../_static/images/ReAct.jpg + +The environment contains user query, step histories, observations, and external sources. + +At each step, the agent: + +- **[Thought]** In response to the environment and user query, the agent uses its LLM to generate a strategic thought that outlines a plan or hypothesis guiding the subsequent action. + +- **[Action]** The agent executes the action. + +The environment will be updated: + +- **[Observation]** The observation is created after the action is done. + +Then the agent iteratively generates thoughts based on latest observation and context(previous steps), takes actions and gets new observations. + +The termination condition is: + +* The agent finds the answer and takes "finish" action. + +* The agent fails to get the answer when the defined max steps is reached. Return nothing. + +**2. Action Space** + +Now we understand the 3 different stages: Thought, Action, Observation. Let's focus on Action, one of agents' uniqueness. + +Actions refer to the tools the agent uses to interact with the environment and creates observations. +Note: the paper defines Thought(or reasoning trace) as a *language level action* but it is not included in the action space because it doesn't impact the environment. + +Use ``HotpotQA`` dataset as an example, what external source do we need to answer questions? + +`HotpotQA `_ contains Wikipedia-based questions that require multi-hop reasoning. Therefore, the agent will need to query the Wikipedia API. + +In the `ReAct paper `_, researchers include 3 actions in the "action space" (simplified version here): + +* search[entity], returns the first 5 sentences from the corresponding entity wiki page if it exists, or else suggests top-5 similar entities. + +* lookup[string], simulating Ctrl+F functionality on the browser. + +* finish[answer], which would finish the current task with answer. + +**3. Components** + +With the workflow and action space, next, let's focus on the components needed to implement the agent. + +* **prompt:** Besides the role and task-specific description, the key in ReAct prompting is to define the tools to use in the prompt. + +* **function call:** In the implementation, each action is essentially a function to call. Clear functionality definition is important for the agent to determine which action to take next. + +* **parser:** The agent is built on LLMs. It takes in the prompt with context, generates thought and determine the action to take in text response. +To really call functions, we need to parse the text response to get the parameters for the determined function. \ No newline at end of file diff --git a/docs/source/developer_notes/retriever.rst b/docs/source/developer_notes/retriever.rst index 2828f3ff..e1cd2354 100644 --- a/docs/source/developer_notes/retriever.rst +++ b/docs/source/developer_notes/retriever.rst @@ -49,7 +49,7 @@ If we want it to be more accurate, we move up to more expensive and more accurat Design pattern ------------------ -.. figure:: /_static/retriever.png +.. figure:: /_static/images/retriever.png :align: center :alt: Retriever design :width: 620px @@ -615,4 +615,4 @@ Additionally, ``LocalDB`` help us keep track of our initial documents and its tr - :class:`components.retriever.faiss_retriever.FAISSRetriever` - :class:`components.retriever.bm25_retriever.InMemoryBM25Retriever` - :class:`components.retriever.reranker_retriever.RerankerRetriever` - - :class:`components.retriever.llm_retriever.LLMRetriever` + - :class:`components.retriever.llm_retriever.LLMRetriever` \ No newline at end of file diff --git a/docs/source/developer_notes/text_splitter.rst b/docs/source/developer_notes/text_splitter.rst index a6794250..ff7afc9d 100644 --- a/docs/source/developer_notes/text_splitter.rst +++ b/docs/source/developer_notes/text_splitter.rst @@ -1,28 +1,69 @@ -TextSplitter -=================== +Text Splitter +----------------- .. admonition:: Author :class: highlight `Xiaoyi Gu `_ -The ``DocumentSplitter`` in LightRAG is designed to preprocess text by splitting long documents into smaller chunks. -This improves the performance of embedding models and ensures they operate within their maximum context length limits. +In this tutorial, we will learn: -``LightRAG's DocumentSplitter`` splits a list of documents (:obj:`core.base_data_class.Document`) into a list of shorter documents. -The document object to manage id, document content,optional meta data, document's embedding vectors, etc. -Instead of maintaining the complex relationship between parent, child, previous, and next documents, ``LightRAG`` mainly manages the related documents with ``parent_doc_id`` (id of the Document where the chunk is from) and ``order`` (order of the chunked document in the original document). +#. Why do we need the ``TextSplitter`` -**Key Arguments:** +#. How does ``LightRAG's TextSplitter`` work -* ``split_by`` is the unit by which the document should be split. We implemented a string split function inside to break the text into a ``list``. The splitted ``list`` will get concatenated based on the specified ``split_length`` later. +#. How to implement ``LightRAG's TextSplitter`` + +Why do we need the ``TextSplitter`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +LLMs’s context window is limited and the performance often drops with very long and nonsense input. +Shorter content is more manageable and fits memory constraint. +The goal of the text splitter is to chunk large data into smaller ones, potentially improving embedding and retrieving. + +The ``TextSplitter`` is designed to efficiently process and chunk **plain text**. +It leverages configurable separators to facilitate the splitting of :obj:`document object ` into smaller manageable document chunks. + +How does ``LightRAG's TextSplitter`` work +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +``TextSplitter`` supports 2 types of splitting. + +* Type 1: Specify the exact text splitting point such as space<" "> and periods<".">. It is intuitive: +"Hello, world!" -> ["Hello, " ,"world!"] + +* Type 2: Use :class:`tokenizer `. It works as: +"Hello, world!" -> ['Hello', ',', ' world', '!'] +This aligns with how models see text in the form of tokens. (`Reference `_) + +Simple text splitting can underestimate the number of tokens. Tokenizer reflects the real token numbers the models take in. +But the Tokenizer here only works on world level. + +* **Overview**: +``TextSplitter`` first utilizes ``split_by`` to specify the text-splitting criterion and breaks the long text into smaller texts. +Then we create a sliding window with length= ``chunk_size``. It moves at step= ``chunk_size`` - ``chunk_overlap``. +The texts inside each window will get concatenated to a smaller chunk. The generated chunks from the splitted text will be returned. + +Here are some Definitions: + +* **Definitions** + +``split_by``: Specifies the text-splitting criterion using predefined keys like "word", "sentence", "page", "passage", and "token". The splitter utilizes the corresponding separator from the ``SEPARATORS`` dictionary. + +``SEPARATORS``: Maps ``split_by`` criterions to their exact text separators, e.g., spaces<" "> for "word" or periods<"."> for "sentence". + +Usage: **SEPARATORS[``split_by``]=separator** + +.. note:: + For option ``token``, its separator is "" because we directly split by a tokenizer, instead of text point. + +* ``split_by`` specifies the separator by which the document should be split, i.e. the smallest unit during splitting. +For Type 1 splitting, we apply ``Python str.split()`` to break the text. Check the following table for ``split_by`` options: .. list-table:: Text Splitting Options :widths: 10 15 75 :header-rows: 1 - * - Option - - Split by + * - ``split_by`` Option + - Actual Separator - Example * - **page** - ``\f`` @@ -37,67 +78,163 @@ Check the following table for ``split_by`` options: - ```` - ``Hello, world. This is LightRAG.`` to ``['Hello, ', 'world. ', 'This ', 'is ', 'LightRAG.']`` -We will use ``word`` in our example. +* ``chunk_size`` is the the maximum number of units in each chunk. -* ``split_length`` is the the maximum number of units in each split. +* ``chunk_overlap`` is the number of units that each chunk should overlap. Including context at the borders prevents sudden meaning shift in text between sentences/context, especially in sentiment analysis. -* ``split_overlap`` is the number of units that each split should overlap. Including context at the borders prevents sudden meaning shift in text between sentences/context, especially in sentiment analysis. In ``LightRAG`` we use ``windowed`` function in ``more-itertools`` package to build a sliding window for the texts to keep the overlaps. The window step size = ``split_length - split_overlap``. +Here is an example of how ``chunk_size`` works with ``chunk_overlap``: -After splitting the long text into a list and using a sliding window to generate the text lists with specified overlap length, the text list will be concatenated into text pieces again. -Here is a quick example: +.. code-block:: python + from lightrag.core.text_splitter import TextSplitter + from lightrag.core.types import Document -``Review: The theater service is terrible. The movie is good.`` Set ``split_by: word``, ``split_length: 6``, ``split_overlap: 2``. + # configure the splitter setting + text_splitter_settings = { + "split_by": "word", + "chunk_size": 5, + "chunk_overlap": 2, + } + + # set up the document splitter + text_splitter = TextSplitter( + split_by=text_splitter_settings["split_by"], + chunk_size=text_splitter_settings["chunk_size"], + chunk_overlap=text_splitter_settings["chunk_overlap"], + ) + doc1 = Document( + text="Hello, this is lightrag. Please implement your splitter here.", + id="doc1", + ) -With our ``DocumentSplitter`` logic, the output will be: ``Review: The theater service is terrible.``, ``is terrible. The movie is good.`` -It prevents the model of misunderstand the context. If we don't have overlap, the second sentence will be ``The movie is good.`` and the embedding model might only consider this document is merely ``Positive``. + documents = [doc1] -Now let's see the code example. First, import the components. + splitted_docs = (text_splitter.call(documents=documents)) -.. code:: python + for doc in splitted_docs: + print(doc.text) + # Output: + # Hello, this is lightrag. Please + # lightrag. Please implement your splitter + # your splitter here. +In this case, when splitting by ``word`` with ``chunk_size``=5 and ``chunk_overlap``=2, +each chunk will repeat 2 words from the previous chunk. These 2 words are set by ``chunk_overlap``. +This means each chunk has ``5-2=3`` word(split unit) difference compared with its previous. - from core.document_splitter import DocumentSplitter - from core.base_data_class import Document +.. note:: + ``chunk_overlap`` should always be smaller than ``chunk_size``, otherwise the window won't move and the splitting stucks. -Then, configure the splitter settings. -.. code:: python +One more example on ``split_by=token``: +.. code-block:: python + # configure the splitter setting text_splitter_settings = { - "split_by": "word", - "split_length": 15, - "split_overlap": 2, - } + "split_by": "token", + "chunk_size": 5, + "chunk_overlap": 2, + } + + # set up the document splitter + text_splitter = TextSplitter( + ... + ) + + doc1 = Document( + text="Hello, this is lightrag. Please implement your splitter here.", + id="doc1", + ) + documents = [doc1] + splitted_docs = (text_splitter.call(documents=documents)) -Next, define the document splitter and set up the documents. + for doc in splitted_docs: + print(doc.text) + # Output: + # Hello, this is lightrag. Please + # lightrag. Please implement your splitter + # your splitter here. +In this case, when splitting by ``word`` with ``chunk_size``=5 and ``chunk_overlap``=2, +each chunk will repeat 2 words from the previous chunk. These 2 words are set by ``chunk_overlap``. +This means each chunk has ``5-2=3`` word(split unit) difference compared with its previous. -.. code:: python +.. note:: + ``chunk_overlap`` should always be smaller than ``chunk_size``, otherwise the window won't move and the splitting stucks. - text_splitter = DocumentSplitter( - split_by=text_splitter_settings["split_by"], - split_length=text_splitter_settings["split_length"], - split_overlap=text_splitter_settings["split_overlap"], - ) - example1 = Document( - text="Review: I absolutely loved the friendly staff and the welcoming atmosphere! Sentiment: Positive", - ) - example2 = Document( - text="Review: It was an awful experience, the food was bland and overpriced. Sentiment: Negative", - ) - example3 = Document( - text="Review: What a fantastic movie! Had a great time and would watch it again! Sentiment: Positive", - ) - example4 = Document( - text="Review: The store is not clean and smells bad. Sentiment: Negative", +One more example on ``split_by=token``: + +.. code-block:: python + # configure the splitter setting + text_splitter_settings = { + "split_by": "token", + "chunk_size": 5, + "chunk_overlap": 2, + } + + # set up the document splitter + text_splitter = TextSplitter( + ... + ) + + doc1 = Document( + text="Hello, this is lightrag. Please implement your splitter here.", + id="doc1", + ) + documents = [doc1] + splitted_docs = (text_splitter.call(documents=documents)) + for doc in splitted_docs: + print(doc.text) + # Output: + # Hello, this is l + # is lightrag. + # trag. Please implement your + # implement your splitter here. +When splitting using tokenizer, each chunk still keeps 5 tokens. +Since ``lightrag`` -> ['l', 'igh', 'trag'], the second chunk is actually ``is`` + ``l`` + ``igh`` + ``trag`` + ``.``. + +.. note:: + The punctuation is considered as a token. + +This splitting aligns with how models see text in the form of tokens. (`Reference `_) + +Simple text splitting(Type 1) can underestimate the number of tokens. Tokenizer reflects the real token numbers the models take in. +But the Tokenizer here only works at world level. + +How to implement ``LightRAG's TextSplitter`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +What you need is to specify the arguments and input your documents this way: + +.. code-block:: python + + from lightrag.core.text_splitter import TextSplitter + from lightrag.core.types import Document + + # Configure the splitter settings + text_splitter = TextSplitter( + split_by="sentence", + chunk_size=5, + chunk_overlap=1 ) - documents = [example1, example2, example3, example4] + # Example document + doc = Document( + text="Example text. More example text. Even more text to illustrate.", + id="doc1" + ) -Now you can use the splitter to create document chunks. + # Execute the splitting + splitted_docs = text_splitter.call(documents=[doc]) -.. code:: python + for doc in splitted_docs: + print(doc) - splitted_docs = (text_splitter.call(documents=documents)) +Integration with Other Document Types +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +This functionality is ideal for segmenting texts into sentences, words, pages, or passages, which can then be processed further for NLP applications. +For **PDFs**, developers will need to extract the text before using the splitter. Libraries like ``PyPDF2`` or ``PDFMiner`` can be utilized for this purpose. +``LightRAG``'s future implementations will introduce splitters for ``JSON``, ``HTML``, ``markdown``, and ``code``. - # output: - # splitted_doc: [Document(id=15d838c4-abda-4c39-b81f-9cd745effb43, meta_data=None, text=Review: I absolutely loved the friendly staff and the welcoming atmosphere! Sentiment: Positive, estimated_num_tokens=17), Document(id=e4850140-8762-4972-9bae-1dfe96ccb65f, meta_data=None, text=Review: It was an awful experience, the food was bland and overpriced. Sentiment: Negative, estimated_num_tokens=21), Document(id=6bd772b9-88b4-4dfa-a595-922c0f8a4efb, meta_data=None, text=Review: What a fantastic movie! Had a great time and would watch it again! Sentiment: , estimated_num_tokens=21), Document(id=b0d98c1b-13ac-4c92-882e-2ed0196b0c81, meta_data=None, text=again! Sentiment: Positive, estimated_num_tokens=6), Document(id=fdc2429b-17e7-4c00-991f-f89e0955e3a3, meta_data=None, text=Review: The store is not clean and smells bad. Sentiment: Negative, estimated_num_tokens=15)] +Customization Tips +~~~~~~~~~~~~~~~~~~~~~ +You can also customize the ``SEPARATORS``. For example, by defining ``SEPARATORS`` = {"question": "?"} and setting ``split_by`` = "question", the document will be split at each ``?``, ideal for processing text structured +as a series of questions. If you need to customize :class:`tokenizer `, please check `Reference `_. + \ No newline at end of file diff --git a/docs/source/index.rst b/docs/source/index.rst index bd575994..b23ce133 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,8 +6,6 @@ LightRAG Home ======================= -.. .. image:: ../../images/lightrag_structure.png -.. :width: 60% LightRAG is the "PyTorch" library for building large langage model(LLM) applications. It is super light, modular and robust like "PyTorch", and offers essential components for `Retriever`-`Agent`-`Generator` (RAG). diff --git a/docs/source/remove_files.py b/docs/source/remove_files.py index 7f33d5b6..b89abdb2 100644 --- a/docs/source/remove_files.py +++ b/docs/source/remove_files.py @@ -44,6 +44,8 @@ def remove_file(directory: str): "components.agent.react_agent.rst", "components.model_client.anthropic_client.rst", "components.output_parsers.outputs.rst", + "components.model_client.cohere_client.rst", + "components.retriever.reranker_retriever.rst" ] try: for filename in os.listdir(directory): diff --git a/docs/source/tutorials/index.rst b/docs/source/tutorials/index.rst index 3bb7a96e..dc0edc48 100644 --- a/docs/source/tutorials/index.rst +++ b/docs/source/tutorials/index.rst @@ -11,5 +11,4 @@ Use Cases :maxdepth: 2 eval_a_rag - introduction_to_basedataclass - document_splitter \ No newline at end of file + introduction_to_basedataclass \ No newline at end of file diff --git a/lightrag/components/agent/react_agent.py b/lightrag/components/agent/react_agent.py index 21540441..fc25a1db 100644 --- a/lightrag/components/agent/react_agent.py +++ b/lightrag/components/agent/react_agent.py @@ -1,5 +1,7 @@ """ -https://arxiv.org/abs/2210.03629, published in Mar, 2023 +ReAct Agent leveraging LLM reasoning and function calling. + +Source: https://arxiv.org/abs/2210.03629, published in Mar, 2023 Agent is not a model or LLM model. Agent is better defined as a system that uses LLM models to plan and replan steps that each involves the usage of various tools, @@ -7,7 +9,7 @@ The future: the agent can write your prompts too. Check out dspy: https://github.com/stanfordnlp/dspy -REact agent can be useful for +ReAct agent can be useful for - Multi-hop reasoning [Q&A], including dividing the query into subqueries and answering them one by one. - Plan the usage of the given tools: highly flexible. Retriever, Generator modules or any other functions can all be wrapped as tools. @@ -20,14 +22,17 @@ from typing import List, Union, Callable, Optional, Any, Dict from dataclasses import dataclass from copy import deepcopy +import logging +from lightrag.core.generator import Generator +from lightrag.core.component import Component +from lightrag.core.tool_helper import FunctionTool, AsyncCallable +from lightrag.core.string_parser import JsonParser, parse_function_call +from lightrag.core.generator import GeneratorOutput +from lightrag.core.model_client import ModelClient +from lightrag.utils.logger import printc -from core.generator import Generator -from core.component import Component -from core.tool_helper import FunctionTool, AsyncCallable -from core.string_parser import JsonParser, parse_function_call - -from core.model_client import ModelClient +log = logging.getLogger(__name__) DEFAULT_REACT_AGENT_SYSTEM_PROMPT = r""" {# role/task description #} @@ -84,6 +89,10 @@ } "observation": "{{history.observation}}" {% endfor %} +{% if input_str %} +User query: +{{ input_str }} +{% endif %} """ @@ -116,33 +125,59 @@ class ReActAgent(Generator): There are `examples` which is optional, a list of string examples in the prompt. Example: - ``` - from core.openai_client import OpenAIClient - from components.agent.react_agent import ReActAgent - from core.tool_helper import FunctionTool - # define the tools - def multiply(a: int, b: int) -> int: - '''Multiply two numbers.''' - return a * b - def add(a: int, b: int) -> int: - '''Add two numbers.''' - return a + b - agent = ReActAgent( - tools=[multiply, add], - model_client=OpenAIClient, - model_kwargs={"model": "gpt-3.5-turbo"}, - ) - - Using examples: - - preset_prompt_kwargs = {"examples": examples} - agent = ReActAgent( - tools=[multiply, add], - model_client=OpenAIClient, - model_kwargs={"model": "gpt-3.5-turbo"}, - preset_prompt_kwargs=preset_prompt_kwargs, - ) - ``` + .. code-block:: python + + from lightrag.core.tool_helper import FunctionTool + from lightrag.components.agent.react_agent import ReActAgent + from lightrag.components.model_client import GroqAPIClient + + import time + import dotenv + # load evironment + dotenv.load_dotenv(dotenv_path=".env", override=True) + + # define the tools + def multiply(a: int, b: int) -> int: + '''Multiply two numbers.''' + return a * b + def add(a: int, b: int) -> int: + '''Add two numbers.''' + return a + b + + tools = [ + FunctionTool.from_defaults(fn=multiply), + FunctionTool.from_defaults(fn=add), + ] + + # set up examples + examples = [ + "your example, a human-like task-solving trajectory" + ] + # preset examples in the prompt + preset_prompt_kwargs = {"example": examples} + + # set up llm args + llm_model_kwargs = { + "model": "llama3-70b-8192", + "temperature": 0.0 + } + + # initialze an agent + agent = ReActAgent( + tools=tools, + model_client=GroqAPIClient(), + model_kwargs=llm_model_kwargs, + max_steps=3, + preset_prompt_kwargs=preset_prompt_kwargs + ) + + # query the agent + queries = ["What is 3 add 4?", "3*9=?"] + average_time = 0 + for query in queries: + t0 = time.time() + answer = agent(query) + """ def __init__( @@ -177,13 +212,18 @@ def __init__( def llm_tool(input: str) -> str: """ - I answer any input query with llm's world knowledge. Use me as a fallback tool or when the query is simple. + Answer any input query with llm's world knowledge. Use it as a fallback tool or when the query is simple. """ # use the generator to answer the query + prompt_kwargs = {"input_str": input} # wrap the query input in the local prompt_kwargs try: - return self.additional_llm_tool(input=input) + response = self.additional_llm_tool.call(prompt_kwargs=prompt_kwargs) + json_response = response.data if isinstance(response, GeneratorOutput) else response # get json data from GeneratorOutput + # print(f"response: {response}, json_response: {json_response}") + return json_response except Exception as e: - print(f"Error using the generator: {e}") + # print(f"Error using the generator: {e}") + log.error(f"Error using the generator: {e}") return None @@ -226,7 +266,8 @@ def _parse_text_response( action = json_obj_response.get(action_key, "") return StepOutput(step=step, thought=thought, action=action) except Exception as e: - print(f"Error parsing response: {e}") + # print(f"Error parsing response: {e}") + log.error(f"Error parsing response: {e}") return None def _execute_action(self, action_step: StepOutput) -> Optional[StepOutput]: @@ -236,7 +277,7 @@ def _execute_action(self, action_step: StepOutput) -> Optional[StepOutput]: action = action_step.action try: fun_name, args, kwargs = parse_function_call(action, self.tools_map) - print(f"fun_name: {fun_name}, args: {args}, kwargs: {kwargs}") + # print(f"fun_name: {fun_name}, args: {args}, kwargs: {kwargs}") fun: Union[Callable, AsyncCallable] = self.tools_map[fun_name].fn result = fun(*args, **kwargs) action_step.fun_name = fun_name @@ -246,13 +287,14 @@ def _execute_action(self, action_step: StepOutput) -> Optional[StepOutput]: action_step.observation = result return action_step except Exception as e: - print(f"Error executing {action}: {e}") + # print(f"Error executing {action}: {e}") + log.error(f"Error executing {action}: {e}") # pass the error as observation so that the agent can continue and correct the error in the next step action_step.observation = f"Error executing {action}: {e}" return action_step def _run_one_step( - self, input: str, step: int, prompt_kwargs: Dict, model_kwargs: Dict + self, step: int, prompt_kwargs: Dict, model_kwargs: Dict ) -> str: """ Run one step of the agent. @@ -263,16 +305,22 @@ def _run_one_step( # call the super class Generator to get the response response = super().call( - input=input, prompt_kwargs=prompt_kwargs, model_kwargs=model_kwargs - ) + prompt_kwargs=prompt_kwargs, model_kwargs=model_kwargs + ) # response is GeneratorOutput + + # get json response data from generator output + json_response = response.data if isinstance(response, GeneratorOutput) else response + parsed_response = self._parse_text_response( - json_obj_response=response, step=step + json_obj_response=json_response, step=step ) # execute the action if parsed_response and parsed_response.action: parsed_response = self._execute_action(parsed_response) + printc(f"step: {step}, response: {parsed_response}", color="blue") else: - print(f"Failed to parse response for step {step}") + # print(f"Failed to parse response for step {step}") + log.error(f"Failed to parse response for step {step}") self.step_history.append(parsed_response) return response @@ -280,14 +328,17 @@ def _run_one_step( def call( self, input: str, - promt_kwargs: Optional[Dict] = {}, + prompt_kwargs: Optional[Dict] = {}, model_kwargs: Optional[Dict] = {}, ) -> Any: r"""prompt_kwargs: additional prompt kwargs to either replace or add to the preset prompt kwargs.""" + # wrap up the input in the prompt_kwargs + prompt_kwargs["input_str"] = input + printc(f"input_query: {input}", color="cyan") for i in range(self.max_steps): step = i + 1 try: - self._run_one_step(input, step, promt_kwargs, model_kwargs) + self._run_one_step(step, prompt_kwargs, model_kwargs) if ( self.step_history[-1].fun_name and self.step_history[-1].fun_name == "finish" @@ -295,10 +346,15 @@ def call( break except Exception as e: error_message = f"Error running step {step}: {e}" - print(error_message) - - answer = self.step_history[-1].observation - print(f"step_history: {self.step_history}") + # print(error_message) + log.error(error_message) + try: + answer = self.step_history[-1].observation + except: + answer = None + printc(f"answer: {answer}", color="magneta") + # print(f"step_history: {self.step_history}") + log.info(f"step_history: {self.step_history}") self.reset() return answer @@ -306,84 +362,3 @@ def _extra_repr(self) -> str: s = f"tools={self.tools}, max_steps={self.max_steps}, " s += super()._extra_repr() return s - - -if __name__ == "__main__": - from components.model_client import GroqAPIClient - import utils.setup_env - - def multiply(a: int, b: int) -> int: - """ - Multiply two numbers. - """ - return a * b - - def add(a: int, b: int) -> int: - """ - Add two numbers. - """ - return a + b - - def search(query: str) -> str: - """ - Search the web for the given query. - """ - return "python programming is a great way to learn programming" - - tools = [ - FunctionTool.from_defaults(fn=multiply), - FunctionTool.from_defaults(fn=add), - # FunctionTool.from_defaults(fn=search), - ] - llm_model_kwargs = { - "model": "llama3-70b-8192", # llama3 is not good with string formatting, llama3 8b is also bad at following instruction, 70b is better but still not as good as gpt-3.5-turbo - # mistral also not good: mixtral-8x7b-32768, but with better prompt, it can still work - "temperature": 0.0, - } - - examples = [ - # r""" - # User: What is 9 - 3? - # You: { - # "thought": "I need to subtract 3 from 9, but there is no subtraction tool, so I ask llm_tool to answer the query.", - # "action": "llm_tool('What is 9 - 3?')" - # } - # """ - ] - agent = ReActAgent( - # examples=examples, - tools=tools, - max_steps=5, - model_client=GroqAPIClient, - model_kwargs=llm_model_kwargs, - ) - print(agent) - queries = [ - # "What is 2 times 3?", - # "What is 3 plus 4?", - # "What is the capital of France? and what is 4 times 5 then add 3?", # this is actually two queries, or a multi-hop query - "Li adapted her pet Apple in 2017 when Apple was only 2 months old, now we are at year 2024, how old is Li's pet Apple?", - ] - """ - Results: mixtral-8x7b-32768, 0.9s per query - llama3-70b-8192, 1.8s per query - gpt-3.5-turbo, 2.2s per query - """ - import time - - for i in range(3): - agent = ReActAgent( - tools=tools, - max_steps=5, - model_client=GroqAPIClient, - model_kwargs=llm_model_kwargs, - ) - print(agent.tools) - - # average_time = 0 - # for query in queries: - # t0 = time.time() - # answer = agent(query) - # average_time += time.time() - t0 - # print(f"Answer: {answer}") - # print(f"Average time: {average_time / len(queries)}") diff --git a/lightrag/components/data_process/document_splitter.py b/lightrag/components/data_process/document_splitter.py index 68dae9d3..1a2d4598 100644 --- a/lightrag/components/data_process/document_splitter.py +++ b/lightrag/components/data_process/document_splitter.py @@ -152,3 +152,16 @@ def _concatenate_units( def _extra_repr(self) -> str: s = f"split_by={self.split_by}, split_length={self.split_length}, split_overlap={self.split_overlap}" return s + +if __name__ == "__main__": + from lightrag.core.document_splitter import DocumentSplitter + from lightrag.core.types import Document + + doc1 = Document(text="This is a simple test to check splitting.") + # doc2 = Document(text="This is another test document. It is also a long document.") + splitter = DocumentSplitter(split_by="word", split_length=5, split_overlap=2) + # print(splitter) + splitted_docs = splitter([doc1]) + # print(splitted_docs) + for doc in splitted_docs: + print(doc.text) \ No newline at end of file diff --git a/lightrag/components/data_process/text_splitter.py b/lightrag/components/data_process/text_splitter.py new file mode 100644 index 00000000..ad7bd7a6 --- /dev/null +++ b/lightrag/components/data_process/text_splitter.py @@ -0,0 +1,334 @@ +""" +Splitting texts is commonly used as a preprocessing step before embedding and retrieving texts. + +We encourage you to process your data here and define your own embedding and retrieval methods. These methods can highly depend on the product environment and may extend beyond the scope of this library. + +However, the following approaches are commonly shared: + +* **Document Storage:** Define how to store the documents, both raw and chunked. For example, LlamaIndex uses `Document Stores `_ to manage ingested document chunks. + +* **Document Chunking:** Segment documents into manageable chunks suitable for further processing. + +* **Vectorization:** Embed each chunk and store the resulting vectors in Vector Stores. For example, LLama index utilizes `Vector Stores `_. + +* **Retrieval:** Leverage vectors for context retrieval. +""" + +from copy import deepcopy +from typing import List, Literal +from tqdm import tqdm +import logging + +from lightrag.core.component import Component +from lightrag.core.types import Document +from lightrag.components.retriever.bm25_retriever import split_text_tokenized + +# TODO: +# More splitters such as PDF/JSON/HTML Splitter can be built on TextSplitter. + +log = logging.getLogger(__name__) + +DocumentSplitterInputType = List[Document] +DocumentSplitterOutputType = List[Document] + +# customizable seperators map +SEPARATORS = {"page": "\f", "passage": "\n\n", "word": " ", "sentence": ".", "token": ""} + +DEFAULT_CHUNK_SIZE = 1024 +DEFAULT_CHUNK_OVERLAP = 20 + +class TextSplitter(Component): + """ + Text Splitter for Chunking Documents in Batch + + The ``TextSplitter`` is designed for splitting plain text into manageable chunks. + It supports 2 types of splitting. + + * Type 1: Specify the exact text splitting point such as space<" "> and periods<".">. It is intuitive: + "Hello, world!" -> ["Hello, " ,"world!"] + + * Type 2: Use :class:`tokenizer `. It works as: + "Hello, world!" -> ['Hello', ',', ' world', '!'] + + .. note:: + The punctuation is considered as a token. + + This aligns with how models see text in the form of tokens. (`Reference `_) + + Simple text splitting(Type 1) can underestimate the number of tokens. Tokenizer reflects the real token numbers the models take in. + But the Tokenizer here only works at world level. + + * **Definitions** + + ``split_by``: Specifies the text-splitting criterion using predefined keys like "word", "sentence", "page", "passage", and "token". The splitter utilizes the corresponding separator from the ``SEPARATORS`` dictionary. + + ``SEPARATORS``: Maps ``split_by`` criterions to their exact text separators, e.g., spaces<" "> for "word" or periods<"."> for "sentence". + + Usage: **SEPARATORS[``split_by``]=separator** + + .. note:: + For option ``token``, its separator is "" because we directly split by a tokenizer, instead of text point. + + * **Overview**: + ``TextSplitter`` first utilizes ``split_by`` to specify the text-splitting criterion and breaks the long text into smaller texts. + Then we create a sliding window with length= ``chunk_size``. It moves at step= ``chunk_size`` - ``chunk_overlap``. + The texts inside each window will get concatenated to a smaller chunk. The generated chunks from the splitted text will be returned. + + * **Splitting Details** + Type 1: + The ``TextSplitter`` utilizes Python's ``str.split(separator)`` method. + Developers can refer to + + .. code-block:: none + + { + "page": "\\f", + "passage": "\\n", + "word": " ", + "sentence": "." + } + for exact points of text division. + + .. note:: + Developers need to determine how to assign text to each data chunk for the embedding and retrieval tasks. + The ``TextSplitter`` ``split_by`` cases: + + - "word": Splits the text at every space (" "), treating spaces as the boundaries between words. + + - "sentence": Splits the text at every period ("."), treating these as the ends of sentences. + + - "page": Splits the text at form feed characters ("\\f"), which are often used to represent page breaks in documents. + + - "passage": Splits the text at double newline characters ("\\n\\n"), useful for distinguishing between paragraphs or sections. + + Type 2: + We implement a tokenizer using ``cl100k_base`` encoding that aligns with how models see text in the form of tokens. + E.g. "tiktoken is great!" -> ["t", "ik", "token", " is", " great", "!"] This helps developers control the token usage and budget better. + + + * **Customization** + You can also customize the ``SEPARATORS``. For example, by defining ``SEPARATORS`` = {"question": "?"} and setting ``split_by`` = "question", the document will be split at each ``?``, ideal for processing text structured + as a series of questions. If you need to customize :class:`tokenizer `, please check `Reference `_. + + * **Concatenating Details** + Type 1/Type 2 create a list of split texts. ``TextSplitter`` then reattaches the specified separator to each piece of the split text, except for the last segment. + This approach maintains the original spacing and punctuation, which is critical in contexts like natural language processing where text formatting can impact interpretations and outcomes. + E.g. "hello world!" split by "word" will be kept as "hello " and "world!" + + * **Use Cases** + This functionality is ideal for segmenting texts into sentences, words, pages, or passages, which can then be processed further for NLP applications. + + To handle PDF content, developers need to first extract the text using tools like ``PyPDF2`` or ``PDFMiner`` before splitting. + + Example: + .. code-block:: python + + from lightrag.core.text_splitter import TextSplitter + from lightrag.core.types import Document + + # configure the splitter setting + text_splitter_settings = { + "split_by": "word", + "chunk_size": 20, + "chunk_overlap": 2, + } + + # set up the document splitter + text_splitter = TextSplitter( + split_by=text_splitter_settings["split_by"], + chunk_size=text_splitter_settings["chunk_size"], + chunk_overlap=text_splitter_settings["chunk_overlap"], + ) + + doc1 = Document( + meta_data={"title": "Luna's Profile"}, + text="lots of more nonsense text." * 2 + + "Luna is a domestic shorthair." + + "lots of nonsense text." * 3, + id="doc1", + ) + doc2 = Document( + meta_data={"title": "Luna's Hobbies"}, + text="lots of more nonsense text." * 2 + + "Luna loves to eat lickable treats." + + "lots of more nonsense text." * 2 + + "Luna loves to play cat wand." + + "lots of more nonsense text." * 2 + + "Luna likes to sleep all the afternoon", + id="doc2", + ) + documents = [doc1, doc2] + + splitted_docs = (text_splitter.call(documents=documents)) + + for doc in splitted_docs: + print("*" * 50) + print(doc) + print("*" * 50) + """ + def __init__( + self, + split_by: Literal["word", "sentence", "page", "passage", "token"] = "word", + chunk_size: int = DEFAULT_CHUNK_SIZE, + chunk_overlap: int = DEFAULT_CHUNK_OVERLAP, + batch_size: int = 1000 + ): + """ + Initializes the TextSplitter with the specified parameters for text splitting. + + Args: + split_by (str): The specific criterion to use for splitting the text. + Valid options are 'word' to split by ' ', 'sentence' to split by '.', + 'page' to split by '\\f', 'passage' to split by '\\n\\n'. + chunk_size (int): The size of chunks to generate after splitting. Must be greater than 0. + chunk_overlap (int): The number of characters of overlap between chunks. Must be non-negative + and less than chunk_size. + batch_size (int): The size of documents to process in each batch. + Raises: + ValueError: If the provided split_by is not supported, chunk_size is not greater than 0, + or chunk_overlap is not valid as per the given conditions. + """ + super().__init__() + + # variable value checks + self.split_by = split_by + if split_by not in SEPARATORS: + options = ", ".join(f"'{key}'" for key in SEPARATORS.keys()) + log.error(f"Invalid options for split_by. You must select from {options}.") + raise ValueError(f"Invalid options for split_by. You must select from {options}.") + + if chunk_overlap >= chunk_size: + log.error(f"chunk_overlap can't be larger than or equal to chunk_size. Received chunk_size: {chunk_size}, chunk_overlap: {chunk_overlap}") + raise ValueError( + f"chunk_overlap can't be larger than or equal to chunk_size. Received chunk_size: {chunk_size}, chunk_overlap: {chunk_overlap}" + ) + + if chunk_size <= 0: + log.error(f"chunk_size must be greater than 0. Received value: {chunk_size}") + raise ValueError(f"chunk_size must be greater than 0. Received value: {chunk_size}") + self.chunk_size = chunk_size + + if chunk_overlap < 0: + log.error(f"chunk_overlap must be non-negative. Received value: {chunk_overlap}") + raise ValueError(f"chunk_overlap must be non-negative. Received value: {chunk_overlap}") + self.chunk_overlap = chunk_overlap + + self.batch_size = batch_size + + def split_text(self, text: str) -> List[str]: + """ + Splits the provided text into chunks. + + Splits based on the specified split_by, chunk size, and chunk overlap settings. + + Args: + text (str): The text to split. + + Returns: + List[str]: A list of text chunks. + """ + log.info(f"Splitting text with split_by: {self.split_by}, chunk_size: {self.chunk_size}, chunk_overlap: {self.chunk_overlap}") + separator = SEPARATORS[self.split_by] + splits = self._split_text(text, separator) + log.info(f"Text split into {len(splits)} parts.") + chunks = self._concatenate_splits(splits, self.chunk_size, self.chunk_overlap, separator) + log.info(f"Text concatenated into {len(chunks)} chunks.") + return chunks + + def call(self, documents: DocumentSplitterInputType) -> DocumentSplitterOutputType: + """ + Process the splitting task on a list of documents in batch. + + Batch processes a list of documents, splitting each document's text according to the configured + split_by, chunk size, and chunk overlap. + + Args: + documents (List[Document]): A list of Document objects to process. + + Returns: + List[Document]: A list of new Document objects, each containing a chunk of text from the original documents. + + Raises: + TypeError: If 'documents' is not a list or contains non-Document objects. + ValueError: If any document's text is None. + """ + + if not isinstance(documents, list) or any(not isinstance(doc, Document) for doc in documents): + log.error("Input should be a list of Documents.") + raise TypeError("Input should be a list of Documents.") + + split_docs = [] + # Using range and batch_size to create batches + for start_idx in tqdm(range(0, len(documents), self.batch_size), desc="Splitting Documents in Batches"): + batch_docs = documents[start_idx:start_idx + self.batch_size] + + for doc in batch_docs: + if not isinstance(doc, Document): + log.error(f"Each item in documents should be an instance of Document, but got {type(doc).__name__}.") + raise TypeError(f"Each item in documents should be an instance of Document, but got {type(doc).__name__}.") + + if doc.text is None: + log.error(f"Text should not be None. Doc id: {doc.id}") + raise ValueError(f"Text should not be None. Doc id: {doc.id}") + + text_splits = self.split_text(doc.text) + meta_data = deepcopy(doc.meta_data) + + split_docs.extend([ + Document( + text=txt, + meta_data=meta_data, + parent_doc_id=f"{doc.id}", + order=i, + vector=[], + ) + for i, txt in enumerate(text_splits) + ]) + log.info(f"Processed {len(documents)} documents into {len(split_docs)} split documents.") + return split_docs + + def _split_text( + self, text: str, separator: str) -> List[str]: + """Split text based on the specified separator.""" + if self.split_by == "token": + splits = split_text_tokenized(text) + else: + splits = text.split(separator) + log.info(f"Text split by '{separator}' into {len(splits)} parts.") + return splits + + def _concatenate_splits( + self, splits: List[str], chunk_size: int, chunk_overlap: int, separator: str + ) -> List[str]: + """ + Concatenates split text chunks based on the specified chunk size and overlap. + """ + chunks = [] + # we use a window to get the text for each trunk, the window size is chunk_size, step is chunk_size - chunk_overlap + step = chunk_size - chunk_overlap + idx = 0 + + for idx in range(0, len(splits), step): + # 1. if the window exceeds the list of splitted string, break and process the last chunk + # 2. if the window ends exactly the same with the splits, then break and treat the splits[idx:len(splits)] as the last chunk + if idx+chunk_size >= len(splits): + break + current_splits = splits[idx:idx+chunk_size] + # add the separator between each unit and concatenate the string + # this won't be the last chunk, so we need to add the separator at the end + chunk = separator.join(current_splits) + separator + chunks.append(chunk) + + if idx < len(splits): + last_chunk = separator.join(splits[idx:]) + if len(last_chunk) > 0: + chunks.append(last_chunk) + log.info(f"Concatenated into {len(chunks)} chunks.") + return chunks + + def _extra_repr(self) -> str: + s = f"split_by={self.split_by}, chunk_size={self.chunk_size}, chunk_overlap={self.chunk_overlap}" + return s + + +# test the execution llamaindex and langchain \ No newline at end of file diff --git a/lightrag/components/model_client/transformers_client.py b/lightrag/components/model_client/transformers_client.py index f45dda8d..cf9aeba5 100644 --- a/lightrag/components/model_client/transformers_client.py +++ b/lightrag/components/model_client/transformers_client.py @@ -307,4 +307,4 @@ def convert_inputs_to_api_kwargs( final_model_kwargs["query"] = input return final_model_kwargs else: - raise ValueError(f"model_type {model_type} is not supported") + raise ValueError(f"model_type {model_type} is not supported") \ No newline at end of file diff --git a/lightrag/components/retriever/bm25_retriever.py b/lightrag/components/retriever/bm25_retriever.py index 0d598e83..044c73a9 100644 --- a/lightrag/components/retriever/bm25_retriever.py +++ b/lightrag/components/retriever/bm25_retriever.py @@ -42,6 +42,17 @@ def split_text_by_word_fn_then_lower_tokenized(x: str) -> List[str]: final_tokens.append(tokenizer.decode([token])) return final_tokens +def split_text_tokenized(x: str) -> List[str]: + tokenizer = Tokenizer() + # words = x.lower().split(" ") + tokens = tokenizer.encode(x) + # print(tokens) + final_tokens: List[str] = [] + for token in tokens: + final_tokens.append(tokenizer.decode([token])) + # print(final_tokens) + return final_tokens + class InMemoryBM25Retriever(Retriever[str, RetrieverStrQueryType]): __doc__ = r"""Fast Implementation of Best Matching 25 ranking function. @@ -106,6 +117,7 @@ class InMemoryBM25Retriever(Retriever[str, RetrieverStrQueryType]): 2. Pass the documents from the :meth:`build_index_from_documents` method: .. code-block:: python + retriever = InMemoryBM25Retriever(top_k=1) retriever.build_index_from_documents(documents) output = retriever("hello") diff --git a/lightrag/core/base_data_class.py b/lightrag/core/base_data_class.py index c920e304..078f7ae8 100644 --- a/lightrag/core/base_data_class.py +++ b/lightrag/core/base_data_class.py @@ -168,7 +168,7 @@ class DataClass: Describing: - We defined :ref:`DataClassFormatType` to categorize DataClass description formats + We defined :ref:`DataClassFormatType ` to categorize DataClass description formats as input or output in LLM prompt. diff --git a/lightrag/tests/test_gt_text_splitter.py b/lightrag/tests/test_gt_text_splitter.py new file mode 100644 index 00000000..8c3aa4d2 --- /dev/null +++ b/lightrag/tests/test_gt_text_splitter.py @@ -0,0 +1,143 @@ +import unittest +from lightrag.core.types import Document +from lightrag.components.data_process.text_splitter import TextSplitter # Import your TextSplitter +from lightrag.components.data_process.document_splitter import DocumentSplitter # Import the ground truth splitter + + +class TestTextSplitterComparison(unittest.TestCase): + + def setUp(self): + self.text_splitter = TextSplitter(split_by="word", chunk_size=5, chunk_overlap=2) + self.ground_truth_splitter = DocumentSplitter(split_by="word", split_length=5, split_overlap=2) + + def compare_splits(self, text): + expected = self.ground_truth_splitter.split_text(text) + result = self.text_splitter.split_text(text) + + print(f"expected: {expected}") + print(f"result: {result}") + self.assertEqual(result, expected) + + def test_exact_chunk_size(self): + text = "one two three four five" + self.compare_splits(text) + + def test_less_than_chunk_size(self): + text = "one two" + self.compare_splits(text) + + def test_single_word(self): + text = "word" + self.compare_splits(text) + + def test_overlap_handling(self): + text = "one two three four five six seven" + self.compare_splits(text) + + def test_multiple_chunks_with_overlap(self): + text = "one two three four five six seven eight nine ten eleven" + self.compare_splits(text) + + def test_end_index_matches_length(self): + text = "one two three four five six" + self.compare_splits(text) + + def test_long_text(self): + text = " ".join(["word"] * 50) + self.compare_splits(text) + + def test_split_by_sentence(self): + self.text_splitter = TextSplitter(split_by="sentence", chunk_size=1, chunk_overlap=0) + self.ground_truth_splitter = DocumentSplitter(split_by="sentence", split_length=1, split_overlap=0) + text = "This is a test. It should work well." + + self.compare_splits(text) + + def test_split_by_page(self): + self.text_splitter = TextSplitter(split_by="page", chunk_size=1, chunk_overlap=0) + self.ground_truth_splitter = DocumentSplitter(split_by="page", split_length=1, split_overlap=0) + text = "This is a test\fThis is another page" + self.compare_splits(text) + + def test_split_by_passage(self): + self.text_splitter = TextSplitter(split_by="passage", chunk_size=1, chunk_overlap=0) + self.ground_truth_splitter = DocumentSplitter(split_by="passage", split_length=1, split_overlap=0) + text = "This is a test\n\nThis is another passage" + self.compare_splits(text) + + def test_empty_text(self): + text = "" + self.compare_splits(text) + + def test_special_characters(self): + text = "one! two@ three# four$ five% six^ seven& eight* nine( ten)" + self.compare_splits(text) + + def test_multiple_spaces(self): + text = "one two three four five" + self.compare_splits(text) + + def test_newlines(self): + text = "one\ntwo\nthree\nfour\nfive\nsix" + self.compare_splits(text) + + def test_tabs(self): + text = "one\ttwo\tthree\tfour\tfive\tsix" + self.compare_splits(text) + + def test_varied_delimiters(self): + text = "one. two, three; four: five! six? seven" + self.compare_splits(text) + + def test_text_with_punctuation(self): + text = "Hello, world! This is a test. Let's see how it works." + self.compare_splits(text) + + def test_long_paragraph(self): + text = ( + "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. " + "Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. " + "Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. " + "Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." + ) + self.compare_splits(text) + + def test_trailing_whitespace(self): + text = "one two three four five " + self.compare_splits(text) + + def test_leading_whitespace(self): + text = " one two three four five" + self.compare_splits(text) + + def test_mixed_whitespace(self): + text = " one two three four five " + self.compare_splits(text) + + def test_chunk_size_greater_than_overlap(self): + self.text_splitter = TextSplitter(split_by="word", chunk_size=4, chunk_overlap=2) + self.ground_truth_splitter = DocumentSplitter(split_by="word", split_length=4, split_overlap=2) + text = "one two three four five six seven eight nine ten" + self.compare_splits(text) + + def test_overlap_zero(self): + self.text_splitter = TextSplitter(split_by="word", chunk_size=4, chunk_overlap=0) + self.ground_truth_splitter = DocumentSplitter(split_by="word", split_length=4, split_overlap=0) + text = "one two three four five six seven eight nine ten" + self.compare_splits(text) + + def test_overlap_zero_end(self): + self.text_splitter = TextSplitter(split_by="word", chunk_size=5, chunk_overlap=0) + self.ground_truth_splitter = DocumentSplitter(split_by="word", split_length=5, split_overlap=0) + text = "one two three four five six seven eight nine ten" + self.compare_splits(text) + + def test_invalid_parameters(self): + with self.assertRaises(ValueError): + TextSplitter(split_by="word", chunk_size=-1, chunk_overlap=2) + with self.assertRaises(ValueError): + TextSplitter(split_by="word", chunk_size=5, chunk_overlap=6) + + +if __name__ == '__main__': + unittest.main() diff --git a/poetry.lock b/poetry.lock index 5a81cc89..901ab897 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3183,6 +3183,25 @@ traitlets = ">=5.1" docs = ["myst-parser", "pydata-sphinx-theme", "sphinx", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"] test = ["pep440", "pre-commit", "pytest", "testpath"] +[[package]] +name = "nbsphinx" +version = "0.9.4" +description = "Jupyter Notebook Tools for Sphinx" +optional = false +python-versions = ">=3.6" +files = [ + {file = "nbsphinx-0.9.4-py3-none-any.whl", hash = "sha256:22cb1d974a8300e8118ca71aea1f649553743c0c5830a54129dcd446e6a8ba17"}, + {file = "nbsphinx-0.9.4.tar.gz", hash = "sha256:042a60806fc23d519bc5bef59d95570713913fe442fda759d53e3aaf62104794"}, +] + +[package.dependencies] +docutils = ">=0.18.1" +jinja2 = "*" +nbconvert = ">=5.3,<5.4 || >5.4" +nbformat = "*" +sphinx = ">=1.8" +traitlets = ">=5" + [[package]] name = "nest-asyncio" version = "1.6.0" @@ -3647,6 +3666,20 @@ sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-d test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] xml = ["lxml (>=4.9.2)"] +[[package]] +name = "pandoc" +version = "2.3" +description = "Pandoc Documents for Python" +optional = false +python-versions = "*" +files = [ + {file = "pandoc-2.3.tar.gz", hash = "sha256:e772c2c6d871146894579828dbaf1efd538eb64fc7e71d4a6b3a11a18baef90d"}, +] + +[package.dependencies] +plumbum = "*" +ply = "*" + [[package]] name = "pandocfilters" version = "1.5.1" diff --git a/pyproject.toml b/pyproject.toml index 30596e9d..6e03ff7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,10 +7,7 @@ authors = ["Your Name "] license = "MIT" packages = [ - { include = "use_cases", from = "." }, - { include = "benchmarks", from = "." }, - { include = "docs", from = "." }, - { include = "developer_notes", from = "." }, + { include = "lightrag", from = "." } ] [tool.poetry.dependencies] @@ -59,8 +56,11 @@ pydata-sphinx-theme = "^0.15.3" sphinx-design = "^0.6.0" sphinx-copybutton = "^0.5.2" sphinx = "^7.3.7" +nbsphinx = "^0.9.4" +nbconvert = "^7.16.4" +pandoc = "^2.3" [build-system] requires = ["poetry-core>=1.0.0"] -build-backend = "poetry.core.masonry.api" +build-backend = "poetry.core.masonry.api" \ No newline at end of file diff --git a/use_cases/agent/react_agent.ipynb b/use_cases/agent/react_agent.ipynb new file mode 100644 index 00000000..19e7bc6c --- /dev/null +++ b/use_cases/agent/react_agent.ipynb @@ -0,0 +1,1351 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# ReAct Agent Use Case" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 1. Q&A Chatbot\n", + "In this tutorial, we will implement ``LightRAG ReAct`` to build a Q&A chatbot on [HotpotQA](https://arxiv.org/pdf/1809.09600) dataset. \n", + "\n", + "To learn more about ``LightRAG ReAct``, please refer to our developer notes." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# 2. HotpotQA Dataset\n", + "We are using [HotpotQA](https://arxiv.org/pdf/1809.09600). It is a Wikipedia-based multi-hop question and answer dataset." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/alleria/Documents/sylphAI/lightrag_package/LightRAG/.venv/lib/python3.11/site-packages/datasets/table.py:1421: FutureWarning: promote has been superseded by promote_options='default'.\n", + " table = cls._concat_blocks(blocks, axis=0)\n" + ] + } + ], + "source": [ + "# load the dataset\n", + "from datasets import load_dataset\n", + "dataset = load_dataset(path=\"hotpot_qa\", name=\"fullwiki\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "len of eval: 7405\n", + "example: {'id': '5a8b57f25542995d1e6f1371', 'question': 'Were Scott Derrickson and Ed Wood of the same nationality?', 'answer': 'yes', 'type': 'comparison', 'level': 'hard', 'supporting_facts': {'title': ['Scott Derrickson', 'Ed Wood'], 'sent_id': [0, 0]}, 'context': {'title': ['Adam Collis', 'Ed Wood (film)', 'Tyler Bates', 'Doctor Strange (2016 film)', 'Hellraiser: Inferno', 'Sinister (film)', 'Deliver Us from Evil (2014 film)', 'Woodson, Arkansas', 'Conrad Brooks', 'The Exorcism of Emily Rose'], 'sentences': [['Adam Collis is an American filmmaker and actor.', ' He attended the Duke University from 1986 to 1990 and the University of California, Los Angeles from 2007 to 2010.', ' He also studied cinema at the University of Southern California from 1991 to 1997.', ' Collis first work was the assistant director for the Scott Derrickson\\'s short \"Love in the Ruins\" (1995).', ' In 1998, he played \"Crankshaft\" in Eric Koyanagi\\'s \"Hundred Percent\".'], ['Ed Wood is a 1994 American biographical period comedy-drama film directed and produced by Tim Burton, and starring Johnny Depp as cult filmmaker Ed Wood.', \" The film concerns the period in Wood's life when he made his best-known films as well as his relationship with actor Bela Lugosi, played by Martin Landau.\", ' Sarah Jessica Parker, Patricia Arquette, Jeffrey Jones, Lisa Marie, and Bill Murray are among the supporting cast.'], ['Tyler Bates (born June 5, 1965) is an American musician, music producer, and composer for films, television, and video games.', ' Much of his work is in the action and horror film genres, with films like \"Dawn of the Dead, 300, Sucker Punch,\" and \"John Wick.\"', ' He has collaborated with directors like Zack Snyder, Rob Zombie, Neil Marshall, William Friedkin, Scott Derrickson, and James Gunn.', ' With Gunn, he has scored every one of the director\\'s films; including \"Guardians of the Galaxy\", which became one of the highest grossing domestic movies of 2014, and its 2017 sequel.', ' In addition, he is also the lead guitarist of the American rock band Marilyn Manson, and produced its albums \"The Pale Emperor\" and \"Heaven Upside Down\".'], ['Doctor Strange is a 2016 American superhero film based on the Marvel Comics character of the same name, produced by Marvel Studios and distributed by Walt Disney Studios Motion Pictures.', ' It is the fourteenth film of the Marvel Cinematic Universe (MCU).', ' The film was directed by Scott Derrickson, who wrote it with Jon Spaihts and C. Robert Cargill, and stars Benedict Cumberbatch as Stephen Strange, along with Chiwetel Ejiofor, Rachel McAdams, Benedict Wong, Michael Stuhlbarg, Benjamin Bratt, Scott Adkins, Mads Mikkelsen, and Tilda Swinton.', ' In \"Doctor Strange\", surgeon Strange learns the mystic arts after a career-ending car accident.'], ['Hellraiser: Inferno (also known as Hellraiser V: Inferno) is a 2000 American horror film.', ' It is the fifth installment in the \"Hellraiser\" series and the first \"Hellraiser\" film to go straight-to-DVD.', ' It was directed by Scott Derrickson and released on October 3, 2000.', \" The film concerns a corrupt detective who discovers Lemarchand's box at a crime scene.\", \" The film's reviews were mixed.\"], ['Sinister is a 2012 supernatural horror film directed by Scott Derrickson and written by Derrickson and C. Robert Cargill.', ' It stars Ethan Hawke as fictional true-crime writer Ellison Oswalt who discovers a box of home movies in his attic that puts his family in danger.'], ['Deliver Us from Evil is a 2014 American supernatural horror film directed by Scott Derrickson and produced by Jerry Bruckheimer.', ' The film is officially based on a 2001 non-fiction book entitled \"Beware the Night\" by Ralph Sarchie and Lisa Collier Cool, and its marketing campaign highlighted that it was \"inspired by actual accounts\".', ' The film stars Eric Bana, Édgar Ramírez, Sean Harris, Olivia Munn, and Joel McHale in the main roles and was released on July 2, 2014.'], ['Woodson is a census-designated place (CDP) in Pulaski County, Arkansas, in the United States.', ' Its population was 403 at the 2010 census.', ' It is part of the Little Rock–North Little Rock–Conway Metropolitan Statistical Area.', ' Woodson and its accompanying Woodson Lake and Wood Hollow are the namesake for Ed Wood Sr., a prominent plantation owner, trader, and businessman at the turn of the 20th century.', ' Woodson is adjacent to the Wood Plantation, the largest of the plantations own by Ed Wood Sr.'], ['Conrad Brooks (born Conrad Biedrzycki on January 3, 1931 in Baltimore, Maryland) is an American actor.', ' He moved to Hollywood, California in 1948 to pursue a career in acting.', ' He got his start in movies appearing in Ed Wood films such as \"Plan 9 from Outer Space\", \"Glen or Glenda\", and \"Jail Bait.\"', ' He took a break from acting during the 1960s and 1970s but due to the ongoing interest in the films of Ed Wood, he reemerged in the 1980s and has become a prolific actor.', ' He also has since gone on to write, produce and direct several films.'], ['The Exorcism of Emily Rose is a 2005 American legal drama horror film directed by Scott Derrickson and starring Laura Linney and Tom Wilkinson.', ' The film is loosely based on the story of Anneliese Michel and follows a self-proclaimed agnostic who acts as defense counsel (Linney) representing a parish priest (Wilkinson), accused by the state of negligent homicide after he performed an exorcism.']]}}\n", + "attributes in each sample: ['id', 'question', 'answer', 'type', 'level', 'supporting_facts', 'context']\n" + ] + } + ], + "source": [ + "# check the data sample\n", + "test_sample = dataset[\"validation\"][0]\n", + "print(f\"len of eval: {len(dataset['validation'])}\")\n", + "print(f\"example: {test_sample}\")\n", + "print(f\"attributes in each sample: {list(test_sample.keys())}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "question: Were Scott Derrickson and Ed Wood of the same nationality?\n", + "answer: yes\n" + ] + } + ], + "source": [ + "# Each sample contains a question and a corresponding answer.\n", + "print(f\"question: {test_sample.get('question')}\")\n", + "print(f\"answer: {test_sample.get('answer')}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 3. Set up\n", + "Please make sure you have set the model client APIs before running the agent. Now import the necessary packages." + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "import dotenv\n", + "from lightrag.components.model_client import OpenAIClient\n", + "from lightrag.components.agent.react_agent import ReActAgent\n", + "from lightrag.core.tool_helper import FunctionTool\n", + "\n", + "import time\n", + "\n", + "# load evironment, please set the relative path to your .env file that includes the api key\n", + "dotenv.load_dotenv(dotenv_path=\"../../.env\", override=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 4. Create Agent\n", + "To create an gent, we need to define the basic components.\n", + "\n", + "## Tools\n", + "Firstly, we need to specify what functions the agent will need to answer the question. In this case, we are answering the Wikipedia-based questions, we will allow the agent to **search** Wikipedia api. The [ReAct Paper](https://arxiv.org/pdf/2210.03629) includes a **lookup** function that serves as Ctrl+F functionality on the browser.\n", + "\n", + "As ``LightRAG ReAct`` has a built in ``finish`` function, we don't need to define by ourselves." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "import requests\n", + "from bs4 import BeautifulSoup\n", + "import re\n", + "import string\n", + "\n", + "# copy code from the paper\n", + "def clean_str(p):\n", + " return p.encode().decode(\"unicode-escape\").encode(\"latin1\").decode(\"utf-8\")\n", + "\n", + "# normalization copied from the paper's code\n", + "def normalize_answer(s):\n", + " def remove_articles(text):\n", + " return re.sub(r\"\\b(a|an|the)\\b\", \" \", text)\n", + " \n", + " def white_space_fix(text):\n", + " return \" \".join(text.split())\n", + "\n", + " def remove_punc(text):\n", + " exclude = set(string.punctuation)\n", + " return \"\".join(ch for ch in text if ch not in exclude)\n", + "\n", + " def lower(text):\n", + " return text.lower()\n", + "\n", + " return white_space_fix(remove_articles(remove_punc(lower(s))))\n", + "\n", + "def search(entity: str) -> str:\n", + " \"\"\"\n", + " searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.\n", + " \"\"\"\n", + " # Format the entity for URL encoding\n", + " entity_formatted = entity.replace(\" \", \"+\")\n", + " url = f\"https://en.wikipedia.org/w/index.php?search={entity_formatted}\"\n", + " \n", + " # Fetch the page\n", + " response = requests.get(url)\n", + " soup = BeautifulSoup(response.text, 'html.parser')\n", + " \n", + " # Check if the exact page was found or suggest similar items\n", + " # when
is detected, it means the entity page is not found on wikipedia\n", + " result_divs = soup.find_all(\"div\", {\"class\": \"mw-search-result-heading\"})\n", + " \n", + " if result_divs: # this means the searched entity page is not in wikipedia, wikipedia will show a list of similar entities\n", + " # get Similar results\n", + " similar_titles = [div.a.get_text() for div in result_divs]\n", + " return f\"Could not find exact page for '{entity}'. Similar topics: {similar_titles[:5]}\" # return the top 5 similar titles\n", + " else:\n", + " # the paper uses page to represent content in

\n", + " # Extract xontent\n", + " page_list = [p.get_text().strip() for p in soup.find_all(\"p\") + soup.find_all(\"ul\")]\n", + " # TODO: Recursive search, if find any concept that needs more search then call search again\n", + " # if any(\"may refer to:\" in p for p in page_list):\n", + " # search(entity)\n", + "\n", + " # restructure & clean the page content following the paper's logic\n", + " page = ''\n", + " for p in page_list:\n", + " if len(p.split(\" \")) > 2:\n", + " page += clean_str(p)\n", + " if not p.endswith(\"\\n\"):\n", + " page += \"\\n\"\n", + " paragraphs = page.split(\"\\n\")\n", + " paragraphs = [p.strip() for p in paragraphs if p.strip()]\n", + " \n", + " sentences = []\n", + " for p in paragraphs:\n", + " sentences += p.split('. ')\n", + " sentences = [s.strip() + '.' for s in sentences if s.strip()]\n", + " \n", + " # return the first 5 sentences\n", + " if sentences:\n", + " return ' '.join(sentences[:5]) if len(sentences)>=5 else ' '.join(sentences)\n", + " else:\n", + " return \"No content found on this page.\"\n", + " \n", + " # TODO: clean the paragraphs and return the searched content\n", + "\n", + "\n", + "def lookup(text: str, keyword: str) -> str:\n", + " \"\"\"\n", + " returns the sentences containing keyword in the current passage.\n", + " \"\"\"\n", + " sentences = text.split('.')\n", + " matching_sentences = [sentence.strip() + '.' for sentence in sentences if keyword.lower() in sentence.lower()]\n", + " if not matching_sentences:\n", + " return \"No sentences found with the keyword.\"\n", + " else:\n", + " return ' '.join(matching_sentences) # Join all matching sentences into a single string" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "# set up tools for the agent\n", + "tools = [FunctionTool.from_defaults(fn=search), FunctionTool.from_defaults(fn=lookup)]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Examples\n", + "The next thing to add is examples. Few shot prompt engineering is a common practice to improve the model performance.\n", + "\n", + "Let's use the paper's examples. The paper has 6 examples altogether." + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "examples = [\n", + "\"\"\"Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\n", + "Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.\n", + "Action 1: search(\"Colorado orogeny\")\n", + "Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\n", + "Thought 2: It does not mention the eastern sector. So I need to look up eastern sector.\n", + "Action 2: lookup(\"eastern sector\")\n", + "Observation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called the Central Plains orogeny.\n", + "Thought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I need to search High Plains and find its elevation range.\n", + "Action 3: search(\"High Plains\")\n", + "Observation 3: High Plains refers to one of two distinct land regions:\n", + "Thought 4: I need to instead search High Plains (United States).\n", + "Action 4: search(\"High Plains (United States)\")\n", + "Observation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\n", + "Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\n", + "Action 5: finish(\"1,800 to 7,000 ft\")\"\"\",\n", + "\"\"\"Question: Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?\n", + "Thought 1: The question simplifies to \"The Simpsons\" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.\n", + "Action 1: search(\"Milhouse\")\n", + "Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\n", + "Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \"named after\".\n", + "Action 2: lookup(\"named after\")\n", + "Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous. \n", + "Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\n", + "Action 3: finish(\"Richard Nixon\")\"\"\",\n", + "\"\"\"Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\n", + "Thought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\n", + "Action 1: search(\"Adam Clayton Powell\")\n", + "Observation 1: Could not find [\"Adam Clayton Powell\"]. Similar: ['Adam Clayton Powell III', 'Seventh Avenue (Manhattan)', 'Adam Clayton Powell Jr. State Office Building', 'Isabel Washington Powell', 'Adam Powell', 'Adam Clayton Powell (film)', 'Giancarlo Esposito'].\n", + "Thought 2: To find the documentary, I can search Adam Clayton Powell (film).\n", + "Action 2: search(\"Adam Clayton Powell (film)\")\n", + "Observation 2: Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg.\n", + "The film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\n", + "Thought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\n", + "Action 3: finish(\"The Saimaa Gesture\")\"\"\",\n", + "\"\"\"Question: What profession does Nicholas Ray and Elia Kazan have in common?\n", + "Thought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\n", + "Action 1: search(\"Nicholas Ray\")\n", + "Observation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 – June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\n", + "Thought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions.\n", + "Action 2: search(\"Elia Kazan\")\n", + "Observation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\n", + "Thought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\n", + "Action 3: finish(\"director, screenwriter, actor\")\"\"\",\n", + "\"\"\"Question: Which magazine was started first Arthur's Magazine or First for Women?\n", + "Thought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.\n", + "Action 1: search(\"Arthur's Magazine\")\n", + "Observation 1: Arthur's Magazine (1844-€“1846) was an American literary periodical published in Philadelphia in the 19th century. \n", + "Thought 2: Arthur's Magazine was started in 1844. I need to search First for Women next.\n", + "Action 2: search(\"First for Women\")\n", + "Observation 2: First for Women is a woman's magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. \n", + "Thought 3: First for Women was started in 1989. 1844 (Arthur's Magazine) < 1989 (First for Women), so Arthur's Magazine was started first.\n", + "Action 3: finish(\"Arthur's Magazine\")\"\"\",\n", + "\"\"\"Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?\n", + "Thought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\n", + "Action 1: search(\"Pavel Urysohn\")\n", + "Observation 1: Pavel Samuilovich Urysohn (February 3, 1898 – August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\n", + "Thought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and find its type of work.\n", + "Action 2: search(\"Leonid Levin\")\n", + "Observation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist. \n", + "Thought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work. \n", + "Action 3: finish(\"yes\")\"\"\"\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "# preset up the examples as prompt_kwargs, the examples will be included in the system prompt\n", + "\n", + "preset_prompt_kwargs = {\"examples\": examples} " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Model\n", + "\n", + "Next, we can choose the model to call. In this example we will use OpenAIClient ``gpt-3.5-turbo`` model. We will set the ``temperature`` at 0.0 to make the response as consistent as possible." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [], + "source": [ + "gpt_model_kwargs = {\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"temperature\": 0.0,\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Agent\n", + "Combining the previous components, we can define the agent." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "ReActAgent(\n", + " tools=[FunctionTool(search), FunctionTool(lookup), FunctionTool(llm_tool), FunctionTool(finish)], max_steps=3, model_kwargs={'model': 'gpt-3.5-turbo', 'temperature': 0.0}, \n", + " (prompt): Prompt(\n", + " template: \n", + " {# role/task description #}\n", + " You task is to answer user's query with minimum steps and maximum accuracy using the tools provided.\n", + " {# REACT instructions #}\n", + " Each step you will read the previous Thought, Action, and Observation(execution result of the action)steps and then provide the next Thought and Action.\n", + " \n", + " You only have access to the following tools:\n", + " {# tools #}\n", + " {% for tool in tools %}\n", + " {{ loop.index }}. ToolName: {{ tool.metadata.name }}\n", + " Tool Description: {{ tool.metadata.description }}\n", + " Tool Parameters: {{ tool.metadata.fn_schema_str }} {#tool args can be misleading, especially if we already have type hints and docstring in the function#}\n", + " {% endfor %}\n", + " {# output is always more robust to use json than string #}\n", + " ---\n", + " Your output must be in valid JSON format(raw Python string format) with two keys:\n", + " {\n", + " \"thought\": \"\",\n", + " \"action\": \"ToolName(, )\"\n", + " }\n", + " - Must double quote the JSON str.\n", + " - Inside of the JSON str, Must use escape double quote and escape backslash for string.\n", + " For example:\n", + " \"action\": \"finish(\\\"John's.\\\")\"\n", + " ---\n", + " {# Specifications TODO: preference between the usage of llm tool vs the other tool #}\n", + " Process:\n", + " - Step 1: Read the user query and potentially divide it into subqueries. And get started with the first subquery.\n", + " - Call one available tool at a time to solve each subquery/subquestion. \\\n", + " - At step 'finish', join all subqueries answers and finish the task.\n", + " Remember:\n", + " - Action must call one of the above tools with Took Name. It can not be empty.\n", + " - Read the Tool Description and ensure your args and kwarg follow what each tool expects in types. e.g. (a=1, b=2) if it is keyword argument or (1, 2) if it is positional.\n", + " - You will always end with 'finish' action to finish the task. The answer can be the final answer or failure message.\n", + " - When the initial query is simple, use minimum steps to answer the query.\n", + " {#Examples can be here#}\n", + " {# Check if there are any examples #}\n", + " {% if examples %}\n", + " \n", + " {% for example in examples %}\n", + " {{ example }}\n", + " {% endfor %}\n", + " \n", + " {% endif %}\n", + " <>\n", + " -----------------\n", + " {# History #}\n", + " {% for history in step_history %}\n", + " Step {{history.step}}:\n", + " {\n", + " \"thought\": \"{{history.thought}}\",\n", + " \"action\": \"{{history.action}}\",\n", + " }\n", + " \"observation\": \"{{history.observation}}\"\n", + " {% endfor %}\n", + " {% if input_str %}\n", + " User query:\n", + " {{ input_str }}\n", + " {% endif %}\n", + " , preset_prompt_kwargs: {'examples': ['Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?\\nThought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.\\nAction 1: search(\"Colorado orogeny\")\\nObservation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas.\\nThought 2: It does not mention the eastern sector. So I need to look up eastern sector.\\nAction 2: lookup(\"eastern sector\")\\nObservation 2: (Result 1 / 1) The eastern sector extends into the High Plains and is called the Central Plains orogeny.\\nThought 3: The eastern sector of Colorado orogeny extends into the High Plains. So I need to search High Plains and find its elevation range.\\nAction 3: search(\"High Plains\")\\nObservation 3: High Plains refers to one of two distinct land regions:\\nThought 4: I need to instead search High Plains (United States).\\nAction 4: search(\"High Plains (United States)\")\\nObservation 4: The High Plains are a subregion of the Great Plains. From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]\\nThought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.\\nAction 5: finish(\"1,800 to 7,000 ft\")', 'Question: Musician and satirist Allie Goertz wrote a song about the \"The Simpsons\" character Milhouse, who Matt Groening named after who?\\nThought 1: The question simplifies to \"The Simpsons\" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.\\nAction 1: search(\"Milhouse\")\\nObservation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.\\nThought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up \"named after\".\\nAction 2: lookup(\"named after\")\\nObservation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous. \\nThought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.\\nAction 3: finish(\"Richard Nixon\")', 'Question: Which documentary is about Finnish rock groups, Adam Clayton Powell or The Saimaa Gesture?\\nThought 1: I need to search Adam Clayton Powell and The Saimaa Gesture, and find which documentary is about Finnish rock groups.\\nAction 1: search(\"Adam Clayton Powell\")\\nObservation 1: Could not find [\"Adam Clayton Powell\"]. Similar: [\\'Adam Clayton Powell III\\', \\'Seventh Avenue (Manhattan)\\', \\'Adam Clayton Powell Jr. State Office Building\\', \\'Isabel Washington Powell\\', \\'Adam Powell\\', \\'Adam Clayton Powell (film)\\', \\'Giancarlo Esposito\\'].\\nThought 2: To find the documentary, I can search Adam Clayton Powell (film).\\nAction 2: search(\"Adam Clayton Powell (film)\")\\nObservation 2: Adam Clayton Powell is a 1989 American documentary film directed by Richard Kilberg.\\nThe film is about the rise and fall of influential African-American politician Adam Clayton Powell Jr.[3][4] It was later aired as part of the PBS series The American Experience.\\nThought 3: Adam Clayton Powell (film) is a documentary about an African-American politician, not Finnish rock groups. So the documentary about Finnish rock groups must instead be The Saimaa Gesture.\\nAction 3: finish(\"The Saimaa Gesture\")', 'Question: What profession does Nicholas Ray and Elia Kazan have in common?\\nThought 1: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common.\\nAction 1: search(\"Nicholas Ray\")\\nObservation 1: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 – June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause.\\nThought 2: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions.\\nAction 2: search(\"Elia Kazan\")\\nObservation 2: Elia Kazan was an American film and theatre director, producer, screenwriter and actor.\\nThought 3: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor.\\nAction 3: finish(\"director, screenwriter, actor\")', 'Question: Which magazine was started first Arthur\\'s Magazine or First for Women?\\nThought 1: I need to search Arthur\\'s Magazine and First for Women, and find which was started first.\\nAction 1: search(\"Arthur\\'s Magazine\")\\nObservation 1: Arthur\\'s Magazine (1844-\\x80\\x931846) was an American literary periodical published in Philadelphia in the 19th century. \\nThought 2: Arthur\\'s Magazine was started in 1844. I need to search First for Women next.\\nAction 2: search(\"First for Women\")\\nObservation 2: First for Women is a woman\\'s magazine published by Bauer Media Group in the USA.[1] The magazine was started in 1989. \\nThought 3: First for Women was started in 1989. 1844 (Arthur\\'s Magazine) < 1989 (First for Women), so Arthur\\'s Magazine was started first.\\nAction 3: finish(\"Arthur\\'s Magazine\")', 'Question: Were Pavel Urysohn and Leonid Levin known for the same type of work?\\nThought 1: I need to search Pavel Urysohn and Leonid Levin, find their types of work, then find if they are the same.\\nAction 1: search(\"Pavel Urysohn\")\\nObservation 1: Pavel Samuilovich Urysohn (February 3, 1898 â\\x80\\x93 August 17, 1924) was a Soviet mathematician who is best known for his contributions in dimension theory.\\nThought 2: Pavel Urysohn is a mathematician. I need to search Leonid Levin next and find its type of work.\\nAction 2: search(\"Leonid Levin\")\\nObservation 2: Leonid Anatolievich Levin is a Soviet-American mathematician and computer scientist. \\nThought 3: Leonid Levin is a mathematician and computer scientist. So Pavel Urysohn and Leonid Levin have the same type of work. \\nAction 3: finish(\"yes\")'], 'tools': [FunctionTool(search), FunctionTool(lookup), FunctionTool(llm_tool), FunctionTool(finish)]}, prompt_variables: ['examples', 'step_history', 'input_str', 'tools']\n", + " )\n", + " (model_client): OpenAIClient()\n", + " (output_processors): JsonParser()\n", + " (additional_llm_tool): Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo', 'temperature': 0.0}, \n", + " (prompt): Prompt(\n", + " template: \n", + " {% if task_desc_str or output_format_str or tools_str or examples_str or chat_history_str or context_str or steps_str %}\n", + " \n", + " {% endif %}\n", + " {# task desc #}\n", + " {% if task_desc_str %}\n", + " {{task_desc_str}}\n", + " {% endif %}\n", + " {# output format #}\n", + " {% if output_format_str %}\n", + " \n", + " {{output_format_str}}\n", + " \n", + " {% endif %}\n", + " {# tools #}\n", + " {% if tools_str %}\n", + " \n", + " {{tools_str}}\n", + " \n", + " {% endif %}\n", + " {# example #}\n", + " {% if examples_str %}\n", + " \n", + " {{examples_str}}\n", + " \n", + " {% endif %}\n", + " {# chat history #}\n", + " {% if chat_history_str %}\n", + " \n", + " {{chat_history_str}}\n", + " \n", + " {% endif %}\n", + " {#contex#}\n", + " {% if context_str %}\n", + " \n", + " {{context_str}}\n", + " \n", + " {% endif %}\n", + " {# steps #}\n", + " {% if steps_str %}\n", + " \n", + " {{steps_str}}\n", + " \n", + " {% endif %}\n", + " {% if task_desc_str or output_format_str or tools_str or examples_str or chat_history_str or context_str or steps_str %}\n", + " \n", + " {% endif %}\n", + " {% if input_str %}\n", + " \n", + " {{input_str}}\n", + " \n", + " {% endif %}\n", + " {% if output_str %}\n", + " \n", + " {{output_str}}\n", + " \n", + " {% endif %}\n", + " You:\n", + " , prompt_variables: ['context_str', 'input_str', 'steps_str', 'output_format_str', 'output_str', 'chat_history_str', 'tools_str', 'examples_str', 'task_desc_str']\n", + " )\n", + " (model_client): OpenAIClient()\n", + " )\n", + ")" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# max_steps refers to how many thought-action round we allow the model to perform\n", + "# to save resources, let's use 3 here\n", + "agent = ReActAgent(\n", + " tools=tools, max_steps=3, model_client=OpenAIClient(),\n", + " model_kwargs=gpt_model_kwargs, preset_prompt_kwargs=preset_prompt_kwargs\n", + ")\n", + "agent" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import importlib\n", + "import lightrag\n", + "importlib.reload(lightrag)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 5. Q & A\n", + "Next we can use the agent to answer our questions. Let's run 5 examples. We will use the validation data." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Dataset({\n", + " features: ['id', 'question', 'answer', 'type', 'level', 'supporting_facts', 'context'],\n", + " num_rows: 7405\n", + "})" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val_dataset = dataset[\"validation\"]\n", + "val_dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "``LightRAG`` provides a ``printc`` function. You can utilize it to show colored console output for angent." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m2024-06-15 23:13:47 - [react_agent.py:330:call] - input_query: Were Scott Derrickson and Ed Wood of the same nationality?\u001b[0m\n", + "\u001b[34m2024-06-15 23:13:48 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search Scott Derrickson and Ed Wood, find their nationalities, then determine if they are the same.\n", + "Action 1: search(\"Scott Derrickson\")\n", + "Observation 1: Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for his work in the horror genre, directing films such as The Exorcism of Emily Rose (2005), Sinister (2012) and The Black Phone (2021). He is also known for the superhero film Doctor Strange (2016), based on the Marvel Comics character.. Scott Derrickson grew up in Denver, Colorado. He graduated from Biola University with a B.A.\u001b[0m\n", + "\u001b[34m2024-06-15 23:13:50 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: Scott Derrickson is an American filmmaker. I need to search Ed Wood next to find his nationality.\n", + "Action 2: search(\"Ed Wood\")\n", + "Observation 2: Edward Davis Wood Jr. (October 10, 1924 – December 10, 1978) was an American filmmaker, actor, screenwriter, and pulp novelist.. In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959).[1] In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 lurid pulp crime and sex novels.. Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.[2]. Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D.\u001b[0m\n", + "\u001b[34m2024-06-15 23:13:51 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: Scott Derrickson is an American filmmaker and Ed Wood is also an American filmmaker. They are of the same nationality.\n", + "Action 3: finish(\"yes\")\n", + "Observation 3: yes\u001b[0m\n", + "\u001b[36m2024-06-15 23:13:51 - [react_agent.py:345:call] - answer: yes\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to search Scott Derrickson and Ed Wood, find their nationalities, then determine if they are the same.', action='search(\"Scott Derrickson\")', fun_name='search', fun_args=['Scott Derrickson'], fun_kwargs={}, observation='Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for his work in the horror genre, directing films such as The Exorcism of Emily Rose (2005), Sinister (2012) and The Black Phone (2021). He is also known for the superhero film Doctor Strange (2016), based on the Marvel Comics character.. Scott Derrickson grew up in Denver, Colorado. He graduated from Biola University with a B.A.'), StepOutput(step=2, thought='Scott Derrickson is an American filmmaker. I need to search Ed Wood next to find his nationality.', action='search(\"Ed Wood\")', fun_name='search', fun_args=['Ed Wood'], fun_kwargs={}, observation=\"Edward Davis Wood Jr. (October 10, 1924\\xa0– December 10, 1978) was an American filmmaker, actor, screenwriter, and pulp novelist.. In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959).[1] In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 lurid pulp crime and sex novels.. Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.[2]. Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D.\"), StepOutput(step=3, thought='Scott Derrickson is an American filmmaker and Ed Wood is also an American filmmaker. They are of the same nationality.', action='finish(\"yes\")', fun_name='finish', fun_args=['yes'], fun_kwargs={}, observation='yes')]\n", + "\u001b[33m2024-06-15 23:13:51 - [2706144185.py:12:] - question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: yes, pred answer: yes\u001b[0m\n", + "\u001b[36m2024-06-15 23:13:51 - [react_agent.py:330:call] - input_query: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n", + "\u001b[34m2024-06-15 23:13:53 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to find the woman who portrayed Corliss Archer in the film Kiss and Tell, then search for the government position she held.\n", + "Action 1: search(\"Corliss Archer Kiss and Tell film\")\n", + "Observation 1: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n", + "\u001b[34m2024-06-15 23:13:55 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\n", + "Action 2: search(\"Corliss Archer Kiss and Tell film\")\n", + "Observation 2: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\",\n", + " \"action\": \"search(\\\"Corliss Archer Kiss and Tell film\\\")\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:13:57 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\n", + "Action 3: search(\"Corliss Archer Kiss and Tell film\")\n", + "Observation 3: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n", + "\u001b[36m2024-06-15 23:13:57 - [react_agent.py:345:call] - answer: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to find the woman who portrayed Corliss Archer in the film Kiss and Tell, then search for the government position she held.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\"), StepOutput(step=2, thought='I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\"), StepOutput(step=3, thought='I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\")]\n", + "\u001b[33m2024-06-15 23:13:57 - [2706144185.py:12:] - question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?, ground truth: chief of protocol, pred answer: could not find exact page for corliss archer kiss and tell film similar topics kiss for corliss corliss archer kiss and tell 1945 film kiss and tell play meet corliss archer\u001b[0m\n", + "\u001b[36m2024-06-15 23:13:57 - [react_agent.py:330:call] - input_query: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?\u001b[0m\n", + "\u001b[34m2024-06-15 23:13:59 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for a science fantasy young adult series told in first person, with companion books narrating the stories of enslaved worlds and alien species.\n", + "Action 1: search(\"science fantasy young adult series first person companion books enslaved worlds alien species\")\n", + "Observation 1: Could not find exact page for 'science fantasy young adult series first person companion books enslaved worlds alien species'. Similar topics: ['Animorphs', 'Last Legionary', 'Barsoom', 'List of The Outer Limits (1995 TV series) episodes', 'LGBT themes in speculative fiction']\u001b[0m\n", + "\u001b[34m2024-06-15 23:14:00 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The search results suggest 'Animorphs' as a similar topic, I should search for 'Animorphs' to see if it matches the criteria.\n", + "Action 2: search('Animorphs')\n", + "Observation 2: Animorphs is a science fantasy series of youth books written by Katherine Applegate and her husband Michael Grant,[2] writing together under the name K. A. Applegate,[3] and published by Scholastic.[4] It is told in first person, with all six main characters taking turns narrating the books through their own perspectives. Horror, war, imperialism, dehumanization, sanity, morality, innocence, leadership, freedom, family, and growing up are the core themes of the series.. Published between June 1996 and May 2001, the series consists of 54 books and includes ten companion books, eight of which fit into the series' continuity (the Animorphs Chronicles and Megamorphs books) and two that are gamebooks not fitting into the continuity (the Alternamorphs books)..\u001b[0m\n", + "\u001b[34m2024-06-15 23:14:01 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The series that matches the criteria is 'Animorphs'. I should provide this as the answer.\n", + "Action 3: finish(\"Animorphs\")\n", + "Observation 3: Animorphs\u001b[0m\n", + "\u001b[36m2024-06-15 23:14:01 - [react_agent.py:345:call] - answer: Animorphs\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to search for a science fantasy young adult series told in first person, with companion books narrating the stories of enslaved worlds and alien species.', action='search(\"science fantasy young adult series first person companion books enslaved worlds alien species\")', fun_name='search', fun_args=['science fantasy young adult series first person companion books enslaved worlds alien species'], fun_kwargs={}, observation=\"Could not find exact page for 'science fantasy young adult series first person companion books enslaved worlds alien species'. Similar topics: ['Animorphs', 'Last Legionary', 'Barsoom', 'List of The Outer Limits (1995 TV series) episodes', 'LGBT themes in speculative fiction']\"), StepOutput(step=2, thought=\"The search results suggest 'Animorphs' as a similar topic, I should search for 'Animorphs' to see if it matches the criteria.\", action=\"search('Animorphs')\", fun_name='search', fun_args=['Animorphs'], fun_kwargs={}, observation=\"Animorphs is a science fantasy series of youth books written by Katherine Applegate and her husband Michael Grant,[2] writing together under the name K. A. Applegate,[3] and published by Scholastic.[4] It is told in first person, with all six main characters taking turns narrating the books through their own perspectives. Horror, war, imperialism, dehumanization, sanity, morality, innocence, leadership, freedom, family, and growing up are the core themes of the series.. Published between June 1996 and May 2001, the series consists of 54 books and includes ten companion books, eight of which fit into the series' continuity (the Animorphs Chronicles and Megamorphs books) and two that are gamebooks not fitting into the continuity (the Alternamorphs books)..\"), StepOutput(step=3, thought=\"The series that matches the criteria is 'Animorphs'. I should provide this as the answer.\", action='finish(\"Animorphs\")', fun_name='finish', fun_args=['Animorphs'], fun_kwargs={}, observation='Animorphs')]\n", + "\u001b[33m2024-06-15 23:14:01 - [2706144185.py:12:] - question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: animorphs, pred answer: animorphs\u001b[0m\n", + "\u001b[36m2024-06-15 23:14:01 - [react_agent.py:330:call] - input_query: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\u001b[0m\n", + "\u001b[34m2024-06-15 23:14:03 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the locations of Laleli Mosque and Esma Sultan Mansion to determine if they are in the same neighborhood.\n", + "Action 1: search(\"Laleli Mosque\")\n", + "Observation 1: The Laleli Mosque (Turkish: Laleli Camii, lit. 'Tulip Mosque') is an 18th-century Ottoman imperial mosque located in Laleli, Fatih, Istanbul, Turkey.[1]. The mosque was commissioned by Sultan Mustafa III to serve as his imperial or sultanic mosque.[2][3] Although it was tradition among earlier sultans to build their imperial mosque in commemoration of a major military success, Mustafa III ignored this tradition by ordering the construction before any such victories.[3] Construction began on 5 April 1760 and was completed on 9 March 1764.[4][3] According to a contemporary writer, the mosque was officially named Nur Mustafa ('Light of Mustafa'), but it became popularly known as the Laleli Mosque ('Mosque of the Tulips') after the name of the neighbourhood where it was built.[3]. The architect of the mosque is not confirmed by historical documentation, but art historians have attributed the mosque to Mehmed Tahir Agha, the chief imperial architect at the time of the mosque's completion.[a][2][4][5] On average, about 770 workers were employed in the project and about two thirds of them were non-Muslims, the rest being Muslim.[5]. The mosque was the centerpiece of a larger complex (külliye) that included the Mustafa III's tomb, a nearby caravanserai which provided some revenues to the complex, a sebil, and a madrasa.[6] Mustafa III was buried in the mausoleum attached to the complex after his death in 1774.[7] The mosque and its complex were damaged by the 1766 earthquake[4] and, according to Ünver Rüstem, by a fire in 1783.[7] In 1783 it was fully restored.[7][2] The restoration, which Doğan Kuban attributes to the architect Seyit Mustafa Agha,[4][2] preserved the original mosque's appearance.[7] The mausoleum's façade was updated with new marble window frames in the early 19th century.[6] The madrasa of the complex was destroyed by fire in 1911.[6]. The mosque was built in the Ottoman Baroque style of its time.[8][9][10] The layout is based on that of the earlier Selimiye Mosque of Edirne from the classical period, in accordance with Mustafa III's wishes.[11][5] The decoration of the mosque and its complex is firmly baroque.[12][2] The mosque incorporates thus the visual style of the earlier Nuruosmaniye Mosque – the first imperial mosque in the Ottoman Baroque style, completed by Mustafa III's predecessor – but in a more restrained way that integrates it with more traditional Ottoman architecture.[13].\u001b[0m\n", + "\u001b[34m2024-06-15 23:14:04 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the location of Esma Sultan Mansion to determine if it is in the same neighborhood as Laleli Mosque.\n", + "Action 2: search(\"Esma Sultan Mansion\")\n", + "Observation 2: The Esma Sultan Mansion (Turkish: Esma Sultan Yalısı), a historical yalı located on the Bosphorus in the Ortaköy neighborhood of Istanbul, Turkey and named after its original owner Princess Esma Sultan, is used today as a cultural center after being redeveloped.. The three-storey brick manor was designed by the renowned architect Sarkis Balyan and finished in 1875 next to Ortaköy Mosque. It was presented to the Princess Esma Sultan, the daughter of Ottoman Sultan Abdulaziz, as a wedding gift in 1889.. The mansion remained in the possession of the Ottoman dynasty until 1915. Subsequently, the building was used first as a tobacco warehouse and then as a coal depot from 1920 until 1975 when it was destroyed by a fire.[1].\u001b[0m\n", + "\u001b[34m2024-06-15 23:14:05 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to determine if Laleli Mosque and Esma Sultan Mansion are in the same neighborhood by comparing their locations.\n", + "Action 3: finish(\"No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\")\n", + "Observation 3: No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\u001b[0m\n", + "\u001b[36m2024-06-15 23:14:05 - [react_agent.py:345:call] - answer: No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to search for the locations of Laleli Mosque and Esma Sultan Mansion to determine if they are in the same neighborhood.', action='search(\"Laleli Mosque\")', fun_name='search', fun_args=['Laleli Mosque'], fun_kwargs={}, observation=\"The Laleli Mosque (Turkish: Laleli Camii, lit.\\u2009'Tulip Mosque') is an 18th-century Ottoman imperial mosque located in Laleli, Fatih, Istanbul, Turkey.[1]. The mosque was commissioned by Sultan Mustafa III to serve as his imperial or sultanic mosque.[2][3] Although it was tradition among earlier sultans to build their imperial mosque in commemoration of a major military success, Mustafa III ignored this tradition by ordering the construction before any such victories.[3] Construction began on 5 April 1760 and was completed on 9 March 1764.[4][3] According to a contemporary writer, the mosque was officially named Nur Mustafa ('Light of Mustafa'), but it became popularly known as the Laleli Mosque ('Mosque of the Tulips') after the name of the neighbourhood where it was built.[3]. The architect of the mosque is not confirmed by historical documentation, but art historians have attributed the mosque to Mehmed Tahir Agha, the chief imperial architect at the time of the mosque's completion.[a][2][4][5] On average, about 770 workers were employed in the project and about two thirds of them were non-Muslims, the rest being Muslim.[5]. The mosque was the centerpiece of a larger complex (külliye) that included the Mustafa III's tomb, a nearby caravanserai which provided some revenues to the complex, a sebil, and a madrasa.[6] Mustafa III was buried in the mausoleum attached to the complex after his death in 1774.[7] The mosque and its complex were damaged by the 1766 earthquake[4] and, according to Ünver Rüstem, by a fire in 1783.[7] In 1783 it was fully restored.[7][2] The restoration, which Doğan Kuban attributes to the architect Seyit Mustafa Agha,[4][2] preserved the original mosque's appearance.[7] The mausoleum's façade was updated with new marble window frames in the early 19th century.[6] The madrasa of the complex was destroyed by fire in 1911.[6]. The mosque was built in the Ottoman Baroque style of its time.[8][9][10] The layout is based on that of the earlier Selimiye Mosque of Edirne from the classical period, in accordance with Mustafa III's wishes.[11][5] The decoration of the mosque and its complex is firmly baroque.[12][2] The mosque incorporates thus the visual style of the earlier Nuruosmaniye Mosque – the first imperial mosque in the Ottoman Baroque style, completed by Mustafa III's predecessor – but in a more restrained way that integrates it with more traditional Ottoman architecture.[13].\"), StepOutput(step=2, thought='I need to search for the location of Esma Sultan Mansion to determine if it is in the same neighborhood as Laleli Mosque.', action='search(\"Esma Sultan Mansion\")', fun_name='search', fun_args=['Esma Sultan Mansion'], fun_kwargs={}, observation='The Esma Sultan Mansion (Turkish: Esma Sultan Yalısı), a historical yalı located on the Bosphorus in the Ortaköy neighborhood of Istanbul, Turkey and named after its original owner Princess Esma Sultan, is used today as a cultural center after being redeveloped.. The three-storey brick manor was designed by the renowned architect Sarkis Balyan and finished in 1875 next to Ortaköy Mosque. It was presented to the Princess Esma Sultan, the daughter of Ottoman Sultan Abdulaziz, as a wedding gift in 1889.. The mansion remained in the possession of the Ottoman dynasty until 1915. Subsequently, the building was used first as a tobacco warehouse and then as a coal depot from 1920 until 1975 when it was destroyed by a fire.[1].'), StepOutput(step=3, thought='I need to determine if Laleli Mosque and Esma Sultan Mansion are in the same neighborhood by comparing their locations.', action='finish(\"No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\")', fun_name='finish', fun_args=['No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.'], fun_kwargs={}, observation='No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.')]\n", + "\u001b[33m2024-06-15 23:14:05 - [2706144185.py:12:] - question: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?, ground truth: no, pred answer: no laleli mosque is located in laleli fatih istanbul turkey while esma sultan mansion is in ortaköy neighborhood of istanbul turkey\u001b[0m\n", + "\u001b[36m2024-06-15 23:14:06 - [react_agent.py:330:call] - input_query: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?\u001b[0m\n", + "\u001b[34m2024-06-15 23:14:07 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the director of the romantic comedy 'Big Stone Gap' and then find the New York city they are based in.\n", + "Action 1: search('Big Stone Gap director')\n", + "Observation 1: Could not find exact page for 'Big Stone Gap director'. Similar topics: ['Big Stone Gap (film)', 'Adriana Trigiani', 'Union High School (Big Stone Gap, Virginia)', 'Star Wood', 'Dagmara Domińczyk']\u001b[0m\n", + "\u001b[34m2024-06-15 23:14:09 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the New York city where the romantic comedy 'Big Stone Gap' is based.\n", + "Action 2: search('Big Stone Gap film location')\n", + "Observation 2: Could not find exact page for 'Big Stone Gap film location'. Similar topics: ['Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'The Beach (film)']\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"I need to search for the New York city where the romantic comedy 'Big Stone Gap' is based.\",\n", + " \"action\": \"search('Big Stone Gap film location')\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:14:11 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the New York city where the romantic comedy 'Big Stone Gap' is based.\n", + "Action 3: search('Big Stone Gap film location')\n", + "Observation 3: Could not find exact page for 'Big Stone Gap film location'. Similar topics: ['Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'The Beach (film)']\u001b[0m\n", + "\u001b[36m2024-06-15 23:14:11 - [react_agent.py:345:call] - answer: Could not find exact page for 'Big Stone Gap film location'. Similar topics: ['Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'The Beach (film)']\u001b[0m\n", + "step_history: [StepOutput(step=1, thought=\"I need to search for the director of the romantic comedy 'Big Stone Gap' and then find the New York city they are based in.\", action=\"search('Big Stone Gap director')\", fun_name='search', fun_args=['Big Stone Gap director'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap director'. Similar topics: ['Big Stone Gap (film)', 'Adriana Trigiani', 'Union High School (Big Stone Gap, Virginia)', 'Star Wood', 'Dagmara Domińczyk']\"), StepOutput(step=2, thought=\"I need to search for the New York city where the romantic comedy 'Big Stone Gap' is based.\", action=\"search('Big Stone Gap film location')\", fun_name='search', fun_args=['Big Stone Gap film location'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film location'. Similar topics: ['Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'The Beach (film)']\"), StepOutput(step=3, thought=\"I need to search for the New York city where the romantic comedy 'Big Stone Gap' is based.\", action=\"search('Big Stone Gap film location')\", fun_name='search', fun_args=['Big Stone Gap film location'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film location'. Similar topics: ['Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'The Beach (film)']\")]\n", + "\u001b[33m2024-06-15 23:14:11 - [2706144185.py:12:] - question: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?, ground truth: greenwich village new york city, pred answer: could not find exact page for big stone gap film location similar topics big stone gap virginia adriana trigiani dagmara domińczyk big lebowski beach film\u001b[0m\n" + ] + } + ], + "source": [ + "from lightrag.utils.logger import printc\n", + "\n", + "num_questions = 5\n", + "for i in range(num_questions):\n", + " question = val_dataset[i][\"question\"]\n", + " gt_answer = normalize_answer(val_dataset[i][\"answer\"]) # normalize the ground truth answer\n", + " \n", + " # get the agent's response\n", + " pred_answer = agent(question)\n", + " pred_answer = normalize_answer(pred_answer)\n", + " \n", + " printc(f\"question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\", color=\"yellow\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 6. Evaluation\n", + "\n", + "Now you will see that we have the ``exact correct answer`` for some questions:\n", + "\n", + "question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: ``yes`` pred answer: ``yes``\n", + "\n", + "question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: ``animorphs``, pred answer: ``animorphs``\n", + "\n", + "Sometimes the agent performs correctly but not in the same format with the ground truth. E.g. ground truth: ``no``, pred answer: ``no, they are not the same``. This is what we can tolerate.\n", + "\n", + "But how to evaluate if the agent is doing well, or if our tools, examples, and prompt implementations work well? We need to evaluate it.\n", + "\n", + "1. Exact Match(EM)\n", + "Exact Match is what the paper is using. Only when the normalized agent response is the same with the ground truth answer, we count it as correct. The paper's EM for react agent is around 30%(gpt-3).\n", + "\n", + "2. Fuzzy Match(FM)\n", + "EM doesn't make much sense in question and answering. So we propose fuzzy match based on the LLMs' lengthy output nature. If the ground truth answer is included in the agent response, then we count it as correct. FM is not necessarily correct. \n", + "E.g. question: Harry Potter and Dumbledore, who is older? ground truth: ``dumbledore``, pred answer: ``harry potter is older than dumbledore.``\n", + "the model mentioned the groud truth but still provide wrong answer. So FM serves as reference.\n", + "\n", + "Let's use ``LightRAG eval`` module and evaluate on 10 questions and keep the model's practice to set ``max_step`` at `7`." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m2024-06-15 23:15:11 - [react_agent.py:330:call] - input_query: Were Scott Derrickson and Ed Wood of the same nationality?\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:13 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search Scott Derrickson and Ed Wood, find their nationalities, then determine if they are the same.\n", + "Action 1: search(\"Scott Derrickson\")\n", + "Observation 1: Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for his work in the horror genre, directing films such as The Exorcism of Emily Rose (2005), Sinister (2012) and The Black Phone (2021). He is also known for the superhero film Doctor Strange (2016), based on the Marvel Comics character.. Scott Derrickson grew up in Denver, Colorado. He graduated from Biola University with a B.A.\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:15 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: Scott Derrickson is an American filmmaker. I need to search Ed Wood next to find his nationality.\n", + "Action 2: search(\"Ed Wood\")\n", + "Observation 2: Edward Davis Wood Jr. (October 10, 1924 – December 10, 1978) was an American filmmaker, actor, screenwriter, and pulp novelist.. In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959).[1] In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 lurid pulp crime and sex novels.. Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.[2]. Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D.\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:15 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: Scott Derrickson is an American filmmaker and Ed Wood is also American. So, they are of the same nationality.\n", + "Action 3: finish(\"yes\")\n", + "Observation 3: yes\u001b[0m\n", + "\u001b[36m2024-06-15 23:15:15 - [react_agent.py:345:call] - answer: yes\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to search Scott Derrickson and Ed Wood, find their nationalities, then determine if they are the same.', action='search(\"Scott Derrickson\")', fun_name='search', fun_args=['Scott Derrickson'], fun_kwargs={}, observation='Scott Derrickson (born July 16, 1966) is an American filmmaker. He is best known for his work in the horror genre, directing films such as The Exorcism of Emily Rose (2005), Sinister (2012) and The Black Phone (2021). He is also known for the superhero film Doctor Strange (2016), based on the Marvel Comics character.. Scott Derrickson grew up in Denver, Colorado. He graduated from Biola University with a B.A.'), StepOutput(step=2, thought='Scott Derrickson is an American filmmaker. I need to search Ed Wood next to find his nationality.', action='search(\"Ed Wood\")', fun_name='search', fun_args=['Ed Wood'], fun_kwargs={}, observation=\"Edward Davis Wood Jr. (October 10, 1924\\xa0– December 10, 1978) was an American filmmaker, actor, screenwriter, and pulp novelist.. In the 1950s, Wood directed several low-budget science fiction, crime and horror films that later became cult classics, notably Glen or Glenda (1953), Jail Bait (1954), Bride of the Monster (1955), Plan 9 from Outer Space (1957) and Night of the Ghouls (1959).[1] In the 1960s and 1970s, he moved towards sexploitation and pornographic films such as The Sinister Urge (1960), Orgy of the Dead (1965) and Necromania (1971), and wrote over 80 lurid pulp crime and sex novels.. Notable for their campy aesthetics, technical errors, unsophisticated special effects, use of poorly-matched stock footage, eccentric casts, idiosyncratic stories and non sequitur dialogue, Wood's films remained largely obscure until he was posthumously awarded a Golden Turkey Award for Worst Director of All Time in 1980, renewing public interest in his life and work.[2]. Following the publication of Rudolph Grey's 1992 oral biography Nightmare of Ecstasy: The Life and Art of Edward D.\"), StepOutput(step=3, thought='Scott Derrickson is an American filmmaker and Ed Wood is also American. So, they are of the same nationality.', action='finish(\"yes\")', fun_name='finish', fun_args=['yes'], fun_kwargs={}, observation='yes')]\n", + "\u001b[33m2024-06-15 23:15:15 - [3641068398.py:26:] - No. 1, question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: yes, pred answer: yes\u001b[0m\n", + "\u001b[36m2024-06-15 23:15:15 - [react_agent.py:330:call] - input_query: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:17 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to find the woman who portrayed Corliss Archer in the film Kiss and Tell, then search for the government position she held.\n", + "Action 1: search(\"Corliss Archer Kiss and Tell film\")\n", + "Observation 1: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:19 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\n", + "Action 2: search(\"Corliss Archer Kiss and Tell film\")\n", + "Observation 2: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['Corliss Archer', 'A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\",\n", + " \"action\": \"search(\\\"Corliss Archer Kiss and Tell film\\\")\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:15:21 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.\n", + "Action 3: search(\"Corliss Archer Kiss and Tell film\")\n", + "Observation 3: Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:23 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.\n", + "Action 4: search(\"Corliss Archer film Kiss and Tell cast\")\n", + "Observation 4: Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:25 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.\n", + "Action 5: search(\"Corliss Archer film Kiss and Tell cast\")\n", + "Observation 5: Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:27 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.\n", + "Action 6: search(\"Corliss Archer film Kiss and Tell cast\")\n", + "Observation 6: Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:29 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.\n", + "Action 7: search(\"Corliss Archer film Kiss and Tell cast\")\n", + "Observation 7: Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\u001b[0m\n", + "\u001b[36m2024-06-15 23:15:29 - [react_agent.py:345:call] - answer: Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to find the woman who portrayed Corliss Archer in the film Kiss and Tell, then search for the government position she held.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\"), StepOutput(step=2, thought='I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['Corliss Archer', 'A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\"), StepOutput(step=3, thought='I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell.', action='search(\"Corliss Archer Kiss and Tell film\")', fun_name='search', fun_args=['Corliss Archer Kiss and Tell film'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer Kiss and Tell film'. Similar topics: ['A Kiss for Corliss', 'Corliss Archer', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Meet Corliss Archer']\"), StepOutput(step=4, thought='I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.', action='search(\"Corliss Archer film Kiss and Tell cast\")', fun_name='search', fun_args=['Corliss Archer film Kiss and Tell cast'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\"), StepOutput(step=5, thought='I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.', action='search(\"Corliss Archer film Kiss and Tell cast\")', fun_name='search', fun_args=['Corliss Archer film Kiss and Tell cast'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\"), StepOutput(step=6, thought='I need to search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.', action='search(\"Corliss Archer film Kiss and Tell cast\")', fun_name='search', fun_args=['Corliss Archer film Kiss and Tell cast'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\"), StepOutput(step=7, thought='I should search for the woman who portrayed Corliss Archer in the film Kiss and Tell to find out the government position she held.', action='search(\"Corliss Archer film Kiss and Tell cast\")', fun_name='search', fun_args=['Corliss Archer film Kiss and Tell cast'], fun_kwargs={}, observation=\"Could not find exact page for 'Corliss Archer film Kiss and Tell cast'. Similar topics: ['A Kiss for Corliss', 'Kiss and Tell (1945 film)', 'Kiss and Tell (play)', 'Janet Waldo', 'Kathryn Card']\")]\n", + "\u001b[33m2024-06-15 23:15:29 - [3641068398.py:26:] - No. 2, question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?, ground truth: chief of protocol, pred answer: could not find exact page for corliss archer film kiss and tell cast similar topics kiss for corliss kiss and tell 1945 film kiss and tell play janet waldo kathryn card\u001b[0m\n", + "\u001b[36m2024-06-15 23:15:29 - [react_agent.py:330:call] - input_query: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:32 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for a science fantasy young adult series told in first person and involving companion books about enslaved worlds and alien species.\n", + "Action 1: search(\"science fantasy young adult series first person companion books enslaved worlds alien species\")\n", + "Observation 1: Could not find exact page for 'science fantasy young adult series first person companion books enslaved worlds alien species'. Similar topics: ['Animorphs', 'Last Legionary', 'Barsoom', 'List of The Outer Limits (1995 TV series) episodes', 'LGBT themes in speculative fiction']\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:34 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The search did not return an exact match. I should search for 'Animorphs' as it involves young adult science fantasy, first-person narration, enslaved worlds, and alien species.\n", + "Action 2: search('Animorphs')\n", + "Observation 2: Animorphs is a science fantasy series of youth books written by Katherine Applegate and her husband Michael Grant,[2] writing together under the name K. A. Applegate,[3] and published by Scholastic.[4] It is told in first person, with all six main characters taking turns narrating the books through their own perspectives. Horror, war, imperialism, dehumanization, sanity, morality, innocence, leadership, freedom, family, and growing up are the core themes of the series.. Published between June 1996 and May 2001, the series consists of 54 books and includes ten companion books, eight of which fit into the series' continuity (the Animorphs Chronicles and Megamorphs books) and two that are gamebooks not fitting into the continuity (the Alternamorphs books)..\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:35 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The series that matches the description is Animorphs. I should provide this as the answer.\n", + "Action 3: finish(\"Animorphs\")\n", + "Observation 3: Animorphs\u001b[0m\n", + "\u001b[36m2024-06-15 23:15:35 - [react_agent.py:345:call] - answer: Animorphs\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to search for a science fantasy young adult series told in first person and involving companion books about enslaved worlds and alien species.', action='search(\"science fantasy young adult series first person companion books enslaved worlds alien species\")', fun_name='search', fun_args=['science fantasy young adult series first person companion books enslaved worlds alien species'], fun_kwargs={}, observation=\"Could not find exact page for 'science fantasy young adult series first person companion books enslaved worlds alien species'. Similar topics: ['Animorphs', 'Last Legionary', 'Barsoom', 'List of The Outer Limits (1995 TV series) episodes', 'LGBT themes in speculative fiction']\"), StepOutput(step=2, thought=\"The search did not return an exact match. I should search for 'Animorphs' as it involves young adult science fantasy, first-person narration, enslaved worlds, and alien species.\", action=\"search('Animorphs')\", fun_name='search', fun_args=['Animorphs'], fun_kwargs={}, observation=\"Animorphs is a science fantasy series of youth books written by Katherine Applegate and her husband Michael Grant,[2] writing together under the name K. A. Applegate,[3] and published by Scholastic.[4] It is told in first person, with all six main characters taking turns narrating the books through their own perspectives. Horror, war, imperialism, dehumanization, sanity, morality, innocence, leadership, freedom, family, and growing up are the core themes of the series.. Published between June 1996 and May 2001, the series consists of 54 books and includes ten companion books, eight of which fit into the series' continuity (the Animorphs Chronicles and Megamorphs books) and two that are gamebooks not fitting into the continuity (the Alternamorphs books)..\"), StepOutput(step=3, thought='The series that matches the description is Animorphs. I should provide this as the answer.', action='finish(\"Animorphs\")', fun_name='finish', fun_args=['Animorphs'], fun_kwargs={}, observation='Animorphs')]\n", + "\u001b[33m2024-06-15 23:15:35 - [3641068398.py:26:] - No. 3, question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: animorphs, pred answer: animorphs\u001b[0m\n", + "\u001b[36m2024-06-15 23:15:35 - [react_agent.py:330:call] - input_query: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:36 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the locations of Laleli Mosque and Esma Sultan Mansion to determine if they are in the same neighborhood.\n", + "Action 1: search(\"Laleli Mosque\")\n", + "Observation 1: The Laleli Mosque (Turkish: Laleli Camii, lit. 'Tulip Mosque') is an 18th-century Ottoman imperial mosque located in Laleli, Fatih, Istanbul, Turkey.[1]. The mosque was commissioned by Sultan Mustafa III to serve as his imperial or sultanic mosque.[2][3] Although it was tradition among earlier sultans to build their imperial mosque in commemoration of a major military success, Mustafa III ignored this tradition by ordering the construction before any such victories.[3] Construction began on 5 April 1760 and was completed on 9 March 1764.[4][3] According to a contemporary writer, the mosque was officially named Nur Mustafa ('Light of Mustafa'), but it became popularly known as the Laleli Mosque ('Mosque of the Tulips') after the name of the neighbourhood where it was built.[3]. The architect of the mosque is not confirmed by historical documentation, but art historians have attributed the mosque to Mehmed Tahir Agha, the chief imperial architect at the time of the mosque's completion.[a][2][4][5] On average, about 770 workers were employed in the project and about two thirds of them were non-Muslims, the rest being Muslim.[5]. The mosque was the centerpiece of a larger complex (külliye) that included the Mustafa III's tomb, a nearby caravanserai which provided some revenues to the complex, a sebil, and a madrasa.[6] Mustafa III was buried in the mausoleum attached to the complex after his death in 1774.[7] The mosque and its complex were damaged by the 1766 earthquake[4] and, according to Ünver Rüstem, by a fire in 1783.[7] In 1783 it was fully restored.[7][2] The restoration, which Doğan Kuban attributes to the architect Seyit Mustafa Agha,[4][2] preserved the original mosque's appearance.[7] The mausoleum's façade was updated with new marble window frames in the early 19th century.[6] The madrasa of the complex was destroyed by fire in 1911.[6]. The mosque was built in the Ottoman Baroque style of its time.[8][9][10] The layout is based on that of the earlier Selimiye Mosque of Edirne from the classical period, in accordance with Mustafa III's wishes.[11][5] The decoration of the mosque and its complex is firmly baroque.[12][2] The mosque incorporates thus the visual style of the earlier Nuruosmaniye Mosque – the first imperial mosque in the Ottoman Baroque style, completed by Mustafa III's predecessor – but in a more restrained way that integrates it with more traditional Ottoman architecture.[13].\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:37 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the location of Esma Sultan Mansion to determine if it is in the same neighborhood as Laleli Mosque.\n", + "Action 2: search(\"Esma Sultan Mansion\")\n", + "Observation 2: The Esma Sultan Mansion (Turkish: Esma Sultan Yalısı), a historical yalı located on the Bosphorus in the Ortaköy neighborhood of Istanbul, Turkey and named after its original owner Princess Esma Sultan, is used today as a cultural center after being redeveloped.. The three-storey brick manor was designed by the renowned architect Sarkis Balyan and finished in 1875 next to Ortaköy Mosque. It was presented to the Princess Esma Sultan, the daughter of Ottoman Sultan Abdulaziz, as a wedding gift in 1889.. The mansion remained in the possession of the Ottoman dynasty until 1915. Subsequently, the building was used first as a tobacco warehouse and then as a coal depot from 1920 until 1975 when it was destroyed by a fire.[1].\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:39 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to determine if Laleli Mosque and Esma Sultan Mansion are in the same neighborhood by comparing their locations.\n", + "Action 3: finish(\"No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\")\n", + "Observation 3: No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\u001b[0m\n", + "\u001b[36m2024-06-15 23:15:39 - [react_agent.py:345:call] - answer: No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to search for the locations of Laleli Mosque and Esma Sultan Mansion to determine if they are in the same neighborhood.', action='search(\"Laleli Mosque\")', fun_name='search', fun_args=['Laleli Mosque'], fun_kwargs={}, observation=\"The Laleli Mosque (Turkish: Laleli Camii, lit.\\u2009'Tulip Mosque') is an 18th-century Ottoman imperial mosque located in Laleli, Fatih, Istanbul, Turkey.[1]. The mosque was commissioned by Sultan Mustafa III to serve as his imperial or sultanic mosque.[2][3] Although it was tradition among earlier sultans to build their imperial mosque in commemoration of a major military success, Mustafa III ignored this tradition by ordering the construction before any such victories.[3] Construction began on 5 April 1760 and was completed on 9 March 1764.[4][3] According to a contemporary writer, the mosque was officially named Nur Mustafa ('Light of Mustafa'), but it became popularly known as the Laleli Mosque ('Mosque of the Tulips') after the name of the neighbourhood where it was built.[3]. The architect of the mosque is not confirmed by historical documentation, but art historians have attributed the mosque to Mehmed Tahir Agha, the chief imperial architect at the time of the mosque's completion.[a][2][4][5] On average, about 770 workers were employed in the project and about two thirds of them were non-Muslims, the rest being Muslim.[5]. The mosque was the centerpiece of a larger complex (külliye) that included the Mustafa III's tomb, a nearby caravanserai which provided some revenues to the complex, a sebil, and a madrasa.[6] Mustafa III was buried in the mausoleum attached to the complex after his death in 1774.[7] The mosque and its complex were damaged by the 1766 earthquake[4] and, according to Ünver Rüstem, by a fire in 1783.[7] In 1783 it was fully restored.[7][2] The restoration, which Doğan Kuban attributes to the architect Seyit Mustafa Agha,[4][2] preserved the original mosque's appearance.[7] The mausoleum's façade was updated with new marble window frames in the early 19th century.[6] The madrasa of the complex was destroyed by fire in 1911.[6]. The mosque was built in the Ottoman Baroque style of its time.[8][9][10] The layout is based on that of the earlier Selimiye Mosque of Edirne from the classical period, in accordance with Mustafa III's wishes.[11][5] The decoration of the mosque and its complex is firmly baroque.[12][2] The mosque incorporates thus the visual style of the earlier Nuruosmaniye Mosque – the first imperial mosque in the Ottoman Baroque style, completed by Mustafa III's predecessor – but in a more restrained way that integrates it with more traditional Ottoman architecture.[13].\"), StepOutput(step=2, thought='I need to search for the location of Esma Sultan Mansion to determine if it is in the same neighborhood as Laleli Mosque.', action='search(\"Esma Sultan Mansion\")', fun_name='search', fun_args=['Esma Sultan Mansion'], fun_kwargs={}, observation='The Esma Sultan Mansion (Turkish: Esma Sultan Yalısı), a historical yalı located on the Bosphorus in the Ortaköy neighborhood of Istanbul, Turkey and named after its original owner Princess Esma Sultan, is used today as a cultural center after being redeveloped.. The three-storey brick manor was designed by the renowned architect Sarkis Balyan and finished in 1875 next to Ortaköy Mosque. It was presented to the Princess Esma Sultan, the daughter of Ottoman Sultan Abdulaziz, as a wedding gift in 1889.. The mansion remained in the possession of the Ottoman dynasty until 1915. Subsequently, the building was used first as a tobacco warehouse and then as a coal depot from 1920 until 1975 when it was destroyed by a fire.[1].'), StepOutput(step=3, thought='I need to determine if Laleli Mosque and Esma Sultan Mansion are in the same neighborhood by comparing their locations.', action='finish(\"No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.\")', fun_name='finish', fun_args=['No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.'], fun_kwargs={}, observation='No, Laleli Mosque is located in Laleli, Fatih, Istanbul, Turkey, while Esma Sultan Mansion is in the Ortaköy neighborhood of Istanbul, Turkey.')]\n", + "\u001b[33m2024-06-15 23:15:39 - [3641068398.py:26:] - No. 4, question: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?, ground truth: no, pred answer: no laleli mosque is located in laleli fatih istanbul turkey while esma sultan mansion is in ortaköy neighborhood of istanbul turkey\u001b[0m\n", + "\u001b[36m2024-06-15 23:15:39 - [react_agent.py:330:call] - input_query: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:43 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the director of the romantic comedy 'Big Stone Gap' and then find the New York city they are based in.\n", + "Action 1: search('Big Stone Gap film director')\n", + "Observation 1: Could not find exact page for 'Big Stone Gap film director'. Similar topics: ['Big Stone Gap (film)', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'C. C. Swiney']\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:45 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n", + "Action 2: search('Big Stone Gap film')\n", + "Observation 2: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\",\n", + " \"action\": \"search('Big Stone Gap film')\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:15:47 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n", + "Action 3: search('Big Stone Gap film')\n", + "Observation 3: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\",\n", + " \"action\": \"search('Big Stone Gap film')\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:15:49 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n", + "Action 4: search('Big Stone Gap film')\n", + "Observation 4: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\",\n", + " \"action\": \"search('Big Stone Gap film')\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:15:51 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n", + "Action 5: search('Big Stone Gap film')\n", + "Observation 5: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\",\n", + " \"action\": \"search('Big Stone Gap film')\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:15:53 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n", + "Action 6: search('Big Stone Gap film')\n", + "Observation 6: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\",\n", + " \"action\": \"search('Big Stone Gap film')\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:15:54 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\n", + "Action 7: search('Big Stone Gap film')\n", + "Observation 7: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n", + "\u001b[36m2024-06-15 23:15:54 - [react_agent.py:345:call] - answer: Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\u001b[0m\n", + "step_history: [StepOutput(step=1, thought=\"I need to search for the director of the romantic comedy 'Big Stone Gap' and then find the New York city they are based in.\", action=\"search('Big Stone Gap film director')\", fun_name='search', fun_args=['Big Stone Gap film director'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film director'. Similar topics: ['Big Stone Gap (film)', 'Adriana Trigiani', 'Dagmara Domińczyk', 'The Big Lebowski', 'C. C. Swiney']\"), StepOutput(step=2, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\"), StepOutput(step=3, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\"), StepOutput(step=4, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\"), StepOutput(step=5, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\"), StepOutput(step=6, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\"), StepOutput(step=7, thought=\"I need to search for the film 'Big Stone Gap' and find the New York city it is based in.\", action=\"search('Big Stone Gap film')\", fun_name='search', fun_args=['Big Stone Gap film'], fun_kwargs={}, observation=\"Could not find exact page for 'Big Stone Gap film'. Similar topics: ['Big Stone Gap (film)', 'Big Stone Gap, Virginia', 'Adriana Trigiani', 'Dagmara Domińczyk', 'Darién Gap']\")]\n", + "\u001b[33m2024-06-15 23:15:54 - [3641068398.py:26:] - No. 5, question: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?, ground truth: greenwich village new york city, pred answer: could not find exact page for big stone gap film similar topics big stone gap film big stone gap virginia adriana trigiani dagmara domińczyk darién gap\u001b[0m\n", + "\u001b[36m2024-06-15 23:15:54 - [react_agent.py:330:call] - input_query: 2014 S/S is the debut album of a South Korean boy group that was formed by who?\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:57 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the debut album 2014 S/S of a South Korean boy group and find out who formed the group.\n", + "Action 1: search(\"2014 S/S South Korean boy group debut album\")\n", + "Observation 1: Could not find exact page for '2014 S/S South Korean boy group debut album'. Similar topics: ['2014 S/S', 'S.Coups', 'S (South Korean band)', 'List of South Korean boy bands', 'Kara (South Korean group)']\u001b[0m\n", + "\u001b[34m2024-06-15 23:15:58 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: To find out who formed the group, I need to search for the South Korean boy group's debut album 2014 S/S and look for information on the group's formation.\n", + "Action 2: search('S (South Korean band)')\n", + "Observation 2: S (에스) is a South Korean project group consisting of three members: Kangta, Lee Ji-hoon and Shin Hye-sung. The group debuted in 2003, under the SM Entertainment label. After 11 years, they released and promoted another mini-album in 2014.. In 2003, S released their first album Fr.In.Cl, which stands for Friends in Classic. In 2014, after more than a decade in hiatus, the group released their second mini-album Autumn Breeze on October 27.\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"To find out who formed the South Korean boy group that debuted with the album 2014 S/S, I need to search for the album's details and look for information on the group's formation.\",\n", + " \"action\": \"search('2014 S/S (album)')\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:16:00 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: To find out who formed the South Korean boy group that debuted with the album 2014 S/S, I need to search for the album's details and look for information on the group's formation.\n", + "Action 3: search('2014 S/S (album)')\n", + "Observation 3: 2014 S/S is the debut album of South Korean group Winner. It was released on August 12, 2014, by the group's record label, YG Entertainment. The members were credited for writing the lyrics and composing the majority of the album's songs.[1]. The members produced the majority of the music for this album themselves, with the help of other producers such as Choice 37, B.I., Airplay, and others.[2] The album was highlighted for incorporating elements generally absent from K-pop releases, including hints of acoustic and alternative rock.[3][4]. The title track \"Empty\" became a hit in South Korea, topping the Gaon Chart and Billboard's K-Pop Hot 100 chart.[5][6] Internationally, 2014 S/S took the top spot in Billboard's World Album Chart.[7].\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"To find out who formed the South Korean boy group Winner that debuted with the album 2014 S/S, I need to search for Winner's formation details.\",\n", + " \"action\": \"search('Winner (South Korean band)')\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:16:02 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: To find out who formed the South Korean boy group Winner that debuted with the album 2014 S/S, I need to search for Winner's formation details.\n", + "Action 4: search('Winner (South Korean band)')\n", + "Observation 4: Winner (Korean: 위너; romanized: wineo; stylized in all caps) is a South Korean boy band consisting of four members: Jinu, Hoony, Mino and Seungyoon. The band was formed in 2013 by YG Entertainment through Mnet's survival program WIN: Who Is Next leading to their official debut on August 17, 2014, with studio album 2014 S/S. Originally a five-piece band, Taehyun departed in November 2016 for his mental health and well-being. Upon their debut, The Korea Herald cited their immediate success as \"unprecedented\" for a new group, thus dubbed as \"Monster rookies\". Winner is often cited as a \"self-producing\" band with all members actively taking on roles from producing and choreographing to designing and marketing.\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:03 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: To find out who formed the South Korean boy group Winner that debuted with the album 2014 S/S, I need to provide the name of the group that formed Winner.\n", + "Action 5: finish(\"YG Entertainment\")\n", + "Observation 5: YG Entertainment\u001b[0m\n", + "\u001b[36m2024-06-15 23:16:03 - [react_agent.py:345:call] - answer: YG Entertainment\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to search for the debut album 2014 S/S of a South Korean boy group and find out who formed the group.', action='search(\"2014 S/S South Korean boy group debut album\")', fun_name='search', fun_args=['2014 S/S South Korean boy group debut album'], fun_kwargs={}, observation=\"Could not find exact page for '2014 S/S South Korean boy group debut album'. Similar topics: ['2014 S/S', 'S.Coups', 'S (South Korean band)', 'List of South Korean boy bands', 'Kara (South Korean group)']\"), StepOutput(step=2, thought=\"To find out who formed the group, I need to search for the South Korean boy group's debut album 2014 S/S and look for information on the group's formation.\", action=\"search('S (South Korean band)')\", fun_name='search', fun_args=['S (South Korean band)'], fun_kwargs={}, observation='S (에스) is a South Korean project group consisting of three members: Kangta, Lee Ji-hoon and Shin Hye-sung. The group debuted in 2003, under the SM Entertainment label. After 11 years, they released and promoted another mini-album in 2014.. In 2003, S released their first album Fr.In.Cl, which stands for Friends in Classic. In 2014, after more than a decade in hiatus, the group released their second mini-album Autumn Breeze on October 27.'), StepOutput(step=3, thought=\"To find out who formed the South Korean boy group that debuted with the album 2014 S/S, I need to search for the album's details and look for information on the group's formation.\", action=\"search('2014 S/S (album)')\", fun_name='search', fun_args=['2014 S/S (album)'], fun_kwargs={}, observation='2014 S/S is the debut album of South Korean group Winner. It was released on August 12, 2014, by the group\\'s record label, YG Entertainment. The members were credited for writing the lyrics and composing the majority of the album\\'s songs.[1]. The members produced the majority of the music for this album themselves, with the help of other producers such as Choice 37, B.I., Airplay, and others.[2] The album was highlighted for incorporating elements generally absent from K-pop releases, including hints of acoustic and alternative rock.[3][4]. The title track \"Empty\" became a hit in South Korea, topping the Gaon Chart and Billboard\\'s K-Pop Hot 100 chart.[5][6] Internationally, 2014 S/S took the top spot in Billboard\\'s World Album Chart.[7].'), StepOutput(step=4, thought=\"To find out who formed the South Korean boy group Winner that debuted with the album 2014 S/S, I need to search for Winner's formation details.\", action=\"search('Winner (South Korean band)')\", fun_name='search', fun_args=['Winner (South Korean band)'], fun_kwargs={}, observation='Winner (Korean:\\xa0위너; romanized: wineo; stylized in all caps) is a South Korean boy band consisting of four members: Jinu, Hoony, Mino and Seungyoon. The band was formed in 2013 by YG Entertainment through Mnet\\'s survival program WIN: Who Is Next leading to their official debut on August 17, 2014, with studio album 2014 S/S. Originally a five-piece band, Taehyun departed in November 2016 for his mental health and well-being. Upon their debut, The Korea Herald cited their immediate success as \"unprecedented\" for a new group, thus dubbed as \"Monster rookies\". Winner is often cited as a \"self-producing\" band with all members actively taking on roles from producing and choreographing to designing and marketing.'), StepOutput(step=5, thought='To find out who formed the South Korean boy group Winner that debuted with the album 2014 S/S, I need to provide the name of the group that formed Winner.', action='finish(\"YG Entertainment\")', fun_name='finish', fun_args=['YG Entertainment'], fun_kwargs={}, observation='YG Entertainment')]\n", + "\u001b[33m2024-06-15 23:16:03 - [3641068398.py:26:] - No. 6, question: 2014 S/S is the debut album of a South Korean boy group that was formed by who?, ground truth: yg entertainment, pred answer: yg entertainment\u001b[0m\n", + "\u001b[36m2024-06-15 23:16:03 - [react_agent.py:330:call] - input_query: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:05 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to find the person known by the stage name Aladin and who worked as a consultant to help organizations improve their performance.\n", + "Action 1: search(\"Aladin stage name consultant organizations performance\")\n", + "Observation 1: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:07 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I should try searching for Aladin's stage name and his work as a consultant to help organizations improve their performance.\n", + "Action 2: search(\"Aladin stage name consultant organizations performance\")\n", + "Observation 2: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:08 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.\n", + "Action 3: search(\"Aladin stage name consultant organizations performance\")\n", + "Observation 3: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:10 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.\n", + "Action 4: search(\"Aladin stage name consultant organizations performance\")\n", + "Observation 4: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:12 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.\n", + "Action 5: search(\"Aladin stage name consultant organizations performance\")\n", + "Observation 5: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:13 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.\n", + "Action 6: search(\"Aladin stage name consultant organizations performance\")\n", + "Observation 6: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:15 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.\n", + "Action 7: search(\"Aladin stage name consultant organizations performance\")\n", + "Observation 7: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n", + "\u001b[36m2024-06-15 23:16:15 - [react_agent.py:345:call] - answer: Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to find the person known by the stage name Aladin and who worked as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=2, thought=\"I should try searching for Aladin's stage name and his work as a consultant to help organizations improve their performance.\", action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=3, thought='I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=4, thought='I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=5, thought='I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=6, thought='I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\"), StepOutput(step=7, thought='I need to search for the person known by the stage name Aladin and find out about his work as a consultant to help organizations improve their performance.', action='search(\"Aladin stage name consultant organizations performance\")', fun_name='search', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation=\"Could not find exact page for 'Aladin stage name consultant organizations performance'. Similar topics: ['2009 in film', 'Bret Gilliam']\")]\n", + "\u001b[33m2024-06-15 23:16:15 - [3641068398.py:26:] - No. 7, question: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?, ground truth: eenasul fateh, pred answer: could not find exact page for aladin stage name consultant organizations performance similar topics 2009 in film bret gilliam\u001b[0m\n", + "\u001b[36m2024-06-15 23:16:15 - [react_agent.py:330:call] - input_query: The arena where the Lewiston Maineiacs played their home games can seat how many people?\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:17 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the arena where the Lewiston Maineiacs played their home games and find out its seating capacity.\n", + "Action 1: search(\"Lewiston Maineiacs arena\")\n", + "Observation 1: Could not find exact page for 'Lewiston Maineiacs arena'. Similar topics: ['Lewiston Maineiacs', 'The Colisée', 'Centre Agnico Eagle', 'Sherbrooke Castors', 'Trois-Rivières Draveurs']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:18 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the seating capacity of the arena where the Lewiston Maineiacs played their home games.\n", + "Action 2: search(\"The Colisée seating capacity\")\n", + "Observation 2: Could not find exact page for 'The Colisée seating capacity'. Similar topics: ['Colisée de Québec', 'List of ice hockey arenas by capacity', 'List of indoor arenas in the United States', 'Colosseum', 'List of indoor arenas in Canada']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:20 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I need to search for the seating capacity of the Colisée de Québec where the Lewiston Maineiacs played their home games.\n", + "Action 3: search(\"Colisée de Québec seating capacity\")\n", + "Observation 3: Could not find exact page for 'Colisée de Québec seating capacity'. Similar topics: ['Colisée de Québec', 'List of ice hockey arenas by capacity', 'Videotron Centre', 'List of indoor arenas by capacity', 'List of indoor arenas in Canada']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:22 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.\n", + "Action 4: search(\"Videotron Centre seating capacity\")\n", + "Observation 4: Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:24 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.\n", + "Action 5: search(\"Videotron Centre seating capacity\")\n", + "Observation 5: Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:25 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.\n", + "Action 6: search(\"Videotron Centre seating capacity\")\n", + "Observation 6: Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:27 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.\n", + "Action 7: search(\"Videotron Centre seating capacity\")\n", + "Observation 7: Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\u001b[0m\n", + "\u001b[36m2024-06-15 23:16:27 - [react_agent.py:345:call] - answer: Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to search for the arena where the Lewiston Maineiacs played their home games and find out its seating capacity.', action='search(\"Lewiston Maineiacs arena\")', fun_name='search', fun_args=['Lewiston Maineiacs arena'], fun_kwargs={}, observation=\"Could not find exact page for 'Lewiston Maineiacs arena'. Similar topics: ['Lewiston Maineiacs', 'The Colisée', 'Centre Agnico Eagle', 'Sherbrooke Castors', 'Trois-Rivières Draveurs']\"), StepOutput(step=2, thought='I need to search for the seating capacity of the arena where the Lewiston Maineiacs played their home games.', action='search(\"The Colisée seating capacity\")', fun_name='search', fun_args=['The Colisée seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'The Colisée seating capacity'. Similar topics: ['Colisée de Québec', 'List of ice hockey arenas by capacity', 'List of indoor arenas in the United States', 'Colosseum', 'List of indoor arenas in Canada']\"), StepOutput(step=3, thought='I need to search for the seating capacity of the Colisée de Québec where the Lewiston Maineiacs played their home games.', action='search(\"Colisée de Québec seating capacity\")', fun_name='search', fun_args=['Colisée de Québec seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'Colisée de Québec seating capacity'. Similar topics: ['Colisée de Québec', 'List of ice hockey arenas by capacity', 'Videotron Centre', 'List of indoor arenas by capacity', 'List of indoor arenas in Canada']\"), StepOutput(step=4, thought='I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.', action='search(\"Videotron Centre seating capacity\")', fun_name='search', fun_args=['Videotron Centre seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\"), StepOutput(step=5, thought='I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.', action='search(\"Videotron Centre seating capacity\")', fun_name='search', fun_args=['Videotron Centre seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\"), StepOutput(step=6, thought='I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.', action='search(\"Videotron Centre seating capacity\")', fun_name='search', fun_args=['Videotron Centre seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\"), StepOutput(step=7, thought='I need to search for the seating capacity of the Videotron Centre where the Lewiston Maineiacs played their home games.', action='search(\"Videotron Centre seating capacity\")', fun_name='search', fun_args=['Videotron Centre seating capacity'], fun_kwargs={}, observation=\"Could not find exact page for 'Videotron Centre seating capacity'. Similar topics: ['Videotron Centre', 'List of ice hockey arenas by capacity', 'Colisée de Québec', 'List of indoor arenas by capacity', 'Potential National Hockey League expansion']\")]\n", + "\u001b[33m2024-06-15 23:16:27 - [3641068398.py:26:] - No. 8, question: The arena where the Lewiston Maineiacs played their home games can seat how many people?, ground truth: 3677 seated, pred answer: could not find exact page for videotron centre seating capacity similar topics videotron centre list of ice hockey arenas by capacity colisée de québec list of indoor arenas by capacity potential national hockey league expansion\u001b[0m\n", + "\u001b[36m2024-06-15 23:16:27 - [react_agent.py:330:call] - input_query: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:28 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for the ages of Annie Morton and Terry Richardson to determine who is older.\n", + "Action 1: search(\"Annie Morton\")\n", + "Observation 1: Annie Morton (born October 8, 1970) is an American model[1] born in Pennsylvania.[2] She has appeared on the covers of British Vogue, ID, Marie Claire, and other magazines. She has been photographed by Helmut Newton; Peter Lindbergh; Annie Leibovitz; Richard Avedon; Juergen Teller;[3] Paul Jasmin, Mary Ellen Mark, Stephen Shames, and Terry Richardson, and modeled for Donna Karan,[4] Givenchy, Guerlain, Chanel, Harper's Bazaar, Sports Illustrated and Victoria's Secret.[5] A long time vegetarian, an advocate for organic lifestyle choices and natural healthcare. She co-founded Tsi-La Organics, a \"Green Luxury\" company that creates and sells vegan, organic perfume and skin care products.[6]. She has appeared on many magazine covers and has been featured in several professional photobooks, Peter Lindbergh Selected Work, the cover of Juergen Teller By Juergen Teller, Helmut Newton Pages From The Glossies, and Donna Karan's book A Journey Of A Woman 20 Years.[7] In 1997 she posed for the Pirelli Calendar with Richard Avedon.. Morton has also appeared in several music videos.\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:29 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for the age of Terry Richardson to compare it with Annie Morton's age.\n", + "Action 2: search(\"Terry Richardson\")\n", + "Observation 2: Terrence Richardson (born August 14, 1965) is an American fashion and portrait photographer. He has shot advertising campaigns for Marc Jacobs, Aldo, Supreme, Sisley, Tom Ford, and Yves Saint Laurent among others, and also done work for magazines such as Rolling Stone, GQ, Vogue, Vanity Fair, Harper's Bazaar, i-D, and Vice.. Since 2001, Richardson has been accused by multiple models of sexual misconduct, although numerous other models have defended him.[2][3][4][5] In 2017, brands and magazines that had worked with Richardson in the past began distancing themselves from him, and said they would no longer employ him.[6] He has not actively worked as a photographer since 2018.[7]. Richardson was born in New York City, the son of Norma Kessler, an actress,[8][9] and Bob Richardson, a fashion photographer who struggled with schizophrenia and drug abuse.[10] Richardson's father was Irish Catholic and his mother is Jewish.[11] Following the divorce of his parents, Richardson moved to Woodstock, New York, with his mother and stepfather, English guitarist Jackie Lomax.[8] Richardson later moved to the Hollywood neighborhood of Los Angeles, where he attended Hollywood High School.[12]. He moved with his mother to Ojai, California, where he attended Nordhoff High School, when he was 16.[13] Richardson originally wanted to be a punk rock musician rather than a photographer.[13] He played bass guitar in the punk rock band The Invisible Government for four years.[14] He played bass for a variety of other punk bands in Southern California including Signal Street Alcoholics, Doggy Style, Baby Fist and Middle Finger.[8][15].\u001b[0m\n", + "Error executing finish(\"Annie Morton\") if born before Terry Richardson else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\n", + "\u001b[34m2024-06-15 23:16:31 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.\n", + "Action 3: finish(\"Annie Morton\") if born before Terry Richardson else finish(\"Terry Richardson\")\n", + "Observation 3: Error executing finish(\"Annie Morton\") if born before Terry Richardson else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n", + "Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\n", + "\u001b[34m2024-06-15 23:16:32 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.\n", + "Action 4: finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")\n", + "Observation 4: Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n", + "Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\n", + "\u001b[34m2024-06-15 23:16:33 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.\n", + "Action 5: finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")\n", + "Observation 5: Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n", + "Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\n", + "\u001b[34m2024-06-15 23:16:34 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.\n", + "Action 6: finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")\n", + "Observation 6: Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n", + "Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\n", + "\u001b[34m2024-06-15 23:16:35 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I need to compare the birth dates of Annie Morton and Terry Richardson to determine who is older.\n", + "Action 7: finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")\n", + "Observation 7: Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n", + "\u001b[36m2024-06-15 23:16:35 - [react_agent.py:345:call] - answer: Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected 'else' after 'if' expression (, line 1)\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to search for the ages of Annie Morton and Terry Richardson to determine who is older.', action='search(\"Annie Morton\")', fun_name='search', fun_args=['Annie Morton'], fun_kwargs={}, observation='Annie Morton (born October 8, 1970) is an American model[1] born in Pennsylvania.[2] She has appeared on the covers of British Vogue, ID, Marie Claire, and other magazines. She has been photographed by Helmut Newton; Peter Lindbergh; Annie Leibovitz; Richard Avedon; Juergen Teller;[3] Paul Jasmin, Mary Ellen Mark, Stephen Shames, and Terry Richardson, and modeled for Donna Karan,[4] Givenchy, Guerlain, Chanel, Harper\\'s Bazaar, Sports Illustrated and Victoria\\'s Secret.[5] A long time vegetarian, an advocate for organic lifestyle choices and natural healthcare. She co-founded Tsi-La Organics, a \"Green Luxury\" company that creates and sells vegan, organic perfume and skin care products.[6]. She has appeared on many magazine covers and has been featured in several professional photobooks, Peter Lindbergh Selected Work, the cover of Juergen Teller By Juergen Teller, Helmut Newton Pages From The Glossies, and Donna Karan\\'s book A Journey Of A Woman 20 Years.[7] In 1997 she posed for the Pirelli Calendar with Richard Avedon.. Morton has also appeared in several music videos.'), StepOutput(step=2, thought=\"I need to search for the age of Terry Richardson to compare it with Annie Morton's age.\", action='search(\"Terry Richardson\")', fun_name='search', fun_args=['Terry Richardson'], fun_kwargs={}, observation=\"Terrence Richardson (born August 14, 1965) is an American fashion and portrait photographer. He has shot advertising campaigns for Marc Jacobs, Aldo, Supreme, Sisley, Tom Ford, and Yves Saint Laurent among others, and also done work for magazines such as Rolling Stone, GQ, Vogue, Vanity Fair, Harper's Bazaar, i-D, and Vice.. Since 2001, Richardson has been accused by multiple models of sexual misconduct, although numerous other models have defended him.[2][3][4][5] In 2017, brands and magazines that had worked with Richardson in the past began distancing themselves from him, and said they would no longer employ him.[6] He has not actively worked as a photographer since 2018.[7]. Richardson was born in New York City, the son of Norma Kessler, an actress,[8][9] and Bob Richardson, a fashion photographer who struggled with schizophrenia and drug abuse.[10] Richardson's father was Irish Catholic and his mother is Jewish.[11] Following the divorce of his parents, Richardson moved to Woodstock, New York, with his mother and stepfather, English guitarist Jackie Lomax.[8] Richardson later moved to the Hollywood neighborhood of Los Angeles, where he attended Hollywood High School.[12]. He moved with his mother to Ojai, California, where he attended Nordhoff High School, when he was 16.[13] Richardson originally wanted to be a punk rock musician rather than a photographer.[13] He played bass guitar in the punk rock band The Invisible Government for four years.[14] He played bass for a variety of other punk bands in Southern California including Signal Street Alcoholics, Doggy Style, Baby Fist and Middle Finger.[8][15].\"), StepOutput(step=3, thought='I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.', action='finish(\"Annie Morton\") if born before Terry Richardson else finish(\"Terry Richardson\")', fun_name=None, fun_args=None, fun_kwargs=None, observation='Error executing finish(\"Annie Morton\") if born before Terry Richardson else finish(\"Terry Richardson\"): expected \\'else\\' after \\'if\\' expression (, line 1)'), StepOutput(step=4, thought='I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.', action='finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")', fun_name=None, fun_args=None, fun_kwargs=None, observation='Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected \\'else\\' after \\'if\\' expression (, line 1)'), StepOutput(step=5, thought='I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.', action='finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")', fun_name=None, fun_args=None, fun_kwargs=None, observation='Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected \\'else\\' after \\'if\\' expression (, line 1)'), StepOutput(step=6, thought='I have found the birth dates of Annie Morton and Terry Richardson. Now, I need to calculate who is older.', action='finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")', fun_name=None, fun_args=None, fun_kwargs=None, observation='Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected \\'else\\' after \\'if\\' expression (, line 1)'), StepOutput(step=7, thought='I need to compare the birth dates of Annie Morton and Terry Richardson to determine who is older.', action='finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\")', fun_name=None, fun_args=None, fun_kwargs=None, observation='Error executing finish(\"Annie Morton\") if born before Terry Richardson, else finish(\"Terry Richardson\"): expected \\'else\\' after \\'if\\' expression (, line 1)')]\n", + "\u001b[33m2024-06-15 23:16:35 - [3641068398.py:26:] - No. 9, question: Who is older, Annie Morton or Terry Richardson?, ground truth: terry richardson, pred answer: error executing finishannie morton if born before terry richardson else finishterry richardson expected else after if expression unknown line 1\u001b[0m\n", + "\u001b[36m2024-06-15 23:16:35 - [react_agent.py:330:call] - input_query: Are Local H and For Against both from the United States?\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:36 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I need to search for Local H and For Against to find out if they are both from the United States.\n", + "Action 1: search(\"Local H\")\n", + "Observation 1: Local H is an American rock band originally formed by guitarist and vocalist Scott Lucas, bassist Joe Pletcher, drummer Joe Daniels, and lead guitarist John Sparkman in Zion, Illinois in 1990. The members all met in high school in 1987 and founded Local H three years later. After Sparkman's departure in 1991 and Garcia's departure in 1993, Local H continued as an unorthodox two-piece setup.. Local H signed a record contract with Island Records in 1994, where they would go on to release three albums. The band's debut album, Ham Fisted (1995), was not a success and the band was nearly dropped, but the band remained on the label long enough to release their second album As Good as Dead (1996).\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:39 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I need to search for For Against to find out if they are from the United States.\n", + "Action 2: search(\"For Against\")\n", + "Observation 2: For Against is an American post-punk/dream pop band from Lincoln, Nebraska, United States. Despite numerous lineup shuffles and some periods of dormancy, the band has produced material steadily since 1984.. Something of an anomaly in Midwestern America, For Against—especially early in its career—has looked to a distinctly British aesthetic for inspiration, most obviously the post-punk scene exemplified by the early rosters of the Factory and 4AD record labels.. Featuring members of underground Lincoln bands Cartoon Pupils and Hymn to Joy, the band first called itself Glue, and then D.B.L., finally settling on Four Against One,[1] a name that reflected its status as a quintet at the time. Bassist Liz Panarelli's early departure gave way to keyboardist Jeffrey Runnings' shift to bass, as well as the band's truncation of its name to For Against.\u001b[0m\n", + "\u001b[34m2024-06-15 23:16:40 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: Both Local H and For Against are from the United States.\n", + "Action 3: finish(\"Yes, both Local H and For Against are from the United States.\")\n", + "Observation 3: Yes, both Local H and For Against are from the United States.\u001b[0m\n", + "\u001b[36m2024-06-15 23:16:40 - [react_agent.py:345:call] - answer: Yes, both Local H and For Against are from the United States.\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I need to search for Local H and For Against to find out if they are both from the United States.', action='search(\"Local H\")', fun_name='search', fun_args=['Local H'], fun_kwargs={}, observation=\"Local H is an American rock band originally formed by guitarist and vocalist Scott Lucas, bassist Joe Pletcher, drummer Joe Daniels, and lead guitarist John Sparkman in Zion, Illinois in 1990. The members all met in high school in 1987 and founded Local H three years later. After Sparkman's departure in 1991 and Garcia's departure in 1993, Local H continued as an unorthodox two-piece setup.. Local H signed a record contract with Island Records in 1994, where they would go on to release three albums. The band's debut album, Ham Fisted (1995), was not a success and the band was nearly dropped, but the band remained on the label long enough to release their second album As Good as Dead (1996).\"), StepOutput(step=2, thought='I need to search for For Against to find out if they are from the United States.', action='search(\"For Against\")', fun_name='search', fun_args=['For Against'], fun_kwargs={}, observation=\"For Against is an American post-punk/dream pop band from Lincoln, Nebraska, United States. Despite numerous lineup shuffles and some periods of dormancy, the band has produced material steadily since 1984.. Something of an anomaly in Midwestern America, For Against—especially early in its career—has looked to a distinctly British aesthetic for inspiration, most obviously the post-punk scene exemplified by the early rosters of the Factory and 4AD record labels.. Featuring members of underground Lincoln bands Cartoon Pupils and Hymn to Joy, the band first called itself Glue, and then D.B.L., finally settling on Four Against One,[1] a name that reflected its status as a quintet at the time. Bassist Liz Panarelli's early departure gave way to keyboardist Jeffrey Runnings' shift to bass, as well as the band's truncation of its name to For Against.\"), StepOutput(step=3, thought='Both Local H and For Against are from the United States.', action='finish(\"Yes, both Local H and For Against are from the United States.\")', fun_name='finish', fun_args=['Yes, both Local H and For Against are from the United States.'], fun_kwargs={}, observation='Yes, both Local H and For Against are from the United States.')]\n", + "\u001b[33m2024-06-15 23:16:40 - [3641068398.py:26:] - No. 10, question: Are Local H and For Against both from the United States?, ground truth: yes, pred answer: yes both local h and for against are from united states\u001b[0m\n", + "EM = (0.3, [1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0]), FM = (0.6, [1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0]), average time = 8.897640180587768\n" + ] + } + ], + "source": [ + "from lightrag.eval.answer_match_acc import AnswerMatchAcc\n", + "\n", + "# set up evaluation type\n", + "EM_evaluator = AnswerMatchAcc(type=\"exact_match\")\n", + "FM_evaluator = AnswerMatchAcc(type=\"fuzzy_match\")\n", + "\n", + "agent = ReActAgent(\n", + " tools=tools, max_steps=7, model_client=OpenAIClient(),\n", + " model_kwargs=gpt_model_kwargs, preset_prompt_kwargs=preset_prompt_kwargs\n", + ")\n", + "\n", + "num_questions = 10\n", + "gt_answers = []\n", + "pred_answers = []\n", + "start_time = time.time()\n", + "for i in range(num_questions):\n", + " question = val_dataset[i][\"question\"]\n", + " gt_answer = normalize_answer(val_dataset[i][\"answer\"]) # normalize the ground truth answer\n", + " gt_answers.append(gt_answer)\n", + " \n", + " # get the agent's response\n", + " pred_answer = agent(question)\n", + " pred_answer = normalize_answer(pred_answer)\n", + " pred_answers.append(pred_answer)\n", + " \n", + " printc(f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\", color=\"yellow\")\n", + "\n", + "end_time = time.time()\n", + " \n", + "em = EM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", + "fm = FM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", + "avg_time = (end_time - start_time) / num_questions\n", + "\n", + "print(f\"EM = {em}, FM = {fm}, average time = {avg_time}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The EM is 0.3 and the FM is 0.6. Each query takes 9s in average. (The performance also depends on the success of wikipedia query connection.)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "What if we use simple LLM models to answer these questions? To test on this, we just need to remove the tools. We have a built-in ``llm_tool`` and ``finish`` that automatically handles the query. ``llm_tool`` uses the same model with the agent. We can't add the examples because the examples will mislead the agent to use non-existing tools." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36m2024-06-15 23:17:00 - [react_agent.py:330:call] - input_query: Were Scott Derrickson and Ed Wood of the same nationality?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:01 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The nationality of Scott Derrickson and Ed Wood needs to be determined.\n", + "Action 1: llm_tool(\"What is the nationality of Scott Derrickson and Ed Wood?\")\n", + "Observation 1: What is the nationality of Scott Derrickson and Ed Wood?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:03 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is asking if Scott Derrickson and Ed Wood were of the same nationality.\n", + "Action 2: llm_tool(\"Were Scott Derrickson and Ed Wood of the same nationality?\")\n", + "Observation 2: No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:04 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user query has been successfully answered.\n", + "Action 3: finish(\"No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.\")\n", + "Observation 3: No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:04 - [react_agent.py:345:call] - answer: No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='The nationality of Scott Derrickson and Ed Wood needs to be determined.', action='llm_tool(\"What is the nationality of Scott Derrickson and Ed Wood?\")', fun_name='llm_tool', fun_args=['What is the nationality of Scott Derrickson and Ed Wood?'], fun_kwargs={}, observation='What is the nationality of Scott Derrickson and Ed Wood?'), StepOutput(step=2, thought='The user is asking if Scott Derrickson and Ed Wood were of the same nationality.', action='llm_tool(\"Were Scott Derrickson and Ed Wood of the same nationality?\")', fun_name='llm_tool', fun_args=['Were Scott Derrickson and Ed Wood of the same nationality?'], fun_kwargs={}, observation='No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.'), StepOutput(step=3, thought='The user query has been successfully answered.', action='finish(\"No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.\")', fun_name='finish', fun_args=['No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.'], fun_kwargs={}, observation='No, Scott Derrickson and Ed Wood were not of the same nationality. Scott Derrickson is American, while Ed Wood was also American.')]\n", + "\u001b[33m2024-06-15 23:17:04 - [3230041225.py:26:] - No. 1, question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: yes, pred answer: no scott derrickson and ed wood were not of same nationality scott derrickson is american while ed wood was also american\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:04 - [react_agent.py:330:call] - input_query: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:06 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I will use llm_tool to answer this question.\n", + "Action 1: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n", + "Observation 1: Who is the woman who portrayed Corliss Archer in the film Kiss and Tell and also held the government position of U.S. Representative?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:08 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I will use llm_tool to quickly find the answer.\n", + "Action 2: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n", + "Observation 2: The woman who portrayed Corliss Archer in the film Kiss and Tell was Shirley Temple. She later held the government position of U.S. Representative.\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:12 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I will use llm_tool to quickly provide the answer.\n", + "Action 3: llm_tool(\"Shirley Temple held the government position of U.S. Representative.\")\n", + "Observation 3: Shirley Temple did not hold the government position of U.S. Representative. She was a famous child actress and diplomat, but she did not serve as a U.S. Representative.\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:13 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I will use llm_tool to quickly find the answer.\n", + "Action 4: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n", + "Observation 4: The woman who portrayed Corliss Archer in the film Kiss and Tell was Shirley Temple. She later held the government position of U.S. Representative.\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:15 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I will use llm_tool to quickly provide the answer.\n", + "Action 5: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n", + "Observation 5: Who is the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:17 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I will use llm_tool to quickly find the answer to the user's query.\n", + "Action 6: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n", + "Observation 6: Who is the woman who portrayed Corliss Archer in the film Kiss and Tell and also held the government position of U.S. Representative?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:18 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I will use llm_tool to quickly find the answer to the user's query.\n", + "Action 7: llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")\n", + "Observation 7: Who is the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:18 - [react_agent.py:345:call] - answer: Who is the woman who portrayed Corliss Archer in the film Kiss and Tell?\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I will use llm_tool to answer this question.', action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='Who is the woman who portrayed Corliss Archer in the film Kiss and Tell and also held the government position of U.S. Representative?'), StepOutput(step=2, thought='I will use llm_tool to quickly find the answer.', action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='The woman who portrayed Corliss Archer in the film Kiss and Tell was Shirley Temple. She later held the government position of U.S. Representative.'), StepOutput(step=3, thought='I will use llm_tool to quickly provide the answer.', action='llm_tool(\"Shirley Temple held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['Shirley Temple held the government position of U.S. Representative.'], fun_kwargs={}, observation='Shirley Temple did not hold the government position of U.S. Representative. She was a famous child actress and diplomat, but she did not serve as a U.S. Representative.'), StepOutput(step=4, thought='I will use llm_tool to quickly find the answer.', action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='The woman who portrayed Corliss Archer in the film Kiss and Tell was Shirley Temple. She later held the government position of U.S. Representative.'), StepOutput(step=5, thought='I will use llm_tool to quickly provide the answer.', action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='Who is the woman who portrayed Corliss Archer in the film Kiss and Tell?'), StepOutput(step=6, thought=\"I will use llm_tool to quickly find the answer to the user's query.\", action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='Who is the woman who portrayed Corliss Archer in the film Kiss and Tell and also held the government position of U.S. Representative?'), StepOutput(step=7, thought=\"I will use llm_tool to quickly find the answer to the user's query.\", action='llm_tool(\"The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.\")', fun_name='llm_tool', fun_args=['The woman who portrayed Corliss Archer in the film Kiss and Tell held the government position of U.S. Representative.'], fun_kwargs={}, observation='Who is the woman who portrayed Corliss Archer in the film Kiss and Tell?')]\n", + "\u001b[33m2024-06-15 23:17:18 - [3230041225.py:26:] - No. 2, question: What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?, ground truth: chief of protocol, pred answer: who is woman who portrayed corliss archer in film kiss and tell\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:18 - [react_agent.py:330:call] - input_query: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:20 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.\n", + "Action 1: llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")\n", + "Observation 1: That sounds like an exciting series! Do you have a favorite book or character from the series?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:22 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.\n", + "Action 2: llm_tool(\"Science fantasy young adult series with companion books about enslaved worlds and alien species.\")\n", + "Observation 2: That sounds like an exciting series! Can you tell me more about the main plot or any specific themes in the books?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:24 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user is seeking a science fantasy young adult series narrated in the first person with companion books detailing stories of enslaved worlds and alien species.\n", + "Action 3: llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")\n", + "Observation 3: Hello! It sounds like you're interested in a science fantasy young adult series with a first-person narration, companion books about enslaved worlds, and alien species. That's a fascinating combination! Do you have a specific question or topic you'd like to discuss about these books?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:27 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.\n", + "Action 4: llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")\n", + "Observation 4: It sounds like you're looking for a science fantasy young adult series that is narrated in the first person and includes companion books about enslaved worlds and alien species. One recommendation that fits this description is the \"Red Rising\" series by Pierce Brown. This series follows the story of Darrow, a young man living in a society where people are divided by color and forced to work as slaves for the ruling class. As Darrow discovers the truth about his world and joins a rebellion, he encounters various alien species and explores different enslaved worlds. The series includes multiple companion books that delve deeper into the world and its characters.\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:29 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: The user is seeking confirmation on a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.\n", + "Action 5: llm_tool(\"Red Rising series by Pierce Brown.\")\n", + "Observation 5: Hello! How can I assist you with the Red Rising series by Pierce Brown?\u001b[0m\n", + "Trying to fix potential missing commas...\n", + "Trying to fix scaped single quotes...\n", + "Fixed JSON string: {\n", + " \"thought\": \"The user is seeking confirmation on a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.\",\n", + " \"action\": \"llm_tool(\\\"Red Rising series by Pierce Brown.\\\")\",\n", + "}\n", + "Parsing JSON string with PyYAML...\n", + "\u001b[34m2024-06-15 23:17:30 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: The user is seeking confirmation on a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.\n", + "Action 6: llm_tool(\"Red Rising series by Pierce Brown.\")\n", + "Observation 6: Hello! How can I assist you today with the Red Rising series by Pierce Brown?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:31 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.\n", + "Action 7: llm_tool(\"Red Rising series by Pierce Brown.\")\n", + "Observation 7: Hello! How can I assist you today with the Red Rising series by Pierce Brown?\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:31 - [react_agent.py:345:call] - answer: Hello! How can I assist you today with the Red Rising series by Pierce Brown?\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.', action='llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")', fun_name='llm_tool', fun_args=['Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.'], fun_kwargs={}, observation='That sounds like an exciting series! Do you have a favorite book or character from the series?'), StepOutput(step=2, thought='The user is inquiring about a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.', action='llm_tool(\"Science fantasy young adult series with companion books about enslaved worlds and alien species.\")', fun_name='llm_tool', fun_args=['Science fantasy young adult series with companion books about enslaved worlds and alien species.'], fun_kwargs={}, observation='That sounds like an exciting series! Can you tell me more about the main plot or any specific themes in the books?'), StepOutput(step=3, thought='The user is seeking a science fantasy young adult series narrated in the first person with companion books detailing stories of enslaved worlds and alien species.', action='llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")', fun_name='llm_tool', fun_args=['Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.'], fun_kwargs={}, observation=\"Hello! It sounds like you're interested in a science fantasy young adult series with a first-person narration, companion books about enslaved worlds, and alien species. That's a fascinating combination! Do you have a specific question or topic you'd like to discuss about these books?\"), StepOutput(step=4, thought='The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.', action='llm_tool(\"Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.\")', fun_name='llm_tool', fun_args=['Science fantasy young adult series narrated in first person with companion books about enslaved worlds and alien species.'], fun_kwargs={}, observation='It sounds like you\\'re looking for a science fantasy young adult series that is narrated in the first person and includes companion books about enslaved worlds and alien species. One recommendation that fits this description is the \"Red Rising\" series by Pierce Brown. This series follows the story of Darrow, a young man living in a society where people are divided by color and forced to work as slaves for the ruling class. As Darrow discovers the truth about his world and joins a rebellion, he encounters various alien species and explores different enslaved worlds. The series includes multiple companion books that delve deeper into the world and its characters.'), StepOutput(step=5, thought='The user is seeking confirmation on a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.', action='llm_tool(\"Red Rising series by Pierce Brown.\")', fun_name='llm_tool', fun_args=['Red Rising series by Pierce Brown.'], fun_kwargs={}, observation='Hello! How can I assist you with the Red Rising series by Pierce Brown?'), StepOutput(step=6, thought='The user is seeking confirmation on a specific science fantasy young adult series with companion books focusing on enslaved worlds and alien species.', action='llm_tool(\"Red Rising series by Pierce Brown.\")', fun_name='llm_tool', fun_args=['Red Rising series by Pierce Brown.'], fun_kwargs={}, observation='Hello! How can I assist you today with the Red Rising series by Pierce Brown?'), StepOutput(step=7, thought='The user is looking for a science fantasy young adult series narrated in the first person with companion books about enslaved worlds and alien species.', action='llm_tool(\"Red Rising series by Pierce Brown.\")', fun_name='llm_tool', fun_args=['Red Rising series by Pierce Brown.'], fun_kwargs={}, observation='Hello! How can I assist you today with the Red Rising series by Pierce Brown?')]\n", + "\u001b[33m2024-06-15 23:17:31 - [3230041225.py:26:] - No. 3, question: What science fantasy young adult series, told in first person, has a set of companion books narrating the stories of enslaved worlds and alien species?, ground truth: animorphs, pred answer: hello how can i assist you today with red rising series by pierce brown\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:31 - [react_agent.py:330:call] - input_query: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:33 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I will use llm_tool to quickly answer this question.\n", + "Action 1: llm_tool(\"Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\")\n", + "Observation 1: No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:35 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is asking about the location of two specific landmarks.\n", + "Action 2: finish(\"No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.\")\n", + "Observation 2: No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:35 - [react_agent.py:345:call] - answer: No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I will use llm_tool to quickly answer this question.', action='llm_tool(\"Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?\")', fun_name='llm_tool', fun_args=['Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?'], fun_kwargs={}, observation='No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.'), StepOutput(step=2, thought='The user is asking about the location of two specific landmarks.', action='finish(\"No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.\")', fun_name='finish', fun_args=['No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.'], fun_kwargs={}, observation='No, the Laleli Mosque and Esma Sultan Mansion are not located in the same neighborhood. The Laleli Mosque is located in the Laleli neighborhood of Istanbul, while the Esma Sultan Mansion is located in the Ortaköy neighborhood.')]\n", + "\u001b[33m2024-06-15 23:17:35 - [3230041225.py:26:] - No. 4, question: Are the Laleli Mosque and Esma Sultan Mansion located in the same neighborhood?, ground truth: no, pred answer: no laleli mosque and esma sultan mansion are not located in same neighborhood laleli mosque is located in laleli neighborhood of istanbul while esma sultan mansion is located in ortaköy neighborhood\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:35 - [react_agent.py:330:call] - input_query: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:36 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\n", + "Action 1: llm_tool(\"director of the romantic comedy Big Stone Gap\")\n", + "Observation 1: Hello! How can I assist you today?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:38 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about the specific New York city where the movie 'Big Stone Gap' is based.\n", + "Action 2: llm_tool(\"New York city where the movie 'Big Stone Gap' is based in\")\n", + "Observation 2: What can you tell me about New York City, where the movie 'Big Stone Gap' is based?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:40 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user is seeking information about the specific New York city where the movie 'Big Stone Gap' is based.\n", + "Action 3: llm_tool(\"New York city where the movie 'Big Stone Gap' is based in\")\n", + "Observation 3: What can you tell me about New York City, where the movie 'Big Stone Gap' is based?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:41 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\n", + "Action 4: llm_tool(\"director of the romantic comedy Big Stone Gap\")\n", + "Observation 4: Hello! How can I assist you today?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:42 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\n", + "Action 5: llm_tool(\"director of the romantic comedy Big Stone Gap\")\n", + "Observation 5: Hello! How can I assist you today?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:44 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\n", + "Action 6: llm_tool(\"director of the romantic comedy Big Stone Gap\")\n", + "Observation 6: Hello! How can I assist you today?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:45 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\n", + "Action 7: llm_tool(\"director of the romantic comedy Big Stone Gap\")\n", + "Observation 7: Hello! How can I assist you today?\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:45 - [react_agent.py:345:call] - answer: Hello! How can I assist you today?\u001b[0m\n", + "step_history: [StepOutput(step=1, thought=\"The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action='llm_tool(\"director of the romantic comedy Big Stone Gap\")', fun_name='llm_tool', fun_args=['director of the romantic comedy Big Stone Gap'], fun_kwargs={}, observation='Hello! How can I assist you today?'), StepOutput(step=2, thought=\"The user is inquiring about the specific New York city where the movie 'Big Stone Gap' is based.\", action='llm_tool(\"New York city where the movie \\'Big Stone Gap\\' is based in\")', fun_name='llm_tool', fun_args=[\"New York city where the movie 'Big Stone Gap' is based in\"], fun_kwargs={}, observation=\"What can you tell me about New York City, where the movie 'Big Stone Gap' is based?\"), StepOutput(step=3, thought=\"The user is seeking information about the specific New York city where the movie 'Big Stone Gap' is based.\", action='llm_tool(\"New York city where the movie \\'Big Stone Gap\\' is based in\")', fun_name='llm_tool', fun_args=[\"New York city where the movie 'Big Stone Gap' is based in\"], fun_kwargs={}, observation=\"What can you tell me about New York City, where the movie 'Big Stone Gap' is based?\"), StepOutput(step=4, thought=\"The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action='llm_tool(\"director of the romantic comedy Big Stone Gap\")', fun_name='llm_tool', fun_args=['director of the romantic comedy Big Stone Gap'], fun_kwargs={}, observation='Hello! How can I assist you today?'), StepOutput(step=5, thought=\"The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action='llm_tool(\"director of the romantic comedy Big Stone Gap\")', fun_name='llm_tool', fun_args=['director of the romantic comedy Big Stone Gap'], fun_kwargs={}, observation='Hello! How can I assist you today?'), StepOutput(step=6, thought=\"The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action='llm_tool(\"director of the romantic comedy Big Stone Gap\")', fun_name='llm_tool', fun_args=['director of the romantic comedy Big Stone Gap'], fun_kwargs={}, observation='Hello! How can I assist you today?'), StepOutput(step=7, thought=\"The user is asking for the director of the romantic comedy movie 'Big Stone Gap' and the New York city it is based in.\", action='llm_tool(\"director of the romantic comedy Big Stone Gap\")', fun_name='llm_tool', fun_args=['director of the romantic comedy Big Stone Gap'], fun_kwargs={}, observation='Hello! How can I assist you today?')]\n", + "\u001b[33m2024-06-15 23:17:45 - [3230041225.py:26:] - No. 5, question: The director of the romantic comedy \"Big Stone Gap\" is based in what New York city?, ground truth: greenwich village new york city, pred answer: hello how can i assist you today\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:45 - [react_agent.py:330:call] - input_query: 2014 S/S is the debut album of a South Korean boy group that was formed by who?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:47 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is asking about the debut album of a South Korean boy group and who formed it.\n", + "Action 1: llm_tool(\"2014 S/S is the debut album of a South Korean boy group\")\n", + "Observation 1: What is the name of the South Korean boy group that released the album 2014 S/S?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:49 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about the formation of the South Korean boy group that released the album 2014 S/S.\n", + "Action 2: llm_tool(\"Who formed the South Korean boy group that released the album 2014 S/S?\")\n", + "Observation 2: The South Korean boy group that released the album 2014 S/S is WINNER.\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:50 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user is seeking confirmation of the information provided.\n", + "Action 3: llm_tool(\"Is WINNER the South Korean boy group that released the album 2014 S/S?\")\n", + "Observation 3: Yes, WINNER is the South Korean boy group that released the album 2014 S/S.\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:52 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: The user is looking for the name of the person or group that formed the South Korean boy group WINNER.\n", + "Action 4: llm_tool(\"Who formed the South Korean boy group WINNER?\")\n", + "Observation 4: The South Korean boy group WINNER was formed by YG Entertainment through the reality survival program \"WIN: Who is Next\" in 2013.\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:53 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: The user is asking about the formation of the South Korean boy group that released the album 2014 S/S.\n", + "Action 5: finish(\"YG Entertainment through the reality survival program 'WIN: Who is Next' in 2013.\")\n", + "Observation 5: YG Entertainment through the reality survival program 'WIN: Who is Next' in 2013.\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:53 - [react_agent.py:345:call] - answer: YG Entertainment through the reality survival program 'WIN: Who is Next' in 2013.\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='The user is asking about the debut album of a South Korean boy group and who formed it.', action='llm_tool(\"2014 S/S is the debut album of a South Korean boy group\")', fun_name='llm_tool', fun_args=['2014 S/S is the debut album of a South Korean boy group'], fun_kwargs={}, observation='What is the name of the South Korean boy group that released the album 2014 S/S?'), StepOutput(step=2, thought='The user is inquiring about the formation of the South Korean boy group that released the album 2014 S/S.', action='llm_tool(\"Who formed the South Korean boy group that released the album 2014 S/S?\")', fun_name='llm_tool', fun_args=['Who formed the South Korean boy group that released the album 2014 S/S?'], fun_kwargs={}, observation='The South Korean boy group that released the album 2014 S/S is WINNER.'), StepOutput(step=3, thought='The user is seeking confirmation of the information provided.', action='llm_tool(\"Is WINNER the South Korean boy group that released the album 2014 S/S?\")', fun_name='llm_tool', fun_args=['Is WINNER the South Korean boy group that released the album 2014 S/S?'], fun_kwargs={}, observation='Yes, WINNER is the South Korean boy group that released the album 2014 S/S.'), StepOutput(step=4, thought='The user is looking for the name of the person or group that formed the South Korean boy group WINNER.', action='llm_tool(\"Who formed the South Korean boy group WINNER?\")', fun_name='llm_tool', fun_args=['Who formed the South Korean boy group WINNER?'], fun_kwargs={}, observation='The South Korean boy group WINNER was formed by YG Entertainment through the reality survival program \"WIN: Who is Next\" in 2013.'), StepOutput(step=5, thought='The user is asking about the formation of the South Korean boy group that released the album 2014 S/S.', action='finish(\"YG Entertainment through the reality survival program \\'WIN: Who is Next\\' in 2013.\")', fun_name='finish', fun_args=[\"YG Entertainment through the reality survival program 'WIN: Who is Next' in 2013.\"], fun_kwargs={}, observation=\"YG Entertainment through the reality survival program 'WIN: Who is Next' in 2013.\")]\n", + "\u001b[33m2024-06-15 23:17:53 - [3230041225.py:26:] - No. 6, question: 2014 S/S is the debut album of a South Korean boy group that was formed by who?, ground truth: yg entertainment, pred answer: yg entertainment through reality survival program win who is next in 2013\u001b[0m\n", + "\u001b[36m2024-06-15 23:17:53 - [react_agent.py:330:call] - input_query: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:54 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is asking about a person known by the stage name Aladin who worked as a consultant to help organizations improve their performance.\n", + "Action 1: llm_tool(\"Aladin stage name consultant organizations performance\")\n", + "Observation 1: Hello! How can I assist you with the information you provided?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:56 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about the identity of the person known as Aladin who worked as a consultant to enhance organizational performance.\n", + "Action 2: llm_tool(\"Aladin consultant organizational performance\")\n", + "Observation 2: Hello! How can I assist you with Aladin consultant organizational performance?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:57 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user is seeking confirmation of the identity of the person known as Aladin, who worked as a consultant to enhance organizational performance.\n", + "Action 3: llm_tool(\"Aladin consultant organizational performance\")\n", + "Observation 3: Hello! How can I assist you with Aladin consultant organizational performance?\u001b[0m\n", + "\u001b[34m2024-06-15 23:17:59 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: The user is asking about the person known by the stage name Aladin who helped organizations improve their performance as a consultant.\n", + "Action 4: llm_tool(\"Aladin consultant organizations performance\")\n", + "Observation 4: Hello! How can I assist you with the performance of Aladin consultant organizations?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:00 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: The user is asking for information about a person known as Aladin who worked as a consultant to enhance organizational performance.\n", + "Action 5: llm_tool(\"Aladin consultant organizations performance\")\n", + "Observation 5: Hello! How can I assist you with the Aladin consultant organizations performance?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:01 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: The user is seeking confirmation of the identity of the person known as Aladin, who worked as a consultant to enhance organizational performance.\n", + "Action 6: llm_tool(\"Aladin consultant organizations performance\")\n", + "Observation 6: Hello! How can I assist you with the Aladin consultant organizations performance?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:03 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: The user is looking for information about the person known as Aladin, who assisted organizations in improving their performance as a consultant.\n", + "Action 7: llm_tool(\"Aladin consultant organizations performance\")\n", + "Observation 7: Hello! How can I assist you with the Aladin consultant organizations performance data?\u001b[0m\n", + "\u001b[36m2024-06-15 23:18:03 - [react_agent.py:345:call] - answer: Hello! How can I assist you with the Aladin consultant organizations performance data?\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='The user is asking about a person known by the stage name Aladin who worked as a consultant to help organizations improve their performance.', action='llm_tool(\"Aladin stage name consultant organizations performance\")', fun_name='llm_tool', fun_args=['Aladin stage name consultant organizations performance'], fun_kwargs={}, observation='Hello! How can I assist you with the information you provided?'), StepOutput(step=2, thought='The user is inquiring about the identity of the person known as Aladin who worked as a consultant to enhance organizational performance.', action='llm_tool(\"Aladin consultant organizational performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizational performance'], fun_kwargs={}, observation='Hello! How can I assist you with Aladin consultant organizational performance?'), StepOutput(step=3, thought='The user is seeking confirmation of the identity of the person known as Aladin, who worked as a consultant to enhance organizational performance.', action='llm_tool(\"Aladin consultant organizational performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizational performance'], fun_kwargs={}, observation='Hello! How can I assist you with Aladin consultant organizational performance?'), StepOutput(step=4, thought='The user is asking about the person known by the stage name Aladin who helped organizations improve their performance as a consultant.', action='llm_tool(\"Aladin consultant organizations performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizations performance'], fun_kwargs={}, observation='Hello! How can I assist you with the performance of Aladin consultant organizations?'), StepOutput(step=5, thought='The user is asking for information about a person known as Aladin who worked as a consultant to enhance organizational performance.', action='llm_tool(\"Aladin consultant organizations performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizations performance'], fun_kwargs={}, observation='Hello! How can I assist you with the Aladin consultant organizations performance?'), StepOutput(step=6, thought='The user is seeking confirmation of the identity of the person known as Aladin, who worked as a consultant to enhance organizational performance.', action='llm_tool(\"Aladin consultant organizations performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizations performance'], fun_kwargs={}, observation='Hello! How can I assist you with the Aladin consultant organizations performance?'), StepOutput(step=7, thought='The user is looking for information about the person known as Aladin, who assisted organizations in improving their performance as a consultant.', action='llm_tool(\"Aladin consultant organizations performance\")', fun_name='llm_tool', fun_args=['Aladin consultant organizations performance'], fun_kwargs={}, observation='Hello! How can I assist you with the Aladin consultant organizations performance data?')]\n", + "\u001b[33m2024-06-15 23:18:03 - [3230041225.py:26:] - No. 7, question: Who was known by his stage name Aladin and helped organizations improve their performance as a consultant?, ground truth: eenasul fateh, pred answer: hello how can i assist you with aladin consultant organizations performance data\u001b[0m\n", + "\u001b[36m2024-06-15 23:18:03 - [react_agent.py:330:call] - input_query: The arena where the Lewiston Maineiacs played their home games can seat how many people?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:05 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is asking about the seating capacity of the arena where the Lewiston Maineiacs played their home games.\n", + "Action 1: llm_tool(\"Seating capacity of the arena where the Lewiston Maineiacs played their home games.\")\n", + "Observation 1: The seating capacity of the arena where the Lewiston Maineiacs played their home games was 3,677.\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:06 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about the seating capacity of the arena where the Lewiston Maineiacs played their home games.\n", + "Action 2: finish(\"3,677.\")\n", + "Observation 2: 3,677.\u001b[0m\n", + "\u001b[36m2024-06-15 23:18:06 - [react_agent.py:345:call] - answer: 3,677.\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='The user is asking about the seating capacity of the arena where the Lewiston Maineiacs played their home games.', action='llm_tool(\"Seating capacity of the arena where the Lewiston Maineiacs played their home games.\")', fun_name='llm_tool', fun_args=['Seating capacity of the arena where the Lewiston Maineiacs played their home games.'], fun_kwargs={}, observation='The seating capacity of the arena where the Lewiston Maineiacs played their home games was 3,677.'), StepOutput(step=2, thought='The user is inquiring about the seating capacity of the arena where the Lewiston Maineiacs played their home games.', action='finish(\"3,677.\")', fun_name='finish', fun_args=['3,677.'], fun_kwargs={}, observation='3,677.')]\n", + "\u001b[33m2024-06-15 23:18:06 - [3230041225.py:26:] - No. 8, question: The arena where the Lewiston Maineiacs played their home games can seat how many people?, ground truth: 3677 seated, pred answer: 3677\u001b[0m\n", + "\u001b[36m2024-06-15 23:18:06 - [react_agent.py:330:call] - input_query: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:07 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: I will use the llm_tool to get the information about the ages of Annie Morton and Terry Richardson.\n", + "Action 1: llm_tool(\"Annie Morton age\")\n", + "Observation 1: Hello! How old is Annie Morton?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:09 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.\n", + "Action 2: llm_tool(\"Annie Morton age\")\n", + "Observation 2: Hello! How old is Annie Morton?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:10 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: I will use the llm_tool to get the information about the ages of Terry Richardson.\n", + "Action 3: llm_tool(\"Terry Richardson age\")\n", + "Observation 3: How old is Terry Richardson?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:12 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.\n", + "Action 4: llm_tool(\"Annie Morton age\")\n", + "Observation 4: Hello! How old is Annie Morton?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:13 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.\n", + "Action 5: llm_tool(\"Annie Morton age\")\n", + "Observation 5: Hello! How old is Annie Morton?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:14 - [react_agent.py:314:_run_one_step] - step: 6, response: Thought 6: I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.\n", + "Action 6: llm_tool(\"Who is older, Annie Morton or Terry Richardson?\")\n", + "Observation 6: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:16 - [react_agent.py:314:_run_one_step] - step: 7, response: Thought 7: I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.\n", + "Action 7: llm_tool(\"Who is older, Annie Morton or Terry Richardson?\")\n", + "Observation 7: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n", + "\u001b[36m2024-06-15 23:18:16 - [react_agent.py:345:call] - answer: Who is older, Annie Morton or Terry Richardson?\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='I will use the llm_tool to get the information about the ages of Annie Morton and Terry Richardson.', action='llm_tool(\"Annie Morton age\")', fun_name='llm_tool', fun_args=['Annie Morton age'], fun_kwargs={}, observation='Hello! How old is Annie Morton?'), StepOutput(step=2, thought='I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.', action='llm_tool(\"Annie Morton age\")', fun_name='llm_tool', fun_args=['Annie Morton age'], fun_kwargs={}, observation='Hello! How old is Annie Morton?'), StepOutput(step=3, thought='I will use the llm_tool to get the information about the ages of Terry Richardson.', action='llm_tool(\"Terry Richardson age\")', fun_name='llm_tool', fun_args=['Terry Richardson age'], fun_kwargs={}, observation='How old is Terry Richardson?'), StepOutput(step=4, thought='I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.', action='llm_tool(\"Annie Morton age\")', fun_name='llm_tool', fun_args=['Annie Morton age'], fun_kwargs={}, observation='Hello! How old is Annie Morton?'), StepOutput(step=5, thought='I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.', action='llm_tool(\"Annie Morton age\")', fun_name='llm_tool', fun_args=['Annie Morton age'], fun_kwargs={}, observation='Hello! How old is Annie Morton?'), StepOutput(step=6, thought='I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.', action='llm_tool(\"Who is older, Annie Morton or Terry Richardson?\")', fun_name='llm_tool', fun_args=['Who is older, Annie Morton or Terry Richardson?'], fun_kwargs={}, observation='Who is older, Annie Morton or Terry Richardson?'), StepOutput(step=7, thought='I will use the llm_tool to compare the ages of Annie Morton and Terry Richardson to determine who is older.', action='llm_tool(\"Who is older, Annie Morton or Terry Richardson?\")', fun_name='llm_tool', fun_args=['Who is older, Annie Morton or Terry Richardson?'], fun_kwargs={}, observation='Who is older, Annie Morton or Terry Richardson?')]\n", + "\u001b[33m2024-06-15 23:18:16 - [3230041225.py:26:] - No. 9, question: Who is older, Annie Morton or Terry Richardson?, ground truth: terry richardson, pred answer: who is older annie morton or terry richardson\u001b[0m\n", + "\u001b[36m2024-06-15 23:18:16 - [react_agent.py:330:call] - input_query: Are Local H and For Against both from the United States?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:17 - [react_agent.py:314:_run_one_step] - step: 1, response: Thought 1: The user is asking about the origin of two entities, Local H and For Against.\n", + "Action 1: llm_tool(\"Local H origin\")\n", + "Observation 1: Hello! How can I assist you today?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:18 - [react_agent.py:314:_run_one_step] - step: 2, response: Thought 2: The user is inquiring about the origin of For Against.\n", + "Action 2: llm_tool(\"For Against origin\")\n", + "Observation 2: Hello! How can I assist you today regarding the topic of origin?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:20 - [react_agent.py:314:_run_one_step] - step: 3, response: Thought 3: The user wants to know if both Local H and For Against are from the United States.\n", + "Action 3: llm_tool(\"Local H and For Against origin\")\n", + "Observation 3: Hello! How can I assist you today with the information about Local H and their stance on For Against?\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:23 - [react_agent.py:314:_run_one_step] - step: 4, response: Thought 4: Confirming the origin of Local H and For Against.\n", + "Action 4: llm_tool(\"Are Local H and For Against both from the United States?\")\n", + "Observation 4: Yes, both Local H and For Against are from the United States.\u001b[0m\n", + "\u001b[34m2024-06-15 23:18:24 - [react_agent.py:314:_run_one_step] - step: 5, response: Thought 5: The user is seeking confirmation about the origin of Local H and For Against.\n", + "Action 5: finish(\"Yes, both Local H and For Against are from the United States.\")\n", + "Observation 5: Yes, both Local H and For Against are from the United States.\u001b[0m\n", + "\u001b[36m2024-06-15 23:18:24 - [react_agent.py:345:call] - answer: Yes, both Local H and For Against are from the United States.\u001b[0m\n", + "step_history: [StepOutput(step=1, thought='The user is asking about the origin of two entities, Local H and For Against.', action='llm_tool(\"Local H origin\")', fun_name='llm_tool', fun_args=['Local H origin'], fun_kwargs={}, observation='Hello! How can I assist you today?'), StepOutput(step=2, thought='The user is inquiring about the origin of For Against.', action='llm_tool(\"For Against origin\")', fun_name='llm_tool', fun_args=['For Against origin'], fun_kwargs={}, observation='Hello! How can I assist you today regarding the topic of origin?'), StepOutput(step=3, thought='The user wants to know if both Local H and For Against are from the United States.', action='llm_tool(\"Local H and For Against origin\")', fun_name='llm_tool', fun_args=['Local H and For Against origin'], fun_kwargs={}, observation='Hello! How can I assist you today with the information about Local H and their stance on For Against?'), StepOutput(step=4, thought='Confirming the origin of Local H and For Against.', action='llm_tool(\"Are Local H and For Against both from the United States?\")', fun_name='llm_tool', fun_args=['Are Local H and For Against both from the United States?'], fun_kwargs={}, observation='Yes, both Local H and For Against are from the United States.'), StepOutput(step=5, thought='The user is seeking confirmation about the origin of Local H and For Against.', action='finish(\"Yes, both Local H and For Against are from the United States.\")', fun_name='finish', fun_args=['Yes, both Local H and For Against are from the United States.'], fun_kwargs={}, observation='Yes, both Local H and For Against are from the United States.')]\n", + "\u001b[33m2024-06-15 23:18:24 - [3230041225.py:26:] - No. 10, question: Are Local H and For Against both from the United States?, ground truth: yes, pred answer: yes both local h and for against are from united states\u001b[0m\n", + "EM = (0.0, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]), FM = (0.4, [0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, 1.0]), average time = 8.354214406013488\n" + ] + } + ], + "source": [ + "from lightrag.eval.answer_match_acc import AnswerMatchAcc\n", + "\n", + "# set up evaluation type\n", + "EM_evaluator = AnswerMatchAcc(type=\"exact_match\")\n", + "FM_evaluator = AnswerMatchAcc(type=\"fuzzy_match\")\n", + "\n", + "agent = ReActAgent(\n", + " max_steps=7, model_client=OpenAIClient(),\n", + " model_kwargs=gpt_model_kwargs\n", + ")\n", + "\n", + "num_questions = 10\n", + "gt_answers = []\n", + "pred_answers = []\n", + "start_time = time.time()\n", + "for i in range(num_questions):\n", + " question = val_dataset[i][\"question\"]\n", + " gt_answer = normalize_answer(val_dataset[i][\"answer\"]) # normalize the ground truth answer\n", + " gt_answers.append(gt_answer)\n", + " \n", + " # get the agent's response\n", + " pred_answer = agent(question)\n", + " pred_answer = normalize_answer(pred_answer)\n", + " pred_answers.append(pred_answer)\n", + " \n", + " printc(f\"No. {i+1}, question: {question}, ground truth: {gt_answer}, pred answer: {pred_answer}\", color=\"yellow\")\n", + "\n", + "end_time = time.time()\n", + " \n", + "em = EM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", + "fm = FM_evaluator.compute(pred_answers=pred_answers, gt_answers=gt_answers)\n", + "avg_time = (end_time - start_time) / num_questions\n", + "\n", + "print(f\"EM = {em}, FM = {fm}, average time = {avg_time}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Without the tools and examples, EM=0 and FM=0.4. We saw hallucinations and nonsense:\n", + "\n", + "2024-06-15 23:17:04 - [3230041225.py:26:] - No. 1, question: Were Scott Derrickson and Ed Wood of the same nationality?, ground truth: ``yes``, pred answer: ``no scott derrickson and ed wood were not of same nationality scott derrickson is american while ed wood was also american``\n", + "\n", + "2024-06-15 23:18:16 - [3230041225.py:26:] - No. 9, question: Who is older, Annie Morton or Terry Richardson?, ground truth:`` terry richardson``, pred answer: ``who is older annie morton or terry richardson``\n", + "\n", + "Therefore, using ReAct agent outperforms the base LLM.\n", + "Meanwhile, ``LightRAG ReAct agent`` shows that the performance on 10 questions(EM=0.3)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 7. Future Improvement" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO:\n", + "# 1. advanced, add history to react\n", + "# 2. add training, few shot\n", + "# 3. llm as judge\n", + "# 4. add picture\n", + "# 5. better json handling, we need to store the answer output" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "lightrag-project", + "language": "python", + "name": "light-rag-project" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}