diff --git a/docs/source/tutorials/logging.rst b/docs/source/tutorials/logging.rst index 04d31de9..1f566758 100644 --- a/docs/source/tutorials/logging.rst +++ b/docs/source/tutorials/logging.rst @@ -1,3 +1,19 @@ +Logger Example +------------- + +.. raw:: html + +
+ + Open In Colab + + + GitHub + Open Source Code + +
+ + Logging ==================== diff --git a/docs/source/tutorials/logging_tracing.rst b/docs/source/tutorials/logging_tracing.rst index 26d8f605..7cf5ba5f 100644 --- a/docs/source/tutorials/logging_tracing.rst +++ b/docs/source/tutorials/logging_tracing.rst @@ -1,3 +1,17 @@ +```rst +.. raw:: html + +
+ + Open In Colab + + + GitHub + Open Source Code + +
+``` + .. _logging_tracing: Tracing diff --git a/docs/source/tutorials/rag_playbook.rst b/docs/source/tutorials/rag_playbook.rst index 685bb4ea..9175a09f 100644 --- a/docs/source/tutorials/rag_playbook.rst +++ b/docs/source/tutorials/rag_playbook.rst @@ -1,11 +1,9 @@ -.. -.. Try Quickstart in Colab -.. - .. raw:: html
- + + Try RAG playbook in Colab + GitHub Open Source Code diff --git a/docs/source/tutorials/tool_helper.rst b/docs/source/tutorials/tool_helper.rst index 6b3736fd..4b607a26 100644 --- a/docs/source/tutorials/tool_helper.rst +++ b/docs/source/tutorials/tool_helper.rst @@ -1,3 +1,15 @@ +.. raw:: html + +
+ + Open In Colab + + + GitHub + Open Source Code + +
+ .. _tool_helper: Function calls diff --git a/docs/source/use_cases/classification.rst b/docs/source/use_cases/classification.rst index d0aaa489..0ba09159 100644 --- a/docs/source/use_cases/classification.rst +++ b/docs/source/use_cases/classification.rst @@ -1,10 +1,9 @@ -.. -.. Try Quickstart in Colab -.. - .. raw:: html
+ + Open In Colab + GitHub diff --git a/docs/source/use_cases/rag_opt.rst b/docs/source/use_cases/rag_opt.rst index 73b824bf..072fe3f5 100644 --- a/docs/source/use_cases/rag_opt.rst +++ b/docs/source/use_cases/rag_opt.rst @@ -1,10 +1,9 @@ -.. -.. Try Quickstart in Colab -.. - .. raw:: html
+ + Open In Colab + GitHub diff --git a/notebooks/tutorials/adalflow_classification_optimization.ipynb b/notebooks/tutorials/adalflow_classification_optimization.ipynb new file mode 100644 index 00000000..0afb97df --- /dev/null +++ b/notebooks/tutorials/adalflow_classification_optimization.ipynb @@ -0,0 +1,463 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# 🤗 Welcome to AdalFlow!\n", + "## The PyTorch library to auto-optimize any LLM task pipelines\n", + "\n", + "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ Star us on Github ⭐\n", + "\n", + "\n", + "# Quick Links\n", + "\n", + "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n", + "\n", + "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n", + "\n", + "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n", + "\n", + "Common use cases along with the auto-optimization: check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n", + "\n", + "## 📖 Outline\n", + "\n", + "This is the code for a classification optimization tutorial ![image.png]()\n" + ], + "metadata": { + "id": "xHF95Kr4CzGq" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "# Installation\n", + "\n", + "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq` from the extra packages.\n", + "\n", + " ```bash\n", + " pip install adalflow[openai,groq]\n", + " ```\n", + "2. Setup `openai` and `groq` API key in the environment variables\n", + "\n", + "You can choose to use different client. You can import the model client you prefer. We support `Anthropic`, `Cohere`, `Google`, `GROQ`, `OpenAI`, `Transformer` and more in development. We will use OpenAI here as an example.Please refer to our [full installation guide](https://adalflow.sylph.ai/get_started/installation.html)" + ], + "metadata": { + "id": "Kof5M6DRaKhh" + } + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "id": "tAp3eDjOCma1" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai] # also install the package for the model client you'll use\n", + "!pip install datasets\n", + "clear_output()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Set Environment Variables\n", + "\n", + "Run the following code and pass your api key.\n", + "\n", + "Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n", + "\n", + "*Go to [OpenAI](https://platform.openai.com/docs/introduction) to get API keys if you don't already have.*" + ], + "metadata": { + "id": "KapUyHMM07pJ" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "\n", + "\n", + "# Set environment variables\n", + "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "\n", + "print(\"API keys have been set.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ONfzF9Puzdd_", + "outputId": "e5c3cfc5-69cb-448a-c248-a8cebda5ba71" + }, + "execution_count": 43, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ··········\n", + "API keys have been set.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from dataclasses import dataclass, field\n", + "from typing import List, Dict, Union, Optional, Tuple, Any, Callable\n", + "from datasets import load_dataset\n", + "from adalflow.components.model_client import OpenAIClient\n", + "import adalflow as adal\n", + "from adalflow.core.component import Component\n", + "from adalflow.datasets.types import TrecData\n", + "from adalflow.eval.answer_match_acc import AnswerMatchAcc\n", + "\n", + "\n", + "_COARSE_LABELS = [\n", + " \"ABBR\",\n", + " \"DESC\",\n", + " \"ENTY\",\n", + " \"HUM\",\n", + " \"LOC\",\n", + " \"NUM\"\n", + "]\n", + "\n", + "_COARSE_LABELS_DESC = [\n", + " \"Abbreviation: Questions about abbreviations and their meanings\",\n", + " \"Description: Questions seeking descriptions of people, things, or concepts\",\n", + " \"Entity: Questions about entities (e.g., animals, colors, inventions)\",\n", + " \"Human: Questions about people or organizations\",\n", + " \"Location: Questions about places, cities, countries\",\n", + " \"Numeric: Questions seeking numeric answers (e.g., dates, amounts, distances)\"\n", + "]\n", + "\n", + "\n", + "template = r\"\"\"\n", + " {{system_prompt}}\n", + " {% if output_format_str is not none %}\n", + " {{output_format_str}}\n", + " {% endif %}\n", + " {% if few_shot_demos is not none %}\n", + " Here are some examples:\n", + " {{few_shot_demos}}\n", + " {% endif %}\n", + " \n", + " \n", + " {{input_str}}\n", + " \n", + " \"\"\"\n", + "\n", + "task_desc_template = r\"\"\"You are a classifier. Given a question, you need to classify it into one of the following classes:\n", + " Format: class_index. class_name, class_description\n", + " {% if classes %}\n", + " {% for class in classes %}\n", + " {{loop.index-1}}. {{class.label}}, {{class.desc}}\n", + " {% endfor %}\n", + " {% endif %}\n", + " - Do not try to answer the question:\n", + " \"\"\"\n", + "\n", + "@dataclass\n", + "class TRECExtendedData(TrecData):\n", + " rationale: str = field(\n", + " metadata={\n", + " \"desc\": \"Your step-by-step reasoning to classify the question to class_name\"\n", + " },\n", + " default=None,\n", + " )\n", + " __input_fields__ = [\"question\"]\n", + " __output_fields__ = [\"rationale\", \"class_name\"] # it is important to have the rationale before the class_name" + ], + "metadata": { + "id": "ZZIEtZYHNVjo" + }, + "execution_count": 49, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "class TRECClassifierStructuredOutput(adal.Component):\n", + "\n", + " def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):\n", + " super().__init__()\n", + "\n", + " label_desc = [\n", + " {\"label\": label, \"desc\": desc}\n", + " for label, desc in zip(_COARSE_LABELS, _COARSE_LABELS_DESC)\n", + " ]\n", + "\n", + " task_desc_str = adal.Prompt(\n", + " template=task_desc_template, prompt_kwargs={\"classes\": label_desc}\n", + " )()\n", + "\n", + " self.data_class = TRECExtendedData\n", + " self.data_class.set_task_desc(task_desc_str)\n", + "\n", + " self.parser = adal.DataClassParser(\n", + " data_class=self.data_class, return_data_class=True, format_type=\"yaml\"\n", + " )\n", + "\n", + " prompt_kwargs = {\n", + " \"system_prompt\": adal.Parameter(\n", + " data=self.parser.get_task_desc_str(),\n", + " role_desc=\"Task description\",\n", + " requires_opt=True,\n", + " param_type=adal.ParameterType.PROMPT,\n", + " ),\n", + " \"output_format_str\": adal.Parameter(\n", + " data=self.parser.get_output_format_str(),\n", + " role_desc=\"Output format requirements\",\n", + " requires_opt=False,\n", + " param_type=adal.ParameterType.PROMPT,\n", + " ),\n", + " \"few_shot_demos\": adal.Parameter(\n", + " data=None,\n", + " requires_opt=True,\n", + " role_desc=\"Few shot examples to help the model\",\n", + " param_type=adal.ParameterType.DEMOS,\n", + " ),\n", + " }\n", + "\n", + " self.llm = adal.Generator(\n", + " model_client=model_client,\n", + " model_kwargs=model_kwargs,\n", + " prompt_kwargs=prompt_kwargs,\n", + " template=template,\n", + " output_processors=self.parser,\n", + " use_cache=True,\n", + " )\n", + "\n", + " def _prepare_input(self, question: str):\n", + " input_data = self.data_class(question=question)\n", + " input_str = self.parser.get_input_str(input_data)\n", + " prompt_kwargs = {\n", + " \"input_str\": adal.Parameter(\n", + " data=input_str, requires_opt=False, role_desc=\"input to the LLM\"\n", + " )\n", + " }\n", + " return prompt_kwargs\n", + "\n", + " def call(\n", + " self, question: str, id: Optional[str] = None\n", + " ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n", + " prompt_kwargs = self._prepare_input(question)\n", + " output = self.llm(prompt_kwargs=prompt_kwargs, id=id)\n", + " return output" + ], + "metadata": { + "id": "3Q3H9XC4Ncfi" + }, + "execution_count": 50, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "class TrecClassifierAdal(adal.AdalComponent):\n", + " def __init__(\n", + " self,\n", + " model_client: adal.ModelClient,\n", + " model_kwargs: Dict,\n", + " teacher_model_config: Dict,\n", + " backward_engine_model_config: Dict,\n", + " text_optimizer_model_config: Dict,\n", + " ):\n", + " task = TRECClassifierStructuredOutput(model_client, model_kwargs)\n", + " eval_fn = AnswerMatchAcc(type=\"exact_match\").compute_single_item\n", + " loss_fn = adal.EvalFnToTextLoss(\n", + " eval_fn=eval_fn,\n", + " eval_fn_desc=\"exact_match: 1 if str(y) == str(y_gt) else 0\",\n", + " )\n", + " super().__init__(\n", + " task=task,\n", + " eval_fn=eval_fn,\n", + " loss_fn=loss_fn,\n", + " backward_engine_model_config=backward_engine_model_config,\n", + " text_optimizer_model_config=text_optimizer_model_config,\n", + " teacher_model_config=teacher_model_config,\n", + " )\n", + "\n", + " def prepare_task(self, sample: TRECExtendedData):\n", + " return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n", + "\n", + " def prepare_eval(\n", + " self, sample: TRECExtendedData, y_pred: adal.GeneratorOutput\n", + " ) -> float:\n", + " y_label = -1\n", + " if y_pred and y_pred.data is not None and y_pred.data.class_name is not None:\n", + " y_label = y_pred.data.class_name\n", + " return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.class_name}\n", + "\n", + " def prepare_loss(\n", + " self, sample: TRECExtendedData, y_pred: adal.Parameter, *args, **kwargs\n", + " ) -> Tuple[Callable[..., Any], Dict]:\n", + " full_response = y_pred.full_response\n", + " y_label = -1\n", + " if (\n", + " full_response\n", + " and full_response.data is not None\n", + " and full_response.data.class_name is not None\n", + " ):\n", + " y_label = full_response.data.class_name\n", + "\n", + " y_pred.eval_input = y_label\n", + " y_gt = adal.Parameter(\n", + " name=\"y_gt\",\n", + " data=sample.class_name,\n", + " eval_input=sample.class_name,\n", + " requires_opt=False,\n", + " )\n", + " return self.loss_fn, {\"kwargs\": {\"y\": y_pred, \"y_gt\": y_gt}}" + ], + "metadata": { + "id": "HpkQYsh2NevT" + }, + "execution_count": 51, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def train(\n", + " model_client: adal.ModelClient,\n", + " model_kwargs: Dict,\n", + " train_batch_size=4,\n", + " raw_shots: int = 0,\n", + " bootstrap_shots: int = 1,\n", + " max_steps=12,\n", + " num_workers=4,\n", + " strategy=\"constrained\",\n", + " optimization_order=\"sequential\",\n", + " debug=False,\n", + "):\n", + " print(\"Starting training process...\")\n", + "\n", + " # Define the model configuration for all components\n", + " gpt_4o_model = {\n", + " \"model\": \"gpt-4-turbo-preview\",\n", + " \"temperature\": 0,\n", + " \"max_tokens\": 1000,\n", + " \"top_p\": 1,\n", + " \"frequency_penalty\": 0,\n", + " \"presence_penalty\": 0\n", + " }\n", + " print(f\"Component model configuration: {gpt_4o_model}\")\n", + "\n", + " try:\n", + " print(\"Initializing ADAL component...\")\n", + " adal_component = TrecClassifierAdal(\n", + " model_client=model_client,\n", + " model_kwargs=model_kwargs,\n", + " text_optimizer_model_config=gpt_4o_model,\n", + " backward_engine_model_config=gpt_4o_model,\n", + " teacher_model_config=gpt_4o_model,\n", + " )\n", + " print(\"ADAL component initialized successfully\")\n", + "\n", + " print(\"Initializing trainer...\")\n", + " trainer = adal.Trainer(\n", + " train_batch_size=train_batch_size,\n", + " adaltask=adal_component,\n", + " strategy=strategy,\n", + " max_steps=max_steps,\n", + " num_workers=num_workers,\n", + " raw_shots=raw_shots,\n", + " bootstrap_shots=bootstrap_shots,\n", + " debug=debug,\n", + " weighted_sampling=True,\n", + " optimization_order=optimization_order,\n", + " exclude_input_fields_from_bootstrap_demos=True,\n", + " )\n", + " print(\"Trainer initialized successfully\")\n", + "\n", + " print(\"Loading datasets...\")\n", + " train_dataset, val_dataset, test_dataset = load_datasets()\n", + " print(f\"Datasets loaded - Train size: {len(train_dataset)}, Val size: {len(val_dataset)}, Test size: {len(test_dataset)}\")\n", + "\n", + " print(\"Starting model training...\")\n", + " trainer.fit(\n", + " train_dataset=train_dataset,\n", + " val_dataset=test_dataset,\n", + " debug=debug,\n", + " )\n", + " print(\"Training completed successfully\")\n", + "\n", + " except Exception as e:\n", + " print(f\"Error occurred: {str(e)}\")\n", + " raise" + ], + "metadata": { + "id": "PEj6xiZ5dVaj" + }, + "execution_count": 52, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.components.model_client.openai_client import OpenAIClient\n", + "\n", + "\n", + "gpt_4o_model = {\n", + " \"model_client\": OpenAIClient(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-4o-mini\",\n", + " \"max_tokens\": 2000,\n", + "\n", + " },\n", + "}\n", + "\n", + "\n", + "train(\n", + " model_client=OpenAIClient(),\n", + " model_kwargs=gpt_4o_model,\n", + " )" + ], + "metadata": { + "id": "GnlZBQOMEj6E", + "collapsed": true + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Issues and feedback\n", + "\n", + "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n", + "\n", + "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)." + ], + "metadata": { + "id": "AmkbyxmuruUu" + } + } + ] +} diff --git a/notebooks/tutorials/adalflow_function_calls.ipynb b/notebooks/tutorials/adalflow_function_calls.ipynb new file mode 100644 index 00000000..622448c9 --- /dev/null +++ b/notebooks/tutorials/adalflow_function_calls.ipynb @@ -0,0 +1,737 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Function calls\n", + "\n", + "Tools are means LLM can use to interact with the world beyond of its internal knowledge. Technically speaking, retrievers are tools to help LLM to get more relevant context, and memory is a tool for LLM to carry out a conversation. Deciding when, which, and how to use a tool, and even to creating a tool is an agentic behavior: Function calls is a process of showing LLM a list of funciton definitions and prompt it to choose one or few of them. Many places use tools and function calls interchangably.\n", + "\n", + "In this notebook we will covert function calls, including:\n", + "\n", + "- Function call walkthrough\n", + "\n", + "- Overall design\n", + "\n", + "- Function call in action\n", + "\n", + "It follows the tutorial here: https://adalflow.sylph.ai/tutorials/tool_helper.html#" + ], + "metadata": { + "id": "lLGpv1fLLIjF" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "sfKEfaYC3Go7" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai,groq,faiss-cpu]\n", + "\n", + "clear_output()\n" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", + "\n", + "# Set environment variables\n", + "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ['GROQ_API_KEY'] = groq_api_key\n", + "\n", + "print(\"API keys have been set.\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-4c_AGBt3PlR", + "outputId": "21a26437-9f95-4478-84e9-ba4369956b6f" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ··········\n", + "Please enter your GROQ API key: ··········\n", + "API keys have been set.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from dataclasses import dataclass\n", + "from typing import List\n", + "import numpy as np\n", + "import time\n", + "import asyncio\n", + "\n", + "\n", + "\n", + "def multiply(a: int, b: int) -> int:\n", + " \"\"\"Multiply two numbers.\"\"\"\n", + " time.sleep(1)\n", + " return a * b\n", + "\n", + "\n", + "def add(a: int, b: int) -> int:\n", + " \"\"\"Add two numbers.\"\"\"\n", + " time.sleep(1)\n", + " return a + b\n", + "\n", + "\n", + "async def divide(a: float, b: float) -> float:\n", + " \"\"\"Divide two numbers.\"\"\"\n", + " await asyncio.sleep(1)\n", + " return float(a) / b\n", + "\n", + "\n", + "async def search(query: str) -> List[str]:\n", + " \"\"\"Search for query and return a list of results.\"\"\"\n", + " await asyncio.sleep(1)\n", + " return [\"result1\" + query, \"result2\" + query]\n", + "\n", + "\n", + "def numpy_sum(arr: np.ndarray) -> float:\n", + " \"\"\"Sum the elements of an array.\"\"\"\n", + " return np.sum(arr)\n", + "\n", + "\n", + "x = 2\n", + "\n", + "@dataclass\n", + "class Point:\n", + " x: int\n", + " y: int\n", + "\n", + "\n", + "def add_points(p1: Point, p2: Point) -> Point:\n", + " return Point(p1.x + p2.x, p1.y + p2.y)" + ], + "metadata": { + "id": "GMKuuP7xR9Nt" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "## Function Tool" + ], + "metadata": { + "id": "jCA7HMjtT16P" + } + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.func_tool import FunctionTool\n", + "\n", + "functions =[multiply, add, divide, search, numpy_sum, add_points]\n", + "tools = [\n", + " FunctionTool(fn=fn) for fn in functions\n", + "]\n", + "for tool in tools:\n", + " print(tool)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fgOEoLoDSBqh", + "outputId": "7e636e2c-9a5d-44f1-f0fe-fe8a6bea474d" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='multiply', func_desc='multiply(a: int, b: int) -> int\\nMultiply two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']}))\n", + "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add', func_desc='add(a: int, b: int) -> int\\nAdd two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']}))\n", + "FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='divide', func_desc='divide(a: float, b: float) -> float\\nDivide two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'float'}, 'b': {'type': 'float'}}, 'required': ['a', 'b']}))\n", + "FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='search', func_desc='search(query: str) -> List[str]\\nSearch for query and return a list of results.', func_parameters={'type': 'object', 'properties': {'query': {'type': 'str'}}, 'required': ['query']}))\n", + "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='numpy_sum', func_desc='numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', func_parameters={'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']}))\n", + "FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add_points', func_desc='add_points(p1: __main__.Point, p2: __main__.Point) -> __main__.Point\\nNone', func_parameters={'type': 'object', 'properties': {'p1': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}, 'p2': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}}, 'required': ['p1', 'p2']}))\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "print(tools[-2].definition.to_dict())" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "CYJaHFhGSEzH", + "outputId": "9ab36c6c-7509-4e7f-ce85-11dae889c8c2" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "{'func_name': 'numpy_sum', 'func_desc': 'numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', 'func_parameters': {'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']}}\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "context_map = {tool.definition.func_name: tool for tool in tools}" + ], + "metadata": { + "id": "_O4bQgXrSKb6" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "function_name = \"add\"\n", + "function_to_call = context_map[function_name]\n", + "function_args = {\"a\": 1, \"b\": 2}\n", + "function_response = function_to_call.call(**function_args)" + ], + "metadata": { + "id": "-RgWWMdISL1u" + }, + "execution_count": 8, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.tool_manager import ToolManager\n", + "\n", + "tool_manager = ToolManager(tools=functions)\n", + "print(tool_manager)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "6CT7Tez1SOai", + "outputId": "e486d882-9179-4db3-f077-6adfc9fc6579" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ToolManager(Tools: [FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='multiply', func_desc='multiply(a: int, b: int) -> int\\nMultiply two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add', func_desc='add(a: int, b: int) -> int\\nAdd two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='divide', func_desc='divide(a: float, b: float) -> float\\nDivide two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'float'}, 'b': {'type': 'float'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='search', func_desc='search(query: str) -> List[str]\\nSearch for query and return a list of results.', func_parameters={'type': 'object', 'properties': {'query': {'type': 'str'}}, 'required': ['query']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='numpy_sum', func_desc='numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', func_parameters={'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add_points', func_desc='add_points(p1: __main__.Point, p2: __main__.Point) -> __main__.Point\\nNone', func_parameters={'type': 'object', 'properties': {'p1': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}, 'p2': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}}, 'required': ['p1', 'p2']}))], Additional Context: {})\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## ToolManager" + ], + "metadata": { + "id": "jzFqNnN_T-cu" + } + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.tool_manager import ToolManager\n", + "\n", + "tool_manager = ToolManager(tools=functions)\n", + "print(tool_manager)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "JX7MibWiUF3U", + "outputId": "20707186-5ec3-49a4-d553-c3160c3daa84" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ToolManager(Tools: [FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='multiply', func_desc='multiply(a: int, b: int) -> int\\nMultiply two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add', func_desc='add(a: int, b: int) -> int\\nAdd two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'int'}, 'b': {'type': 'int'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='divide', func_desc='divide(a: float, b: float) -> float\\nDivide two numbers.', func_parameters={'type': 'object', 'properties': {'a': {'type': 'float'}, 'b': {'type': 'float'}}, 'required': ['a', 'b']})), FunctionTool(fn: , async: True, definition: FunctionDefinition(func_name='search', func_desc='search(query: str) -> List[str]\\nSearch for query and return a list of results.', func_parameters={'type': 'object', 'properties': {'query': {'type': 'str'}}, 'required': ['query']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='numpy_sum', func_desc='numpy_sum(arr: numpy.ndarray) -> float\\nSum the elements of an array.', func_parameters={'type': 'object', 'properties': {'arr': {'type': 'ndarray'}}, 'required': ['arr']})), FunctionTool(fn: , async: False, definition: FunctionDefinition(func_name='add_points', func_desc='add_points(p1: __main__.Point, p2: __main__.Point) -> __main__.Point\\nNone', func_parameters={'type': 'object', 'properties': {'p1': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}, 'p2': {'type': \"{'type': 'Point', 'properties': {'x': {'type': 'int'}, 'y': {'type': 'int'}}, 'required': ['x', 'y']}\"}}, 'required': ['p1', 'p2']}))], Additional Context: {})\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Function Call end-to-end" + ], + "metadata": { + "id": "9Bw2fs--UKX7" + } + }, + { + "cell_type": "code", + "source": [ + "template = r\"\"\"You have these tools available:\n", + "{% if tools %}\n", + "\n", + "{% for tool in tools %}\n", + "{{ loop.index }}.\n", + "{{tool}}\n", + "------------------------\n", + "{% endfor %}\n", + "\n", + "{% endif %}\n", + "\n", + "{{output_format_str}}\n", + "\n", + "\n", + "User: {{input_str}}\n", + "You:\n", + "\"\"\"" + ], + "metadata": { + "id": "TywPQMIVUOqh" + }, + "execution_count": 11, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.prompt_builder import Prompt\n", + "\n", + "prompt = Prompt(template=template)\n", + "small_tool_manager = ToolManager(tools=tools[:2])\n", + "\n", + "renered_prompt = prompt(tools=small_tool_manager.yaml_definitions)\n", + "print(renered_prompt)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-vMajeXoUQ5A", + "outputId": "ca68601b-e9c8-41c3-a6fa-777f225e68e3" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "You have these tools available:\n", + "\n", + "1.\n", + "func_name: multiply\n", + "func_desc: 'multiply(a: int, b: int) -> int\n", + "\n", + " Multiply two numbers.'\n", + "func_parameters:\n", + " type: object\n", + " properties:\n", + " a:\n", + " type: int\n", + " b:\n", + " type: int\n", + " required:\n", + " - a\n", + " - b\n", + "------------------------\n", + "2.\n", + "func_name: add\n", + "func_desc: 'add(a: int, b: int) -> int\n", + "\n", + " Add two numbers.'\n", + "func_parameters:\n", + " type: object\n", + " properties:\n", + " a:\n", + " type: int\n", + " b:\n", + " type: int\n", + " required:\n", + " - a\n", + " - b\n", + "------------------------\n", + "\n", + "\n", + "None\n", + "\n", + "\n", + "User: None\n", + "You:\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.types import Function\n", + "\n", + "output_data_class = Function\n", + "output_format_str = output_data_class.to_json_signature(exclude=[\"thought\", \"args\"])\n", + "\n", + "renered_prompt= prompt(output_format_str=output_format_str)\n", + "print(renered_prompt)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "V9-90IFRUUNT", + "outputId": "ed2f829e-c656-43c6-a454-8a7c32d5dafe" + }, + "execution_count": 13, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "You have these tools available:\n", + "\n", + "{\n", + " \"name\": \"The name of the function (str) (optional)\",\n", + " \"kwargs\": \"The keyword arguments of the function (Optional[Dict[str, object]]) (optional)\"\n", + "}\n", + "\n", + "\n", + "User: None\n", + "You:\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.types import FunctionExpression\n", + "\n", + "output_data_class = FunctionExpression\n", + "output_format_str = output_data_class.to_json_signature(exclude=[\"thought\"])\n", + "print(prompt(output_format_str=output_format_str))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "p3kPMhWaUYT1", + "outputId": "a3de7117-c3eb-404e-e2e7-8a5187b32f6b" + }, + "execution_count": 14, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "You have these tools available:\n", + "\n", + "{\n", + " \"action\": \"FuncName() Valid function call expression. Example: \\\"FuncName(a=1, b=2)\\\" Follow the data type specified in the function parameters.e.g. for Type object with x,y properties, use \\\"ObjectType(x=1, y=2) (str) (required)\"\n", + "}\n", + "\n", + "\n", + "User: None\n", + "You:\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.components.output_parsers import JsonOutputParser\n", + "\n", + "func_parser = JsonOutputParser(data_class=Function, exclude_fields=[\"thought\", \"args\"])\n", + "instructions = func_parser.format_instructions()\n", + "print(instructions)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "MvGyoUmMUatR", + "outputId": "e819866b-f6e3-4c88-f9f1-22d725a28865" + }, + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Your output should be formatted as a standard JSON instance with the following schema:\n", + "```\n", + "{\n", + " \"name\": \"The name of the function (str) (optional)\",\n", + " \"kwargs\": \"The keyword arguments of the function (Optional[Dict[str, object]]) (optional)\"\n", + "}\n", + "```\n", + "-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\n", + "-Use double quotes for the keys and string values.\n", + "-DO NOT mistaken the \"properties\" and \"type\" in the schema as the actual fields in the JSON output.\n", + "-Follow the JSON formatting conventions.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Function Output Format" + ], + "metadata": { + "id": "9W7DiGcpUme5" + } + }, + { + "cell_type": "code", + "source": [ + "from adalflow.core.generator import Generator\n", + "from adalflow.core.types import ModelClientType\n", + "\n", + "model_kwargs = {\"model\": \"gpt-4o-mini\"}\n", + "prompt_kwargs = {\n", + " \"tools\": tool_manager.yaml_definitions,\n", + " \"output_format_str\": func_parser.format_instructions(),\n", + "}\n", + "generator = Generator(\n", + " model_client=ModelClientType.OPENAI(),\n", + " model_kwargs=model_kwargs,\n", + " template=template,\n", + " prompt_kwargs=prompt_kwargs,\n", + " output_processors=func_parser,\n", + ")" + ], + "metadata": { + "id": "z5tNhoruUp6o" + }, + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "queries = [\n", + " \"add 2 and 3\",\n", + " \"search for something\",\n", + " \"add points (1, 2) and (3, 4)\",\n", + " \"sum numpy array with arr = np.array([[1, 2], [3, 4]])\",\n", + " \"multiply 2 with local variable x\",\n", + " \"divide 2 by 3\",\n", + " \"Add 5 to variable y\",\n", + "]\n", + "\n", + "for idx, query in enumerate(queries):\n", + " prompt_kwargs = {\"input_str\": query}\n", + " print(f\"\\n{idx} Query: {query}\")\n", + " print(f\"{'-'*50}\")\n", + " try:\n", + " result = generator(prompt_kwargs=prompt_kwargs)\n", + " # print(f\"LLM raw output: {result.raw_response}\")\n", + " func = Function.from_dict(result.data)\n", + " print(f\"Function: {func}\")\n", + " func_output = tool_manager.execute_func(func)\n", + " print(f\"Function output: {func_output}\")\n", + " except Exception as e:\n", + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "9DCukn1SUs_x", + "outputId": "dcfd952c-0699-4d79-ee6d-a59373e3c75d" + }, + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "0 Query: add 2 and 3\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='add', args=[], kwargs={'a': 2, 'b': 3})\n", + "Function output: FunctionOutput(name='add', input=Function(thought=None, name='add', args=(), kwargs={'a': 2, 'b': 3}), parsed_input=None, output=5, error=None)\n", + "\n", + "1 Query: search for something\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='search', args=[], kwargs={'query': 'something'})\n", + "Function output: FunctionOutput(name='search', input=Function(thought=None, name='search', args=(), kwargs={'query': 'something'}), parsed_input=None, output=['result1something', 'result2something'], error=None)\n", + "\n", + "2 Query: add points (1, 2) and (3, 4)\n", + "--------------------------------------------------\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "ERROR:adalflow.core.func_tool:Error at calling : 'dict' object has no attribute 'x'\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Function: Function(thought=None, name='add_points', args=[], kwargs={'p1': {'x': 1, 'y': 2}, 'p2': {'x': 3, 'y': 4}})\n", + "Function output: FunctionOutput(name='add_points', input=Function(thought=None, name='add_points', args=(), kwargs={'p1': {'x': 1, 'y': 2}, 'p2': {'x': 3, 'y': 4}}), parsed_input=None, output=None, error=\"'dict' object has no attribute 'x'\")\n", + "\n", + "3 Query: sum numpy array with arr = np.array([[1, 2], [3, 4]])\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='numpy_sum', args=[], kwargs={'arr': [[1, 2], [3, 4]]})\n", + "Function output: FunctionOutput(name='numpy_sum', input=Function(thought=None, name='numpy_sum', args=(), kwargs={'arr': [[1, 2], [3, 4]]}), parsed_input=None, output=10, error=None)\n", + "\n", + "4 Query: multiply 2 with local variable x\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='multiply', args=[], kwargs={'a': 2, 'b': 'x'})\n", + "Function output: FunctionOutput(name='multiply', input=Function(thought=None, name='multiply', args=(), kwargs={'a': 2, 'b': 'x'}), parsed_input=None, output='xx', error=None)\n", + "\n", + "5 Query: divide 2 by 3\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='divide', args=[], kwargs={'a': 2.0, 'b': 3.0})\n", + "Function output: FunctionOutput(name='divide', input=Function(thought=None, name='divide', args=(), kwargs={'a': 2.0, 'b': 3.0}), parsed_input=None, output=0.6666666666666666, error=None)\n", + "\n", + "6 Query: Add 5 to variable y\n", + "--------------------------------------------------\n", + "Function: Function(thought=None, name='add', args=[], kwargs={'a': 5, 'b': 'y'})\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "ERROR:adalflow.core.func_tool:Error at calling : unsupported operand type(s) for +: 'int' and 'str'\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Function output: FunctionOutput(name='add', input=Function(thought=None, name='add', args=(), kwargs={'a': 5, 'b': 'y'}), parsed_input=None, output=None, error=\"unsupported operand type(s) for +: 'int' and 'str'\")\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## FunctionExpression Output Format" + ], + "metadata": { + "id": "O-sBTPATUwsD" + } + }, + { + "cell_type": "code", + "source": [ + "tool_manager = ToolManager(\n", + " tools=functions,\n", + " additional_context={\"x\": x, \"y\": 0, \"np.array\": np.array, \"np\": np},\n", + ")\n", + "func_parser = JsonOutputParser(data_class=FunctionExpression)" + ], + "metadata": { + "id": "TVRZ44N1UyWg" + }, + "execution_count": 22, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "context = r\"\"\"\n", + "Your function expression also have access to these context:\n", + "{{context_str}}\n", + "\n", + "\"\"\"" + ], + "metadata": { + "id": "9h47p4XpU2BC" + }, + "execution_count": 23, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "async def run_async_function_call(self, generator, tool_manager):\n", + " answers = []\n", + " start_time = time.time()\n", + " tasks = []\n", + " for idx, query in enumerate(queries):\n", + " tasks.append(self.process_query(idx, query, generator, tool_manager))\n", + "\n", + " results = await asyncio.gather(*tasks)\n", + " answers.extend(results)\n", + " end_time = time.time()\n", + " print(f\"Total time taken: {end_time - start_time :.2f} seconds\")\n", + " return answers\n", + "\n", + "async def process_query(self, idx, query, generator, tool_manager: ToolManager):\n", + " print(f\"\\n{idx} Query: {query}\")\n", + " print(f\"{'-'*50}\")\n", + " try:\n", + " result = generator(prompt_kwargs={\"input_str\": query})\n", + " func_expr = FunctionExpression.from_dict(result.data)\n", + " print(f\"Function_expr: {func_expr}\")\n", + " func = tool_manager.parse_func_expr(func_expr)\n", + " func_output = await tool_manager.execute_func_async(func)\n", + " print(f\"Function output: {func_output}\")\n", + " return func_output\n", + " except Exception as e:\n", + " print(\n", + " f\"Failed to execute the function for query: {query}, func: {result.data}, error: {e}\"\n", + " )\n", + " return None" + ], + "metadata": { + "id": "n9Qq7wcOU4X9" + }, + "execution_count": 24, + "outputs": [] + } + ] +} diff --git a/notebooks/tutorials/adalflow_logger.ipynb b/notebooks/tutorials/adalflow_logger.ipynb new file mode 100644 index 00000000..135d6450 --- /dev/null +++ b/notebooks/tutorials/adalflow_logger.ipynb @@ -0,0 +1,242 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Adalflow RAG Playbook example\n", + "\n", + "There are different patterns to build a RAG:\n", + "\n", + "- RAG with separate data process pipeline and a RAG task pipeline. This fits into a scenario where there is lots of data in production database, and we preprocess the data to embeddings and then we build a RAG task pipeline that retrieves context in multiple stages.\n", + "\n", + "- RAG with dynamic data access and caching the embedding dynamically in a local storage.\n", + "\n", + "Here we will have have a look at an example with a local DB using FAISS" + ], + "metadata": { + "id": "lLGpv1fLLIjF" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "sfKEfaYC3Go7" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai,groq,faiss-cpu]\n", + "\n", + "clear_output()\n" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", + "\n", + "# Set environment variables\n", + "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ['GROQ_API_KEY'] = groq_api_key\n", + "\n", + "print(\"API keys have been set.\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-4c_AGBt3PlR", + "outputId": "275b050a-ce64-4b40-a5f9-4ccc12d92add" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ··········\n", + "Please enter your GROQ API key: ··········\n", + "API keys have been set.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Design\n", + "\n", + "Some libraries may use hooks [2] and callbacks [3] [4], or advanced web-based debugging tools [5] [6] [7]. Hooks and callbacks are conceptually similar in that they both allow users to execute custom code at specific points during the execution of a program. Both provide mechanisms to inject additional behavior in response to certain events or conditions, without modifying the core logic. PyTorch defines, registers, and executes hooks mainly in its base classes like nn.Module and Tensor, without polluting the functional and user-facing APIs.\n", + "\n", + "At this point, our objectives are:\n", + "\n", + "1. Maximize debugging capabilities via the simple logging module to keep the source code clean.\n", + "\n", + "2. Additionally, as we can’t always control the outputs of generators, we will provide customized logger and tracers(drop-in decorators) for them, for which we will explain in Tracing. This will not break the first objective.\n", + "\n", + "In the future, when we have more complex requirements from users, we will consider adding hooks/callbacks but we will do it in a way to keep the functional and user-facing APIs clean." + ], + "metadata": { + "id": "4NztjiLR_EQE" + } + }, + { + "cell_type": "code", + "source": [ + "import logging\n", + "\n", + "log = logging.getLogger(__name__)" + ], + "metadata": { + "id": "d2H1vYoC_F-g" + }, + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.utils.logger import get_logger\n", + "\n", + "\n", + "root_logger = get_logger()" + ], + "metadata": { + "id": "e2GxAapG_TJH" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from adalflow.utils.logger import printc\n", + "\n", + "printc(\"All logging examples are done. Feeling green!\", color=\"green\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Yk4oiBFE_asG", + "outputId": "470e30dc-1b31-40c1-9e48-30754ae54b45" + }, + "execution_count": 5, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32m2024-11-28 13:39:41 - [:3:] - All logging examples are done. Feeling green!\u001b[0m\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Set up all logs in one file\n", + "\n", + "Assume your source code is at src/task.py. You can log simply by:" + ], + "metadata": { + "id": "B8lmlT_9_nVP" + } + }, + { + "cell_type": "code", + "source": [ + "import logging\n", + "\n", + "log = logging.getLogger(__name__)\n", + "\n", + "class Task:\n", + " def __init__(self):\n", + " log.info(\"This is a user program child logger\")" + ], + "metadata": { + "id": "o_Ru1myM_c-J" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import logging\n", + "from adalflow.utils.logger import get_logger\n", + "\n", + "root_logger = get_logger(level=\"DEBUG\", save_dir=\"./logs\") # log to ./logs/lib.log\n", + "\n", + "# run code from the library components such as generator\n", + "# ....\n", + "\n", + "root_logger.info(\"This is the log in the main file\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "o7YPjEZk_ehg", + "outputId": "ad0f58e9-6f5c-4d00-e737-2fa1ad5ebd85" + }, + "execution_count": 7, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2024-11-28 13:39:46 - - INFO - [:9:] - This is the log in the main file\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Separate library and application logs" + ], + "metadata": { + "id": "Db1_Ob3X_gpe" + } + }, + { + "cell_type": "code", + "source": [ + "from adalflow.utils.logger import get_logger\n", + "\n", + "app_logger = get_logger(name=\"my_app\", level=\"DEBUG\", save_dir=\"./logs\") # log to ./logs/my_app.log\n", + "\n", + "class Task:\n", + " def __init__(self):\n", + " app_logger.info(\"This is a user program child logger\")" + ], + "metadata": { + "id": "rQWuFnUc_gNm" + }, + "execution_count": 8, + "outputs": [] + } + ] +} diff --git a/notebooks/tutorials/adalflow_rag_optimization.ipynb b/notebooks/tutorials/adalflow_rag_optimization.ipynb new file mode 100644 index 00000000..7ae0b152 --- /dev/null +++ b/notebooks/tutorials/adalflow_rag_optimization.ipynb @@ -0,0 +1,495 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# 🤗 Welcome to AdalFlow!\n", + "## The PyTorch library to auto-optimize any LLM task pipelines\n", + "\n", + "Thanks for trying us out, we're here to provide you with the best LLM application development experience you can dream of 😊 any questions or concerns you may have, [come talk to us on discord,](https://discord.gg/ezzszrRZvT) we're always here to help! ⭐ Star us on Github ⭐\n", + "\n", + "\n", + "# Quick Links\n", + "\n", + "Github repo: https://github.com/SylphAI-Inc/AdalFlow\n", + "\n", + "Full Tutorials: https://adalflow.sylph.ai/index.html#.\n", + "\n", + "Deep dive on each API: check out the [developer notes](https://adalflow.sylph.ai/tutorials/index.html).\n", + "\n", + "Common use cases along with the auto-optimization: check out [Use cases](https://adalflow.sylph.ai/use_cases/index.html).\n", + "\n", + "## 📖 Outline\n", + "\n", + "In this tutorial, we will cover the auto-optimization of a standard RAG:\n", + "\n", + "- Introducing HotPotQA dataset and HotPotQAData class.\n", + "\n", + "- Convert Dspy’s Retriever to AdalFlow’s Retriever to easy comparison.\n", + "\n", + "- Build the standard RAG with Retriever and Generator components.\n", + "\n", + "- Learn how to connect the output-input between components to enable auto-text-grad optimization." + ], + "metadata": { + "id": "xHF95Kr4CzGq" + } + }, + { + "cell_type": "markdown", + "source": [ + "\n", + "# Installation\n", + "\n", + "1. Use `pip` to install the `adalflow` Python package. We will need `openai`, `groq` from the extra packages.\n", + "\n", + " ```bash\n", + " pip install adalflow[openai,groq]\n", + " ```\n", + "2. Setup `openai` and `groq` API key in the environment variables\n", + "\n", + "You can choose to use different client. You can import the model client you prefer. We support `Anthropic`, `Cohere`, `Google`, `GROQ`, `OpenAI`, `Transformer` and more in development. We will use OpenAI here as an example.Please refer to our [full installation guide](https://adalflow.sylph.ai/get_started/installation.html)" + ], + "metadata": { + "id": "Kof5M6DRaKhh" + } + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "tAp3eDjOCma1" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai] # also install the package for the model client you'll use\n", + "!pip install dspy\n", + "!pip install datasets\n", + "clear_output()" + ] + }, + { + "cell_type": "markdown", + "source": [ + "## Set Environment Variables\n", + "\n", + "Run the following code and pass your api key.\n", + "\n", + "Note: for normal `.py` projects, follow our [official installation guide](https://lightrag.sylph.ai/get_started/installation.html).\n", + "\n", + "*Go to [OpenAI](https://platform.openai.com/docs/introduction) to get API keys if you don't already have.*" + ], + "metadata": { + "id": "KapUyHMM07pJ" + } + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "\n", + "\n", + "# Set environment variables\n", + "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "\n", + "print(\"API keys have been set.\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ONfzF9Puzdd_", + "outputId": "5fc0cd30-9ae7-443a-c06c-31e9edeafd69" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ··········\n", + "API keys have been set.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "import dspy\n", + "import re\n", + "from typing import List, Union, Optional, Dict, Callable, Any, Tuple\n", + "from dataclasses import dataclass, field\n", + "import adalflow as adal\n", + "from adalflow.optim.parameter import Parameter, ParameterType\n", + "from adalflow.datasets.hotpot_qa import HotPotQA, HotPotQAData\n", + "from adalflow.datasets.types import Example\n", + "from adalflow.core.types import RetrieverOutput\n", + "from adalflow.core import Component, Generator\n", + "from adalflow.core.retriever import Retriever\n", + "from adalflow.core.component import fun_to_component\n", + "from adalflow.components.model_client.openai_client import OpenAIClient" + ], + "metadata": { + "id": "aE3I05BqOmd7" + }, + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "\n", + "gpt_4o_model = {\n", + " \"model_client\": OpenAIClient(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-4o-mini\",\n", + " \"max_tokens\": 2000,\n", + " },\n", + "}\n", + "\n", + "gpt_3_model = {\n", + " \"model_client\": OpenAIClient(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"max_tokens\": 2000,\n", + " },\n", + "}" + ], + "metadata": { + "id": "cqUUoua9fUxQ" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def load_datasets():\n", + "\n", + " trainset = HotPotQA(split=\"train\", size=20)\n", + " valset = HotPotQA(split=\"val\", size=50)\n", + " testset = HotPotQA(split=\"test\", size=50)\n", + " print(f\"trainset, valset: {len(trainset)}, {len(valset)}, example: {trainset[0]}\")\n", + " return trainset, valset, testset\n", + "\n", + "\n", + "@dataclass\n", + "class AnswerData(adal.DataClass):\n", + " reasoning: str = field(\n", + " metadata={\"desc\": \"The reasoning to produce the answer\"},\n", + " )\n", + " answer: str = field(\n", + " metadata={\"desc\": \"The answer you produced\"},\n", + " )\n", + "\n", + " __output_fields__ = [\"reasoning\", \"answer\"]\n", + "\n", + "\n", + "dataset = HotPotQA(split=\"train\", size=20)\n", + "print(dataset[0], type(dataset[0]))\n", + "\n", + "HotPotQAData(id='5a8b57f25542995d1e6f1371', question='Were Scott Derrickson and Ed Wood of the same nationality?', answer='yes', gold_titles=\"{'Scott Derrickson', 'Ed Wood'}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "0irHeHUkOmL8", + "outputId": "61f778a2-9ec1-4fda-daa2-bcc7f31baa78" + }, + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "HotPotQAData(id='5a8b57f25542995d1e6f1371', question='Were Scott Derrickson and Ed Wood of the same nationality?', answer='yes', gold_titles=\"{'Scott Derrickson', 'Ed Wood'}\") \n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "HotPotQAData(id='5a8b57f25542995d1e6f1371', question='Were Scott Derrickson and Ed Wood of the same nationality?', answer='yes', gold_titles=\"{'Scott Derrickson', 'Ed Wood'}\")" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ] + }, + { + "cell_type": "code", + "source": [ + "class DspyRetriever(adal.Retriever):\n", + " def __init__(self, top_k: int = 3):\n", + " super().__init__()\n", + " self.top_k = top_k\n", + " self.dspy_retriever = dspy.Retrieve(k=top_k)\n", + "\n", + " def call(self, input: str, top_k: Optional[int] = None) -> List[adal.RetrieverOutput]:\n", + "\n", + " k = top_k or self.top_k\n", + "\n", + " output = self.dspy_retriever(query_or_queries=input, k=k)\n", + " final_output: List[RetrieverOutput] = []\n", + " documents = output.passages\n", + "\n", + " final_output.append(\n", + " RetrieverOutput(\n", + " query=input,\n", + " documents=documents,\n", + " doc_indices=[],\n", + " )\n", + " )\n", + " return final_output\n", + "\n", + "def test_retriever():\n", + " question = \"How many storeys are in the castle that David Gregory inherited?\"\n", + " retriever = DspyRetriever(top_k=3)\n", + " retriever_out = retriever(input=question)\n", + " print(f\"retriever_out: {retriever_out}\")\n", + "\n", + "\n", + "def call(\n", + " self, question: str, id: Optional[str] = None\n", + " ) -> Union[adal.GeneratorOutput, adal.Parameter]:\n", + " prompt_kwargs = self._prepare_input(question)\n", + " output = self.llm(prompt_kwargs=prompt_kwargs, id=id)\n", + " return output\n", + "\n", + "\n", + "def call(self, question: str, id: str = None) -> adal.GeneratorOutput:\n", + " if self.training:\n", + " raise ValueError(\n", + " \"This component is not supposed to be called in training mode\"\n", + " )\n", + "\n", + " retriever_out = self.retriever.call(input=question)\n", + "\n", + " successor_map_fn = lambda x: ( # noqa E731\n", + " \"\\n\\n\".join(x[0].documents) if x and x[0] and x[0].documents else \"\"\n", + " )\n", + " retrieved_context = successor_map_fn(retriever_out)\n", + "\n", + " prompt_kwargs = {\n", + " \"context\": retrieved_context,\n", + " \"question\": question,\n", + " }\n", + "\n", + " output = self.llm.call(\n", + " prompt_kwargs=prompt_kwargs,\n", + " id=id,\n", + " )\n", + " return output\n", + "\n", + "\n", + "def forward(self, question: str, id: str = None) -> adal.Parameter:\n", + " if not self.training:\n", + " raise ValueError(\"This component is not supposed to be called in eval mode\")\n", + " retriever_out = self.retriever.forward(input=question)\n", + " successor_map_fn = lambda x: ( # noqa E731\n", + " \"\\n\\n\".join(x.data[0].documents)\n", + " if x.data and x.data[0] and x.data[0].documents\n", + " else \"\"\n", + " )\n", + " retriever_out.add_successor_map_fn(successor=self.llm, map_fn=successor_map_fn)\n", + " generator_out = self.llm.forward(\n", + " prompt_kwargs={\"question\": question, \"context\": retriever_out}, id=id\n", + " )\n", + " return generator_out\n", + "\n", + "\n", + "def bicall(\n", + " self, question: str, id: str = None\n", + ") -> Union[adal.GeneratorOutput, adal.Parameter]:\n", + " \"\"\"You can also combine both the forward and call in the same function.\n", + " Supports both training and eval mode by using __call__ for GradComponents\n", + " like Retriever and Generator\n", + " \"\"\"\n", + " retriever_out = self.retriever(input=question)\n", + " if isinstance(retriever_out, adal.Parameter):\n", + " successor_map_fn = lambda x: ( # noqa E731\n", + " \"\\n\\n\".join(x.data[0].documents)\n", + " if x.data and x.data[0] and x.data[0].documents\n", + " else \"\"\n", + " )\n", + " retriever_out.add_successor_map_fn(\n", + " successor=self.llm, map_fn=successor_map_fn\n", + " )\n", + " else:\n", + " successor_map_fn = lambda x: ( # noqa E731\n", + " \"\\n\\n\".join(x[0].documents) if x and x[0] and x[0].documents else \"\"\n", + " )\n", + " retrieved_context = successor_map_fn(retriever_out)\n", + " prompt_kwargs = {\n", + " \"context\": retrieved_context,\n", + " \"question\": question,\n", + " }\n", + " output = self.llm(prompt_kwargs=prompt_kwargs, id=id)\n", + " return output\n", + "\n", + "task_desc_str = r\"\"\"Answer questions with short factoid answers.\n", + "\n", + "You will receive context(may contain relevant facts) and a question.\n", + "Think step by step.\"\"\"\n", + "\n", + "\n", + "class VanillaRAG(adal.GradComponent):\n", + " def __init__(self, passages_per_hop=3, model_client=None, model_kwargs=None):\n", + " super().__init__()\n", + "\n", + " self.passages_per_hop = passages_per_hop\n", + "\n", + " self.retriever = DspyRetriever(top_k=passages_per_hop)\n", + " self.llm_parser = adal.DataClassParser(\n", + " data_class=AnswerData, return_data_class=True, format_type=\"json\"\n", + " )\n", + " self.llm = Generator(\n", + " model_client=model_client,\n", + " model_kwargs=model_kwargs,\n", + " prompt_kwargs={\n", + " \"task_desc_str\": adal.Parameter(\n", + " data=task_desc_str,\n", + " role_desc=\"Task description for the language model\",\n", + " param_type=adal.ParameterType.PROMPT,\n", + " ),\n", + " \"few_shot_demos\": adal.Parameter(\n", + " data=None,\n", + " requires_opt=True,\n", + " role_desc=\"To provide few shot demos to the language model\",\n", + " param_type=adal.ParameterType.DEMOS,\n", + " ),\n", + " \"output_format_str\": self.llm_parser.get_output_format_str(),\n", + " },\n", + " template=answer_template,\n", + " output_processors=self.llm_parser,\n", + " use_cache=True,\n", + " )\n", + "\n", + "\n", + "class VallinaRAGAdal(adal.AdalComponent):\n", + " def __init__(\n", + " self,\n", + " model_client: adal.ModelClient,\n", + " model_kwargs: Dict,\n", + " backward_engine_model_config: Dict | None = None,\n", + " teacher_model_config: Dict | None = None,\n", + " text_optimizer_model_config: Dict | None = None,\n", + " ):\n", + " task = VanillaRAG(\n", + " model_client=model_client,\n", + " model_kwargs=model_kwargs,\n", + " passages_per_hop=3,\n", + " )\n", + " eval_fn = AnswerMatchAcc(type=\"fuzzy_match\").compute_single_item\n", + " loss_fn = adal.EvalFnToTextLoss(\n", + " eval_fn=eval_fn, eval_fn_desc=\"fuzzy_match: 1 if str(y) in str(y_gt) else 0\"\n", + " )\n", + " super().__init__(\n", + " task=task,\n", + " eval_fn=eval_fn,\n", + " loss_fn=loss_fn,\n", + " backward_engine_model_config=backward_engine_model_config,\n", + " teacher_model_config=teacher_model_config,\n", + " text_optimizer_model_config=text_optimizer_model_config,\n", + " )\n", + "\n", + " # tell the trainer how to call the task\n", + " def prepare_task(self, sample: HotPotQAData) -> Tuple[Callable[..., Any], Dict]:\n", + " if self.task.training:\n", + " return self.task.forward, {\"question\": sample.question, \"id\": sample.id}\n", + " else:\n", + " return self.task.call, {\"question\": sample.question, \"id\": sample.id}\n", + "\n", + "\n", + " # eval mode: get the generator output, directly engage with the eval_fn\n", + " def prepare_eval(self, sample: HotPotQAData, y_pred: adal.GeneratorOutput) -> float:\n", + " y_label = \"\"\n", + " if y_pred and y_pred.data and y_pred.data.answer:\n", + " y_label = y_pred.data.answer\n", + " return self.eval_fn, {\"y\": y_label, \"y_gt\": sample.answer}\n", + "\n", + "\n", + " # train mode: get the loss and get the data from the full_response\n", + " def prepare_loss(self, sample: HotPotQAData, pred: adal.Parameter):\n", + " # prepare gt parameter\n", + " y_gt = adal.Parameter(\n", + " name=\"y_gt\",\n", + " data=sample.answer,\n", + " eval_input=sample.answer,\n", + " requires_opt=False,\n", + " )\n", + "\n", + " # pred's full_response is the output of the task pipeline which is GeneratorOutput\n", + " pred.eval_input = (\n", + " pred.full_response.data.answer\n", + " if pred.full_response\n", + " and pred.full_response.data\n", + " and pred.full_response.data.answer\n", + " else \"\"\n", + " )\n", + " return self.loss_fn, {\"kwargs\": {\"y\": pred, \"y_gt\": y_gt}}\n", + "\n", + "def train_diagnose(\n", + " model_client: adal.ModelClient,\n", + " model_kwargs: Dict,\n", + ") -> Dict:\n", + "\n", + " trainset, valset, testset = load_datasets()\n", + "\n", + " adal_component = VallinaRAGAdal(\n", + " model_client,\n", + " model_kwargs,\n", + " backward_engine_model_config=gpt_4o_model,\n", + " teacher_model_config=gpt_3_model,\n", + " text_optimizer_model_config=gpt_3_model,\n", + " )\n", + " trainer = adal.Trainer(adaltask=adal_component)\n", + " trainer.diagnose(dataset=trainset, split=\"train\")\n", + " # trainer.diagnose(dataset=valset, split=\"val\")\n", + " # trainer.diagnose(dataset=testset, split=\"test\")\n" + ], + "metadata": { + "id": "ZZIEtZYHNVjo" + }, + "execution_count": 23, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "# Issues and feedback\n", + "\n", + "If you encounter any issues, please report them here: [GitHub Issues](https://github.com/SylphAI-Inc/LightRAG/issues).\n", + "\n", + "For feedback, you can use either the [GitHub discussions](https://github.com/SylphAI-Inc/LightRAG/discussions) or [Discord](https://discord.gg/ezzszrRZvT)." + ], + "metadata": { + "id": "AmkbyxmuruUu" + } + } + ] +} diff --git a/notebooks/tutorials/adalflow_rag_playbook.ipynb b/notebooks/tutorials/adalflow_rag_playbook.ipynb new file mode 100644 index 00000000..27c6bda0 --- /dev/null +++ b/notebooks/tutorials/adalflow_rag_playbook.ipynb @@ -0,0 +1,522 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Adalflow RAG Playbook example\n", + "\n", + "There are different patterns to build a RAG:\n", + "\n", + "- RAG with separate data process pipeline and a RAG task pipeline. This fits into a scenario where there is lots of data in production database, and we preprocess the data to embeddings and then we build a RAG task pipeline that retrieves context in multiple stages.\n", + "\n", + "- RAG with dynamic data access and caching the embedding dynamically in a local storage.\n", + "\n", + "Here we will have have a look at an example with a local DB using FAISS" + ], + "metadata": { + "id": "lLGpv1fLLIjF" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "sfKEfaYC3Go7" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai,groq,faiss-cpu]\n", + "\n", + "clear_output()\n" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", + "\n", + "# Set environment variables\n", + "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ['GROQ_API_KEY'] = groq_api_key\n", + "\n", + "print(\"API keys have been set.\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-4c_AGBt3PlR", + "outputId": "a36f157b-0b18-4f3d-d5a8-09aa94743922" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ··········\n", + "Please enter your GROQ API key: ··········\n", + "API keys have been set.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from typing import Any, List, Optional\n", + "import os\n", + "from adalflow.core import Component, Generator, Embedder, Sequential\n", + "from adalflow.core.types import Document, ModelClientType\n", + "from adalflow.core.string_parser import JsonParser\n", + "from adalflow.core.db import LocalDB\n", + "from adalflow.utils import setup_env\n", + "from adalflow.components.retriever.faiss_retriever import FAISSRetriever\n", + "from adalflow.components.data_process import (\n", + " RetrieverOutputToContextStr,\n", + " ToEmbeddings,\n", + " TextSplitter,\n", + ")\n", + "from adalflow.utils.global_config import get_adalflow_default_root_path\n" + ], + "metadata": { + "id": "V9LsGDnm3RbV" + }, + "execution_count": 4, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "configs = {\n", + " \"embedder\": {\n", + " \"batch_size\": 100,\n", + " \"model_kwargs\": {\n", + " \"model\": \"text-embedding-3-small\",\n", + " \"dimensions\": 256,\n", + " \"encoding_format\": \"float\",\n", + " },\n", + " },\n", + " \"retriever\": {\n", + " \"top_k\": 5,\n", + " },\n", + " \"generator\": {\n", + " \"model_client\": ModelClientType.OPENAI(),\n", + " \"model_kwargs\": {\n", + " \"model\": \"gpt-3.5-turbo\",\n", + " \"temperature\": 0.3,\n", + " \"stream\": False,\n", + " },\n", + " },\n", + " \"text_splitter\": {\n", + " \"split_by\": \"word\",\n", + " \"chunk_size\": 400,\n", + " \"chunk_overlap\": 200,\n", + " },\n", + "}\n" + ], + "metadata": { + "id": "kWGTZxrw3Tli" + }, + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "def prepare_data_pipeline():\n", + " splitter = TextSplitter(**configs[\"text_splitter\"])\n", + " embedder = Embedder(\n", + " model_client=ModelClientType.OPENAI(),\n", + " model_kwargs=configs[\"embedder\"][\"model_kwargs\"],\n", + " )\n", + " embedder_transformer = ToEmbeddings(\n", + " embedder=embedder, batch_size=configs[\"embedder\"][\"batch_size\"]\n", + " )\n", + " data_transformer = Sequential(splitter, embedder_transformer)\n", + " return data_transformer\n", + "\n", + "def prepare_database_with_index(\n", + " docs: List[Document],\n", + " index_file: str = \"index.faiss\",\n", + " index_path: Optional[str] = None,\n", + "):\n", + " index_path = index_path or get_adalflow_default_root_path()\n", + " index_path = os.path.join(index_path, index_file)\n", + " if os.path.exists(index_path):\n", + " return None\n", + " db = LocalDB()\n", + " db.load(docs)\n", + " data_transformer = prepare_data_pipeline()\n", + " db.transform(data_transformer, key=\"data_transformer\")\n", + " db.save_state(index_path)\n" + ], + "metadata": { + "id": "1QE0PCKs4BLz" + }, + "execution_count": 6, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "RAG_PROMPT_TEMPLATE = r\"\"\"\n", + "{{task_desc}}\n", + "\n", + "\n", + "{{input_str}}\n", + "{{context_str}}\n", + "\n", + "\"\"\"\n", + "\n", + "rag_prompt_task_desc = r\"\"\"\n", + "You are a helpful assistant.\n", + "\n", + "Your task is to answer the query that may or may not come with context information.\n", + "When context is provided, you should stick to the context and less on your prior knowledge to answer the query.\n", + "\n", + "Output JSON format:\n", + "{\n", + " \"answer\": \"The answer to the query\",\n", + "}\"\"\"\n", + "\n", + "class RAG(Component):\n", + " def __init__(\n", + " self,\n", + " index_file: str = \"index.faiss\",\n", + " index_path: Optional[str] = None,\n", + " configs: dict = configs,\n", + " ):\n", + " super().__init__()\n", + "\n", + " index_path = index_path or get_adalflow_default_root_path()\n", + " index_path = os.path.join(index_path, index_file)\n", + " self.index_path = index_path\n", + "\n", + " if not os.path.exists(index_path):\n", + " self.db = LocalDB()\n", + " self.register_data_transformer()\n", + " self.transformed_docs = []\n", + " else:\n", + " self.db = LocalDB.load_state(index_path)\n", + " self.transformed_docs = self.db.get_transformed_data(\"data_transformer\")\n", + "\n", + " embedder = Embedder(\n", + " model_client=ModelClientType.OPENAI(),\n", + " model_kwargs=configs[\"embedder\"][\"model_kwargs\"],\n", + " )\n", + "\n", + " self.retriever = FAISSRetriever(\n", + " **configs[\"retriever\"],\n", + " embedder=embedder,\n", + " documents=self.transformed_docs,\n", + " document_map_func=lambda doc: doc.vector,\n", + " )\n", + " self.retriever_output_processors = RetrieverOutputToContextStr(deduplicate=True)\n", + "\n", + " self.generator = Generator(\n", + " **configs[\"generator\"],\n", + " prompt_kwargs={\"task_desc_str\": rag_prompt_task_desc},\n", + " output_processors=JsonParser(),\n", + " )\n", + "\n", + " def register_data_transformer(self):\n", + " if \"data_transformer\" not in self.db.get_transformer_keys():\n", + " data_transformer = prepare_data_pipeline()\n", + " self.db.register_transformer(data_transformer, key=\"data_transformer\")\n", + " print(\"Data transformer registered\")\n", + "\n", + " def add_documents(self, docs: List[Document]):\n", + " self.db.extend(docs, apply_transformer=True)\n", + " self.db.save_state(self.index_path)\n", + "\n", + " def get_transformed_docs(self, filter_func=None):\n", + " return self.db.get_transformed_data(\"data_transformer\", filter_func)\n", + "\n", + " def prepare_retriever(self, filter_func=None):\n", + " self.transformed_docs = self.get_transformed_docs(filter_func)\n", + " self.retriever.build_index_from_documents(\n", + " self.transformed_docs, document_map_func=lambda doc: doc.vector\n", + " )\n", + "\n", + " def generate(self, query: str, context: Optional[str] = None) -> Any:\n", + " if not self.generator:\n", + " raise ValueError(\"Generator is not set\")\n", + " prompt_kwargs = {\"context_str\": context, \"input_str\": query}\n", + " response = self.generator(prompt_kwargs=prompt_kwargs)\n", + " return response, context\n", + "\n", + " def call(self, query: str, verbose: bool = False) -> Any:\n", + " retrieved_documents = self.retriever(query)\n", + " for i, retriever_output in enumerate(retrieved_documents):\n", + " retrieved_documents[i].documents = [\n", + " self.transformed_docs[doc_index]\n", + " for doc_index in retriever_output.doc_indices\n", + " ]\n", + " if verbose:\n", + " print(f\"retrieved_documents: \\n {retrieved_documents}\")\n", + "\n", + " context_str = self.retriever_output_processors(retrieved_documents)\n", + " if verbose:\n", + " print(f\"context_str: \\n {context_str}\")\n", + "\n", + " return self.generate(query, context=context_str)\n" + ], + "metadata": { + "id": "6Mu1HXhy4DIG" + }, + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Prepare initial documents\n", + "doc1 = Document(\n", + " meta_data={\"title\": \"Li Yin's profile\"},\n", + " text=\"My name is Li Yin, I love rock climbing\" + \"lots of nonsense text\" * 500,\n", + " id=\"doc1\",\n", + ")\n", + "doc2 = Document(\n", + " meta_data={\"title\": \"Interviewing Li Yin\"},\n", + " text=\"lots of more nonsense text\" * 250\n", + " + \"Li Yin is an AI researcher and a software engineer\"\n", + " + \"lots of more nonsense text\" * 250,\n", + " id=\"doc2\",\n", + ")\n", + "\n", + "# Prepare the database (only runs once)\n", + "prepare_database_with_index([doc1, doc2], index_file=\"index.faiss\")\n", + "\n", + "# Initialize RAG\n", + "rag = RAG(index_file=\"index.faiss\")\n", + "print(rag)\n", + "\n", + "# Query the RAG system\n", + "query = \"What is Li Yin's hobby and profession?\"\n", + "response = rag.call(query)\n", + "print(f\"Response: {response}\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sPnx4PY34D1j", + "outputId": "f66d6f1a-70bf-40e9-a160-591fcfdcbed3" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Splitting Documents in Batches: 100%|██████████| 1/1 [00:00<00:00, 109.58it/s]\n", + "Batch embedding documents: 100%|██████████| 1/1 [00:01<00:00, 1.33s/it]\n", + "Adding embeddings to documents from batch: 1it [00:00, 6462.72it/s]\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saved the state of the DB to /root/.adalflow/index.faiss\n", + "RAG(\n", + " (db): LocalDB(name='LocalDB', items=[Document(id=doc1, text='My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense te...', meta_data={'title': \"Li Yin's profile\"}, vector=[], parent_doc_id=None, order=None, score=None), Document(id=doc2, text='lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...', meta_data={'title': 'Interviewing Li Yin'}, vector=[], parent_doc_id=None, order=None, score=None)], transformed_items={'data_transformer': [Document(id=59f7f6ad-eb4c-4fdb-8d04-6dba1ee439bc, text='My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense te...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=0, score=None), Document(id=2486725e-47ff-4978-84fc-7937778b0e45, text='textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nons...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=1, score=None), Document(id=96993047-4cff-436d-b8ac-e02da4ae7fec, text='nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlot...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=2, score=None), Document(id=77742f90-0c0c-4143-802d-3557577d4935, text='of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense text...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=3, score=None), Document(id=81ba770e-c5f2-4dc5-98fc-349ab9143ef9, text='textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nons...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=4, score=None), Document(id=dff6f5e3-5929-4e3c-ba5f-79f5116c1fa3, text='nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlot...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=5, score=None), Document(id=1e7888e2-0783-40b2-ab85-067e3ba71fad, text='of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense text...', meta_data={'title': \"Li Yin's profile\"}, vector='len: 256', parent_doc_id=doc1, order=6, score=None), Document(id=2deb945f-dfb9-46d3-a60b-dae77e2f5fd8, text='lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=0, score=None), Document(id=3d9c21aa-d583-47fe-b143-710b4bc4a8b2, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=1, score=None), Document(id=a318ffea-2542-4493-ab2d-03d10a94e860, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=2, score=None), Document(id=b5c05820-7545-43a8-a4a3-691c5ccc79d1, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=3, score=None), Document(id=a739cd3e-8826-4e74-afa9-499498115621, text='textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonse...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=4, score=None), Document(id=7153cde2-b6ee-4485-91e9-9de2f4bd45ab, text='textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsens...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=5, score=None), Document(id=c3f3ed48-acc2-41b5-b4ac-a6107b651789, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=6, score=None), Document(id=7bfd84e6-0025-4cfa-8c0a-63c9de9a8d4a, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=7, score=None), Document(id=8bece98d-65f0-4dd1-9407-d1c54413bef4, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=8, score=None), Document(id=cf9ab236-af73-4af6-9302-b3c7ffdd9ca7, text='nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of m...', meta_data={'title': 'Interviewing Li Yin'}, vector='len: 256', parent_doc_id=doc2, order=9, score=None)]}, transformer_setups={'data_transformer': Sequential(\n", + " (0): TextSplitter(split_by=word, chunk_size=400, chunk_overlap=200)\n", + " (1): ToEmbeddings(\n", + " batch_size=100\n", + " (embedder): Embedder(\n", + " model_kwargs={'model': 'text-embedding-3-small', 'dimensions': 256, 'encoding_format': 'float'}, \n", + " (model_client): OpenAIClient()\n", + " )\n", + " (batch_embedder): BatchEmbedder(\n", + " (embedder): Embedder(\n", + " model_kwargs={'model': 'text-embedding-3-small', 'dimensions': 256, 'encoding_format': 'float'}, \n", + " (model_client): OpenAIClient()\n", + " )\n", + " )\n", + " )\n", + " )}, mapper_setups={}, index_path='/root/.adalflow/index.faiss')\n", + " (retriever): FAISSRetriever(\n", + " top_k=5, metric=prob, dimensions=256, total_documents=17\n", + " (embedder): Embedder(\n", + " model_kwargs={'model': 'text-embedding-3-small', 'dimensions': 256, 'encoding_format': 'float'}, \n", + " (model_client): OpenAIClient()\n", + " )\n", + " )\n", + " (retriever_output_processors): RetrieverOutputToContextStr(deduplicate=True)\n", + " (generator): Generator(\n", + " model_kwargs={'model': 'gpt-3.5-turbo', 'temperature': 0.3, 'stream': False}, trainable_prompt_kwargs=[]\n", + " (prompt): Prompt(\n", + " template: \n", + " {# task desc #}\n", + " {% if task_desc_str %}\n", + " {{task_desc_str}}\n", + " {% else %}\n", + " You are a helpful assistant.\n", + " {% endif %}\n", + " {#input format#}\n", + " {% if input_format_str %}\n", + " \n", + " {{input_format_str}}\n", + " \n", + " {% endif %}\n", + " {# output format #}\n", + " {% if output_format_str %}\n", + " \n", + " {{output_format_str}}\n", + " \n", + " {% endif %}\n", + " {# tools #}\n", + " {% if tools_str %}\n", + " \n", + " {{tools_str}}\n", + " \n", + " {% endif %}\n", + " {# example #}\n", + " {% if examples_str %}\n", + " \n", + " {{examples_str}}\n", + " \n", + " {% endif %}\n", + " {# chat history #}\n", + " {% if chat_history_str %}\n", + " \n", + " {{chat_history_str}}\n", + " \n", + " {% endif %}\n", + " {#contex#}\n", + " {% if context_str %}\n", + " \n", + " {{context_str}}\n", + " \n", + " {% endif %}\n", + " \n", + " \n", + " {% if input_str %}\n", + " {{input_str}}\n", + " {% endif %}\n", + " \n", + " {# steps #}\n", + " {% if steps_str %}\n", + " \n", + " {{steps_str}}\n", + " \n", + " {% endif %}\n", + " , prompt_kwargs: {'task_desc_str': '\\nYou are a helpful assistant.\\n\\nYour task is to answer the query that may or may not come with context information.\\nWhen context is provided, you should stick to the context and less on your prior knowledge to answer the query.\\n\\nOutput JSON format:\\n{\\n \"answer\": \"The answer to the query\",\\n}'}, prompt_variables: ['examples_str', 'context_str', 'chat_history_str', 'tools_str', 'task_desc_str', 'input_str', 'input_format_str', 'output_format_str', 'steps_str']\n", + " )\n", + " (model_client): OpenAIClient()\n", + " (output_processors): JsonParser()\n", + " )\n", + ")\n", + "Response: (GeneratorOutput(id=None, data={'answer': \"Li Yin's hobby is rock climbing and profession is an AI researcher and a software engineer.\"}, error=None, usage=CompletionUsage(completion_tokens=25, prompt_tokens=2713, total_tokens=2738), raw_response='{\\n \"answer\": \"Li Yin\\'s hobby is rock climbing and profession is an AI researcher and a software engineer.\"\\n}', metadata=None), ' My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense ')\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# Add more documents at runtime\n", + "doc3 = Document(\n", + " meta_data={\"title\": \"Apple's profile\"},\n", + " text=\"Apple is a cute dog with black and tan fur\" + \"lots of nonsense text\" * 500,\n", + " id=\"doc3\",\n", + ")\n", + "doc4 = Document(\n", + " meta_data={\"title\": \"Apple's characteristics\"},\n", + " text=\"lots of more nonsense text\" * 250\n", + " + \"Apple is energetic, loves to play with her monkey toy\"\n", + " + \"lots of more nonsense text\" * 250,\n", + " id=\"doc4\",\n", + ")\n", + "\n", + "rag.add_documents([doc3, doc4])\n", + "rag.prepare_retriever()\n", + "\n", + "# Test a new query\n", + "query = \"What is Apple's favorite toy?\"\n", + "response = rag.call(query)\n", + "print(f\"Response: {response}\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "bcC1-dCheVEC", + "outputId": "133bab3f-ff2e-40db-99dc-71d64af6283f" + }, + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Splitting Documents in Batches: 100%|██████████| 1/1 [00:00<00:00, 114.76it/s]\n", + "Batch embedding documents: 100%|██████████| 1/1 [00:00<00:00, 1.35it/s]\n", + "Adding embeddings to documents from batch: 1it [00:00, 1915.21it/s]\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Saved the state of the DB to /root/.adalflow/index.faiss\n", + "Response: (GeneratorOutput(id=None, data={'answer': \"Apple's favorite toy is her monkey toy.\"}, error=None, usage=CompletionUsage(completion_tokens=16, prompt_tokens=2647, total_tokens=2663), raw_response='{\\n \"answer\": \"Apple\\'s favorite toy is her monkey toy.\"\\n}', metadata=None), ' Apple is a cute dog with black and tan furlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots of nonsense textlots textApple is energetic, loves to play with her monkey toylots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textApple is energetic, loves to play with her monkey toylots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more textLi Yin is an AI researcher and a software engineerlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more ')\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "# View all documents in the database\n", + "print(\"All documents in the database:\")\n", + "for item in rag.db.items:\n", + " print(f\"ID: {item.id}, Title: {item.meta_data['title']}, Text: {item.text[:100]}...\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "o9TzVv5GeZZ2", + "outputId": "bde56355-186c-4013-d702-b4530f82881b" + }, + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "All documents in the database:\n", + "ID: doc1, Title: Li Yin's profile, Text: My name is Li Yin, I love rock climbinglots of nonsense textlots of nonsense textlots of nonsense te...\n", + "ID: doc2, Title: Interviewing Li Yin, Text: lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...\n", + "ID: doc3, Title: Apple's profile, Text: Apple is a cute dog with black and tan furlots of nonsense textlots of nonsense textlots of nonsense...\n", + "ID: doc4, Title: Apple's characteristics, Text: lots of more nonsense textlots of more nonsense textlots of more nonsense textlots of more nonsense ...\n" + ] + } + ] + } + ] +} diff --git a/notebooks/tutorials/adalflow_tracing.ipynb b/notebooks/tutorials/adalflow_tracing.ipynb new file mode 100644 index 00000000..014c1b5e --- /dev/null +++ b/notebooks/tutorials/adalflow_tracing.ipynb @@ -0,0 +1,183 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "# Tracing\n", + "\n", + "In particular, we provide two tracing methods to help you develop and improve the Generator:\n", + "\n", + "1. Trace the history change(states) on prompt during your development process. Developers typically go through a long process of prompt optimization and it is frustrating to lose track of the prompt changes when your current change actually makes the performance much worse.\n" + ], + "metadata": { + "id": "lLGpv1fLLIjF" + } + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "sfKEfaYC3Go7" + }, + "outputs": [], + "source": [ + "from IPython.display import clear_output\n", + "\n", + "!pip install -U adalflow[openai,groq,faiss-cpu]\n", + "\n", + "clear_output()\n" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from getpass import getpass\n", + "\n", + "# Prompt user to enter their API keys securely\n", + "openai_api_key = getpass(\"Please enter your OpenAI API key: \")\n", + "groq_api_key = getpass(\"Please enter your GROQ API key: \")\n", + "\n", + "# Set environment variables\n", + "os.environ['OPENAI_API_KEY'] = openai_api_key\n", + "os.environ['GROQ_API_KEY'] = groq_api_key\n", + "\n", + "print(\"API keys have been set.\")\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-4c_AGBt3PlR", + "outputId": "85aba038-ee9c-463d-bdbd-027cbfff0094" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Please enter your OpenAI API key: ··········\n", + "Please enter your GROQ API key: ··········\n", + "API keys have been set.\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "We created a GeneratorStateLogger to handle the logging and saving into json files. To further simplify developers’s process, we provides a class decorator trace_generator_states where a single line of code can be added to any of your task component. It will automatically track any attributes of type Generator." + ], + "metadata": { + "id": "yWi2uEiE6UIf" + } + }, + { + "cell_type": "code", + "source": [ + "from adalflow.tracing import trace_generator_states\n", + "from adalflow.core import Component, Generator\n", + "import adalflow as adal\n", + "from adalflow.components.model_client import OpenAIClient\n", + "\n", + "template_doc = r\"\"\" You are a doctor User: {{input_str}}\"\"\"\n", + "\n", + "@trace_generator_states()\n", + "class DocQA(adal.Component):\n", + " def __init__(self):\n", + " super(DocQA, self).__init__()\n", + " self.generator = Generator(\n", + " template=template_doc,\n", + " model_client=OpenAIClient(),\n", + " model_kwargs={\"model\": \"gpt-4o-mini\"},\n", + " )\n", + "\n", + " def call(self, query: str) -> str:\n", + " return self.doc(prompt_kwargs={\"input_str\": query}).data\n" + ], + "metadata": { + "id": "qk9pkcCVzdek" + }, + "execution_count": 13, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Here is the folder structer of where the trace is generated as a .json file and also an example output below" + ], + "metadata": { + "id": "LAZUSnYn-lnI" + } + }, + { + "cell_type": "markdown", + "source": [ + "![image.png]()" + ], + "metadata": { + "id": "cVofNXVW-EMo" + } + }, + { + "cell_type": "code", + "source": [ + "'''\n", + " {\n", + " \"doc\": [\n", + " {\n", + " \"prompt_states\": {\n", + " \"type\": \"Prompt\",\n", + " \"data\": {\n", + " \"_components\": {\n", + " \"_ordered_dict\": true,\n", + " \"data\": []\n", + " },\n", + " \"_parameters\": {\n", + " \"_ordered_dict\": true,\n", + " \"data\": []\n", + " },\n", + " \"training\": false,\n", + " \"teacher_mode\": false,\n", + " \"tracing\": false,\n", + " \"name\": \"Prompt\",\n", + " \"_init_args\": {\n", + " \"template\": null,\n", + " \"prompt_kwargs\": {}\n", + " },\n", + " \"template\": \" You are a doctor User: {{input_str}}\",\n", + " \"prompt_variables\": [\n", + " \"input_str\"\n", + " ],\n", + " \"prompt_kwargs\": {}\n", + " }\n", + " },\n", + " \"time_stamp\": \"2024-11-29T12:36:33.302956\"\n", + " }\n", + " ]\n", + "}\n", + "'''" + ], + "metadata": { + "id": "dPd9i6_t7ERJ" + }, + "execution_count": null, + "outputs": [] + } + ] +} diff --git a/tutorials/adalflow_function_calls.py b/tutorials/adalflow_function_calls.py new file mode 100644 index 00000000..184e2b88 --- /dev/null +++ b/tutorials/adalflow_function_calls.py @@ -0,0 +1,108 @@ +""" +This script demonstrates the usage of AdalFlow's Tool Helper functionality. +It can be run independently to showcase function calling capabilities. +""" + +from adalflow.components import Generator +from adalflow.components.model_client import OpenAIClient +from adalflow.utils import setup_env +from typing import List, Dict +import json + + +def setup_generator(): + """Initialize and configure the Generator with OpenAI client.""" + setup_env() + generator = Generator( + model_client=OpenAIClient(), + model_kwargs={"model": "gpt-3.5-turbo", "temperature": 0, "max_tokens": 1000}, + ) + return generator + + +def define_tools() -> List[Dict]: + """Define the available tools/functions that can be called.""" + return [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get the weather in a location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. San Francisco, CA", + }, + "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}, + }, + "required": ["location", "unit"], + }, + }, + } + ] + + +def get_weather(location: str, unit: str) -> str: + """Mock function to simulate weather data retrieval.""" + # This is a mock implementation + weather_data = { + "San Francisco, CA": {"celsius": 20, "fahrenheit": 68}, + "New York, NY": {"celsius": 22, "fahrenheit": 72}, + } + + if location in weather_data: + temp = weather_data[location][unit] + return f"The temperature in {location} is {temp}°{'C' if unit == 'celsius' else 'F'}" + return f"Weather data not available for {location}" + + +def process_function_calls(generator: Generator, query: str): + """Process user query and handle any function calls.""" + # Get the response from the model + response = generator.generate(prompt_kwargs={"query": query}, tools=define_tools()) + + # Check if the response includes a function call + if hasattr(response, "tool_calls") and response.tool_calls: + for tool_call in response.tool_calls: + if tool_call.function.name == "get_weather": + # Parse the function arguments + args = json.loads(tool_call.function.arguments) + + # Call the function with the provided arguments + weather_result = get_weather(args["location"], args["unit"]) + + # Generate final response incorporating the function result + final_response = generator.generate( + prompt_kwargs={"query": query}, + tools=define_tools(), + tool_results=[ + {"tool_call_id": tool_call.id, "output": weather_result} + ], + ) + return final_response + + return response + + +def main(): + """Main function to demonstrate tool helper functionality.""" + # Initialize generator + generator = setup_generator() + + # Example queries + queries = [ + "What's the weather like in San Francisco?", + "Tell me the temperature in New York in Celsius", + ] + + # Process each query + for query in queries: + print(f"\nQuery: {query}") + response = process_function_calls(generator, query) + print(f"Response: {response}") + + +if __name__ == "__main__": + main() diff --git a/tutorials/adalflow_logger.py b/tutorials/adalflow_logger.py new file mode 100644 index 00000000..e4c4bb7e --- /dev/null +++ b/tutorials/adalflow_logger.py @@ -0,0 +1,143 @@ +""" +This script demonstrates the usage of AdalFlow's Logger functionality. +It can be run independently to showcase logging capabilities. +""" + +from adalflow.components import Generator +from adalflow.components.model_client import OpenAIClient +from adalflow.utils import setup_env +from adalflow.utils.logger import get_logger +import logging +from typing import Dict, Any +import json + + +def setup_logging(log_file: str = "adalflow.log") -> logging.Logger: + """ + Initialize and configure the logger. + + Args: + log_file: Name of the log file + + Returns: + Configured logger instance + """ + logger = get_logger(__name__) + + # Add file handler if not already present + if not any(isinstance(handler, logging.FileHandler) for handler in logger.handlers): + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter( + logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + ) + logger.addHandler(file_handler) + + return logger + + +def setup_generator() -> Generator: + """ + Initialize and configure the Generator with OpenAI client. + + Returns: + Configured Generator instance + """ + setup_env() + return Generator( + model_client=OpenAIClient(), + model_kwargs={"model": "gpt-3.5-turbo", "temperature": 0, "max_tokens": 1000}, + ) + + +def process_query( + generator: Generator, query: str, logger: logging.Logger +) -> Dict[str, Any]: + """ + Process a query using the generator and log the interaction. + + Args: + generator: The configured Generator instance + query: User query to process + logger: Logger instance for recording the interaction + + Returns: + Dictionary containing the query and response + """ + logger.info(f"Processing query: {query}") + + try: + # Generate response + response = generator.generate(prompt_kwargs={"query": query}) + + # Log successful response + logger.info(f"Generated response: {response}") + + return {"query": query, "response": str(response), "status": "success"} + + except Exception as e: + # Log error if generation fails + logger.error(f"Error processing query: {str(e)}") + return {"query": query, "response": None, "status": "error", "error": str(e)} + + +def analyze_logs(log_file: str, logger: logging.Logger) -> Dict[str, int]: + """ + Analyze the log file to gather statistics. + + Args: + log_file: Path to the log file + logger: Logger instance for recording the analysis + + Returns: + Dictionary containing log statistics + """ + stats = {"total_queries": 0, "successful_queries": 0, "failed_queries": 0} + + try: + with open(log_file, "r") as f: + for line in f: + if "Processing query:" in line: + stats["total_queries"] += 1 + if "Generated response:" in line: + stats["successful_queries"] += 1 + if "Error processing query:" in line: + stats["failed_queries"] += 1 + + logger.info(f"Log analysis complete: {json.dumps(stats, indent=2)}") + return stats + + except Exception as e: + logger.error(f"Error analyzing logs: {str(e)}") + return stats + + +def main(): + """Main function to demonstrate logger functionality.""" + # Setup + log_file = "adalflow.log" + logger = setup_logging(log_file) + generator = setup_generator() + + # Example queries + queries = [ + "What is artificial intelligence?", + "Explain the concept of machine learning.", + "Tell me about neural networks.", + ] + + # Process queries + results = [] + for query in queries: + result = process_query(generator, query, logger) + results.append(result) + print(f"\nQuery: {query}") + print(f"Response: {result['response']}") + + # Analyze logs + stats = analyze_logs(log_file, logger) + print("\nLog Analysis:") + print(json.dumps(stats, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/tutorials/adalflow_tracing.py b/tutorials/adalflow_tracing.py new file mode 100644 index 00000000..d49008e6 --- /dev/null +++ b/tutorials/adalflow_tracing.py @@ -0,0 +1,88 @@ +""" +This script demonstrates the usage of AdalFlow's tracing functionality. +It shows how to track Generator states and changes during development. +""" + +import os +from getpass import getpass +from adalflow.tracing import trace_generator_states +from adalflow.core import Generator +import adalflow as adal +from adalflow.components.model_client import OpenAIClient + + +def setup_environment(): + """Setup API keys and environment variables.""" + # In a production environment, you might want to use environment variables + # or a configuration file instead of getpass + if "OPENAI_API_KEY" not in os.environ: + openai_api_key = getpass("Please enter your OpenAI API key: ") + os.environ["OPENAI_API_KEY"] = openai_api_key + + if "GROQ_API_KEY" not in os.environ: + groq_api_key = getpass("Please enter your GROQ API key: ") + os.environ["GROQ_API_KEY"] = groq_api_key + + print("API keys have been set.") + + +# Define the template for the doctor QA system +template_doc = r""" You are a doctor User: {{input_str}}""" + + +@trace_generator_states() +class DocQA(adal.Component): + """ + A component that uses a Generator to answer medical questions. + The @trace_generator_states decorator automatically tracks changes + to any Generator attributes in this class. + """ + + def __init__(self): + super(DocQA, self).__init__() + self.generator = Generator( + template=template_doc, + model_client=OpenAIClient(), + model_kwargs={"model": "gpt-4-turbo-preview"}, + ) + + def call(self, query: str) -> str: + """ + Process a medical query and return the response. + + Args: + query: The medical question to be answered + + Returns: + The generated response from the doctor AI + """ + return self.generator(prompt_kwargs={"input_str": query}).data + + +def main(): + """Main function to demonstrate tracing functionality.""" + # Setup environment + setup_environment() + + # Initialize the DocQA component + doc_qa = DocQA() + + # Example queries + queries = [ + "What are the common symptoms of the flu?", + "How can I manage my allergies?", + "What should I do for a minor burn?", + ] + + # Process each query + for query in queries: + print(f"\nQuery: {query}") + response = doc_qa.call(query) + print(f"Response: {response}") + + print("\nNote: Generator states have been logged to the traces directory.") + print("You can find the logs in: ./traces/DocQA/generator_state_trace.json") + + +if __name__ == "__main__": + main()