From f0c702098201ad7119ec971ce921ee3b98f860ce Mon Sep 17 00:00:00 2001 From: Hamza Tahir Date: Thu, 25 Jan 2024 11:58:45 +0100 Subject: [PATCH] removed errors all over the place --- llm-agents/src/requirements.txt | 2 + llm-agents/src/run.ipynb | 244 ++++-------------------- llm-agents/src/run.py | 4 +- llm-agents/src/steps/agent_creator.py | 16 +- llm-agents/src/steps/index_generator.py | 9 +- llm-agents/src/steps/url_scraper.py | 6 +- 6 files changed, 59 insertions(+), 222 deletions(-) diff --git a/llm-agents/src/requirements.txt b/llm-agents/src/requirements.txt index ad771251..cddc0d82 100644 --- a/llm-agents/src/requirements.txt +++ b/llm-agents/src/requirements.txt @@ -8,3 +8,5 @@ gcsfs==2023.5.0 faiss-cpu==1.7.3 unstructured==0.5.7 tiktoken +bs4 +typing_extensions \ No newline at end of file diff --git a/llm-agents/src/run.ipynb b/llm-agents/src/run.ipynb index 680a3a78..b041bbbb 100644 --- a/llm-agents/src/run.ipynb +++ b/llm-agents/src/run.ipynb @@ -43,30 +43,9 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;35mNote: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\u001b[0m\n", - "\u001b[1;35mNumExpr defaulting to 8 threads.\u001b[0m\n", - "\n", - "\u001b[2;36m ____..--\u001b[0m\u001b[2;32m' .-'\u001b[0m\u001b[2;36m'-. ,---. .--. ,---. ,---. .---. \u001b[0m\n", - "\u001b[2;36m | | .'_ _ \\ | \\ | | | \\ \u001b[0m\u001b[2;35m/\u001b[0m\u001b[2;36m | | ,_| \u001b[0m\n", - "\u001b[2;36m | .-\u001b[0m\u001b[2;32m' '\u001b[0m\u001b[2;36m \u001b[0m\u001b[2;35m/\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;2;36m(\u001b[0m\u001b[2;36m ` \u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m ' | , \\ | | | , \\\u001b[0m\u001b[2;35m/\u001b[0m\u001b[2;36m , | ,-.\u001b[0m\u001b[2;35m/\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m \u001b[0m\n", - "\u001b[2;36m |.-\u001b[0m\u001b[2;32m'.'\u001b[0m\u001b[2;36m \u001b[0m\u001b[2;35m/\u001b[0m\u001b[2;36m . \u001b[0m\u001b[1;2;36m(\u001b[0m\u001b[2;36m_ o _\u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m | | |\\_ \\| | | |\\_ \u001b[0m\u001b[2;35m/\u001b[0m\u001b[2;36m| | \\ \u001b[0m\u001b[2;32m'_ '\u001b[0m\u001b[2;36m`\u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m \u001b[0m\n", - "\u001b[2;36m \u001b[0m\u001b[2;35m/\u001b[0m\u001b[2;36m _/ | \u001b[0m\u001b[1;2;36m(\u001b[0m\u001b[2;36m_,_\u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m___| | \u001b[0m\u001b[1;2;35m_\u001b[0m\u001b[1;2;36m(\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m_\\ | | \u001b[0m\u001b[1;2;35m_\u001b[0m\u001b[1;2;36m(\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m_/ | | > \u001b[0m\u001b[1;2;36m(\u001b[0m\u001b[2;36m_\u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m \u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m \u001b[0m\n", - "\u001b[2;36m .\u001b[0m\u001b[2;32m'._\u001b[0m\u001b[2;32m(\u001b[0m\u001b[2;32m \u001b[0m\u001b[2;32m)\u001b[0m\u001b[2;32m_ '\u001b[0m\u001b[2;36m \\ .---. | \u001b[0m\u001b[1;2;36m(\u001b[0m\u001b[2;36m_ o _\u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m | | \u001b[0m\u001b[1;2;36m(\u001b[0m\u001b[2;36m_ o _\u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m | | \u001b[0m\u001b[1;2;36m(\u001b[0m\u001b[2;36m . .-' \u001b[0m\n", - "\u001b[2;36m .\u001b[0m\u001b[2;32m' \u001b[0m\u001b[2;32m(\u001b[0m\u001b[2;32m_'\u001b[0m\u001b[2;36mo._\u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m \\ `-\u001b[0m\u001b[2;32m' / | \u001b[0m\u001b[2;32m(\u001b[0m\u001b[2;32m_,_\u001b[0m\u001b[2;32m)\u001b[0m\u001b[2;32m\\ | | \u001b[0m\u001b[2;32m(\u001b[0m\u001b[2;32m_,_\u001b[0m\u001b[2;32m)\u001b[0m\u001b[2;32m | | `-'\u001b[0m\u001b[2;36m`-'|___ \u001b[0m\n", - "\u001b[2;36m | \u001b[0m\u001b[1;2;36m(\u001b[0m\u001b[2;36m_,_\u001b[0m\u001b[1;2;36m)\u001b[0m\u001b[2;36m| \\ \u001b[0m\u001b[2;35m/\u001b[0m\u001b[2;36m | | | | | | | | | \\ \u001b[0m\n", - "\u001b[2;36m |_________| `\u001b[0m\u001b[2;32m'-..-'\u001b[0m\u001b[2;36m \u001b[0m\u001b[2;32m'--'\u001b[0m\u001b[2;36m \u001b[0m\u001b[2;32m'--'\u001b[0m\u001b[2;36m \u001b[0m\u001b[2;32m'--'\u001b[0m\u001b[2;36m \u001b[0m\u001b[2;32m'--'\u001b[0m\u001b[2;36m `--------` \u001b[0m\n", - "\u001b[2;36m \u001b[0m\n", - "\u001b[1mversion: 0.54.1\u001b[0m\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "!zenml version" ] @@ -97,40 +76,9 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;35mNote: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\u001b[0m\n", - "\u001b[1;35mNumExpr defaulting to 8 threads.\u001b[0m\n", - "\u001b[?25l\u001b[32m⠋\u001b[0m Initializing ZenML repository at \n", - "/home/wjayesh/apps/zenml-projects/langchain-llamaindex-slackbot/src.\n", - "\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[32m⠙\u001b[0m Initializing ZenML repository at \n", - "/home/wjayesh/apps/zenml-projects/langchain-llamaindex-slackbot/src.\n", - "\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[32m⠹\u001b[0m Initializing ZenML repository at \n", - "/home/wjayesh/apps/zenml-projects/langchain-llamaindex-slackbot/src.\n", - "\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[32m⠸\u001b[0m Initializing ZenML repository at \n", - "/home/wjayesh/apps/zenml-projects/langchain-llamaindex-slackbot/src.\n", - "\u001b[1;35mSetting the repo active workspace to 'default'.\u001b[0m\n", - "\u001b[33mSetting the repo active stack to default.\u001b[0m\n", - "\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[2;36mZenML repository initialized at \u001b[0m\n", - "\u001b[2;35m/home/wjayesh/apps/zenml-projects/langchain-llamaindex-slackbot/\u001b[0m\u001b[2;95msrc.\u001b[0m\n", - "\u001b[2;32m⠼\u001b[0m\u001b[2;36m \u001b[0m\u001b[2;36mInitializing ZenML repository at \u001b[0m\n", - "\u001b[2;36m/home/wjayesh/apps/zenml-projects/langchain-llamaindex-slackbot/src.\u001b[0m\n", - "\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[32m⠼\u001b[0m Initializing ZenML repository at \n", - "/home/wjayesh/apps/zenml-projects/langchain-llamaindex-slackbot/src.\n", - "\n", - "\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[2;36mThe local active stack was initialized to \u001b[0m\u001b[2;32m'default'\u001b[0m\u001b[2;36m. This local configuration \u001b[0m\n", - "\u001b[2;36mwill only take effect when you're running ZenML from the initialized repository \u001b[0m\n", - "\u001b[2;36mroot, or from a subdirectory. For more information on repositories and \u001b[0m\n", - "\u001b[2;36mconfigurations, please visit \u001b[0m\n", - "\u001b[2;4;94mhttps://docs.zenml.io/user-guide/starter-guide/understand-stacks.\u001b[0m\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "!rm -rf .zen\n", "!zenml init" @@ -172,6 +120,8 @@ "outputs": [], "source": [ "# automatically restart kernel\n", + "import IPython\n", + "\n", "IPython.Application.instance().kernel.do_shutdown(restart=True)" ] }, @@ -196,36 +146,9 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;35mNote: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\u001b[0m\n", - "\u001b[1;35mNumExpr defaulting to 8 threads.\u001b[0m\n", - "\u001b[1;35mMigrating the ZenML global configuration from version 0.54.1 to version 0.55.0...\u001b[0m\n", - "\u001b[?25l\u001b[3m Stack Configuration \u001b[0m\n", - "┏━━━━━━━━━━━━━━━━┯━━━━━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mCOMPONENT_TYPE\u001b[0m\u001b[1m \u001b[0m│\u001b[1m \u001b[0m\u001b[1mCOMPONENT_NAME\u001b[0m\u001b[1m \u001b[0m┃\n", - "┠────────────────┼────────────────┨\n", - "┃ ARTIFACT_STORE │ default ┃\n", - "┠────────────────┼────────────────┨\n", - "┃ ORCHESTRATOR │ default ┃\n", - "┗━━━━━━━━━━━━━━━━┷━━━━━━━━━━━━━━━━┛\n", - "\u001b[2;3m 'default' stack (ACTIVE) \u001b[0m\n", - "\u001b[32m⠋\u001b[0m Describing the stack...\n", - "\u001b[2K\u001b[1A\u001b[2K\u001b[2;36mStack \u001b[0m\u001b[2;32m'default'\u001b[0m\u001b[2;36m with id \u001b[0m\u001b[2;32m'a4cd4161-6ee5-411c-8adf-559ac084ceb5'\u001b[0m\u001b[2;36m is unowned.\u001b[0m\n", - "\u001b[2;32m⠋\u001b[0m\u001b[2;36m \u001b[0m\u001b[2;36mDescribing the stack...\u001b[0m\n", - "\u001b[2K\u001b[1A\u001b[2K\u001b[32m⠋\u001b[0m Describing the stack...\n", - "\n", - "\u001b[1A\u001b[2K\u001b[1A\u001b[2K\u001b[2;36mDashboard URL: \u001b[0m\n", - "\u001b[2;4;94mhttps://1cf18d95-zenml.cloudinfra.zenml.io/workspaces/default/stacks/a4cd4161-6e\u001b[0m\n", - "\u001b[2;4;94me5-411c-8adf-559ac084ceb5/configuration\u001b[0m\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "!zenml stack describe" ] @@ -276,21 +199,12 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;35mNote: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\u001b[0m\n", - "\u001b[1;35mNumExpr defaulting to 8 threads.\u001b[0m\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "from typing import Annotated, List\n", - "\n", + "from typing import List\n", + "from typing_extensions import Annotated\n", "from steps.url_scraping_utils import get_all_pages, get_nested_readme_urls\n", "from zenml import step, log_artifact_metadata\n", "\n", @@ -340,7 +254,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -377,7 +291,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -421,7 +335,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -431,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -535,7 +449,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -582,7 +496,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -593,51 +507,9 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1;35mInitiating a new run for the pipeline: \u001b[0m\u001b[1;36mzenml_agent_creation_pipeline\u001b[1;35m.\u001b[0m\n", - "\u001b[33mThe \u001b[0m\u001b[1;36mBaseParameters\u001b[33m class to define step parameters is deprecated. Check out our docs https://docs.zenml.io/user-guide/advanced-guide/pipelining-features/configure-steps-pipelines for information on how to parameterize your steps. As a quick fix to get rid of this warning, make sure your parameter class inherits from \u001b[0m\u001b[1;36mpydantic.BaseModel\u001b[33m instead of the \u001b[0m\u001b[1;36mBaseParameters\u001b[33m class.\u001b[0m\n", - "\u001b[1;35mRegistered new version: \u001b[0m\u001b[1;36m(version 15)\u001b[1;35m.\u001b[0m\n", - "\u001b[1;35mNew model version \u001b[0m\u001b[1;36m6\u001b[1;35m was created.\u001b[0m\n", - "\u001b[1;35mExecuting a new run.\u001b[0m\n", - "\u001b[1;35mUsing user: \u001b[0m\u001b[1;36mjayesh.ext@zenml.io\u001b[1;35m\u001b[0m\n", - "\u001b[1;35mUsing stack: \u001b[0m\u001b[1;36mdefault\u001b[1;35m\u001b[0m\n", - "\u001b[1;35m artifact_store: \u001b[0m\u001b[1;36mdefault\u001b[1;35m\u001b[0m\n", - "\u001b[1;35m orchestrator: \u001b[0m\u001b[1;36mdefault\u001b[1;35m\u001b[0m\n", - "\u001b[1;35mCaching \u001b[0m\u001b[1;36menabled\u001b[1;35m explicitly for \u001b[0m\u001b[1;36murl_scraper\u001b[1;35m.\u001b[0m\n", - "\u001b[1;35mUsing cached version of \u001b[0m\u001b[1;36murl_scraper\u001b[1;35m.\u001b[0m\n", - "\u001b[1;35mStep \u001b[0m\u001b[1;36murl_scraper\u001b[1;35m has started.\u001b[0m\n", - "\u001b[1;35mCaching \u001b[0m\u001b[1;36menabled\u001b[1;35m explicitly for \u001b[0m\u001b[1;36mweb_url_loader\u001b[1;35m.\u001b[0m\n", - "\u001b[1;35mUsing cached version of \u001b[0m\u001b[1;36mweb_url_loader\u001b[1;35m.\u001b[0m\n", - "\u001b[1;35mStep \u001b[0m\u001b[1;36mweb_url_loader\u001b[1;35m has started.\u001b[0m\n", - "\u001b[1;35mCaching \u001b[0m\u001b[1;36menabled\u001b[1;35m explicitly for \u001b[0m\u001b[1;36mindex_generator\u001b[1;35m.\u001b[0m\n", - "\u001b[1;35mUsing cached version of \u001b[0m\u001b[1;36mindex_generator\u001b[1;35m.\u001b[0m\n", - "\u001b[1;35mStep \u001b[0m\u001b[1;36mindex_generator\u001b[1;35m has started.\u001b[0m\n", - "\u001b[1;35mCaching \u001b[0m\u001b[1;36mdisabled\u001b[1;35m explicitly for \u001b[0m\u001b[1;36magent_creator\u001b[1;35m.\u001b[0m\n", - "\u001b[1;35mStep \u001b[0m\u001b[1;36magent_creator\u001b[1;35m has started.\u001b[0m\n", - "\u001b[1;35mLoading faiss with AVX2 support.\u001b[0m\n", - "\u001b[1;35mSuccessfully loaded faiss with AVX2 support.\u001b[0m\n", - "\u001b[1;35mStep \u001b[0m\u001b[1;36magent_creator\u001b[1;35m has finished in \u001b[0m\u001b[1;36m10.859s\u001b[1;35m.\u001b[0m\n", - "\u001b[1;35mPipeline run has finished in \u001b[0m\u001b[1;36m20.340s\u001b[1;35m.\u001b[0m\n", - "\u001b[1;35mDashboard URL: https://1cf18d95-zenml.cloudinfra.zenml.io/workspaces/default/pipelines/0cb1ec5c-66a4-4641-9621-ac3cb1703cce/runs/3b86c4bb-c3f7-4b0f-b3c5-f473d07fc8c6/dag\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "PipelineRunResponse(id=UUID('3b86c4bb-c3f7-4b0f-b3c5-f473d07fc8c6'), permission_denied=False, body=PipelineRunResponseBody(created=datetime.datetime(2024, 1, 25, 7, 34, 42), updated=datetime.datetime(2024, 1, 25, 7, 34, 42), user=UserResponse(id=UUID('8b4899df-2396-4226-9780-0c75e7e1bf75'), permission_denied=False, body=UserResponseBody(created=datetime.datetime(2023, 11, 10, 10, 9, 15), updated=datetime.datetime(2024, 1, 24, 10, 54, 53), active=True, activation_token=None, full_name='Jayesh Sharma', email_opted_in=True, is_service_account=False), metadata=None, name='jayesh.ext@zenml.io'), status=, stack=StackResponse(id=UUID('a4cd4161-6ee5-411c-8adf-559ac084ceb5'), permission_denied=False, body=StackResponseBody(created=datetime.datetime(2023, 11, 30, 9, 39, 29), updated=datetime.datetime(2023, 11, 30, 9, 39, 29), user=None), metadata=None, name='default'), pipeline=PipelineResponse(id=UUID('0cb1ec5c-66a4-4641-9621-ac3cb1703cce'), permission_denied=False, body=PipelineResponseBody(created=datetime.datetime(2024, 1, 25, 7, 34, 40), updated=datetime.datetime(2024, 1, 25, 7, 34, 40), user=UserResponse(id=UUID('8b4899df-2396-4226-9780-0c75e7e1bf75'), permission_denied=False, body=UserResponseBody(created=datetime.datetime(2023, 11, 10, 10, 9, 15), updated=datetime.datetime(2024, 1, 24, 10, 54, 53), active=True, activation_token=None, full_name='Jayesh Sharma', email_opted_in=True, is_service_account=False), metadata=None, name='jayesh.ext@zenml.io'), status=[], version='15'), metadata=None, name='zenml_agent_creation_pipeline'), build=None, schedule=None, code_reference=None), metadata=PipelineRunResponseMetadata(workspace=WorkspaceResponse(id=UUID('f3a544f2-afb5-4672-934a-7a465c66201c'), permission_denied=False, body=WorkspaceResponseBody(created=datetime.datetime(2023, 10, 23, 15, 34, 47), updated=datetime.datetime(2023, 10, 23, 15, 34, 47)), metadata=None, name='default'), run_metadata={}, steps={}, config=PipelineConfiguration(enable_cache=True, enable_artifact_metadata=None, enable_artifact_visualization=None, enable_step_logs=None, settings={}, extra={}, failure_hook_source=None, success_hook_source=None, model=Model(name='zenml_agent', license='Apache', description='ZenML Agent with a vector store tool.', audience=None, use_cases=None, limitations=None, trade_offs=None, ethics=None, tags=['agent', 'llm', 'rag'], version='6', save_models_to_registry=True, suppress_class_validation_warnings=True, was_created_in_this_run=True), parameters=None, name='zenml_agent_creation_pipeline'), start_time=datetime.datetime(2024, 1, 25, 7, 34, 40), end_time=None, client_environment={'environment': 'notebook', 'os': 'linux', 'linux_distro': 'ubuntu', 'linux_distro_like': 'debian', 'linux_distro_version': '20.04', 'python_version': '3.9.5'}, orchestrator_environment={}, orchestrator_run_id=None), name='zenml_agent_creation_pipeline-2024_01_25-07_34_40_439769')" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "version = \"0.54.1\"\n", "docs_url = f\"https://docs.zenml.io/v/{version}/\"\n", @@ -706,7 +578,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -756,26 +628,9 @@ }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m{\n", - " \"action\": \"Final Answer\",\n", - " \"action_input\": \"I am ZenML Agent, a technical assistant built using ZenML. I am here to assist you with any questions or tasks you may have. How can I help you today?\"\n", - "}\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - "\u001b[1;35m\u001b[0m\u001b[1;36mversion\u001b[1;35m \u001b[0m\u001b[1;36m6\u001b[1;35m is numeric and will be fetched using version number.\u001b[0m\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "ask_agent(\"Who are you?\")" ] @@ -809,7 +664,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -829,33 +684,9 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n", - "\u001b[32;1m\u001b[1;3m{\n", - " \"action\": \"zenml-qa-tool\",\n", - " \"action_input\": \"What is an orchestrator in ZenML?\"\n", - "}\u001b[0m\n", - "Observation: \u001b[36;1m\u001b[1;3mAn orchestrator in ZenML refers to a component that manages and coordinates the execution of ML pipelines. It is responsible for scheduling and executing the various steps and tasks within a pipeline, ensuring that they are executed in the correct order and with the necessary dependencies. The orchestrator in ZenML helps automate the workflow, making it easier to manage and monitor the entire ML pipeline from data ingestion to model deployment.\u001b[0m\n", - "Thought:\u001b[32;1m\u001b[1;3m{\n", - " \"action\": \"Final Answer\",\n", - " \"action_input\": \"An orchestrator in ZenML refers to a component that manages and coordinates the execution of ML pipelines. It automates the workflow, ensuring that the steps and tasks within a pipeline are executed in the correct order and with the necessary dependencies.\"\n", - "}\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - "\u001b[1;35m\u001b[0m\u001b[1;36mversion\u001b[1;35m \u001b[0m\u001b[1;36m5\u001b[1;35m is numeric and will be fetched using version number.\u001b[0m\n", - "\u001b[33mProvided model configuration does not match existing model \u001b[0m\u001b[1;36mzenml_agent\u001b[33m with the following changes: {'license': {'config': None, 'db': 'Apache'}}. If you want to update the model configuration, please use the \u001b[0m\u001b[1;36mzenml model update\u001b[33m command.\u001b[0m\n", - "\u001b[33mProvided model version configuration does not match existing model version \u001b[0m\u001b[1;36mzenml_agent::5\u001b[33m with the following changes: {'description': {'config': None, 'db': 'ZenML Agent with a vector store tool.'}, 'tags added': [], 'tags removed': ['agent', 'rag', 'llm']}. If you want to update the model version configuration, please use the \u001b[0m\u001b[1;36mzenml model version update\u001b[33m command.\u001b[0m\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "ask_agent(\"What is an orchestrator in ZenML?\", version=ModelStages.PRODUCTION)" ] @@ -863,7 +694,7 @@ ], "metadata": { "kernelspec": { - "display_name": "slackbot", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -877,10 +708,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.5" - }, - "orig_nbformat": 4 + "version": "3.8.10" + } }, "nbformat": 4, - "nbformat_minor": 2 + "nbformat_minor": 4 } diff --git a/llm-agents/src/run.py b/llm-agents/src/run.py index e45ba949..10f653b9 100644 --- a/llm-agents/src/run.py +++ b/llm-agents/src/run.py @@ -14,11 +14,11 @@ import logging -from pipelines.index_builder import docs_to_agent_pipeline +from pipelines.agent_creator import docs_to_agent_pipeline def main(): - version = "0.54.1" + version = "0.55.0" docs_url = f"https://docs.zenml.io/v/{version}/" website_url = "https://zenml.io" repo_url = f"https://github.com/zenml-io/zenml/tree/{version}/examples" diff --git a/llm-agents/src/steps/agent_creator.py b/llm-agents/src/steps/agent_creator.py index f6711e14..1374a640 100644 --- a/llm-agents/src/steps/agent_creator.py +++ b/llm-agents/src/steps/agent_creator.py @@ -1,5 +1,5 @@ -import logging -from typing import Annotated, Dict, cast +from typing import Dict, Optional +from typing_extensions import Annotated from agent.agent_executor_materializer import AgentExecutorMaterializer from agent.prompt import PREFIX, SUFFIX @@ -8,7 +8,7 @@ from langchain.schema.vectorstore import VectorStore from langchain.tools.vectorstore.tool import VectorStoreQATool from langchain.agents import AgentExecutor -from zenml.steps import BaseParameters +from pydantic import BaseModel from zenml import step, ArtifactConfig, log_artifact_metadata @@ -17,7 +17,7 @@ CHARACTER = "technical assistant" -class AgentParameters(BaseParameters): +class AgentParameters(BaseModel): """Parameters for the agent.""" llm: Dict = { @@ -33,8 +33,10 @@ class Config: @step(output_materializers=AgentExecutorMaterializer, enable_cache=False) def agent_creator( - vector_store: VectorStore, config: AgentParameters -) -> Annotated[AgentExecutor, ArtifactConfig(name="agent", is_model_artifact=True)]: + vector_store: VectorStore, config: AgentParameters = AgentParameters() +) -> Annotated[ + AgentExecutor, ArtifactConfig(name="agent", is_model_artifact=True) +]: """Create an agent from a vector store. Args: @@ -81,7 +83,7 @@ def agent_creator( "temperature": config.llm["temperature"], "model_name": config.llm["model_name"], }, - } + }, ) return agent_executor diff --git a/llm-agents/src/steps/index_generator.py b/llm-agents/src/steps/index_generator.py index fdf36fd7..5e0ad6bc 100644 --- a/llm-agents/src/steps/index_generator.py +++ b/llm-agents/src/steps/index_generator.py @@ -12,7 +12,8 @@ # or implied. See the License for the specific language governing # permissions and limitations under the License. -from typing import Annotated, List +from typing_extensions import Annotated +from typing import List from langchain.docstore.document import Document from langchain.embeddings import OpenAIEmbeddings @@ -25,7 +26,9 @@ @step(enable_cache=True) -def index_generator(documents: List[Document]) -> Annotated[VectorStore, "vector_store"]: +def index_generator( + documents: List[Document], +) -> Annotated[VectorStore, "vector_store"]: embeddings = OpenAIEmbeddings() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) @@ -36,7 +39,7 @@ def index_generator(documents: List[Document]) -> Annotated[VectorStore, "vector metadata={ "embedding_type": "OpenAIEmbeddings", "vector_store_type": "FAISS", - } + }, ) return FAISS.from_documents(compiled_texts, embeddings) diff --git a/llm-agents/src/steps/url_scraper.py b/llm-agents/src/steps/url_scraper.py index 16bed1e6..b7341cca 100644 --- a/llm-agents/src/steps/url_scraper.py +++ b/llm-agents/src/steps/url_scraper.py @@ -12,8 +12,8 @@ # or implied. See the License for the specific language governing # permissions and limitations under the License. -from typing import Annotated, List - +from typing import List +from typing_extensions import Annotated from steps.url_scraping_utils import get_all_pages, get_nested_readme_urls from zenml import step, log_artifact_metadata @@ -45,6 +45,6 @@ def url_scraper( artifact_name="urls", metadata={ "count": len(all_urls), - } + }, ) return all_urls