From 7a6600be7aae4882b76c778a6cd24bbbeadb4288 Mon Sep 17 00:00:00 2001 From: Alexej Penner Date: Mon, 30 Oct 2023 15:25:44 +0100 Subject: [PATCH 1/5] Allow running on remote orchestrator --- .../src/local_testing_slackbot.py | 3 ++- .../src/pipelines/index_builder.py | 23 ++++++++++++++++--- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/langchain-llamaindex-slackbot/src/local_testing_slackbot.py b/langchain-llamaindex-slackbot/src/local_testing_slackbot.py index f492a5ff..7bd8f8f8 100644 --- a/langchain-llamaindex-slackbot/src/local_testing_slackbot.py +++ b/langchain-llamaindex-slackbot/src/local_testing_slackbot.py @@ -77,7 +77,7 @@ def reply_in_thread(body: dict, say, context): thread_ts = event.get("thread_ts", None) or event["ts"] if context["bot_user_id"] in event["text"]: - logger.debug(f"Received message: {event['text']}") + logger.info(f"Received message: {event['text']}") if event.get("thread_ts", None): full_thread = [ f"{msg['text']}" @@ -107,6 +107,7 @@ def reply_in_thread(body: dict, say, context): question=event["text"], verbose=True, ) + logger.info(output) say(text=output, thread_ts=thread_ts) diff --git a/langchain-llamaindex-slackbot/src/pipelines/index_builder.py b/langchain-llamaindex-slackbot/src/pipelines/index_builder.py index f0275226..3c71e67c 100644 --- a/langchain-llamaindex-slackbot/src/pipelines/index_builder.py +++ b/langchain-llamaindex-slackbot/src/pipelines/index_builder.py @@ -11,17 +11,34 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing # permissions and limitations under the License. - +import os from steps.index_generator import index_generator from steps.url_scraper import url_scraper from steps.web_url_loader import web_url_loader from zenml import pipeline +from zenml.config import DockerSettings pipeline_name = "zenml_docs_index_generation" +docker_settings = DockerSettings( + requirements=[ + "langchain==0.0.263", + "openai==0.27.2", + "slack-bolt==1.16.2", + "slack-sdk==3.20.0", + "fastapi", + "flask", + "uvicorn", + "gcsfs==2023.5.0", + "faiss-cpu==1.7.3", + "unstructured==0.5.7", + "tiktoken", + "bs4" + ], + environment={"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY")} +) - -@pipeline(name=pipeline_name) +@pipeline(name=pipeline_name, settings={"docker": docker_settings}) def docs_to_index_pipeline( docs_url: str = "", repo_url: str = "", From 7acd93842ada1576868790fc8ef695862e828f6b Mon Sep 17 00:00:00 2001 From: Alexej Penner Date: Mon, 30 Oct 2023 15:27:32 +0100 Subject: [PATCH 2/5] Update zenml version in requirements --- langchain-llamaindex-slackbot/src/requirements-slackbot.txt | 2 +- langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/langchain-llamaindex-slackbot/src/requirements-slackbot.txt b/langchain-llamaindex-slackbot/src/requirements-slackbot.txt index 62066bd2..24f3dffa 100644 --- a/langchain-llamaindex-slackbot/src/requirements-slackbot.txt +++ b/langchain-llamaindex-slackbot/src/requirements-slackbot.txt @@ -2,7 +2,7 @@ langchain==0.0.263 openai==0.27.2 slack-bolt==1.16.2 slack-sdk==3.20.0 -zenml[connectors-gcp]==0.45.3 +zenml[connectors-gcp]==0.45.5 fastapi flask uvicorn diff --git a/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt b/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt index d0ceb3dc..53f6d8e2 100644 --- a/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt +++ b/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt @@ -2,7 +2,7 @@ langchain>=0.0.125,<=0.0.263 openai>=0.27.2,<=0.27.8 slack-bolt==1.16.2 slack-sdk==3.20.0 -zenml==0.44.1 +zenml==0.45.5 fastapi flask uvicorn From 47df80c2ee4952e62c8e9104935832e15ffbe9de Mon Sep 17 00:00:00 2001 From: Alexej Penner Date: Thu, 2 Nov 2023 11:14:35 +0100 Subject: [PATCH 3/5] Move secrets to zenml secrets store --- .../src/local_testing_slackbot.py | 11 ++++++++--- .../src/requirements-zenml-io-qa.txt | 3 ++- .../src/steps/index_generator.py | 3 +++ .../src/steps/url_scraper.py | 2 +- 4 files changed, 14 insertions(+), 5 deletions(-) diff --git a/langchain-llamaindex-slackbot/src/local_testing_slackbot.py b/langchain-llamaindex-slackbot/src/local_testing_slackbot.py index 7bd8f8f8..3c1fc5f9 100644 --- a/langchain-llamaindex-slackbot/src/local_testing_slackbot.py +++ b/langchain-llamaindex-slackbot/src/local_testing_slackbot.py @@ -18,10 +18,15 @@ get_vector_store, ) from zenml.logger import get_logger +from zenml.client import Client + +SLACK_BOT_TOKEN = (Client().get_secret("langchain_project_secret") + .secret_values["slack_bot_token"]) +SLACK_APP_TOKEN = (Client().get_secret("langchain_project_secret") + .secret_values["slack_app_token"]) +OPENAI_API_KEY = (Client().get_secret("langchain_project_secret") + .secret_values["openai_api_key"]) -SLACK_BOT_TOKEN = os.getenv("SLACK_BOT_TOKEN") -SLACK_APP_TOKEN = os.getenv("SLACK_APP_TOKEN") -OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") PIPELINE_NAME = os.getenv("PIPELINE_NAME", "zenml_docs_index_generation") logger = get_logger(__name__) diff --git a/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt b/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt index 53f6d8e2..1fc6508c 100644 --- a/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt +++ b/langchain-llamaindex-slackbot/src/requirements-zenml-io-qa.txt @@ -2,7 +2,7 @@ langchain>=0.0.125,<=0.0.263 openai>=0.27.2,<=0.27.8 slack-bolt==1.16.2 slack-sdk==3.20.0 -zenml==0.45.5 +zenml==0.45.6 fastapi flask uvicorn @@ -11,3 +11,4 @@ faiss-cpu>=1.7.3,<=1.7.4 unstructured>=0.5.7,<=0.7.8 lanarky==0.7.12 tiktoken +bs4 \ No newline at end of file diff --git a/langchain-llamaindex-slackbot/src/steps/index_generator.py b/langchain-llamaindex-slackbot/src/steps/index_generator.py index 1b3d065d..7e57b888 100644 --- a/langchain-llamaindex-slackbot/src/steps/index_generator.py +++ b/langchain-llamaindex-slackbot/src/steps/index_generator.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express # or implied. See the License for the specific language governing # permissions and limitations under the License. +import os from typing import List @@ -21,10 +22,12 @@ ) from langchain.vectorstores import FAISS, VectorStore from zenml import step +from zenml.client import Client @step(enable_cache=False) def index_generator(documents: List[Document]) -> VectorStore: + os.environ["OPENAI_API_KEY"] = Client().get_secret("langchain_project_secret").secret_values["openai_api_key"] embeddings = OpenAIEmbeddings() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) diff --git a/langchain-llamaindex-slackbot/src/steps/url_scraper.py b/langchain-llamaindex-slackbot/src/steps/url_scraper.py index fe376aee..cbd8b2be 100644 --- a/langchain-llamaindex-slackbot/src/steps/url_scraper.py +++ b/langchain-llamaindex-slackbot/src/steps/url_scraper.py @@ -16,6 +16,7 @@ from steps.url_scraping_utils import get_all_pages from zenml import step +from zenml.client import Client @step(enable_cache=True) @@ -36,5 +37,4 @@ def url_scraper( Returns: List of URLs to scrape. """ - # examples_readme_urls = get_nested_readme_urls(repo_url) return get_all_pages(docs_url) From 16d9cfcaffae200fdedcb665642bee3a83ddb0f3 Mon Sep 17 00:00:00 2001 From: Alexej Penner Date: Thu, 2 Nov 2023 13:44:05 +0100 Subject: [PATCH 4/5] Removed explicit API_KEY as env var in docker --- langchain-llamaindex-slackbot/src/pipelines/index_builder.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/langchain-llamaindex-slackbot/src/pipelines/index_builder.py b/langchain-llamaindex-slackbot/src/pipelines/index_builder.py index 3c71e67c..c67e11b5 100644 --- a/langchain-llamaindex-slackbot/src/pipelines/index_builder.py +++ b/langchain-llamaindex-slackbot/src/pipelines/index_builder.py @@ -34,8 +34,7 @@ "unstructured==0.5.7", "tiktoken", "bs4" - ], - environment={"OPENAI_API_KEY": os.getenv("OPENAI_API_KEY")} + ] ) @pipeline(name=pipeline_name, settings={"docker": docker_settings}) From 1a248875f9096dd384992425435de53eac3c9ac1 Mon Sep 17 00:00:00 2001 From: Alexej Penner Date: Wed, 8 Nov 2023 14:23:28 +0100 Subject: [PATCH 5/5] Docker settings set to download files --- langchain-llamaindex-slackbot/.gitignore | 2 +- langchain-llamaindex-slackbot/src/pipelines/index_builder.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/langchain-llamaindex-slackbot/.gitignore b/langchain-llamaindex-slackbot/.gitignore index 6fc2f233..84ad0a94 100644 --- a/langchain-llamaindex-slackbot/.gitignore +++ b/langchain-llamaindex-slackbot/.gitignore @@ -129,7 +129,7 @@ dmypy.json .pyre/ # Zenml -.zen/ +src/.zen/ # MLflow mlruns/ diff --git a/langchain-llamaindex-slackbot/src/pipelines/index_builder.py b/langchain-llamaindex-slackbot/src/pipelines/index_builder.py index c67e11b5..1335c3b1 100644 --- a/langchain-llamaindex-slackbot/src/pipelines/index_builder.py +++ b/langchain-llamaindex-slackbot/src/pipelines/index_builder.py @@ -18,6 +18,7 @@ from steps.web_url_loader import web_url_loader from zenml import pipeline from zenml.config import DockerSettings +from zenml.config.docker_settings import SourceFileMode pipeline_name = "zenml_docs_index_generation" docker_settings = DockerSettings( @@ -34,7 +35,8 @@ "unstructured==0.5.7", "tiktoken", "bs4" - ] + ], + source_files=SourceFileMode.DOWNLOAD ) @pipeline(name=pipeline_name, settings={"docker": docker_settings})