Skip to content

Commit

Permalink
Merge branch 'opea-project:main' into add-faqgen-docker-compose-example
Browse files Browse the repository at this point in the history
  • Loading branch information
astafevav authored Nov 5, 2024
2 parents 40d3c61 + 2d9aeb3 commit 5ca7097
Show file tree
Hide file tree
Showing 80 changed files with 1,258 additions and 164 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/_example-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ on:
default: "main"
required: false
type: string
inject_commit:
default: false
required: false
type: string

jobs:
####################################################################################################
# Image Build
Expand Down Expand Up @@ -83,6 +88,7 @@ jobs:
docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
service_list: ${{ inputs.services }}
registry: ${OPEA_IMAGE_REPO}opea
inject_commit: ${{ inputs.inject_commit }}
tag: ${{ inputs.tag }}

####################################################################################################
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/_run-docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,11 @@ jobs:
flag=${flag#test_}
yaml_file=$(find . -type f -wholename "*${{ inputs.hardware }}/${flag}.yaml")
echo $yaml_file
docker compose -f $yaml_file stop && docker compose -f $yaml_file rm -f || true
container_list=$(cat $yaml_file | grep container_name | cut -d':' -f2)
for container_name in $container_list; do
cid=$(docker ps -aq --filter "name=$container_name")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
done
docker system prune -f
docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/manual-example-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,11 @@ on:
description: 'OPEA branch for image build'
required: false
type: string
inject_commit:
default: true
description: "inject commit to docker images true or false"
required: false
type: string

permissions: read-all
jobs:
Expand Down Expand Up @@ -101,4 +106,5 @@ jobs:
test_k8s: ${{ fromJSON(inputs.test_k8s) }}
test_gmc: ${{ fromJSON(inputs.test_gmc) }}
opea_branch: ${{ inputs.opea_branch }}
inject_commit: ${{ inputs.inject_commit }}
secrets: inherit
7 changes: 7 additions & 0 deletions .github/workflows/manual-image-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ on:
description: 'OPEA branch for image build'
required: false
type: string
inject_commit:
default: true
description: "inject commit to docker images true or false"
required: false
type: string

jobs:
get-test-matrix:
runs-on: ubuntu-latest
Expand All @@ -56,4 +62,5 @@ jobs:
services: ${{ inputs.services }}
tag: ${{ inputs.tag }}
opea_branch: ${{ inputs.opea_branch }}
inject_commit: ${{ inputs.inject_commit }}
secrets: inherit
71 changes: 71 additions & 0 deletions .github/workflows/nightly-docker-build-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

name: Nightly build/publish latest docker images

on:
schedule:
- cron: "30 1 * * *"
workflow_dispatch:

env:
EXAMPLES: "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"
TAG: "latest"
PUBLISH_TAGS: "latest"

jobs:
get-build-matrix:
runs-on: ubuntu-latest
outputs:
examples_json: ${{ steps.get-matrix.outputs.examples_json }}
EXAMPLES: ${{ steps.get-matrix.outputs.EXAMPLES }}
TAG: ${{ steps.get-matrix.outputs.TAG }}
PUBLISH_TAGS: ${{ steps.get-matrix.outputs.PUBLISH_TAGS }}
steps:
- name: Create Matrix
id: get-matrix
run: |
examples=($(echo ${EXAMPLES} | tr ',' ' '))
examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
echo "examples_json=$examples_json" >> $GITHUB_OUTPUT
echo "EXAMPLES=$EXAMPLES" >> $GITHUB_OUTPUT
echo "TAG=$TAG" >> $GITHUB_OUTPUT
echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT
build:
needs: get-build-matrix
strategy:
matrix:
example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
fail-fast: false
uses: ./.github/workflows/_example-workflow.yml
with:
node: gaudi
example: ${{ matrix.example }}
inject_commit: true
secrets: inherit

get-image-list:
needs: get-build-matrix
uses: ./.github/workflows/_get-image-list.yml
with:
examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}

publish:
needs: [get-build-matrix, get-image-list, build]
strategy:
matrix:
image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
runs-on: "docker-build-gaudi"
steps:
- uses: docker/[email protected]
with:
username: ${{ secrets.DOCKERHUB_USER }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Image Publish
uses: opea-project/validation/actions/image-publish@main
with:
local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ needs.get-build-matrix.outputs.TAG }}
image_name: opea/${{ matrix.image }}
publish_tags: ${{ needs.get-build-matrix.outputs.PUBLISH_TAGS }}
76 changes: 51 additions & 25 deletions AgentQnA/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,13 @@ flowchart LR
3. Hierarchical agent can further improve performance.
Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.

### Roadmap
## Deployment with docker

- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
- v1.0 or later: agents use open-source llm backend.
- v1.1 or later: add safeguards
1. Build agent docker image

## Getting started
Note: this is optional. The docker images will be automatically pulled when running the docker compose commands. This step is only needed if pulling images failed.

1. Build agent docker image </br>
First, clone the opea GenAIComps repo
First, clone the opea GenAIComps repo.

```
export WORKDIR=<your-work-directory>
Expand All @@ -106,35 +102,63 @@ flowchart LR
docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
```

2. Launch tool services </br>
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.

```
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
```

3. Set up environment for this example </br>
First, clone this repo
2. Set up environment for this example </br>
First, clone this repo.

```
cd $WORKDIR
git clone https://github.com/opea-project/GenAIExamples.git
```

Second, set up env vars
Second, set up env vars.

```
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
# optional: OPANAI_API_KEY
# for using open-source llms
export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
# optional: OPANAI_API_KEY if you want to use OpenAI models
export OPENAI_API_KEY=<your-openai-key>
```

4. Launch agent services</br>
The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
To use openai llm, run command below.
3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)

First, launch the mega-service.

```
cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
bash launch_retrieval_tool.sh
```

Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.

```
bash run_ingest_data.sh
```

4. Launch other tools. </br>
In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.

```
docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
```

5. Launch agent services</br>
We provide two options for `llm_engine` of the agents: 1. open-source LLMs, 2. OpenAI models via API calls.

To use open-source LLMs on Gaudi2, run commands below.

```
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
bash launch_tgi_gaudi.sh
bash launch_agent_service_tgi_gaudi.sh
```

To use OpenAI models, run commands below.

```
cd docker_compose/intel/cpu/xeon
cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
bash launch_agent_service_openai.sh
```

Expand All @@ -143,10 +167,12 @@ flowchart LR
First look at logs of the agent docker containers:

```
docker logs docgrader-agent-endpoint
# worker agent
docker logs rag-agent-endpoint
```

```
# supervisor agent
docker logs react-agent-endpoint
```

Expand All @@ -170,4 +196,4 @@ curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: app

## How to register your own tools with agent

You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md#5-customize-agent-strategy).
You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
3 changes: 3 additions & 0 deletions AgentQnA/docker_compose/intel/cpu/xeon/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Deployment on Xeon

We deploy the retrieval tool on Xeon. For LLMs, we support OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).
8 changes: 4 additions & 4 deletions AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,10 @@
# SPDX-License-Identifier: Apache-2.0

services:
worker-docgrader-agent:
worker-rag-agent:
image: opea/agent-langchain:latest
container_name: docgrader-agent-endpoint
container_name: rag-agent-endpoint
volumes:
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
- ${TOOLSET_PATH}:/home/user/tools/
ports:
- "9095:9095"
Expand Down Expand Up @@ -36,8 +35,9 @@ services:
supervisor-react-agent:
image: opea/agent-langchain:latest
container_name: react-agent-endpoint
depends_on:
- worker-rag-agent
volumes:
- ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
- ${TOOLSET_PATH}:/home/user/tools/
ports:
- "9090:9090"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ export recursion_limit_worker=12
export recursion_limit_supervisor=10
export model="gpt-4o-mini-2024-07-18"
export temperature=0
export max_new_tokens=512
export max_new_tokens=4096
export OPENAI_API_KEY=${OPENAI_API_KEY}
export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
Expand Down
39 changes: 5 additions & 34 deletions AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,9 @@
# SPDX-License-Identifier: Apache-2.0

services:
tgi-server:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
container_name: tgi-server
ports:
- "8085:80"
volumes:
- ${HF_CACHE_DIR}:/data
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
ENABLE_HPU_GRAPH: true
LIMIT_HPU_GRAPH: true
USE_FLASH_ATTENTION: true
FLASH_ATTENTION_RECOMPUTE: true
runtime: habana
cap_add:
- SYS_NICE
ipc: host
command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
worker-docgrader-agent:
worker-rag-agent:
image: opea/agent-langchain:latest
container_name: docgrader-agent-endpoint
depends_on:
- tgi-server
container_name: rag-agent-endpoint
volumes:
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
- ${TOOLSET_PATH}:/home/user/tools/
Expand All @@ -41,7 +13,7 @@ services:
ipc: host
environment:
ip_address: ${ip_address}
strategy: rag_agent
strategy: rag_agent_llama
recursion_limit: ${recursion_limit_worker}
llm_engine: tgi
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
Expand All @@ -66,8 +38,7 @@ services:
image: opea/agent-langchain:latest
container_name: react-agent-endpoint
depends_on:
- tgi-server
- worker-docgrader-agent
- worker-rag-agent
volumes:
# - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
- ${TOOLSET_PATH}:/home/user/tools/
Expand All @@ -76,7 +47,7 @@ services:
ipc: host
environment:
ip_address: ${ip_address}
strategy: react_langgraph
strategy: react_llama
recursion_limit: ${recursion_limit_supervisor}
llm_engine: tgi
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
export NUM_SHARDS=4
export LLM_ENDPOINT_URL="http://${ip_address}:8085"
export temperature=0.01
export max_new_tokens=512
export max_new_tokens=4096

# agent related environment variables
export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
Expand All @@ -27,17 +27,3 @@ export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
export CRAG_SERVER=http://${ip_address}:8080

docker compose -f compose.yaml up -d

sleep 5s
echo "Waiting tgi gaudi ready"
n=0
until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
docker logs tgi-server &> tgi-gaudi-service.log
n=$((n+1))
if grep -q Connected tgi-gaudi-service.log; then
break
fi
sleep 5s
done
sleep 5s
echo "Service started successfully"
Loading

0 comments on commit 5ca7097

Please sign in to comment.