Tightening groundedness prompt #433
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: End-to-End tests | |
on: | |
push: | |
branches: | |
- develop | |
pull_request: | |
branches: | |
- master | |
- main | |
jobs: | |
test: | |
runs-on: ubuntu-latest | |
environment: "GitHub Actions 1" | |
env: | |
ASSISTANTS_API_KEY: ${{ secrets.ASSISTANTS_API_KEY }} | |
ASSISTANTS_API_VERSION: ${{ secrets.ASSISTANTS_API_VERSION }} | |
ASSISTANTS_API_TYPE: ${{ secrets.ASSISTANTS_API_TYPE }} | |
ASSISTANTS_ID: ${{ secrets.ASSISTANTS_ID }} | |
ASSISTANTS_BASE_URL: ${{ secrets.ASSISTANTS_BASE_URL }} | |
ASSISTANTS_MODEL: ${{ secrets.ASSISTANTS_MODEL }} | |
ASSISTANTS_BOT_NAME: ${{ secrets.ASSISTANTS_BOT_NAME }} | |
POSTGRES_DATA_HOST: ${{ secrets.POSTGRES_DATA_HOST }} | |
POSTGRES_DATA_PORT: ${{ secrets.POSTGRES_DATA_PORT }} | |
POSTGRES_DATA_DB: ${{ secrets.POSTGRES_DATA_DB }} | |
POSTGRES_DATA_USER: ${{ secrets.POSTGRES_DATA_USER }} | |
POSTGRES_DATA_PASSWORD: ${{ secrets.POSTGRES_DATA_PASSWORD }} | |
DATA_DB_CONN_STRING: ${{ secrets.DATA_DB_CONN_STRING }} | |
POSTGRES_RECIPE_HOST: ${{ secrets.POSTGRES_RECIPE_HOST }} | |
POSTGRES_RECIPE_PORT: ${{ secrets.POSTGRES_RECIPE_PORT }} | |
POSTGRES_RECIPE_DB: ${{ secrets.POSTGRES_RECIPE_DB }} | |
POSTGRES_RECIPE_USER: ${{ secrets.POSTGRES_RECIPE_USER }} | |
POSTGRES_RECIPE_PASSWORD: ${{ secrets.POSTGRES_RECIPE_PASSWORD }} | |
RECIPE_DB_CONN_STRING: "postgresql://${{ secrets.POSTGRES_RECIPE_USER }}:${{ secrets.POSTGRES_RECIPE_PASSWORD }}@${{ secrets.POSTGRES_RECIPE_HOST }}:${{ secrets.POSTGRES_RECIPE_PORT }}/${{ secrets.POSTGRES_RECIPE_DB }}" | |
RECIPES_OPENAI_API_TYPE: ${{ secrets.RECIPES_OPENAI_API_TYPE }} | |
RECIPES_OPENAI_API_KEY: ${{ secrets.RECIPES_OPENAI_API_KEY }} | |
RECIPES_MODEL: ${{ secrets.RECIPES_MODEL }} | |
RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME: ${{ secrets.RECIPES_OPENAI_TEXT_COMPLETION_DEPLOYMENT_NAME }} | |
RECIPES_OPENAI_API_ENDPOINT: ${{ secrets.RECIPES_OPENAI_API_ENDPOINT }} | |
RECIPES_OPENAI_API_VERSION: ${{ secrets.RECIPES_OPENAI_API_VERSION }} | |
RECIPES_BASE_URL: ${{ secrets.RECIPES_BASE_URL }} | |
RECIPES_MEMORY_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_MEMORY_SIMILARITY_CUTOFF }} | |
RECIPES_RECIPE_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_RECIPE_SIMILARITY_CUTOFF }} | |
RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF: ${{ secrets.RECIPES_HELPER_FUNCTION_SIMILARITY_CUTOFF }} | |
RECIPES_MODEL_TEMP: ${{ secrets.RECIPES_MODEL_TEMP }} | |
RECIPES_MODEL_MAX_TOKENS: ${{ secrets.RECIPES_MODEL_MAX_TOKENS }} | |
IMAGE_HOST: ${{ secrets.IMAGE_HOST }} | |
RECIPE_SERVER_API: ${{ secrets.RECIPE_SERVER_API }} | |
CHAT_URL: ${{ secrets.CHAT_URL }} | |
CHAINLIT_AUTH_SECRET: ${{ secrets.CHAINLIT_AUTH_SECRET }} | |
USER_LOGIN: ${{ secrets.USER_LOGIN }} | |
USER_PASSWORD: ${{ secrets.USER_PASSWORD }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
- name: Expose GitHub Runtime | |
uses: crazy-max/ghaction-github-runtime@v2 | |
- name: Spin up Docker containers | |
run: | | |
env > .env | |
echo "Installing demo data ..." | |
# Get demo data | |
pip3 install gdown==5.2.0 | |
cd data && python3 download_demo_data.py && cd .. | |
# TODO this should be enhanced to use a buildx bake to leverage layer caching for faster builds, or push to repo and simply have a pull for the run | |
# TODO docker-compose files should be refactored to use scopes instead of different versions for each environment | |
echo "Starting docker containers for dbs and server ..." | |
docker-compose pull | |
docker-compose up -d --build datadb recipedb server chat | |
# TODO: For some reason, maybe buildkit, in Github docker compose builds the image differently, and it doesn't work. Individual image build works. | |
docker build --build-arg OPENAI_API_KEY=$ASSISTANTS_API_KEY \ | |
--build-arg OPENAI_API_ENDPOINT=$ASSISTANTS_BASE_URL \ | |
--build-arg OPENAI_API_VERSION=$ASSISTANTS_API_VERSION \ | |
--build-arg CHAT_URL=$CHAT_URL \ | |
--build-arg OPENAI_API_ENDPOINT=$ASSISTANTS_BASE_URL \ | |
--no-cache -t promptflow -f ./flows/chainlit-ui-evaluation/Dockerfile . | |
docker run --env RECIPES_MODEL_MAX_TOKENS=${RECIPES_MODEL_MAX_TOKENS} \ | |
--env RECIPES_MODEL_TEMP=${RECIPES_MODEL_TEMP} \ | |
--env RECIPES_OPENAI_API_TYPE=${RECIPES_OPENAI_API_TYPE} \ | |
--env RECIPES_OPENAI_API_KEY=${RECIPES_OPENAI_API_KEY} \ | |
--env RECIPES_OPENAI_API_VERSION=${ASSISTANTS_API_VERSION} \ | |
--env RECIPES_MODEL=${RECIPES_MODEL} \ | |
--env RECIPES_BASE_URL=${RECIPES_BASE_URL} \ | |
--env USER_LOGIN=${USER_LOGIN} \ | |
--env USER_PASSWORD=${USER_PASSWORD} \ | |
--env CHAT_URL=${CHAT_URL} \ | |
--network=data-recipes-ai_default -d --name promptflow promptflow | |
- name: Check logs | |
run: | | |
docker ps | |
echo "logs datadb ..." | |
docker compose logs datadb | |
echo "logs promptflow ..." | |
docker logs promptflow | |
echo "logs chat ..." | |
docker compose logs chat | |
echo "logs server ..." | |
docker compose logs server | |
docker ps | |
sleep 10 | |
# Debugging GitHUb actions interactively, by connecting to the runner ... | |
# Get ssh connection details for runner. | |
# See here https://github.com/marketplace/actions/debugging-with-ssh | |
# Basically, uncomment this, then get connection string in actions output, then connect with | |
# | |
# ssh -i <YOUR GITHUB SSH KEY> <CONN STRING ON ACTIONS> | |
# | |
#- name: DEBUG - Setup upterm session | |
# uses: lhotari/action-upterm@v1 | |
#- name: DEBUG - Run Selenium outside of promptflow | |
# run: | | |
# docker exec promptflow python call_assistant.py | |
- name: Run tests | |
run: | | |
env > .env | |
docker exec promptflow pf run create --flow . --data ./data.jsonl --stream --column-mapping query='${data.query}' context='${data.context}' chat_history='${data.chat_history}' --name base_run | |
- name: Check logs post-tests | |
run: | | |
docker ps | |
echo "logs datadb ..." | |
docker compose logs datadb | |
echo "logs promptflow ..." | |
docker logs promptflow | |
echo "logs chat ..." | |
docker compose logs chat | |
echo "logs server ..." | |
docker compose logs server | |
- name: Show results | |
run: | | |
docker exec promptflow pf run show-details -n base_run | |
echo "Getting metrics ..." | |
docker exec promptflow pf run show-metrics -n base_run | |
##docker exec promptflow pf run visualize -n base_run | |
echo "Checking results ..." | |
docker exec promptflow python3 check_evaluation_results.py | |