.github/workflows/test.yml

name: Python Tests on M1 Mac

on:
  push:
    branches: [ main ]
  pull_request:
    branches: [ main ]

jobs:
  unit_test:
    runs-on: macos-14
    steps:
    - uses: actions/checkout@v2

    - name: Set up Python
      uses: actions/setup-python@v2
      with:
        python-version: '3.12'

    - name: Cache huggingface hub models
      uses: actions/cache@v3
      with:
        path: ~/.cache/huggingface/hub
        key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }}

    - name: Install dependencies
      run: |
        python3 -m pip install --upgrade pip
        pip install .

    - name: Run tests
      run: |
        # Check if cached files are present
        ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true

        # Run unit tests
        METAL_XCODE=1 python3 -m exo.inference.test_inference_engine

  discovery_integration_test:
    runs-on: macos-latest
    steps:
    - uses: actions/checkout@v2

    - name: Set up Python
      uses: actions/setup-python@v2
      with:
        python-version: '3.x'

    - name: Install dependencies
      run: |
        python3 -m pip install --upgrade pip
        pip install .

    - name: Run discovery integration test
      run: |
        # Start first instance
        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 > output1.log 2>&1 &
        PID1=$!

        # Start second instance
        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 > output2.log 2>&1 &
        PID2=$!

        # Wait for discovery
        sleep 10

        # Stop both instances
        kill $PID1 $PID2

        # Check outputs
        if grep -q "Connected to peer" output1.log && grep -q "Connected to peer" output2.log; then
          echo "Test passed: Both instances discovered each other"
          exit 0
        else
          echo "Test failed: Devices did not discover each other"
          echo "Output of first instance:"
          cat output1.log
          echo "Output of second instance:"
          cat output2.log
          exit 1
        fi

  chatgpt_api_integration_test:
    runs-on: macos-latest
    steps:
    - uses: actions/checkout@v2

    - name: Set up Python
      uses: actions/setup-python@v2
      with:
        python-version: '3.x'

    - name: Cache huggingface hub models
      uses: actions/cache@v3
      with:
        path: ~/.cache/huggingface/hub
        key: ${{ runner.os }}-huggingface-hub-${{ hashFiles('~/.cache/huggingface/hub/**/*') }}-${{ github.job }}
        restore-keys: |
          ${{ runner.os }}-huggingface-hub-

    - name: Cache tinygrad downloaded models
      uses: actions/cache@v3
      with:
        path: ~/Library/Caches/tinygrad/downloads
        key: ${{ runner.os }}-tinygrad-downloads-${{ hashFiles('~/Library/Caches/tinygrad/downloads/**/*') }}-${{ github.job }}
        restore-keys: |
          ${{ runner.os }}-tinygrad-downloads-

    - name: Install dependencies
      run: |
        python3 -m pip install --upgrade pip
        pip install .

    - name: Run chatgpt api integration test
      run: |
        # Check if cached files are present
        ls ~/.cache/huggingface/hub/models--mlx-community--Meta-Llama-3-8B-Instruct-4bit/**/* || true

        # Start first instance
        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5678 --broadcast-port 5679 --chatgpt-api-port 8000 --chatgpt-api-response-timeout-secs 900 > output1.log 2>&1 &
        PID1=$!

        # Start second instance
        DEBUG_DISCOVERY=9 DEBUG=9 python3 main.py --listen-port 5679 --broadcast-port 5678 --chatgpt-api-port 8001 --chatgpt-api-response-timeout-secs 900 > output2.log 2>&1 &
        PID2=$!

        # Wait for discovery
        sleep 10

        # first one to load the model
        curl -s http://localhost:8000/v1/chat/completions \
            -H "Content-Type: application/json" \
            -d '{
              "model": "llama-3-8b",
              "messages": [{"role": "user", "content": "Keep responses concise. Placeholder to load model..."}],
              "temperature": 0.7
            }'

        response_1=$(curl -s http://localhost:8000/v1/chat/completions \
          -H "Content-Type: application/json" \
          -d '{
            "model": "llama-3-8b",
            "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
            "temperature": 0.7
          }')
        echo "Response 1: $response_1"

        response_2=$(curl -s http://localhost:8000/v1/chat/completions \
          -H "Content-Type: application/json" \
          -d '{
            "model": "llama-3-8b",
            "messages": [{"role": "user", "content": "Keep responses concise. Who was the king of pop?"}],
            "temperature": 0.7
          }')
        echo "Response 2: $response_2"

        # Stop both instances
        kill $PID1 $PID2

        echo ""
        if ! echo "$response_1" | grep -q "Michael Jackson" || ! echo "$response_2" | grep -q "Michael Jackson"; then
          echo "Test failed: Response does not contain 'Michael Jackson'"
          echo "Response 1: $response_1"
          echo ""
          echo "Response 2: $response_2"
          echo "Output of first instance:"
          cat output1.log
          echo "Output of second instance:"
          cat output2.log
          exit 1
        else
          echo "Test passed: Response from both nodes contains 'Michael Jackson'"
        fi