.github/workflows/unittest_ci.yml

# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions

name: Unit Test CI

on:
  # push:
  #   paths-ignore:
  #     - "docs/*"
  #     - "third_party/*"
  #     - .gitignore
  #     - "*.md"
  pull_request:
    # paths-ignore:
    #   - "docs/*"
    #   - "third_party/*"
    #   - .gitignore
    #   - "*.md"
  workflow_dispatch:

jobs:
  # build on cpu hosts and upload to GHA
  build_on_cpu:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 30
    strategy:
      matrix:
        include:
         - os: linux.2xlarge
          # ideally we run on 3.9 and 3.10 as well, however we are limited in resources.
           python-version: 3.8
           python-tag: "py38"
           cuda-tag: "cu118"
    steps:
    # Checkout the repository to the GitHub Actions runner
    - name: Check ldd --version
      run: ldd --version
    - name: Checkout
      uses: actions/checkout@v2
    - name: Update pip
      run: |
        sudo yum update -y
        sudo yum -y install git python3-pip
        sudo pip3 install --upgrade pip
    - name: Setup conda
      run: |
        wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
        bash ~/miniconda.sh -b -p $HOME/miniconda -u
    - name: setup Path
      run: |
        echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
        echo "CONDA=/home/ec2-user/miniconda" >> $GITHUB_PATH
    - name: create conda env
      run: |
        conda create --name build_binary python=${{ matrix.python-version }}
        conda info
    - name: check python version no Conda
      run: |
        python --version
    - name: check python version
      run: |
        conda run -n build_binary python --version
    - name: Install C/C++ compilers
      run: |
        sudo yum install -y gcc gcc-c++
    - name: Install PyTorch and CUDA
      shell: bash
      run: |
        conda install -n build_binary -y pytorch pytorch-cuda=11.8 -c pytorch-nightly -c nvidia
    - name: Install Dependencies
      shell: bash
      run: |
        conda run -n build_binary python -m pip install -r requirements.txt
    - name: Test Installation of dependencies
      run: |
        conda run -n build_binary python -c "import torch.distributed"
        echo "torch.distributed succeeded"
        conda run -n build_binary python -c "import skbuild"
        echo "skbuild succeeded"
        conda run -n build_binary python -c "import numpy"
        echo "numpy succeeded"
    # for the conda run with quotes, we have to use "\" and double quotes
    # here is the issue: https://github.com/conda/conda/issues/10972
    - name: Build TorchRec Binary
      run: |
        export CU_VERSION=${{ matrix.cuda-tag }}
        export CHANNEL="nightly"
        conda run -n build_binary \
          python setup.py bdist_wheel \
          --python-tag=${{ matrix.python-tag }}
    - name: Upload wheel as GHA artifact
      uses: actions/upload-artifact@v2
      with:
        name: torchrec_${{ matrix.python-version }}_${{ matrix.cuda-tag }}.whl
        path: dist/*.whl

  # download from GHA, test on gpu
  test_on_gpu:
    runs-on: ${{ matrix.os }}
    timeout-minutes: 30
    strategy:
      matrix:
        os: [linux.g5.12xlarge.nvidia.gpu]
        python-version: [3.8]
        cuda-tag: ["cu118"]
    needs: build_on_cpu
    # the glibc version should match the version of the one we used to build the binary
    # for this case, it's 2.26
    steps:
    - name: Check ldd --version
    # Run unit tests
      run: ldd --version
    - name: check cpu info
      shell: bash
      run: |
        cat /proc/cpuinfo
    - name: check distribution info
      shell: bash
      run: |
        cat /proc/version
    - name: Display EC2 information
      shell: bash
      run: |
        set -euo pipefail
        function get_ec2_metadata() {
          # Pulled from instance metadata endpoint for EC2
          # see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
          category=$1
          curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
        }
        echo "ami-id: $(get_ec2_metadata ami-id)"
        echo "instance-id: $(get_ec2_metadata instance-id)"
        echo "instance-type: $(get_ec2_metadata instance-type)"
    - name: check gpu info
      shell: bash
      run: |
        sudo yum install lshw -y
        sudo lshw -C display
    # Checkout the repository to the GitHub Actions runner
    - name: Checkout
      uses: actions/checkout@v2
    - name: Update pip
      run: |
        sudo yum update -y
        sudo yum -y install git python3-pip
        sudo pip3 install --upgrade pip
    - name: Setup conda
      run: |
        wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
        bash ~/miniconda.sh -b -p $HOME/miniconda
    - name: setup Path
      run: |
        echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
        echo "CONDA=/home/ec2-user/miniconda" >> $GITHUB_PATH
    - name: create conda env
      run: |
        conda create --name build_binary python=${{ matrix.python-version }}
        conda info
    - name: check python version no Conda
      run: |
        python --version
    - name: check python version
      run: |
        conda run -n build_binary python --version
    - name: Install C/C++ compilers
      run: |
        sudo yum install -y gcc gcc-c++
    - name: Install PyTorch and CUDA
      shell: bash
      run: |
        conda run -n build_binary \
          python -m pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cu118
    - name: Test torch installation
      shell: bash
      run: |
        conda run -n build_binary \
          python -c "import torch"
    - name: Install FBGEMM
      shell: bash
      run: |
        conda run -n build_binary \
          python -m pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cu118
    - name: Test fbgemm installation
      shell: bash
      run: |
        conda run -n build_binary \
          python -c "import fbgemm_gpu"
    - name: Test cuda
      shell: bash
      run: |
        conda run -n build_binary \
          python -c "import torch; print(torch.cuda.is_available()); print(torch.cuda.device_count())"
        nvidia-smi
    # download wheel from GHA
    - name: Download wheel
      uses: actions/download-artifact@v2
      with:
        name: torchrec_${{ matrix.python-version }}_${{ matrix.cuda-tag }}.whl
    - name: Display structure of downloaded files
      run: ls -R
    - name: Install TorchRec GPU
      run: |
        rm -r dist || true
        conda run -n build_binary python -m pip install *.whl
    - name: Install Dependencies
      shell: bash
      run: |
        conda run -n build_binary python -m pip install -r requirements.txt
    - name: Test torchrec installation
      shell: bash
      run: |
        conda run -n build_binary \
          python -c "import torchrec"
    - name: Test with pytest
      run: |
        conda run -n build_binary \
          python -m pip install pytest
        conda run -n build_binary \
          python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors