diff --git a/.github/scripts/install_fbgemm.sh b/.github/scripts/install_fbgemm.sh index 08765d62f..acab6e61c 100644 --- a/.github/scripts/install_fbgemm.sh +++ b/.github/scripts/install_fbgemm.sh @@ -12,7 +12,7 @@ echo "CHANNEL" echo "$CHANNEL" if [ "$CHANNEL" = "nightly" ]; then - ${CONDA_RUN} pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/"$CU_VERSION" + ${CONDA_RUN} pip --pre install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/"$CU_VERSION" elif [ "$CHANNEL" = "test" ]; then - ${CONDA_RUN} pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/test/"$CU_VERSION" + ${CONDA_RUN} pip --pre install fbgemm-gpu --index-url https://download.pytorch.org/whl/test/"$CU_VERSION" fi diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 51217790e..275e0b1fd 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -33,7 +33,8 @@ jobs: - name: Setup conda run: | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh - bash ~/miniconda.sh -b -p $HOME/miniconda + bash ~/miniconda.sh -b -p $HOME/miniconda -u + conda update -n base -c defaults -y conda - name: setup Path run: | echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH @@ -51,7 +52,26 @@ jobs: - name: Install gcc shell: bash run: | - sudo apt-get install build-essential + conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17 + echo "[INSTALL] Setting the C/C++ compiler symlinks ..." + cc_path=$(conda run -n build_binary printenv CC) + cxx_path=$(conda run -n build_binary printenv CXX) + ln -sf "${cc_path}" "$(dirname "$cc_path")/cc" + ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc" + ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++" + ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++" + + conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX) + echo "[TEST] Enumerating libstdc++.so files ..." + all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort) + for f in $all_libcxx_libs; do + echo "$f"; + objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat + echo "" + done + + echo "[TEST] Appending the Conda-installed libstdc++ to LD_PRELOAD ..." + conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}" - name: setup Path run: | echo /usr/local/bin >> $GITHUB_PATH @@ -61,7 +81,7 @@ jobs: conda install -n build_binary --yes pytorch cpuonly -c pytorch-nightly - name: Install fbgemm run: | - conda run -n build_binary pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu + conda run -n build_binary pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu - name: Install torchmetrics run: | conda run -n build_binary pip install torchmetrics==1.0.3 diff --git a/.github/workflows/pyre.yml b/.github/workflows/pyre.yml index ed25404e7..e9cae9ca0 100644 --- a/.github/workflows/pyre.yml +++ b/.github/workflows/pyre.yml @@ -20,7 +20,7 @@ jobs: - name: Install dependencies run: > pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu && - pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu && + pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu && pip install -r requirements.txt && pip install pyre-check-nightly==$(cat .pyre_configuration | grep version | awk '{print $2}' | sed 's/\"//g') - name: Pyre check diff --git a/.github/workflows/release_build.yml b/.github/workflows/release_build.yml index 1ea837d4b..f6431d2f6 100644 --- a/.github/workflows/release_build.yml +++ b/.github/workflows/release_build.yml @@ -48,6 +48,7 @@ jobs: run: | wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh bash ~/miniconda.sh -b -p $HOME/miniconda -u + conda update -n base -c defaults -y conda - name: setup Path run: | echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH @@ -62,9 +63,28 @@ jobs: - name: check python version run: | conda run -n build_binary python --version - - name: Install C/C++ compilers - run: | - sudo yum install -y gcc gcc-c++ + - name: Install gcc + run: | + conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17 + echo "[INSTALL] Setting the C/C++ compiler symlinks ..." + cc_path=$(conda run -n build_binary printenv CC) + cxx_path=$(conda run -n build_binary printenv CXX) + ln -sf "${cc_path}" "$(dirname "$cc_path")/cc" + ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc" + ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++" + ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++" + + conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX) + echo "[TEST] Enumerating libstdc++.so files ..." + all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort) + for f in $all_libcxx_libs; do + echo "$f"; + objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat + echo "" + done + + echo "[TEST] Appending the Conda-installed libstdc++ to LD_PRELOAD ..." + conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}" - name: Install PyTorch and CUDA shell: bash run: | @@ -73,7 +93,7 @@ jobs: shell: bash run: | conda run -n build_binary pip install numpy - conda run -n build_binary pip install fbgemm-gpu + conda run -n build_binary pip install --pre fbgemm-gpu - name: Install Dependencies shell: bash run: | @@ -179,7 +199,7 @@ jobs: shell: bash run: | conda run -n build_binary pip install numpy - conda run -n build_binary pip install fbgemm-gpu + conda run -n build_binary pip install --pre fbgemm-gpu - name: Install torchmetrics shell: bash run: | diff --git a/.github/workflows/unittest_ci.yml b/.github/workflows/unittest_ci.yml index 0b3fe0638..7c4b92eaa 100644 --- a/.github/workflows/unittest_ci.yml +++ b/.github/workflows/unittest_ci.yml @@ -81,7 +81,7 @@ jobs: conda run -n build_binary \ python -c "import torch.distributed" conda run -n build_binary \ - pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }} + pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }} conda run -n build_binary \ python -c "import fbgemm_gpu" echo "fbgemm_gpu succeeded" diff --git a/.github/workflows/unittest_ci_cpu.yml b/.github/workflows/unittest_ci_cpu.yml index 1efe64178..cb00b1d23 100644 --- a/.github/workflows/unittest_ci_cpu.yml +++ b/.github/workflows/unittest_ci_cpu.yml @@ -45,17 +45,41 @@ jobs: conda info python --version conda run -n build_binary python --version + + echo "[INSTALL] Installing gcc..." + conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17 + + echo "[INSTALL] Setting the C/C++ compiler symlinks ..." + cc_path=$(conda run -n build_binary printenv CC) + cxx_path=$(conda run -n build_binary printenv CXX) + ln -sf "${cc_path}" "$(dirname "$cc_path")/cc" + ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc" + ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++" + ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++" + + conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX) + echo "[INSTALL] Enumerating libstdc++.so files ..." + all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort) + for f in $all_libcxx_libs; do + echo "$f"; + objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat + echo "" + done + + echo "[INSTALL] Appending the Conda-installed libstdc++ to LD_PRELOAD ..." + conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}" + conda run -n build_binary \ pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu conda run -n build_binary \ - python -c "import torch" + python -c "import torch; print(torch.__version__, torch.version.cuda); " echo "torch succeeded" conda run -n build_binary \ python -c "import torch.distributed" conda run -n build_binary \ - pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu + pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu conda run -n build_binary \ - python -c "import fbgemm_gpu" + python -c "import torch; import fbgemm_gpu; print(fbgemm_gpu.__version__)" echo "fbgemm_gpu succeeded" conda run -n build_binary \ pip install -r requirements.txt @@ -73,9 +97,6 @@ jobs: python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors \ --ignore-glob=**/test_utils/ echo "Starting C++ Tests" - conda install -n build_binary -y gxx_linux-64 - conda run -n build_binary \ - x86_64-conda-linux-gnu-g++ --version conda install -n build_binary -c anaconda redis -y conda run -n build_binary redis-server --daemonize yes mkdir cpp-build diff --git a/torchrec/distributed/utils.py b/torchrec/distributed/utils.py index 8a3db1209..830fef412 100644 --- a/torchrec/distributed/utils.py +++ b/torchrec/distributed/utils.py @@ -525,7 +525,9 @@ def create_global_tensor_shape_stride_from_metadata( """ size = None if parameter_sharding.sharding_type == ShardingType.COLUMN_WISE.value: - row_dim = parameter_sharding.sharding_spec.shards[0].shard_sizes[0] # pyre-ignore[16] + row_dim = parameter_sharding.sharding_spec.shards[0].shard_sizes[ + 0 + ] # pyre-ignore[16] col_dim = 0 for shard in parameter_sharding.sharding_spec.shards: col_dim += shard.shard_sizes[1] @@ -551,4 +553,6 @@ def create_global_tensor_shape_stride_from_metadata( for _ in range(devices_per_node): row_dim += parameter_sharding.sharding_spec.shards[0].shard_sizes[0] size = torch.Size([row_dim, col_dim]) - return size, (size[1], 1) if size else (torch.Size([0, 0]), (0, 1)) # pyre-ignore[7] + return size, ( + (size[1], 1) if size else (torch.Size([0, 0]), (0, 1)) + ) # pyre-ignore[7]