Skip to content

Commit

Permalink
Update gcc version for FBGEMM install in CI (#2654)
Browse files Browse the repository at this point in the history
Summary:

TorchRec CI currently is failing with issues on incompatible GLIBCXX version. The cause is that FBGEMM now requires g++ 11.1+ for building binaries that reference GLIBCXX_3.4.29 (as of pytorch/pytorch#141035)

As recommended in https://github.com/pytorch/FBGEMM/blob/main/.github/scripts/utils_build.bash and pytorch/FBGEMM#3423, install GCC using conda to control glibcxx version being used.

Differential Revision: D67607624
  • Loading branch information
sarckk authored and facebook-github-bot committed Jan 6, 2025
1 parent 504642a commit a3999b1
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 20 deletions.
4 changes: 2 additions & 2 deletions .github/scripts/install_fbgemm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ echo "CHANNEL"
echo "$CHANNEL"

if [ "$CHANNEL" = "nightly" ]; then
${CONDA_RUN} pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/"$CU_VERSION"
${CONDA_RUN} pip --pre install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/"$CU_VERSION"
elif [ "$CHANNEL" = "test" ]; then
${CONDA_RUN} pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/test/"$CU_VERSION"
${CONDA_RUN} pip --pre install fbgemm-gpu --index-url https://download.pytorch.org/whl/test/"$CU_VERSION"
fi
26 changes: 23 additions & 3 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ jobs:
- name: Setup conda
run: |
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p $HOME/miniconda
bash ~/miniconda.sh -b -p $HOME/miniconda -u
conda update -n base -c defaults -y conda
- name: setup Path
run: |
echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
Expand All @@ -51,7 +52,26 @@ jobs:
- name: Install gcc
shell: bash
run: |
sudo apt-get install build-essential
conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17
echo "[INSTALL] Setting the C/C++ compiler symlinks ..."
cc_path=$(conda run -n build_binary printenv CC)
cxx_path=$(conda run -n build_binary printenv CXX)
ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX)
echo "[TEST] Enumerating libstdc++.so files ..."
all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
for f in $all_libcxx_libs; do
echo "$f";
objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
echo ""
done
echo "[TEST] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}"
- name: setup Path
run: |
echo /usr/local/bin >> $GITHUB_PATH
Expand All @@ -61,7 +81,7 @@ jobs:
conda install -n build_binary --yes pytorch cpuonly -c pytorch-nightly
- name: Install fbgemm
run: |
conda run -n build_binary pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu
conda run -n build_binary pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu
- name: Install torchmetrics
run: |
conda run -n build_binary pip install torchmetrics==1.0.3
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pyre.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Install dependencies
run: >
pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu &&
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu &&
pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu &&
pip install -r requirements.txt &&
pip install pyre-check-nightly==$(cat .pyre_configuration | grep version | awk '{print $2}' | sed 's/\"//g')
- name: Pyre check
Expand Down
30 changes: 25 additions & 5 deletions .github/workflows/release_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ jobs:
run: |
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p $HOME/miniconda -u
conda update -n base -c defaults -y conda
- name: setup Path
run: |
echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
Expand All @@ -62,9 +63,28 @@ jobs:
- name: check python version
run: |
conda run -n build_binary python --version
- name: Install C/C++ compilers
run: |
sudo yum install -y gcc gcc-c++
- name: Install gcc
run: |
conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17
echo "[INSTALL] Setting the C/C++ compiler symlinks ..."
cc_path=$(conda run -n build_binary printenv CC)
cxx_path=$(conda run -n build_binary printenv CXX)
ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX)
echo "[TEST] Enumerating libstdc++.so files ..."
all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
for f in $all_libcxx_libs; do
echo "$f";
objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
echo ""
done
echo "[TEST] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}"
- name: Install PyTorch and CUDA
shell: bash
run: |
Expand All @@ -73,7 +93,7 @@ jobs:
shell: bash
run: |
conda run -n build_binary pip install numpy
conda run -n build_binary pip install fbgemm-gpu
conda run -n build_binary pip install --pre fbgemm-gpu
- name: Install Dependencies
shell: bash
run: |
Expand Down Expand Up @@ -179,7 +199,7 @@ jobs:
shell: bash
run: |
conda run -n build_binary pip install numpy
conda run -n build_binary pip install fbgemm-gpu
conda run -n build_binary pip install --pre fbgemm-gpu
- name: Install torchmetrics
shell: bash
run: |
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/unittest_ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ jobs:
conda run -n build_binary \
python -c "import torch.distributed"
conda run -n build_binary \
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }}
pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/${{ matrix.cuda-tag }}
conda run -n build_binary \
python -c "import fbgemm_gpu"
echo "fbgemm_gpu succeeded"
Expand Down
33 changes: 27 additions & 6 deletions .github/workflows/unittest_ci_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,41 @@ jobs:
conda info
python --version
conda run -n build_binary python --version
echo "[INSTALL] Installing gcc..."
conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17
echo "[INSTALL] Setting the C/C++ compiler symlinks ..."
cc_path=$(conda run -n build_binary printenv CC)
cxx_path=$(conda run -n build_binary printenv CXX)
ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX)
echo "[INSTALL] Enumerating libstdc++.so files ..."
all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
for f in $all_libcxx_libs; do
echo "$f";
objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
echo ""
done
echo "[INSTALL] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}"
conda run -n build_binary \
pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu
conda run -n build_binary \
python -c "import torch"
python -c "import torch; print(torch.__version__, torch.version.cuda); "
echo "torch succeeded"
conda run -n build_binary \
python -c "import torch.distributed"
conda run -n build_binary \
pip install fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu
pip install --pre fbgemm-gpu --index-url https://download.pytorch.org/whl/nightly/cpu
conda run -n build_binary \
python -c "import fbgemm_gpu"
python -c "import torch; import fbgemm_gpu; print(fbgemm_gpu.__version__)"
echo "fbgemm_gpu succeeded"
conda run -n build_binary \
pip install -r requirements.txt
Expand All @@ -73,9 +97,6 @@ jobs:
python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors \
--ignore-glob=**/test_utils/
echo "Starting C++ Tests"
conda install -n build_binary -y gxx_linux-64
conda run -n build_binary \
x86_64-conda-linux-gnu-g++ --version
conda install -n build_binary -c anaconda redis -y
conda run -n build_binary redis-server --daemonize yes
mkdir cpp-build
Expand Down
8 changes: 6 additions & 2 deletions torchrec/distributed/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,9 @@ def create_global_tensor_shape_stride_from_metadata(
"""
size = None
if parameter_sharding.sharding_type == ShardingType.COLUMN_WISE.value:
row_dim = parameter_sharding.sharding_spec.shards[0].shard_sizes[0] # pyre-ignore[16]
row_dim = parameter_sharding.sharding_spec.shards[0].shard_sizes[
0
] # pyre-ignore[16]
col_dim = 0
for shard in parameter_sharding.sharding_spec.shards:
col_dim += shard.shard_sizes[1]
Expand All @@ -551,4 +553,6 @@ def create_global_tensor_shape_stride_from_metadata(
for _ in range(devices_per_node):
row_dim += parameter_sharding.sharding_spec.shards[0].shard_sizes[0]
size = torch.Size([row_dim, col_dim])
return size, (size[1], 1) if size else (torch.Size([0, 0]), (0, 1)) # pyre-ignore[7]
return size, (
(size[1], 1) if size else (torch.Size([0, 0]), (0, 1))
) # pyre-ignore[7]

0 comments on commit a3999b1

Please sign in to comment.