Skip to content

Commit

Permalink
Update gcc version for FBGEMM install in CI (#2654)
Browse files Browse the repository at this point in the history
Summary:

TorchRec CI currently is failing with issues on incompatible GLIBCXX version. The cause is that FBGEMM now requires g++ 11.1+ for building binaries that reference GLIBCXX_3.4.29 (as of pytorch/pytorch#141035)

As recommended in https://github.com/pytorch/FBGEMM/blob/main/.github/scripts/utils_build.bash and pytorch/FBGEMM#3423, install GCC using conda to control glibcxx version being used.

Differential Revision: D67607624
  • Loading branch information
sarckk authored and facebook-github-bot committed Jan 3, 2025
1 parent 00d8ed2 commit b0167a6
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 10 deletions.
24 changes: 22 additions & 2 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ jobs:
- name: Setup conda
run: |
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p $HOME/miniconda
bash ~/miniconda.sh -b -p $HOME/miniconda -u
conda update -n base -c defaults -y conda
- name: setup Path
run: |
echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
Expand All @@ -51,7 +52,26 @@ jobs:
- name: Install gcc
shell: bash
run: |
sudo apt-get install build-essential
conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17
echo "[INSTALL] Setting the C/C++ compiler symlinks ..."
cc_path=$(conda run -n build_binary printenv CC)
cxx_path=$(conda run -n build_binary printenv CXX)
ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX)
echo "[TEST] Enumerating libstdc++.so files ..."
all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
for f in $all_libcxx_libs; do
echo "$f";
objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
echo ""
done
echo "[TEST] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}"
- name: setup Path
run: |
echo /usr/local/bin >> $GITHUB_PATH
Expand Down
26 changes: 23 additions & 3 deletions .github/workflows/release_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ jobs:
run: |
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p $HOME/miniconda -u
conda update -n base -c defaults -y conda
- name: setup Path
run: |
echo "/home/ec2-user/miniconda/bin" >> $GITHUB_PATH
Expand All @@ -62,9 +63,28 @@ jobs:
- name: check python version
run: |
conda run -n build_binary python --version
- name: Install C/C++ compilers
run: |
sudo yum install -y gcc gcc-c++
- name: Install gcc
run: |
conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17
echo "[INSTALL] Setting the C/C++ compiler symlinks ..."
cc_path=$(conda run -n build_binary printenv CC)
cxx_path=$(conda run -n build_binary printenv CXX)
ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX)
echo "[TEST] Enumerating libstdc++.so files ..."
all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
for f in $all_libcxx_libs; do
echo "$f";
objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
echo ""
done
echo "[TEST] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
conda env config vars set -n build_binary LD_PRELOAD="${all_libcxx_libs[0]}"
- name: Install PyTorch and CUDA
shell: bash
run: |
Expand Down
27 changes: 24 additions & 3 deletions .github/workflows/unittest_ci_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,30 @@ jobs:
conda info
python --version
conda run -n build_binary python --version
echo "[INSTALL] Installing gcc..."
conda install -n build_binary -c conda-forge -y gxx_linux-64=11.4.0 sysroot_linux-64=2.17
echo "[INSTALL] Setting the C/C++ compiler symlinks ..."
cc_path=$(conda run -n build_binary printenv CC)
cxx_path=$(conda run -n build_binary printenv CXX)
ln -sf "${cc_path}" "$(dirname "$cc_path")/cc"
ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
conda_prefix=$(conda run -n build_binary printenv CONDA_PREFIX)
echo "[INSTALL] Enumerating libstdc++.so files ..."
all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
for f in $all_libcxx_libs; do
echo "$f";
objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
echo ""
done
echo "[INSTALL] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
current_value=$(conda run -n build_binary printenv LD_PRELOAD)
conda run -n build_binary \
pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu
conda run -n build_binary \
Expand Down Expand Up @@ -73,9 +97,6 @@ jobs:
python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors \
--ignore-glob=**/test_utils/
echo "Starting C++ Tests"
conda install -n build_binary -y gxx_linux-64
conda run -n build_binary \
x86_64-conda-linux-gnu-g++ --version
conda install -n build_binary -c anaconda redis -y
conda run -n build_binary redis-server --daemonize yes
mkdir cpp-build
Expand Down
8 changes: 6 additions & 2 deletions torchrec/distributed/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -525,7 +525,9 @@ def create_global_tensor_shape_stride_from_metadata(
"""
size = None
if parameter_sharding.sharding_type == ShardingType.COLUMN_WISE.value:
row_dim = parameter_sharding.sharding_spec.shards[0].shard_sizes[0] # pyre-ignore[16]
row_dim = parameter_sharding.sharding_spec.shards[0].shard_sizes[
0
] # pyre-ignore[16]
col_dim = 0
for shard in parameter_sharding.sharding_spec.shards:
col_dim += shard.shard_sizes[1]
Expand All @@ -551,4 +553,6 @@ def create_global_tensor_shape_stride_from_metadata(
for _ in range(devices_per_node):
row_dim += parameter_sharding.sharding_spec.shards[0].shard_sizes[0]
size = torch.Size([row_dim, col_dim])
return size, (size[1], 1) if size else (torch.Size([0, 0]), (0, 1)) # pyre-ignore[7]
return size, (
(size[1], 1) if size else (torch.Size([0, 0]), (0, 1))
) # pyre-ignore[7]

0 comments on commit b0167a6

Please sign in to comment.