Merge pull request easybuilders#19066 from Flamefire/20231024093827_n…

…ew_pr_PyTorch201 {ai}[foss/2022a] PyTorch v2.0.1
ItIsI-Orient · Oct 26, 2023 · c07c4b1 · c07c4b1
2 parents 9331a9b + 0e3ef19
commit c07c4b1
Show file tree

Hide file tree

Showing 19 changed files with 1,388 additions and 0 deletions.
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1-foss-2022a.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1-foss-2022a.eb
@@ -0,0 +1,147 @@
+name = 'PyTorch'
+version = '2.0.1'
+
+homepage = 'https://pytorch.org/'
+description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
+PyTorch is a deep learning framework that puts Python first."""
+
+toolchain = {'name': 'foss', 'version': '2022a'}
+
+source_urls = [GITHUB_RELEASE]
+sources = ['%(namelower)s-v%(version)s.tar.gz']
+patches = [
+    'PyTorch-1.7.0_disable-dev-shm-test.patch',
+    'PyTorch-1.11.1_skip-test_init_from_local_shards.patch',
+    'PyTorch-1.12.1_add-hypothesis-suppression.patch',
+    'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
+    'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
+    'PyTorch-1.12.1_skip-test_round_robin.patch',
+    'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch',
+    'PyTorch-1.13.1_fix-protobuf-dependency.patch',
+    'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch',
+    'PyTorch-1.13.1_skip-failing-singular-grad-test.patch',
+    'PyTorch-1.13.1_skip-tests-without-fbgemm.patch',
+    'PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch',
+    'PyTorch-2.0.1_avoid-test_quantization-failures.patch',
+    'PyTorch-2.0.1_disable-test-sharding.patch',
+    'PyTorch-2.0.1_fix-numpy-compat.patch',
+    'PyTorch-2.0.1_fix-shift-ops.patch',
+    'PyTorch-2.0.1_fix-skip-decorators.patch',
+    'PyTorch-2.0.1_fix-test_memory_profiler.patch',
+    'PyTorch-2.0.1_fix-test-ops-conf.patch',
+    'PyTorch-2.0.1_fix-torch.compile-on-ppc.patch',
+    'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch',
+    'PyTorch-2.0.1_fix-vsx-loadu.patch',
+    'PyTorch-2.0.1_ignore_unexpected_success_in_test_torchinductor_opinfo.patch',
+    'PyTorch-2.0.1_no-cuda-stubs-rpath.patch',
+    'PyTorch-2.0.1_remove-test-requiring-online-access.patch',
+    'PyTorch-2.0.1_skip-diff-test-on-ppc.patch',
+    'PyTorch-2.0.1_skip-failing-gradtest.patch',
+    'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch',
+    'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch',
+]
+checksums = [
+    {'pytorch-v2.0.1.tar.gz': '9c564ca440265c69400ef5fdd48bf15e28af5aa4bed84c95efaad960a6699998'},
+    {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
+    {'PyTorch-1.11.1_skip-test_init_from_local_shards.patch':
+     '4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'},
+    {'PyTorch-1.12.1_add-hypothesis-suppression.patch':
+     'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
+    {'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch':
+     '1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'},
+    {'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
+    {'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
+    {'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch':
+     '5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'},
+    {'PyTorch-1.13.1_fix-protobuf-dependency.patch':
+     '8bd755a0cab7233a243bc65ca57c9630dfccdc9bf8c9792f0de4e07a644fcb00'},
+    {'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch':
+     'bdde0f2105215c95a54de64ec4b1a4520528510663174fef6d5b900eb1db3937'},
+    {'PyTorch-1.13.1_skip-failing-singular-grad-test.patch':
+     '72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'},
+    {'PyTorch-1.13.1_skip-tests-without-fbgemm.patch':
+     '481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'},
+    {'PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch':
+     'da44961d6c204403ba0c4b88cedccf06a7a3d24f29c4398545f96efae7a45c95'},
+    {'PyTorch-2.0.1_avoid-test_quantization-failures.patch':
+     '02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'},
+    {'PyTorch-2.0.1_disable-test-sharding.patch': 'a1ed7f21c9a269ea039a07a3d6574f885787b30ca5687143c96e096d31066cca'},
+    {'PyTorch-2.0.1_fix-numpy-compat.patch': 'f3e5798193e0909a415d824f13772973200965db84476c1737824f2735f2db94'},
+    {'PyTorch-2.0.1_fix-shift-ops.patch': '5ee655d5dba56d801d5618543b6ca299fa874939a3471f7b5449bfcb7f3f18c7'},
+    {'PyTorch-2.0.1_fix-skip-decorators.patch': '2039012cef45446065e1a2097839fe20bb29fe3c1dcc926c3695ebf29832e920'},
+    {'PyTorch-2.0.1_fix-test_memory_profiler.patch':
+     'fd03117c46f59c1c62227d31c410c4cdd98fd35410976758cb9e7ec947582ddb'},
+    {'PyTorch-2.0.1_fix-test-ops-conf.patch': '0f995e4f89baf3cbeb8666cbfe694666a2ef2bc53d97d6301f768b3ff9001fa4'},
+    {'PyTorch-2.0.1_fix-torch.compile-on-ppc.patch':
+     '20f9172ae696da0c5c7b3bae6f0bf1221192cb1cbac3a44526a415087834bee7'},
+    {'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch':
+     '1b37194f55ae678f3657b8728dfb896c18ffe8babe90987ce468c4fa9274f357'},
+    {'PyTorch-2.0.1_fix-vsx-loadu.patch': 'a0ffa61da2d47c6acd09aaf6d4791e527d8919a6f4f1aa7ed38454cdcadb1f72'},
+    {'PyTorch-2.0.1_ignore_unexpected_success_in_test_torchinductor_opinfo.patch':
+     '57e2985a5b7085c2786e4b0c4a5f0c81f6b2ae9d5804bbd552b06e8b1570f4c4'},
+    {'PyTorch-2.0.1_no-cuda-stubs-rpath.patch': '8902e58a762240f24cdbf0182e99ccdfc2a93492869352fcb4ca0ec7e407f83a'},
+    {'PyTorch-2.0.1_remove-test-requiring-online-access.patch':
+     '721ab0d35ed0ff8a46cb84ced5a98c0fb8ce6143cf6cea80b1360d3d7f64f584'},
+    {'PyTorch-2.0.1_skip-diff-test-on-ppc.patch': 'f6e39cd774e5663df25507a73d37ad598157c2eadb2f47ca20a537dbe4b3e14f'},
+    {'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'},
+    {'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch':
+     '7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'},
+    {'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch':
+     '166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'},
+]
+
+osdependencies = [OS_PKG_IBVERBS_DEV]
+
+builddependencies = [
+    ('CMake', '3.23.1'),
+    ('hypothesis', '6.46.7'),
+    # For tests
+    ('pytest-rerunfailures', '11.1'),
+    ('pytest-shard', '0.1.2'),
+]
+
+dependencies = [
+    ('Ninja', '1.10.2'),  # Required for JIT compilation of C++ extensions
+    ('Python', '3.10.4'),
+    ('protobuf', '3.19.4'),
+    ('protobuf-python', '3.19.4'),
+    ('pybind11', '2.9.2'),
+    ('SciPy-bundle', '2022.05'),
+    ('PyYAML', '6.0'),
+    ('MPFR', '4.1.0'),
+    ('GMP', '6.2.1'),
+    ('numactl', '2.0.14'),
+    ('FFmpeg', '4.4.2'),
+    ('Pillow', '9.1.1'),
+    ('expecttest', '0.1.3'),
+    ('networkx', '2.8.4'),
+    ('sympy', '1.10.1'),
+]
+
+excluded_tests = {
+    '': [
+        # This test seems to take too long on NVIDIA Ampere at least.
+        'distributed/test_distributed_spawn',
+        # Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
+        'distributions/test_constraints',
+        # no xdoctest
+        'doctests',
+        # failing on broadwell
+        # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
+        'test_native_mha',
+        # intermittent failures on various systems
+        # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
+        'distributed/rpc/test_tensorpipe_agent',
+    ]
+}
+
+runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error  --verbose %(excluded_tests)s'
+
+# Especially test_quantization has a few corner cases that are triggered by the random input values,
+# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030
+# So allow a low number of tests to fail as the tests "usually" succeed
+max_failed_tests = 2
+
+tests = ['PyTorch-check-cpp-extension.py']
+
+moduleclass = 'ai'
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch
@@ -0,0 +1,109 @@
+The `Vectorized` class template specializations for VSX are missing the
+left and right shift operators.
+Add a backported version of the fixed operators of https://github.com/pytorch/pytorch/pull/109886
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
+index 7c300c8087c..84c84286740 100644
+--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
+@@ -348,6 +348,7 @@ Vectorized<int16_t> inline minimum(
+   return a.minimum(b);
+ }
+
++DEFINE_SHIFT_FUNCS(int16_t)
+
+ } // namespace
+ } // namespace vec
+diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
+index c98ab6215e6..e1e86d3b53a 100644
+--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
+@@ -279,6 +279,8 @@ Vectorized<int32_t> inline minimum(
+   return a.minimum(b);
+ }
+
++DEFINE_SHIFT_FUNCS(int32_t)
++
+ } // namespace
+ } // namespace vec
+ } // namespace at
+diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
+index a4171026a2b..70613d90443 100644
+--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
+@@ -231,6 +231,8 @@ Vectorized<int64_t> inline minimum(
+   return a.minimum(b);
+ }
+
++DEFINE_SHIFT_FUNCS(int64_t)
++
+ } // namespace
+ } // namespace vec
+ } // namespace at
+diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h b/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
+index dab38458184..52032cdd817 100644
+--- a/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
++++ b/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
+@@ -2,6 +2,7 @@
+ #include <cstdint>
+ #include <c10/macros/Macros.h>
+ #include <ATen/cpu/vec/intrinsics.h>
++#include <ATen/cpu/vec/vec_base.h>
+
+ using vbool8   =  __attribute__((altivec(vector__))) __attribute__((altivec(bool__))) char;
+ using vbool16  =  __attribute__((altivec(vector__))) __attribute__((altivec(bool__))) short;
+@@ -18,6 +19,11 @@ using vuint64  =  __attribute__((altivec(vector__)))  unsigned long long;
+ using vfloat32 =  __attribute__((altivec(vector__)))  float;
+ using vfloat64 =  __attribute__((altivec(vector__)))  double;
+
++inline auto make_vuint(vint8 v){ return reinterpret_cast<vuint8>(v); }
++inline auto make_vuint(vint16 v){ return reinterpret_cast<vuint16>(v); }
++inline auto make_vuint(vint32 v){ return reinterpret_cast<vuint32>(v); }
++inline auto make_vuint(vint64 v){ return reinterpret_cast<vuint64>(v); }
++
+ #if !defined(vec_float)
+ C10_ALWAYS_INLINE vfloat32 vec_float(const vint32& vec_in) {
+   vfloat32 vec_out;
+@@ -448,6 +454,40 @@ const vfloat64 vd_imag_half = vfloat64{0.0, 0.5};
+ const vfloat64 vd_sqrt2_2 = vfloat64{0.70710678118654757, 0.70710678118654757};
+ const vfloat64 vd_pi_2 = vfloat64{M_PI / 2.0, 0.0};
+
++template<typename T>
++Vectorized<T> VsxShiftRightArith(const Vectorized<T>& a, const Vectorized<T>& b) {
++  const Vectorized<T> max_shift(sizeof(T) * CHAR_BIT - std::is_signed_v<T>);
++  const auto mask = (b < Vectorized<T>(0)) | (b >= max_shift);
++  const auto shift = Vectorized<T>::blendv(b, max_shift, mask);
++  return Vectorized<T>{
++    vec_sra(a.vec0(), make_vuint(shift.vec0())),
++    vec_sra(a.vec1(), make_vuint(shift.vec1()))};
++}
++
++template<typename T>
++Vectorized<T> VsxShiftLeftArith(const Vectorized<T>& a, const Vectorized<T>& b) {
++  const Vectorized<T> max_shift(sizeof(T) * CHAR_BIT);
++  const auto mask = (b < Vectorized<T>(0)) | (b >= max_shift);
++  Vectorized<T> ret(
++    vec_sl(a.vec0(), make_vuint(b.vec0())),
++    vec_sl(a.vec1(), make_vuint(b.vec1())));
++  return Vectorized<T>::blendv(ret, Vectorized<T>(0), mask);
++}
++
++#define DEFINE_SHIFT_FUNCS(operand_type)                 \
++  template <>                                            \
++  Vectorized<operand_type> C10_ALWAYS_INLINE operator>>( \
++      const Vectorized<operand_type>& a,                 \
++      const Vectorized<operand_type>& b) {               \
++    return VsxShiftRightArith(a, b);                     \
++  }                                                      \
++  template <>                                            \
++  Vectorized<operand_type> C10_ALWAYS_INLINE operator<<( \
++      const Vectorized<operand_type>& a,                 \
++      const Vectorized<operand_type>& b) {               \
++    return VsxShiftLeftArith(a, b);                      \
++  }                                                      \
++
+ } // namespace
+ } // namespace vec
+ } // namespace at
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1_avoid-test_quantization-failures.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1_avoid-test_quantization-failures.patch
@@ -0,0 +1,19 @@
+The quantized values returned by hypothesis as test inputs might still cause overflows.
+Hence reduce their maximum value by a factor that should fix most such cases.
+See e.g. https://github.com/pytorch/pytorch/issues/111471
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/torch/testing/_internal/hypothesis_utils.py b/torch/testing/_internal/hypothesis_utils.py
+index 15e7b4512a4..67df4d74e9d 100644
+--- a/torch/testing/_internal/hypothesis_utils.py
++++ b/torch/testing/_internal/hypothesis_utils.py
+@@ -36,6 +36,8 @@ _ENFORCED_ZERO_POINT = defaultdict(lambda: None, {
+ def _get_valid_min_max(qparams):
+     scale, zero_point, quantized_type = qparams
+     adjustment = 1 + torch.finfo(torch.float).eps
++    # provide some leeway for scaling values without overflowing long
++    adjustment *= 1e4
+     _long_type_info = torch.iinfo(torch.long)
+     long_min, long_max = _long_type_info.min / adjustment, _long_type_info.max / adjustment
+     # make sure intermediate results are within the range of long
diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1_disable-test-sharding.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1_disable-test-sharding.patch
@@ -0,0 +1,18 @@
+Our error checking doesn't work well with the parallel/sharded pytorch test.
+As the overall gain is low, disable it and always run the full test suite in a single process.
+
+Author: Alexander Grund (TU Dresden)
+
+diff --git a/test/run_test.py b/test/run_test.py
+index 9619cb2626e..ddfb200148f 100755
+--- a/test/run_test.py
++++ b/test/run_test.py
+@@ -815,7 +815,7 @@ def run_test_ops(test_module, test_directory, options):
+     ]
+     default_unittest_args.extend(rerun_options)
+
+-    if 'slow-gradcheck' in os.getenv("BUILD_ENVIRONMENT", ""):
++    if True:
+         extra_unittest_args = default_unittest_args.copy()
+         # there are a lot of tests that take up a lot of space in slowgrad check, so don't bother parallelizing
+         # it's also on periodic so we don't care about TTS as much