Skip to content

Commit

Permalink
Merge pull request easybuilders#19066 from Flamefire/20231024093827_n…
Browse files Browse the repository at this point in the history
…ew_pr_PyTorch201

{ai}[foss/2022a] PyTorch v2.0.1
  • Loading branch information
branfosj authored Oct 26, 2023
2 parents 9331a9b + 0e3ef19 commit c07c4b1
Show file tree
Hide file tree
Showing 19 changed files with 1,388 additions and 0 deletions.
147 changes: 147 additions & 0 deletions easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1-foss-2022a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
name = 'PyTorch'
version = '2.0.1'

homepage = 'https://pytorch.org/'
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
PyTorch is a deep learning framework that puts Python first."""

toolchain = {'name': 'foss', 'version': '2022a'}

source_urls = [GITHUB_RELEASE]
sources = ['%(namelower)s-v%(version)s.tar.gz']
patches = [
'PyTorch-1.7.0_disable-dev-shm-test.patch',
'PyTorch-1.11.1_skip-test_init_from_local_shards.patch',
'PyTorch-1.12.1_add-hypothesis-suppression.patch',
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
'PyTorch-1.12.1_skip-test_round_robin.patch',
'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch',
'PyTorch-1.13.1_fix-protobuf-dependency.patch',
'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch',
'PyTorch-1.13.1_skip-failing-singular-grad-test.patch',
'PyTorch-1.13.1_skip-tests-without-fbgemm.patch',
'PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch',
'PyTorch-2.0.1_avoid-test_quantization-failures.patch',
'PyTorch-2.0.1_disable-test-sharding.patch',
'PyTorch-2.0.1_fix-numpy-compat.patch',
'PyTorch-2.0.1_fix-shift-ops.patch',
'PyTorch-2.0.1_fix-skip-decorators.patch',
'PyTorch-2.0.1_fix-test_memory_profiler.patch',
'PyTorch-2.0.1_fix-test-ops-conf.patch',
'PyTorch-2.0.1_fix-torch.compile-on-ppc.patch',
'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch',
'PyTorch-2.0.1_fix-vsx-loadu.patch',
'PyTorch-2.0.1_ignore_unexpected_success_in_test_torchinductor_opinfo.patch',
'PyTorch-2.0.1_no-cuda-stubs-rpath.patch',
'PyTorch-2.0.1_remove-test-requiring-online-access.patch',
'PyTorch-2.0.1_skip-diff-test-on-ppc.patch',
'PyTorch-2.0.1_skip-failing-gradtest.patch',
'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch',
'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch',
]
checksums = [
{'pytorch-v2.0.1.tar.gz': '9c564ca440265c69400ef5fdd48bf15e28af5aa4bed84c95efaad960a6699998'},
{'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
{'PyTorch-1.11.1_skip-test_init_from_local_shards.patch':
'4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'},
{'PyTorch-1.12.1_add-hypothesis-suppression.patch':
'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
{'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch':
'1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'},
{'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
{'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
{'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch':
'5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'},
{'PyTorch-1.13.1_fix-protobuf-dependency.patch':
'8bd755a0cab7233a243bc65ca57c9630dfccdc9bf8c9792f0de4e07a644fcb00'},
{'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch':
'bdde0f2105215c95a54de64ec4b1a4520528510663174fef6d5b900eb1db3937'},
{'PyTorch-1.13.1_skip-failing-singular-grad-test.patch':
'72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'},
{'PyTorch-1.13.1_skip-tests-without-fbgemm.patch':
'481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'},
{'PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch':
'da44961d6c204403ba0c4b88cedccf06a7a3d24f29c4398545f96efae7a45c95'},
{'PyTorch-2.0.1_avoid-test_quantization-failures.patch':
'02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'},
{'PyTorch-2.0.1_disable-test-sharding.patch': 'a1ed7f21c9a269ea039a07a3d6574f885787b30ca5687143c96e096d31066cca'},
{'PyTorch-2.0.1_fix-numpy-compat.patch': 'f3e5798193e0909a415d824f13772973200965db84476c1737824f2735f2db94'},
{'PyTorch-2.0.1_fix-shift-ops.patch': '5ee655d5dba56d801d5618543b6ca299fa874939a3471f7b5449bfcb7f3f18c7'},
{'PyTorch-2.0.1_fix-skip-decorators.patch': '2039012cef45446065e1a2097839fe20bb29fe3c1dcc926c3695ebf29832e920'},
{'PyTorch-2.0.1_fix-test_memory_profiler.patch':
'fd03117c46f59c1c62227d31c410c4cdd98fd35410976758cb9e7ec947582ddb'},
{'PyTorch-2.0.1_fix-test-ops-conf.patch': '0f995e4f89baf3cbeb8666cbfe694666a2ef2bc53d97d6301f768b3ff9001fa4'},
{'PyTorch-2.0.1_fix-torch.compile-on-ppc.patch':
'20f9172ae696da0c5c7b3bae6f0bf1221192cb1cbac3a44526a415087834bee7'},
{'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch':
'1b37194f55ae678f3657b8728dfb896c18ffe8babe90987ce468c4fa9274f357'},
{'PyTorch-2.0.1_fix-vsx-loadu.patch': 'a0ffa61da2d47c6acd09aaf6d4791e527d8919a6f4f1aa7ed38454cdcadb1f72'},
{'PyTorch-2.0.1_ignore_unexpected_success_in_test_torchinductor_opinfo.patch':
'57e2985a5b7085c2786e4b0c4a5f0c81f6b2ae9d5804bbd552b06e8b1570f4c4'},
{'PyTorch-2.0.1_no-cuda-stubs-rpath.patch': '8902e58a762240f24cdbf0182e99ccdfc2a93492869352fcb4ca0ec7e407f83a'},
{'PyTorch-2.0.1_remove-test-requiring-online-access.patch':
'721ab0d35ed0ff8a46cb84ced5a98c0fb8ce6143cf6cea80b1360d3d7f64f584'},
{'PyTorch-2.0.1_skip-diff-test-on-ppc.patch': 'f6e39cd774e5663df25507a73d37ad598157c2eadb2f47ca20a537dbe4b3e14f'},
{'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'},
{'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch':
'7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'},
{'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch':
'166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'},
]

osdependencies = [OS_PKG_IBVERBS_DEV]

builddependencies = [
('CMake', '3.23.1'),
('hypothesis', '6.46.7'),
# For tests
('pytest-rerunfailures', '11.1'),
('pytest-shard', '0.1.2'),
]

dependencies = [
('Ninja', '1.10.2'), # Required for JIT compilation of C++ extensions
('Python', '3.10.4'),
('protobuf', '3.19.4'),
('protobuf-python', '3.19.4'),
('pybind11', '2.9.2'),
('SciPy-bundle', '2022.05'),
('PyYAML', '6.0'),
('MPFR', '4.1.0'),
('GMP', '6.2.1'),
('numactl', '2.0.14'),
('FFmpeg', '4.4.2'),
('Pillow', '9.1.1'),
('expecttest', '0.1.3'),
('networkx', '2.8.4'),
('sympy', '1.10.1'),
]

excluded_tests = {
'': [
# This test seems to take too long on NVIDIA Ampere at least.
'distributed/test_distributed_spawn',
# Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
'distributions/test_constraints',
# no xdoctest
'doctests',
# failing on broadwell
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'test_native_mha',
# intermittent failures on various systems
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'distributed/rpc/test_tensorpipe_agent',
]
}

runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'

# Especially test_quantization has a few corner cases that are triggered by the random input values,
# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030
# So allow a low number of tests to fail as the tests "usually" succeed
max_failed_tests = 2

tests = ['PyTorch-check-cpp-extension.py']

moduleclass = 'ai'
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
The `Vectorized` class template specializations for VSX are missing the
left and right shift operators.
Add a backported version of the fixed operators of https://github.com/pytorch/pytorch/pull/109886

Author: Alexander Grund (TU Dresden)

diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
index 7c300c8087c..84c84286740 100644
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int16_vsx.h
@@ -348,6 +348,7 @@ Vectorized<int16_t> inline minimum(
return a.minimum(b);
}

+DEFINE_SHIFT_FUNCS(int16_t)

} // namespace
} // namespace vec
diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
index c98ab6215e6..e1e86d3b53a 100644
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int32_vsx.h
@@ -279,6 +279,8 @@ Vectorized<int32_t> inline minimum(
return a.minimum(b);
}

+DEFINE_SHIFT_FUNCS(int32_t)
+
} // namespace
} // namespace vec
} // namespace at
diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
index a4171026a2b..70613d90443 100644
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vec256_int64_vsx.h
@@ -231,6 +231,8 @@ Vectorized<int64_t> inline minimum(
return a.minimum(b);
}

+DEFINE_SHIFT_FUNCS(int64_t)
+
} // namespace
} // namespace vec
} // namespace at
diff --git a/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h b/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
index dab38458184..52032cdd817 100644
--- a/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
+++ b/aten/src/ATen/cpu/vec/vec256/vsx/vsx_helpers.h
@@ -2,6 +2,7 @@
#include <cstdint>
#include <c10/macros/Macros.h>
#include <ATen/cpu/vec/intrinsics.h>
+#include <ATen/cpu/vec/vec_base.h>

using vbool8 = __attribute__((altivec(vector__))) __attribute__((altivec(bool__))) char;
using vbool16 = __attribute__((altivec(vector__))) __attribute__((altivec(bool__))) short;
@@ -18,6 +19,11 @@ using vuint64 = __attribute__((altivec(vector__))) unsigned long long;
using vfloat32 = __attribute__((altivec(vector__))) float;
using vfloat64 = __attribute__((altivec(vector__))) double;

+inline auto make_vuint(vint8 v){ return reinterpret_cast<vuint8>(v); }
+inline auto make_vuint(vint16 v){ return reinterpret_cast<vuint16>(v); }
+inline auto make_vuint(vint32 v){ return reinterpret_cast<vuint32>(v); }
+inline auto make_vuint(vint64 v){ return reinterpret_cast<vuint64>(v); }
+
#if !defined(vec_float)
C10_ALWAYS_INLINE vfloat32 vec_float(const vint32& vec_in) {
vfloat32 vec_out;
@@ -448,6 +454,40 @@ const vfloat64 vd_imag_half = vfloat64{0.0, 0.5};
const vfloat64 vd_sqrt2_2 = vfloat64{0.70710678118654757, 0.70710678118654757};
const vfloat64 vd_pi_2 = vfloat64{M_PI / 2.0, 0.0};

+template<typename T>
+Vectorized<T> VsxShiftRightArith(const Vectorized<T>& a, const Vectorized<T>& b) {
+ const Vectorized<T> max_shift(sizeof(T) * CHAR_BIT - std::is_signed_v<T>);
+ const auto mask = (b < Vectorized<T>(0)) | (b >= max_shift);
+ const auto shift = Vectorized<T>::blendv(b, max_shift, mask);
+ return Vectorized<T>{
+ vec_sra(a.vec0(), make_vuint(shift.vec0())),
+ vec_sra(a.vec1(), make_vuint(shift.vec1()))};
+}
+
+template<typename T>
+Vectorized<T> VsxShiftLeftArith(const Vectorized<T>& a, const Vectorized<T>& b) {
+ const Vectorized<T> max_shift(sizeof(T) * CHAR_BIT);
+ const auto mask = (b < Vectorized<T>(0)) | (b >= max_shift);
+ Vectorized<T> ret(
+ vec_sl(a.vec0(), make_vuint(b.vec0())),
+ vec_sl(a.vec1(), make_vuint(b.vec1())));
+ return Vectorized<T>::blendv(ret, Vectorized<T>(0), mask);
+}
+
+#define DEFINE_SHIFT_FUNCS(operand_type) \
+ template <> \
+ Vectorized<operand_type> C10_ALWAYS_INLINE operator>>( \
+ const Vectorized<operand_type>& a, \
+ const Vectorized<operand_type>& b) { \
+ return VsxShiftRightArith(a, b); \
+ } \
+ template <> \
+ Vectorized<operand_type> C10_ALWAYS_INLINE operator<<( \
+ const Vectorized<operand_type>& a, \
+ const Vectorized<operand_type>& b) { \
+ return VsxShiftLeftArith(a, b); \
+ } \
+
} // namespace
} // namespace vec
} // namespace at
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
The quantized values returned by hypothesis as test inputs might still cause overflows.
Hence reduce their maximum value by a factor that should fix most such cases.
See e.g. https://github.com/pytorch/pytorch/issues/111471

Author: Alexander Grund (TU Dresden)

diff --git a/torch/testing/_internal/hypothesis_utils.py b/torch/testing/_internal/hypothesis_utils.py
index 15e7b4512a4..67df4d74e9d 100644
--- a/torch/testing/_internal/hypothesis_utils.py
+++ b/torch/testing/_internal/hypothesis_utils.py
@@ -36,6 +36,8 @@ _ENFORCED_ZERO_POINT = defaultdict(lambda: None, {
def _get_valid_min_max(qparams):
scale, zero_point, quantized_type = qparams
adjustment = 1 + torch.finfo(torch.float).eps
+ # provide some leeway for scaling values without overflowing long
+ adjustment *= 1e4
_long_type_info = torch.iinfo(torch.long)
long_min, long_max = _long_type_info.min / adjustment, _long_type_info.max / adjustment
# make sure intermediate results are within the range of long
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
Our error checking doesn't work well with the parallel/sharded pytorch test.
As the overall gain is low, disable it and always run the full test suite in a single process.

Author: Alexander Grund (TU Dresden)

diff --git a/test/run_test.py b/test/run_test.py
index 9619cb2626e..ddfb200148f 100755
--- a/test/run_test.py
+++ b/test/run_test.py
@@ -815,7 +815,7 @@ def run_test_ops(test_module, test_directory, options):
]
default_unittest_args.extend(rerun_options)

- if 'slow-gradcheck' in os.getenv("BUILD_ENVIRONMENT", ""):
+ if True:
extra_unittest_args = default_unittest_args.copy()
# there are a lot of tests that take up a lot of space in slowgrad check, so don't bother parallelizing
# it's also on periodic so we don't care about TTS as much
Loading

0 comments on commit c07c4b1

Please sign in to comment.