mirror of
https://code.it4i.cz/sccs/easyconfigs-it4i.git
synced 2025-04-08 07:52:11 +01:00
new file: h/HyperQueue/HyperQueue-0.10.0.eb
new file: p/PyTorch/PyTorch-1.11.0-foss-2021a-CUDA-11.3.1.eb modified: p/PyTorch/PyTorch-1.11.0-fosscuda-2020b.eb new file: p/PyTorch/PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch new file: p/PyTorch/PyTorch-1.11.0_fix_sharded_imports.patch new file: p/PyTorch/PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch new file: p/PyTorch/PyTorch-1.11.0_increase-distributed-test-timeout.patch new file: p/PyTorch/PyTorch-1.11.0_increase_c10d_gloo_timeout.patch new file: p/PyTorch/PyTorch-1.11.0_increase_test_tolerances_TF32.patch new file: p/PyTorch/PyTorch-1.11.0_skip_failing_ops_tests.patch
This commit is contained in:
parent
ce632b3121
commit
094a092a73
24
h/HyperQueue/HyperQueue-0.10.0.eb
Normal file
24
h/HyperQueue/HyperQueue-0.10.0.eb
Normal file
@ -0,0 +1,24 @@
|
||||
# IT4Innovations
|
||||
# JK 2022
|
||||
|
||||
easyblock = 'PackedBinary'
|
||||
|
||||
name = 'HyperQueue'
|
||||
version = '0.10.0'
|
||||
|
||||
homepage = 'https://it4innovations.github.io/hyperqueue/'
|
||||
description = """HyperQueue lets you build a computation plan consisting of a large amount of tasks and then execute it transparently over a system like SLURM/PBS. It dynamically groups jobs into SLURM/PBS jobs and distributes them to fully utilize allocated notes. You thus do not have to manually aggregate your tasks into SLURM/PBS jobs."""
|
||||
|
||||
toolchain = SYSTEM
|
||||
|
||||
source_urls = ['https://github.com/It4innovations/hyperqueue/releases/download/v%(version)s/']
|
||||
sources = ['hq-v%(version)s-linux-x64.tar.gz']
|
||||
checksums = ['2513d5ce7e8b31ace17f5054058c3fed7900ef61e3aa0f27d66f794533cd152c']
|
||||
|
||||
sanity_check_paths = {
|
||||
'files': ['hq'],
|
||||
'dirs': [],
|
||||
}
|
||||
|
||||
|
||||
moduleclass = 'devel'
|
119
p/PyTorch/PyTorch-1.11.0-foss-2021a-CUDA-11.3.1.eb
Normal file
119
p/PyTorch/PyTorch-1.11.0-foss-2021a-CUDA-11.3.1.eb
Normal file
@ -0,0 +1,119 @@
|
||||
name = 'PyTorch'
|
||||
version = '1.11.0'
|
||||
versionsuffix = '-CUDA-%(cudaver)s'
|
||||
|
||||
homepage = 'https://pytorch.org/'
|
||||
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
|
||||
PyTorch is a deep learning framework that puts Python first."""
|
||||
|
||||
toolchain = {'name': 'foss', 'version': '2021a'}
|
||||
|
||||
sources = [{
|
||||
'filename': '%(name)s-%(version)s.tar.gz',
|
||||
'git_config': {
|
||||
'url': 'https://github.com/pytorch',
|
||||
'repo_name': 'pytorch',
|
||||
'tag': 'v%(version)s',
|
||||
'recursive': True,
|
||||
},
|
||||
}]
|
||||
patches = [
|
||||
'PyTorch-1.7.0_avoid-nan-in-test-torch.patch',
|
||||
'PyTorch-1.7.0_disable-dev-shm-test.patch',
|
||||
'PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch',
|
||||
'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch',
|
||||
'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch',
|
||||
'PyTorch-1.10.0_skip_cmake_rpath.patch',
|
||||
'PyTorch-1.11.0_increase-distributed-test-timeout.patch',
|
||||
'PyTorch-1.11.0_skip_failing_ops_tests.patch',
|
||||
'PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch',
|
||||
'PyTorch-1.11.0_fix_sharded_imports.patch',
|
||||
'PyTorch-1.11.0_increase_test_tolerances_TF32.patch',
|
||||
'PyTorch-1.11.0_increase_c10d_gloo_timeout.patch',
|
||||
'PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch',
|
||||
]
|
||||
checksums = [
|
||||
None, # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone'
|
||||
'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch
|
||||
'622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch
|
||||
'89ac7a8e9e7df2e64cf8404fe3a279f5e9b759fee41c9de3aaff9c22f385c2c6', # PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch
|
||||
# PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch
|
||||
'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea',
|
||||
# PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch
|
||||
'313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707',
|
||||
'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', # PyTorch-1.10.0_skip_cmake_rpath.patch
|
||||
# PyTorch-1.11.0_increase-distributed-test-timeout.patch
|
||||
'087ad20163a1291773ae3457569b80523080eb3731e210946459b2333a919f3f',
|
||||
'8eaca92d64fcadb0552d28e9c7ea5c4bc669d2fe33004e45a3519ce8d0d136a2', # PyTorch-1.11.0_skip_failing_ops_tests.patch
|
||||
'21fc678febcdfbb9dabd72235be23cd392044e9a954f6580d15b530e1f69dcc1', # PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch
|
||||
'9a04f4285b800dad8a00c3014af0a9713d40d5dd35d10931c7c0da4e89c558e9', # PyTorch-1.11.0_fix_sharded_imports.patch
|
||||
# PyTorch-1.11.0_increase_test_tolerances_TF32.patch
|
||||
'26e179a4f6f57e49209092612ae5f5cd8c03fd2ca84566ba0244eabefc3736ba',
|
||||
# PyTorch-1.11.0_increase_c10d_gloo_timeout.patch
|
||||
'20cd4a8663f74ab326fdb032b926bf5c7e94d9750c515ab9050927ba00cf1953',
|
||||
# PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch
|
||||
'e7bfe120a8b3fe2b40dac6839852a5fbab3cb3429fbe44a0fc3a1800adaaee51',
|
||||
]
|
||||
|
||||
osdependencies = [OS_PKG_IBVERBS_DEV]
|
||||
|
||||
builddependencies = [
|
||||
('CMake', '3.20.1'),
|
||||
('hypothesis', '6.13.1'),
|
||||
]
|
||||
|
||||
dependencies = [
|
||||
('CUDA', '11.3.1', '', True),
|
||||
('Ninja', '1.10.2'), # Required for JIT compilation of C++ extensions
|
||||
('Python', '3.9.5'),
|
||||
('protobuf', '3.17.3'),
|
||||
('protobuf-python', '3.17.3'),
|
||||
('pybind11', '2.6.2'),
|
||||
('SciPy-bundle', '2021.05'),
|
||||
('typing-extensions', '3.10.0.0'),
|
||||
('PyYAML', '5.4.1'),
|
||||
('MPFR', '4.1.0'),
|
||||
('GMP', '6.2.1'),
|
||||
('numactl', '2.0.14'),
|
||||
('FFmpeg', '4.3.2'),
|
||||
('Pillow', '8.2.0'),
|
||||
('cuDNN', '8.2.1.32', '-CUDA-%(cudaver)s', True),
|
||||
('magma', '2.6.1', '-CUDA-%(cudaver)s'),
|
||||
('NCCL', '2.10.3', '-CUDA-%(cudaver)s'),
|
||||
('expecttest', '0.1.3'),
|
||||
]
|
||||
|
||||
# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
|
||||
cuda_compute_capabilities = ['3.5', '3.7', '5.2', '6.0', '6.1', '7.0', '7.2', '7.5', '8.0', '8.6']
|
||||
|
||||
custom_opts = ["USE_CUPTI_SO=1"]
|
||||
|
||||
excluded_tests = {
|
||||
'': [
|
||||
# Bad tests: https://github.com/pytorch/pytorch/issues/60260
|
||||
'distributed/elastic/utils/distributed_test',
|
||||
'distributed/elastic/multiprocessing/api_test',
|
||||
# These tests fail on A10s at the very least, they time out forever no matter how long the timeout is.
|
||||
# Possibly related to NCCL 2.8.3: https://docs.nvidia.com/deeplearning/nccl/release-notes/rel_2-8-3.html
|
||||
# 'distributed/test_distributed_fork',
|
||||
'distributed/test_distributed_spawn',
|
||||
# Fails on A10s: https://github.com/pytorch/pytorch/issues/63079
|
||||
'test_optim',
|
||||
# Test from this suite timeout often. The process group backend is deprecated anyway
|
||||
# 'distributed/rpc/test_process_group_agent',
|
||||
# This test fails constently when run as part of the test suite, but succeeds when run interactively
|
||||
'test_model_dump',
|
||||
]
|
||||
}
|
||||
|
||||
runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'
|
||||
|
||||
# The readelf sanity check command can be taken out once the TestRPATH test from
|
||||
# https://github.com/pytorch/pytorch/pull/68912 is accepted, since it is then checked as part of the PyTorch test suite
|
||||
local_libcaffe2 = "$EBROOTPYTORCH/lib/python%%(pyshortver)s/site-packages/torch/lib/libcaffe2_nvrtc.%s" % SHLIB_EXT
|
||||
sanity_check_commands = [
|
||||
"readelf -d %s | egrep 'RPATH|RUNPATH' | grep -v stubs" % local_libcaffe2,
|
||||
]
|
||||
tests = ['PyTorch-check-cpp-extension.py']
|
||||
|
||||
moduleclass = 'devel'
|
@ -1,4 +1,4 @@
|
||||
# IT4Innovations
|
||||
# it4Innovations
|
||||
# LK 2022
|
||||
|
||||
name = 'PyTorch'
|
||||
@ -22,55 +22,45 @@ sources = [{
|
||||
patches = [
|
||||
'PyTorch-1.7.0_avoid-nan-in-test-torch.patch',
|
||||
'PyTorch-1.7.0_disable-dev-shm-test.patch',
|
||||
# 'PyTorch-1.7.1_correctly-pass-jit_opt_level.patch',
|
||||
'PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch',
|
||||
'PyTorch-1.8.1_increase-distributed-test-timeout.patch',
|
||||
'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch',
|
||||
'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch',
|
||||
'PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch',
|
||||
'PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch',
|
||||
'PyTorch-1.10.0_fix-test-cond-cpu.patch',
|
||||
'PyTorch-1.10.0_fix-vnni-detection.patch',
|
||||
'PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch',
|
||||
'PyTorch-1.10.0_skip_failing_ops_tests.patch',
|
||||
'PyTorch-1.10.0_skip_nan_tests_openblas.patch',
|
||||
'PyTorch-1.10.0_skip_cmake_rpath.patch',
|
||||
'PyTorch-1.11.0_increase-distributed-test-timeout.patch',
|
||||
'PyTorch-1.11.0_skip_failing_ops_tests.patch',
|
||||
'PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch',
|
||||
'PyTorch-1.11.0_fix_sharded_imports.patch',
|
||||
'PyTorch-1.11.0_increase_test_tolerances_TF32.patch',
|
||||
'PyTorch-1.11.0_increase_c10d_gloo_timeout.patch',
|
||||
'PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch',
|
||||
]
|
||||
checksums = [
|
||||
None, # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone'
|
||||
'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18', # PyTorch-1.7.0_avoid-nan-in-test-torch.patch
|
||||
'622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a', # PyTorch-1.7.0_disable-dev-shm-test.patch
|
||||
# PyTorch-1.7.1_correctly-pass-jit_opt_level.patch
|
||||
'd4d967d47f8a6172fcbf57f0a61835482968850967c4fdb01108b720696a988d',
|
||||
'89ac7a8e9e7df2e64cf8404fe3a279f5e9b759fee41c9de3aaff9c22f385c2c6', # PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch
|
||||
# PyTorch-1.8.1_increase-distributed-test-timeout.patch
|
||||
'7a6e512274f0b8673f4f207a5bc53387d88be7e79833f42d20365668b2118071',
|
||||
# PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch
|
||||
'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea',
|
||||
# PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch
|
||||
'313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707',
|
||||
# PyTorch-1.10.0_fix-alias-violation-in-bitwise-ops.patch
|
||||
'426c9ead1a74b656748d4c8bf8afd4303d8b9f2394ad22b21a845d07c8ca1d12',
|
||||
# PyTorch-1.10.0_fix-faulty-asserts-and-skip-test.patch
|
||||
'67152215e4530a9b1d7349fb20864445fd815288f04ab9e96e45c73b2d87827a',
|
||||
# PyTorch-1.10.0_fix-test-cond-cpu.patch
|
||||
'51f83f5d5ef69656ef35b73f17e0671e70113798421be11ea4c7b56ffcc4da03',
|
||||
# PyTorch-1.10.0_fix-vnni-detection.patch
|
||||
'1f3664c0febfa2a3fc4c0cd3bae185f289716ac0b6c3d7e8fa1cee19ba62b7cc',
|
||||
# PyTorch-1.10.0_increase_zero_optimizer_test_tolerance.patch
|
||||
'e65afb01786f7f030ccb5faada1eb474bb0c418bcadcf1baaa71a4fa2f3f4240',
|
||||
# PyTorch-1.10.0_skip_failing_ops_tests.patch
|
||||
'399af94ffcef4a6db5226552c46f11e9b0f0f371b2d7924b9e5764d2281581ab',
|
||||
# PyTorch-1.10.0_skip_nan_tests_openblas.patch
|
||||
'7d3f83e3056d9e47a460790313238f28708beb596cafaa7ae55e374d368bbedf',
|
||||
# PyTorch-1.10.0_skip_cmake_rpath.patch
|
||||
'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448',
|
||||
'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448', # PyTorch-1.10.0_skip_cmake_rpath.patch
|
||||
# PyTorch-1.11.0_increase-distributed-test-timeout.patch
|
||||
'087ad20163a1291773ae3457569b80523080eb3731e210946459b2333a919f3f',
|
||||
'8eaca92d64fcadb0552d28e9c7ea5c4bc669d2fe33004e45a3519ce8d0d136a2', # PyTorch-1.11.0_skip_failing_ops_tests.patch
|
||||
'21fc678febcdfbb9dabd72235be23cd392044e9a954f6580d15b530e1f69dcc1', # PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch
|
||||
'9a04f4285b800dad8a00c3014af0a9713d40d5dd35d10931c7c0da4e89c558e9', # PyTorch-1.11.0_fix_sharded_imports.patch
|
||||
# PyTorch-1.11.0_increase_test_tolerances_TF32.patch
|
||||
'26e179a4f6f57e49209092612ae5f5cd8c03fd2ca84566ba0244eabefc3736ba',
|
||||
# PyTorch-1.11.0_increase_c10d_gloo_timeout.patch
|
||||
'20cd4a8663f74ab326fdb032b926bf5c7e94d9750c515ab9050927ba00cf1953',
|
||||
# PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch
|
||||
'e7bfe120a8b3fe2b40dac6839852a5fbab3cb3429fbe44a0fc3a1800adaaee51',
|
||||
]
|
||||
|
||||
osdependencies = [OS_PKG_IBVERBS_DEV]
|
||||
|
||||
builddependencies = [
|
||||
('CMake', '3.20.1'), # Needs 3.20 or newer.
|
||||
('CMake', '3.20.1'),
|
||||
('hypothesis', '5.41.5'),
|
||||
]
|
||||
|
||||
@ -88,7 +78,6 @@ dependencies = [
|
||||
('numactl', '2.0.13'),
|
||||
('FFmpeg', '4.3.1'),
|
||||
('Pillow', '8.0.1'),
|
||||
('expecttest', '0.1.3'),
|
||||
('cuDNN', '8.0.4.30', '-CUDA-%(cudaver)s', True),
|
||||
('magma', '2.5.4'),
|
||||
('NCCL', '2.8.3', '-CUDA-%(cudaver)s'),
|
||||
@ -112,16 +101,17 @@ excluded_tests = {
|
||||
'test_optim',
|
||||
# Test from this suite timeout often. The process group backend is deprecated anyway
|
||||
# 'distributed/rpc/test_process_group_agent',
|
||||
# This test fails constently when run as part of the test suite, but succeeds when run interactively
|
||||
'test_model_dump',
|
||||
]
|
||||
}
|
||||
|
||||
runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'
|
||||
|
||||
# The readelf sanity check can be taken out once the TestRPATH test from https://github.com/pytorch/pytorch/pull/68912
|
||||
# is accepted, since it is then checked as part of the PyTorch test suite
|
||||
# The readelf sanity check command can be taken out once the TestRPATH test from
|
||||
# https://github.com/pytorch/pytorch/pull/68912 is accepted, since it is then checked as part of the PyTorch test suite
|
||||
local_libcaffe2 = "$EBROOTPYTORCH/lib/python%%(pyshortver)s/site-packages/torch/lib/libcaffe2_nvrtc.%s" % SHLIB_EXT
|
||||
sanity_check_commands = [
|
||||
"python -c 'import caffe2.python'",
|
||||
"readelf -d %s | egrep 'RPATH|RUNPATH' | grep -v stubs" % local_libcaffe2,
|
||||
]
|
||||
tests = ['PyTorch-check-cpp-extension.py']
|
||||
|
@ -0,0 +1,50 @@
|
||||
# Author: Caspar van Leeuwen
|
||||
# Company: SURF
|
||||
# We've seen that these tests fail for version 1.11.0, see https://github.com/pytorch/pytorch/issues/76107
|
||||
# These failures probably point to underlying issues, but the PR that fixes them touches a ton of files
|
||||
# It's near-impossible to cherry pick that, without causing other issues. Moreover,
|
||||
# PyTorch devs have pointed out that nvfuser is not enabled by default in 1.11.0, so chances of anyone
|
||||
# hitting these issues are very small
|
||||
# We simply disable the tests and accept that in v 1.11.0 in PyTorch, this functionality is broken.
|
||||
diff -Nru pytorch_orig/test/test_jit_cuda_fuser.py pytorch/test/test_jit_cuda_fuser.py
|
||||
--- pytorch_orig/test/test_jit_cuda_fuser.py 2022-04-29 14:54:30.771378000 +0200
|
||||
+++ pytorch/test/test_jit_cuda_fuser.py 2022-04-29 14:05:54.067297000 +0200
|
||||
@@ -1313,6 +1313,12 @@
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
@unittest.skipIf(not TEST_BF16, "device does not support BFloat16")
|
||||
+ # Disable test, since it fails and nnfuser wasn't enabled by default in 1.11
|
||||
+ # Thus, even if this points to an underlying issue, it should be extremely rare that
|
||||
+ # anyone hits it.
|
||||
+ # See https://github.com/pytorch/pytorch/issues/76107
|
||||
+ # and https://github.com/easybuilders/easybuild-easyconfigs/pull/15137
|
||||
+ @unittest.skip("Skipping test that is known to fail, see PT #76107")
|
||||
def test_native_layer_norm_bfloat(self):
|
||||
dims = 4
|
||||
rnds = 3
|
||||
@@ -2828,6 +2834,12 @@
|
||||
@unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
+ # Disable test, since it fails and nnfuser wasn't enabled by default in 1.11
|
||||
+ # Thus, even if this points to an underlying issue, it should be extremely rare that
|
||||
+ # anyone hits it.
|
||||
+ # See https://github.com/pytorch/pytorch/issues/76107
|
||||
+ # and https://github.com/easybuilders/easybuild-easyconfigs/pull/15137
|
||||
+ @unittest.skip("Skipping test that is known to fail, see PT #76107")
|
||||
def test_batch_norm_half(self):
|
||||
with torch.backends.cudnn.flags(enabled=True):
|
||||
setups = [
|
||||
@@ -2843,6 +2855,12 @@
|
||||
@unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
+ # Disable test, since it fails and nnfuser wasn't enabled by default in 1.11
|
||||
+ # Thus, even if this points to an underlying issue, it should be extremely rare that
|
||||
+ # anyone hits it.
|
||||
+ # See https://github.com/pytorch/pytorch/issues/76107
|
||||
+ # and https://github.com/easybuilders/easybuild-easyconfigs/pull/15137
|
||||
+ @unittest.skip("Skipping test that is known to fail, see PT #76107")
|
||||
def test_batch_norm_impl_index_correctness(self):
|
||||
with torch.backends.cudnn.flags(enabled=True):
|
||||
batch = [2, 7, 16]
|
44
p/PyTorch/PyTorch-1.11.0_fix_sharded_imports.patch
Normal file
44
p/PyTorch/PyTorch-1.11.0_fix_sharded_imports.patch
Normal file
@ -0,0 +1,44 @@
|
||||
# Fixes a "NameError: name 'sharded_tensor' is not defined" error
|
||||
# for the test_named_params_with_sharded_tensor test
|
||||
# See https://github.com/pytorch/pytorch/pull/73309
|
||||
From 012d490ed76d8af8538d310a508b0e09a91b7632 Mon Sep 17 00:00:00 2001
|
||||
From: wanchaol <wanchaol@devvm3348.frc0.facebook.com>
|
||||
Date: Wed, 23 Feb 2022 12:10:39 -0800
|
||||
Subject: [PATCH] [shard] fix some imports in tests
|
||||
|
||||
This fix some imports in sharded optimizer tests
|
||||
|
||||
Differential Revision: [D34427252](https://our.internmc.facebook.com/intern/diff/D34427252/)
|
||||
|
||||
[ghstack-poisoned]
|
||||
---
|
||||
.../_shard/sharded_optim/test_sharded_optim.py | 9 ++++++---
|
||||
1 file changed, 6 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/test/distributed/_shard/sharded_optim/test_sharded_optim.py b/test/distributed/_shard/sharded_optim/test_sharded_optim.py
|
||||
index 085c928985eb..d3f1468aea3c 100644
|
||||
--- a/test/distributed/_shard/sharded_optim/test_sharded_optim.py
|
||||
+++ b/test/distributed/_shard/sharded_optim/test_sharded_optim.py
|
||||
@@ -2,7 +2,10 @@
|
||||
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
-import torch.distributed._shard.sharded_tensor
|
||||
+from torch.distributed._shard import (
|
||||
+ sharded_tensor,
|
||||
+ shard_parameter
|
||||
+)
|
||||
|
||||
from copy import deepcopy
|
||||
from torch.distributed._shard.sharding_spec import (
|
||||
@@ -77,8 +80,8 @@ def shard_parameter(self):
|
||||
],
|
||||
)
|
||||
|
||||
- sharded_tensor.shard_parameter(self.linear1, "weight", rowwise_sharding_spec)
|
||||
- sharded_tensor.shard_parameter(self.linear2, "weight", colwise_sharding_spec)
|
||||
+ shard_parameter(self.linear1, "weight", rowwise_sharding_spec)
|
||||
+ shard_parameter(self.linear2, "weight", colwise_sharding_spec)
|
||||
|
||||
def forward(self, inp):
|
||||
return self.linear2(self.gelu(self.linear1(inp)))
|
439
p/PyTorch/PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch
Normal file
439
p/PyTorch/PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch
Normal file
@ -0,0 +1,439 @@
|
||||
# Author: Caspar van Leeuwen
|
||||
# Company: SURF
|
||||
# The CudaFuser test suite checks CUDA capabilities, even if 'RUN_CUDA' is false.
|
||||
# That makes the test fail on non-GPU nodes.
|
||||
# In this patch, I wrapped the logic in 'if RUN_CUDA' blocks in order to make sure
|
||||
# no CUDA calls are made when RUN_CUDA=false
|
||||
# Furthermore, I swapped all occurences of @unittest.skipIf(not RUN_CUDA, ...) and @unittest.skipIf(is_pre_volta())
|
||||
# The latter check is a more specific 'skip' condition: you should only check if a GPU is pre-volta,
|
||||
# if there are CUDA devices present to begin with. Again, doing this in the wrong order would incur CUDA calls
|
||||
# on non-CUDA nodes.
|
||||
# Note that this has been fixed in master, so we probably don't need this patch beyond PT 1.11
|
||||
diff -Nru pytorch-1.11.0-rc3.orig/test/test_jit_cuda_fuser.py pytorch-1.11.0-rc3/test/test_jit_cuda_fuser.py
|
||||
--- pytorch-1.11.0-rc3.orig/test/test_jit_cuda_fuser.py 2022-02-24 18:06:55.180421593 +0100
|
||||
+++ pytorch-1.11.0-rc3/test/test_jit_cuda_fuser.py 2022-02-25 13:30:47.112845480 +0100
|
||||
@@ -57,18 +57,25 @@
|
||||
torch._C._jit_set_nvfuser_horizontal_mode(old_value)
|
||||
|
||||
def is_pre_volta():
|
||||
- prop = torch.cuda.get_device_properties(torch.cuda.current_device())
|
||||
- return prop.major < 7
|
||||
-
|
||||
-TEST_BF16 = torch.cuda.is_bf16_supported()
|
||||
+ if RUN_CUDA:
|
||||
+ prop = torch.cuda.get_device_properties(torch.cuda.current_device())
|
||||
+ return prop.major < 7
|
||||
+ else:
|
||||
+ return True
|
||||
+
|
||||
+if RUN_CUDA:
|
||||
+ TEST_BF16 = torch.cuda.is_bf16_supported()
|
||||
+else:
|
||||
+ TEST_BF16=False
|
||||
|
||||
class TestCudaFuser(JitTestCase):
|
||||
|
||||
- special_values = torch.tensor(
|
||||
- [float("-inf"), -10, -math.pi,
|
||||
- -1, -0.5, 0, 1, 0.5,
|
||||
- math.pi, 10, float("inf"),
|
||||
- float("nan")], dtype=torch.float, device='cuda')
|
||||
+ if RUN_CUDA:
|
||||
+ special_values = torch.tensor(
|
||||
+ [float("-inf"), -10, -math.pi,
|
||||
+ -1, -0.5, 0, 1, 0.5,
|
||||
+ math.pi, 10, float("inf"),
|
||||
+ float("nan")], dtype=torch.float, device='cuda')
|
||||
|
||||
int_types = [
|
||||
torch.int8,
|
||||
@@ -253,8 +260,8 @@
|
||||
self.assertEqual(o, jit_o)
|
||||
self.assertGraphContains(t_jit.graph_for(x, y, z, q), FUSION_GUARD)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_reduction_dtypes_axis(self):
|
||||
@@ -1120,8 +1127,8 @@
|
||||
self.assertTrue(self._compare("comparing output failed", o, jit_o, 1e-4))
|
||||
self.assertGraphContains(t_jit.graph_for(x, y), FUSION_GUARD)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_reduction(self):
|
||||
@@ -1170,8 +1177,8 @@
|
||||
FileCheck().check(FUSION_GUARD).run(g)
|
||||
FileCheck().check(FUSION_GUARD).run(v2.graph)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_layer_norm_autodiff(self):
|
||||
@@ -1212,8 +1219,8 @@
|
||||
args.append(torch.randn(shapes, dtype=torch.float32, device="cuda").requires_grad_())
|
||||
self._layer_norm_autodiff_helper(m, grad, shapes, args)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_layer_norm_parser(self):
|
||||
@@ -1273,8 +1280,8 @@
|
||||
self.assertGraphContains(t_jit.graph_for(x), FUSION_GUARD)
|
||||
|
||||
@unittest.skipIf(True, "codegen failure awaiting fix")
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_native_layer_norm(self):
|
||||
@@ -1288,8 +1295,8 @@
|
||||
self._native_layer_norm_helper(input_shape, norm_shape, torch.float32, "cuda", 1e-4, affine)
|
||||
|
||||
@unittest.skipIf(True, "codegen failure awaiting fix")
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_native_layer_norm_half(self):
|
||||
@@ -1301,8 +1308,8 @@
|
||||
norm_shape = [input_shape[idx] for idx in range(dims - offset, dims)]
|
||||
self._native_layer_norm_helper(input_shape, norm_shape, torch.float16, "cuda", 5e-3)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
@unittest.skipIf(not TEST_BF16, "device does not support BFloat16")
|
||||
@@ -1362,8 +1369,8 @@
|
||||
self.assertTrue(self._compare("comparing running_var failed", eager_running_var, jit_running_var, error))
|
||||
self.assertGraphContains(t_jit.graph_for(x, running_mean, running_var), FUSION_GUARD)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_norm_channels_last(self):
|
||||
@@ -1374,8 +1381,8 @@
|
||||
for mf in [torch.channels_last, torch.contiguous_format]:
|
||||
self._norm_helper(size, torch.float32, "cuda", 1e-4, is_batch_norm_else_instance_norm, memory_format=mf)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_norm(self):
|
||||
@@ -1391,8 +1398,8 @@
|
||||
x[1] = C
|
||||
self._norm_helper(x, torch.float32, "cuda", 1e-4, is_batch_norm_else_instance_norm)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_norm_large(self):
|
||||
@@ -1407,8 +1414,8 @@
|
||||
x[1] = C
|
||||
self._norm_helper(x, torch.float32, "cuda", 1e-4, is_batch_norm_else_instance_norm)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_norm_half(self):
|
||||
@@ -1424,8 +1431,8 @@
|
||||
x[1] = C
|
||||
self._norm_helper(x, torch.float16, "cuda", 5e-3, is_batch_norm_else_instance_norm)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
@unittest.skipIf(not TEST_BF16, "device does not support BFloat16")
|
||||
@@ -1469,8 +1476,8 @@
|
||||
self.assertTrue(self._compare("comparing output failed", o, jit_o, error))
|
||||
self.assertGraphContains(t_jit.graph_for(x, y), FUSION_GUARD)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_softmax_dtype(self):
|
||||
@@ -1511,8 +1518,8 @@
|
||||
)[0].graph
|
||||
FileCheck().check(FUSION_GUARD).run(bwd_graph)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test__softmax_function(self):
|
||||
@@ -1535,8 +1542,8 @@
|
||||
self.assertTrue(self._compare("comparing output failed", o, jit_o, 1e-3))
|
||||
self.assertGraphContainsExactly(t_jit.graph_for(x, y), FUSION_GUARD, 1, consider_subgraphs=True)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test__softmax_function_half_to_float(self):
|
||||
@@ -1559,8 +1566,8 @@
|
||||
self.assertTrue(self._compare("comparing output failed", o, jit_o, 1e-3))
|
||||
self.assertGraphContainsExactly(t_jit.graph_for(x, y), FUSION_GUARD, 1, consider_subgraphs=True)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_softmax(self):
|
||||
@@ -1575,8 +1582,8 @@
|
||||
x[reduction_dim] = reduction_size
|
||||
self._softmax_helper(x, reduction_dim, torch.float32, "cuda", 1e-4)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_softmax_half(self):
|
||||
@@ -1591,8 +1598,8 @@
|
||||
x[reduction_dim] = reduction_size
|
||||
self._softmax_helper(x, reduction_dim, torch.float16, "cuda", 5e-3)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
@unittest.skipIf(not TEST_BF16, "device does not support BFloat16")
|
||||
@@ -1608,8 +1615,8 @@
|
||||
x[reduction_dim] = reduction_size
|
||||
self._softmax_helper(x, reduction_dim, torch.bfloat16, "cuda", 1e-1)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_reduction_permutation(self):
|
||||
@@ -1622,8 +1629,8 @@
|
||||
for perm1 in itertools.permutations(range(len(x))):
|
||||
self._reduction_helper(x, axes, torch.float32, "cuda", perm0, perm1)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_reduction_multiple_output(self):
|
||||
@@ -1767,8 +1774,8 @@
|
||||
self.assertEqual(o, jit_o)
|
||||
'''
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_pw_single_reduction_partition(self):
|
||||
@@ -1792,8 +1799,8 @@
|
||||
self.assertEqual(o, jit_o)
|
||||
self.assertGraphContains(t_jit.graph_for(x, y, z), FUSION_GUARD)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_permutation_preservation(self):
|
||||
@@ -1830,8 +1837,8 @@
|
||||
self.assertGraphContains(t_jit.graph_for(x), FUSION_GUARD)
|
||||
self.assertTrue(jit_o.is_contiguous(memory_format=torch.channels_last))
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_normalization_partition(self):
|
||||
@@ -1858,8 +1865,8 @@
|
||||
self.assertEqual(o, jit_o)
|
||||
self.assertGraphContains(t_jit.graph_for(x, y, z, r_m, r_v), FUSION_GUARD)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_sum_to_one(self):
|
||||
@@ -1879,8 +1886,8 @@
|
||||
self.assertEqual(o, jit_o)
|
||||
self.assertGraphContains(t_jit.graph_for(x), FUSION_GUARD)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_single_reduction_broadcast(self):
|
||||
@@ -1903,8 +1910,8 @@
|
||||
self.assertEqual(o, jit_o)
|
||||
self.assertGraphContains(t_jit.graph_for(x, y, z), FUSION_GUARD)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_trivial_reduction(self):
|
||||
@@ -1940,8 +1947,8 @@
|
||||
repro_jit = torch.jit.script(repro)
|
||||
self._run_helper(repro_jit, repro, x, 0.6)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_reduction_sizes_op(self):
|
||||
@@ -1964,8 +1971,8 @@
|
||||
# have been optimized away
|
||||
self.assertGraphContainsExactly(t_jit.graph_for(x, y), FUSION_GUARD, 0)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_profile_ivalue(self):
|
||||
@@ -1987,8 +1994,8 @@
|
||||
self.assertEqual(o, jit_o)
|
||||
self.assertGraphContains(t_jit.graph_for(x, y, (0, 1), False), FUSION_GUARD)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_sum_to_size(self):
|
||||
@@ -2021,8 +2028,8 @@
|
||||
self.assertEqual(o.dtype, jit_o.dtype)
|
||||
self.assertEqual(o, jit_o)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_grad_sum_to_size(self):
|
||||
@@ -2145,8 +2152,8 @@
|
||||
self.assertTrue((percent_zeros >= (prob - 0.01)) and (percent_zeros <= (prob + 0.01)))
|
||||
self.assertGraphContainsExactly(t_jit.graph_for(x, prob, True), FUSION_GUARD, 1, consider_subgraphs=True)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_dropout_training_fusion(self):
|
||||
@@ -2294,8 +2301,8 @@
|
||||
self.assertEqual(x.grad.dtype, x.dtype)
|
||||
self.assertEqual(y.grad.dtype, y.dtype)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_autocast_1(self):
|
||||
@@ -2331,8 +2338,8 @@
|
||||
self.assertEqual(x.grad.dtype, x.dtype)
|
||||
self.assertEqual(y.grad.dtype, y.dtype)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_autocast_2(self):
|
||||
@@ -2367,8 +2374,8 @@
|
||||
self.assertEqual(jit_o.dtype, torch.float)
|
||||
self.assertEqual(x.grad.dtype, x.dtype)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
@unittest.skipIf(not TEST_BF16, "device does not support BFloat16")
|
||||
@@ -2405,8 +2412,8 @@
|
||||
self.assertEqual(x.grad.dtype, x.dtype)
|
||||
self.assertEqual(y.grad.dtype, y.dtype)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
@unittest.skipIf(not TEST_BF16, "device does not support BFloat16")
|
||||
@@ -2817,8 +2824,8 @@
|
||||
ref_module.bn.running_var,
|
||||
e0))
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_batch_norm_half(self):
|
||||
@@ -2832,8 +2839,8 @@
|
||||
training, track_running_stats = training_and_track
|
||||
self._test_batch_norm_impl_index_helper(4, 8, 5, affine, track_running_stats, training, torch.half)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_batch_norm_impl_index_correctness(self):
|
||||
@@ -2947,8 +2954,8 @@
|
||||
self.assertGraphContainsExactly(graph, FUSION_GROUP, 0)
|
||||
self.assertGraphContains(graph, 'prim::add_optional', True)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_remove_output_used_only_in_dtype(self):
|
||||
@@ -2980,8 +2987,8 @@
|
||||
graph = jitted.graph_for(x, y)
|
||||
self.assertGraphContains(graph, FUSION_GROUP, True)
|
||||
|
||||
- @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(not RUN_CUDA, "requires CUDA")
|
||||
+ @unittest.skipIf(is_pre_volta(), "reduction not supported in pre volta device")
|
||||
@unittest.skipIf(GRAPH_EXECUTOR != ProfilingMode.PROFILING,
|
||||
"Requires fusion optimization pass to be effective")
|
||||
def test_fix_shape_expression_bn(self):
|
@ -0,0 +1,17 @@
|
||||
It seems the timeout for the distributed tests is set to low and spurious failures can be seen
|
||||
Increase it by a factor of 6 similar to torch/testing/_internal/distributed/distributed_test.py
|
||||
|
||||
Original patch by Alexander Grund (TU Dresden), updated by Caspar van Leeuwen (SURF)
|
||||
|
||||
diff -Nru pytorch-1.11.0-rc3.orig/torch/testing/_internal/common_distributed.py pytorch-1.11.0-rc3/torch/testing/_internal/common_distributed.py
|
||||
--- pytorch-1.11.0-rc3.orig/torch/testing/_internal/common_distributed.py 2022-02-24 18:07:16.414274654 +0100
|
||||
+++ pytorch-1.11.0-rc3/torch/testing/_internal/common_distributed.py 2022-02-24 18:08:31.772851148 +0100
|
||||
@@ -321,7 +321,7 @@
|
||||
# TSAN runs much slower.
|
||||
TIMEOUT_DEFAULT = 500
|
||||
else:
|
||||
- TIMEOUT_DEFAULT = 100
|
||||
+ TIMEOUT_DEFAULT = 600
|
||||
TIMEOUT_OVERRIDE = {"test_ddp_uneven_inputs": 400}
|
||||
|
||||
|
16
p/PyTorch/PyTorch-1.11.0_increase_c10d_gloo_timeout.patch
Normal file
16
p/PyTorch/PyTorch-1.11.0_increase_c10d_gloo_timeout.patch
Normal file
@ -0,0 +1,16 @@
|
||||
# Author: Caspar van Leeuwen
|
||||
# Institute: SURF
|
||||
# Increase timeout for c10d gloo process group operations since test_allreduce_coalesced_basics
|
||||
# was failing with a timeout (see https://github.com/easybuilders/easybuild-easyconfigs/pull/15137)
|
||||
diff -Nru pytorch/test/distributed/test_c10d_gloo.py pytorch_orig/test/distributed/test_c10d_gloo.py
|
||||
--- pytorch/test/distributed/test_c10d_gloo.py 2022-04-19 15:27:48.540163735 +0200
|
||||
+++ pytorch_orig/test/distributed/test_c10d_gloo.py 2022-04-07 18:31:13.110755000 +0200
|
||||
@@ -216,7 +216,7 @@
|
||||
|
||||
def opts(self, threads=2):
|
||||
opts = c10d.ProcessGroupGloo._Options()
|
||||
- opts._timeout = 5.0
|
||||
+ opts._timeout = 50.0
|
||||
opts._devices = [create_device(interface=LOOPBACK)]
|
||||
opts._threads = threads
|
||||
return opts
|
143
p/PyTorch/PyTorch-1.11.0_increase_test_tolerances_TF32.patch
Normal file
143
p/PyTorch/PyTorch-1.11.0_increase_test_tolerances_TF32.patch
Normal file
@ -0,0 +1,143 @@
|
||||
# Author: Caspar van Leeuwen, SURF
|
||||
# Fixes failing tests due to use of TensorFloat32
|
||||
# Setting NVIDIA_TF32_OVERRIDE=0 makes these tests pass, proving that TensorFloat32 is the issue
|
||||
# We increase tolerances for the asserts to make these tests pass
|
||||
diff -Nru pytorch_orig/test/distributed/_shard/sharded_tensor/ops/test_linear.py pytorch/test/distributed/_shard/sharded_tensor/ops/test_linear.py
|
||||
--- pytorch_orig/test/distributed/_shard/sharded_tensor/ops/test_linear.py 2022-04-07 18:31:13.069599000 +0200
|
||||
+++ pytorch/test/distributed/_shard/sharded_tensor/ops/test_linear.py 2022-04-07 18:32:32.877406000 +0200
|
||||
@@ -77,7 +77,7 @@
|
||||
local_output = local_linear(inp)
|
||||
|
||||
# Verify
|
||||
- self.assertEqual(local_output, sharded_output)
|
||||
+ self.assertEqual(local_output, sharded_output, rtol=0.02, atol=1e-03)
|
||||
|
||||
# Validate for torch.nn.functional.linear version.
|
||||
local_output = torch.nn.functional.linear(
|
||||
@@ -91,7 +91,7 @@
|
||||
# for reshard. We need to squeeze the # of dimensions manually.
|
||||
if inp.dim() == 1:
|
||||
sharded_output = sharded_output.squeeze(reshard_spec.dim)
|
||||
- self.assertEqual(local_output, sharded_output)
|
||||
+ self.assertEqual(local_output, sharded_output, rtol=0.02, atol=1e-03)
|
||||
|
||||
# Compute loss and run backward pass.
|
||||
local_output.sum().backward()
|
||||
@@ -114,7 +114,7 @@
|
||||
|
||||
# Test backward gradient calculation.
|
||||
self.assertEqual(sharded_linear.bias.grad, local_bias_grad)
|
||||
- self.assertEqual(sharded_weight.grad, local_grad_narrowed)
|
||||
+ self.assertEqual(sharded_weight.grad, local_grad_narrowed, rtol=0.01, atol=1e-03)
|
||||
|
||||
# Test optimizer.
|
||||
previous = local_linear.weight.clone().detach()
|
||||
@@ -135,7 +135,7 @@
|
||||
)
|
||||
self.assertEqual(sharded_weight.size(), local_weight_narrowed.size())
|
||||
self.assertNotEqual(previous_sharded_weight, sharded_weight)
|
||||
- self.assertEqual(sharded_weight, local_weight_narrowed)
|
||||
+ self.assertEqual(sharded_weight, local_weight_narrowed, rtol=0.01, atol=1e-04)
|
||||
self.assertNotEqual(previous_sharded_bias, sharded_linear.bias)
|
||||
self.assertEqual(sharded_linear.bias, local_linear.bias)
|
||||
|
||||
diff -Nru pytorch_orig/test/distributed/_shard/sharded_tensor/test_megatron_prototype.py pytorch/test/distributed/_shard/sharded_tensor/test_megatron_prototype.py
|
||||
--- pytorch_orig/test/distributed/_shard/sharded_tensor/test_megatron_prototype.py 2022-04-07 18:31:13.091710000 +0200
|
||||
+++ pytorch/test/distributed/_shard/sharded_tensor/test_megatron_prototype.py 2022-04-07 18:41:03.744644000 +0200
|
||||
@@ -113,7 +113,7 @@
|
||||
local_output = local_megatron_lm(inp)
|
||||
|
||||
# Verify
|
||||
- self.assertEqual(local_output, sharded_output)
|
||||
+ self.assertEqual(local_output, sharded_output, rtol=0.01, atol=1e-03)
|
||||
|
||||
# Compute loss and run backward pass.
|
||||
local_output.sum().backward()
|
||||
@@ -161,9 +161,9 @@
|
||||
)
|
||||
|
||||
# Test backward gradient calculation.
|
||||
- self.assertEqual(sharded_weight_fc1.grad, local_grad_narrowed_fc1)
|
||||
- self.assertEqual(sharded_weight_fc2.grad, local_grad_narrowed_fc2)
|
||||
- self.assertEqual(bias_grad_fc1, local_bias_grad_fc1)
|
||||
+ self.assertEqual(sharded_weight_fc1.grad, local_grad_narrowed_fc1, rtol=0.01, atol=2e-03)
|
||||
+ self.assertEqual(sharded_weight_fc2.grad, local_grad_narrowed_fc2, rtol=0.01, atol=1e-03)
|
||||
+ self.assertEqual(bias_grad_fc1, local_bias_grad_fc1, rtol=0.01, atol=2e-02)
|
||||
self.assertEqual(bias_grad_fc2, local_bias_grad_fc2)
|
||||
|
||||
# Test optimizer.
|
||||
@@ -171,7 +171,7 @@
|
||||
local_bias_fc1, local_bias_fc2 = _get_bias(local_megatron_lm)
|
||||
self.assertEqual(bias_fc1, local_bias_fc1)
|
||||
self.assertEqual(bias_fc2, local_bias_fc2)
|
||||
- self.assertEqual(bias_fc1.grad, local_bias_fc1.grad)
|
||||
+ self.assertEqual(bias_fc1.grad, local_bias_fc1.grad, rtol=0.01, atol=2e-02)
|
||||
self.assertEqual(bias_fc2.grad, local_bias_fc2.grad)
|
||||
previous_sharded_weight_fc1 = sharded_weight_fc1.clone()
|
||||
previous_sharded_weight_fc2 = sharded_weight_fc2.clone()
|
||||
@@ -197,13 +197,13 @@
|
||||
self.assertEqual(sharded_weight_fc2.size(), local_weight_fc2_narrowed.size())
|
||||
self.assertNotEqual(previous_sharded_weight_fc1, sharded_weight_fc1)
|
||||
self.assertNotEqual(previous_sharded_weight_fc2, sharded_weight_fc2)
|
||||
- self.assertEqual(sharded_weight_fc1, local_weight_fc1_narrowed)
|
||||
- self.assertEqual(sharded_weight_fc2, local_weight_fc2_narrowed)
|
||||
+ self.assertEqual(sharded_weight_fc1, local_weight_fc1_narrowed, rtol=0.01, atol=1e-03)
|
||||
+ self.assertEqual(sharded_weight_fc2, local_weight_fc2_narrowed, rtol=0.01, atol=1e-03)
|
||||
|
||||
# Test bias value after optimizer.
|
||||
local_bias_fc1, local_bias_fc2 = _get_bias(local_megatron_lm)
|
||||
self.assertNotEqual(previous_bias_fc1, bias_fc1)
|
||||
- self.assertEqual(bias_fc1, local_bias_fc1)
|
||||
+ self.assertEqual(bias_fc1, local_bias_fc1, rtol=0.01, atol=1e-03)
|
||||
self.assertNotEqual(previous_bias_fc2, bias_fc2)
|
||||
self.assertEqual(bias_fc2, local_bias_fc2)
|
||||
|
||||
diff -Nru pytorch_orig/test/test_stateless.py pytorch/test/test_stateless.py
|
||||
--- pytorch_orig/test/test_stateless.py 2022-04-07 18:31:13.029968000 +0200
|
||||
+++ pytorch/test/test_stateless.py 2022-04-07 18:43:46.723968000 +0200
|
||||
@@ -42,7 +42,7 @@
|
||||
# existing params in module. So here we expect the result to be the
|
||||
# same as the input if the weight swapping went well.
|
||||
res = _stateless.functional_call(module, parameters, x)
|
||||
- self.assertEqual(x, res)
|
||||
+ self.assertEqual(x, res, rtol=1e-04, atol=1e-04)
|
||||
# check that the weight remain unmodified
|
||||
cur_weight = to_check.l1.weight
|
||||
uur_buffer = to_check.buffer
|
||||
c PyTorch-1.11.0_increase_test_tolerances_TF32.patch
|
||||
rig/test/test_jit_fuser_te.py pytorch/test/test_jit_fuser_te.py
|
||||
--- pytorch_orig/test/test_jit_fuser_te.py 2022-04-07 18:31:13.046680000 +0200
|
||||
+++ pytorch/test/test_jit_fuser_te.py 2022-04-12 18:21:00.355114000 +0200
|
||||
@@ -956,7 +956,7 @@
|
||||
def test_lstm_traced(self):
|
||||
for device in self.devices:
|
||||
inputs = get_lstm_inputs(device)
|
||||
- ge = self.checkTrace(LSTMCellF, inputs)
|
||||
+ ge = self.checkTrace(LSTMCellF, inputs, atol=1e-4, rtol=1e-5)
|
||||
graph = ge.graph_for(*inputs)
|
||||
fusion_groups = self.findFusionGroups(graph)
|
||||
# TODO: chunk
|
||||
diff -Nru pytorch_orig/torch/testing/_internal/jit_utils.py pytorch/torch/testing/_internal/jit_utils.py
|
||||
--- pytorch_orig/torch/testing/_internal/jit_utils.py 2022-04-07 18:28:54.339477000 +0200
|
||||
+++ pytorch/torch/testing/_internal/jit_utils.py 2022-04-12 18:19:59.614272000 +0200
|
||||
@@ -525,7 +525,7 @@
|
||||
def checkTrace(self, func, reference_tensors, input_tensors=None,
|
||||
drop=None, allow_unused=False, verbose=False,
|
||||
inputs_require_grads=True, check_tolerance=1e-5, export_import=True,
|
||||
- _force_outplace=False):
|
||||
+ _force_outplace=False, rtol=None, atol=None):
|
||||
|
||||
# TODO: check gradients for parameters, not just inputs
|
||||
def allSum(vs):
|
||||
@@ -618,7 +618,10 @@
|
||||
|
||||
self.assertEqual(outputs, outputs_ge)
|
||||
if inputs_require_grads:
|
||||
- self.assertEqual(grads, grads_ge)
|
||||
+ if atol is not None and rtol is not None:
|
||||
+ self.assertEqual(grads, grads_ge, atol=atol, rtol=rtol)
|
||||
+ else:
|
||||
+ self.assertEqual(grads, grads_ge)
|
||||
for g2, g2_ge in zip(grads2, grads2_ge):
|
||||
if g2 is None and g2_ge is None:
|
||||
continue
|
35
p/PyTorch/PyTorch-1.11.0_skip_failing_ops_tests.patch
Normal file
35
p/PyTorch/PyTorch-1.11.0_skip_failing_ops_tests.patch
Normal file
@ -0,0 +1,35 @@
|
||||
# Author: Caspar van Leeuwen
|
||||
# Company: SURF
|
||||
# Test 'test_fn_grad_linalg_det_singular_cpu_complex128' and test_variant_consistency_jit_contiguous_cpu_float32 fail
|
||||
# See https://github.com/pytorch/pytorch/issues/67767 and https://github.com/pytorch/pytorch/issues/67838
|
||||
# For the first one, devs recommended to switch it off while they revisit the code.
|
||||
# For the second: the test works interactively when run with
|
||||
# python -m unittest test_ops.TestJitCPU.test_variant_consistency_jit_contiguous_cpu_float32 -v
|
||||
# This shows there is no fundamental problem with the installation,
|
||||
# but something in the environment when run as 'python run_test.py' makes it fail.
|
||||
diff -Nru pytorch-1.11.0-rc3.orig/torch/testing/_internal/common_methods_invocations.py pytorch-1.11.0-rc3/torch/testing/_internal/common_methods_invocations.py
|
||||
--- pytorch-1.11.0-rc3.orig/torch/testing/_internal/common_methods_invocations.py 2022-02-24 18:07:16.430276050 +0100
|
||||
+++ pytorch-1.11.0-rc3/torch/testing/_internal/common_methods_invocations.py 2022-02-24 19:38:11.610293957 +0100
|
||||
@@ -8791,7 +8791,10 @@
|
||||
supports_fwgrad_bwgrad=True,
|
||||
autodiff_fusible_nodes=['aten::contiguous'],
|
||||
assert_jit_shape_analysis=True,
|
||||
- supports_out=False),
|
||||
+ supports_out=False,
|
||||
+ skips=(
|
||||
+ DecorateInfo(unittest.skip("Skipped!"), 'TestJit', 'test_variant_consistency_jit', device_type='cpu'),
|
||||
+ )),
|
||||
OpInfo('sum_to_size',
|
||||
op=lambda x, *args, **kwargs: x.sum_to_size(*args, **kwargs),
|
||||
dtypes=floating_and_complex_types_and(torch.float16, torch.bfloat16),
|
||||
@@ -9746,6 +9749,10 @@
|
||||
DecorateInfo(unittest.skip("Skipped!"), 'TestMathBits', 'test_neg_view', device_type='cuda'),
|
||||
DecorateInfo(unittest.skip("Skipped!"), 'TestCommon', 'test_dtypes'),
|
||||
DecorateInfo(unittest.skip("Skipped!"), 'TestGradients', 'test_fn_gradgrad'),
|
||||
+ # It also breaks on CPU. We'll revisit this once `linalg.lu_solve` is a thing
|
||||
+ # See https://github.com/pytorch/pytorch/pull/64387 and https://github.com/pytorch/pytorch/issues/67767
|
||||
+ DecorateInfo(unittest.skip("Skipped!"), 'TestGradients', 'test_fn_grad',
|
||||
+ dtypes=(torch.complex128,)),
|
||||
)),
|
||||
OpInfo('linalg.cholesky',
|
||||
aten_name='linalg_cholesky',
|
Loading…
x
Reference in New Issue
Block a user