mirror of
https://code.it4i.cz/sccs/easyconfigs-it4i.git
synced 2025-04-07 23:42:12 +01:00

new file: p/PyTorch/PyTorch-1.11.0-foss-2021a-CUDA-11.3.1.eb modified: p/PyTorch/PyTorch-1.11.0-fosscuda-2020b.eb new file: p/PyTorch/PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch new file: p/PyTorch/PyTorch-1.11.0_fix_sharded_imports.patch new file: p/PyTorch/PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch new file: p/PyTorch/PyTorch-1.11.0_increase-distributed-test-timeout.patch new file: p/PyTorch/PyTorch-1.11.0_increase_c10d_gloo_timeout.patch new file: p/PyTorch/PyTorch-1.11.0_increase_test_tolerances_TF32.patch new file: p/PyTorch/PyTorch-1.11.0_skip_failing_ops_tests.patch
18 lines
832 B
Diff
18 lines
832 B
Diff
It seems the timeout for the distributed tests is set to low and spurious failures can be seen
|
|
Increase it by a factor of 6 similar to torch/testing/_internal/distributed/distributed_test.py
|
|
|
|
Original patch by Alexander Grund (TU Dresden), updated by Caspar van Leeuwen (SURF)
|
|
|
|
diff -Nru pytorch-1.11.0-rc3.orig/torch/testing/_internal/common_distributed.py pytorch-1.11.0-rc3/torch/testing/_internal/common_distributed.py
|
|
--- pytorch-1.11.0-rc3.orig/torch/testing/_internal/common_distributed.py 2022-02-24 18:07:16.414274654 +0100
|
|
+++ pytorch-1.11.0-rc3/torch/testing/_internal/common_distributed.py 2022-02-24 18:08:31.772851148 +0100
|
|
@@ -321,7 +321,7 @@
|
|
# TSAN runs much slower.
|
|
TIMEOUT_DEFAULT = 500
|
|
else:
|
|
- TIMEOUT_DEFAULT = 100
|
|
+ TIMEOUT_DEFAULT = 600
|
|
TIMEOUT_OVERRIDE = {"test_ddp_uneven_inputs": 400}
|
|
|
|
|