# it4Innovations
# LK 2022

name = 'PyTorch'
version = '1.11.0'

homepage = 'https://pytorch.org/'
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
PyTorch is a deep learning framework that puts Python first."""

toolchain = {'name': 'fosscuda', 'version': '2020b'}

sources = [{
    'filename': '%(name)s-%(version)s.tar.gz',
    'git_config': {
        'url': 'https://github.com/pytorch',
        'repo_name': 'pytorch',
        'tag': 'v%(version)s',
        'recursive': True,
    },
}]
patches = [
    'PyTorch-1.7.0_avoid-nan-in-test-torch.patch',
    'PyTorch-1.7.0_disable-dev-shm-test.patch',
    'PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch',
    'PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch',
    'PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch',
    'PyTorch-1.10.0_skip_cmake_rpath.patch',
    'PyTorch-1.11.0_increase-distributed-test-timeout.patch',
    'PyTorch-1.11.0_skip_failing_ops_tests.patch',
    'PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch',
    'PyTorch-1.11.0_fix_sharded_imports.patch',
    'PyTorch-1.11.0_increase_test_tolerances_TF32.patch',
    'PyTorch-1.11.0_increase_c10d_gloo_timeout.patch',
    'PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch',
]
checksums = [
    'c5bf5b9e24f3884335c9e30878a763ec31e1ed429904d966c07265b86a16ba7b',  # can't add proper SHA256 checksum, because source tarball is created locally after recursive 'git clone'
    'b899aa94d9e60f11ee75a706563312ccefa9cf432756c470caa8e623991c8f18',  # PyTorch-1.7.0_avoid-nan-in-test-torch.patch
    '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a',  # PyTorch-1.7.0_disable-dev-shm-test.patch
    '89ac7a8e9e7df2e64cf8404fe3a279f5e9b759fee41c9de3aaff9c22f385c2c6',  # PyTorch-1.8.1_dont-use-gpu-ccc-in-test.patch
    # PyTorch-1.9.0_limit-world-size-for-zero-redundancy-opt-test.patch
    'ff573660913ce055e24cfd194ce747ba5685091c631cfd443eae2a99d56b57ea',
    # PyTorch-1.10.0_fix-test-dataloader-fixed-affinity.patch
    '313dca681f45ce3bc7c4557fdcdcbe0b77216d2c708fa30a2ec0e22c44876707',
    'ac05943bb205623f91ef140aa00869efc5fe844184bd666bebf5405808610448',  # PyTorch-1.10.0_skip_cmake_rpath.patch
    # PyTorch-1.11.0_increase-distributed-test-timeout.patch
    '087ad20163a1291773ae3457569b80523080eb3731e210946459b2333a919f3f',
    '8eaca92d64fcadb0552d28e9c7ea5c4bc669d2fe33004e45a3519ce8d0d136a2',  # PyTorch-1.11.0_skip_failing_ops_tests.patch
    '21fc678febcdfbb9dabd72235be23cd392044e9a954f6580d15b530e1f69dcc1',  # PyTorch-1.11.0_fix_skip_jit_cuda_fuser.patch
    '9a04f4285b800dad8a00c3014af0a9713d40d5dd35d10931c7c0da4e89c558e9',  # PyTorch-1.11.0_fix_sharded_imports.patch
    # PyTorch-1.11.0_increase_test_tolerances_TF32.patch
    '26e179a4f6f57e49209092612ae5f5cd8c03fd2ca84566ba0244eabefc3736ba',
    # PyTorch-1.11.0_increase_c10d_gloo_timeout.patch
    '20cd4a8663f74ab326fdb032b926bf5c7e94d9750c515ab9050927ba00cf1953',
    # PyTorch-1.11.0_disable_failing_jit_cuda_fuser_tests.patch
    'e7bfe120a8b3fe2b40dac6839852a5fbab3cb3429fbe44a0fc3a1800adaaee51',
]

osdependencies = [OS_PKG_IBVERBS_DEV]

builddependencies = [
    ('CMake', '3.20.1'),
    ('hypothesis', '5.41.5'),
]

dependencies = [
    ('Ninja', '1.10.1'),  # Required for JIT compilation of C++ extensions
    ('Python', '3.8.6'),
    ('protobuf', '3.14.0'),
    ('protobuf-python', '3.14.0'),
    ('pybind11', '2.6.0'),
#    ('SciPy-bundle', '2020.11'),
    ('typing-extensions', '3.7.4.3'),
    ('PyYAML', '5.3.1'),
    ('MPFR', '4.1.0'),
    ('GMP', '6.2.0'),
    ('numactl', '2.0.13'),
    ('FFmpeg', '4.3.1'),
    ('Pillow', '8.0.1'),
    ('cuDNN', '8.0.4.30', '-CUDA-%(cudaver)s', True),
    ('magma', '2.5.4'),
    ('NCCL', '2.8.3', '-CUDA-%(cudaver)s'),
]

# default CUDA compute capabilities to use (override via --cuda-compute-capabilities)
cuda_compute_capabilities = ['3.5', '3.7', '5.2', '6.0', '6.1', '7.0', '7.2', '7.5', '8.0', '8.6']

custom_opts = ["USE_CUPTI_SO=1"]

excluded_tests = {
    '': [
        # Bad tests: https://github.com/pytorch/pytorch/issues/60260
        'distributed/elastic/utils/distributed_test',
        'distributed/elastic/multiprocessing/api_test',
        # These tests fail on A10s at the very least, they time out forever no matter how long the timeout is.
        # Possibly related to NCCL 2.8.3: https://docs.nvidia.com/deeplearning/nccl/release-notes/rel_2-8-3.html
        # 'distributed/test_distributed_fork',
        'distributed/test_distributed_spawn',
        # Fails on A10s: https://github.com/pytorch/pytorch/issues/63079
        'test_optim',
        # Test from this suite timeout often. The process group backend is deprecated anyway
        # 'distributed/rpc/test_process_group_agent',
        # This test fails constently when run as part of the test suite, but succeeds when run interactively
        'test_model_dump',
    ]
}

runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error  --verbose %(excluded_tests)s'

# The readelf sanity check command can be taken out once the TestRPATH test from 
# https://github.com/pytorch/pytorch/pull/68912 is accepted, since it is then checked as part of the PyTorch test suite
local_libcaffe2 = "$EBROOTPYTORCH/lib/python%%(pyshortver)s/site-packages/torch/lib/libcaffe2_nvrtc.%s" % SHLIB_EXT
sanity_check_commands = [
    "readelf -d %s | egrep 'RPATH|RUNPATH' | grep -v stubs" % local_libcaffe2,
]
tests = ['PyTorch-check-cpp-extension.py']

moduleclass = 'devel'