From 7542a244a8a5fd1dfa7a116c9275bd02a7a1cc76 Mon Sep 17 00:00:00 2001 From: Lukas Krupcik Date: Tue, 8 Aug 2023 11:59:23 +0200 Subject: [PATCH] modified: n/NCCL/NCCL-2.16.2-GCCcore-12.2.0-CUDA-12.2.0.eb modified: o/OpenMPI/OpenMPI-4.1.5-NVHPC-23.5-CUDA-12.2.0.eb modified: u/UCC-CUDA/UCC-CUDA-1.1.0-GCCcore-12.2.0-CUDA-12.2.0.eb new file: u/UCC/UCC-1.1.0-GCCcore-12.2.0.eb new file: u/UCX-CUDA/UCX-CUDA-1.13.1-GCCcore-12.2.0-CUDA-12.2.0.eb new file: u/UCX-CUDA/UCX-CUDA-1.14.1-GCCcore-12.2.0-CUDA-12.2.0.eb new file: u/UCX/UCX-1.14.1-GCCcore-12.2.0.eb deleted: u/UCX-CUDA/UCX-CUDA-1.14.0-GCCcore-12.2.0-CUDA-12.2.0.eb --- .../NCCL-2.16.2-GCCcore-12.2.0-CUDA-12.2.0.eb | 2 +- .../OpenMPI-4.1.5-NVHPC-23.5-CUDA-12.2.0.eb | 32 ++++++----- ...C-CUDA-1.1.0-GCCcore-12.2.0-CUDA-12.2.0.eb | 2 +- u/UCC/UCC-1.1.0-GCCcore-12.2.0.eb | 44 +++++++++++++++ ...CUDA-1.13.1-GCCcore-12.2.0-CUDA-12.2.0.eb} | 2 +- ...-CUDA-1.14.1-GCCcore-12.2.0-CUDA-12.2.0.eb | 48 ++++++++++++++++ u/UCX/UCX-1.14.1-GCCcore-12.2.0.eb | 56 +++++++++++++++++++ 7 files changed, 168 insertions(+), 18 deletions(-) create mode 100644 u/UCC/UCC-1.1.0-GCCcore-12.2.0.eb rename u/UCX-CUDA/{UCX-CUDA-1.14.0-GCCcore-12.2.0-CUDA-12.2.0.eb => UCX-CUDA-1.13.1-GCCcore-12.2.0-CUDA-12.2.0.eb} (98%) create mode 100644 u/UCX-CUDA/UCX-CUDA-1.14.1-GCCcore-12.2.0-CUDA-12.2.0.eb create mode 100644 u/UCX/UCX-1.14.1-GCCcore-12.2.0.eb diff --git a/n/NCCL/NCCL-2.16.2-GCCcore-12.2.0-CUDA-12.2.0.eb b/n/NCCL/NCCL-2.16.2-GCCcore-12.2.0-CUDA-12.2.0.eb index cc45ea79..d232297c 100644 --- a/n/NCCL/NCCL-2.16.2-GCCcore-12.2.0-CUDA-12.2.0.eb +++ b/n/NCCL/NCCL-2.16.2-GCCcore-12.2.0-CUDA-12.2.0.eb @@ -20,7 +20,7 @@ builddependencies = [('binutils', '2.39')] dependencies = [ ('CUDA', '12.2.0', '', SYSTEM), - ('UCX-CUDA', '1.13.1', versionsuffix), + ('UCX-CUDA', '1.14.1', versionsuffix), ] # default CUDA compute capabilities to use (override via --cuda-compute-capabilities) diff --git a/o/OpenMPI/OpenMPI-4.1.5-NVHPC-23.5-CUDA-12.2.0.eb b/o/OpenMPI/OpenMPI-4.1.5-NVHPC-23.5-CUDA-12.2.0.eb index 44c4e88e..a5cf30bd 100644 --- a/o/OpenMPI/OpenMPI-4.1.5-NVHPC-23.5-CUDA-12.2.0.eb +++ b/o/OpenMPI/OpenMPI-4.1.5-NVHPC-23.5-CUDA-12.2.0.eb @@ -23,23 +23,23 @@ checksums = [ 'b767c7166cf0b32906132d58de5439c735193c9fd09ec3c5c11db8d5fa68750e', ] -#builddependencies = [ -# ('pkgconf', '1.9.3'), -# ('Perl', '5.36.0'), -# ('Autotools', '20220317'), -#] +builddependencies = [ + ('pkgconf', '1.9.3'), + ('Perl', '5.36.0'), + ('Autotools', '20220317'), +] dependencies = [ - # ('zlib', '1.2.12'), + ('zlib', '1.2.12'), ('CUDA', '12.2.0', '', True), - # ('hwloc', '2.8.0'), - # ('libevent', '2.1.12'), - ('UCX', '1.14.0'), - # ('UCX-CUDA', '1.14.0', '-CUDA-%(cudaver)s'), - # ('libfabric', '1.16.1'), - # ('PMIx', '4.2.2'), - # ('UCC', '1.1.0'), - # ('UCC-CUDA', '1.1.0', '-CUDA-%(cudaver)s'), + ('hwloc', '2.8.0'), + ('libevent', '2.1.12'), + ('UCX', '1.14.1'), + ('UCX-CUDA', '1.14.1', '-CUDA-%(cudaver)s'), + ('libfabric', '1.16.1'), + ('PMIx', '4.2.2'), + ('UCC', '1.1.0'), + ('UCC-CUDA', '1.1.0', '-CUDA-%(cudaver)s'), ] # Update configure to include changes from the "internal-cuda" patch @@ -68,9 +68,11 @@ configopts += ' --enable-mpi-thread-multiple' configopts += ' --with-verbs' configopts += ' --enable-mpirun-prefix-by-default' configopts += ' --with-hwloc=$EBROOTHWLOC' # hwloc support -configopts = '--with-slurm ' # Enable slurm +configopts += ' --with-tm=/opt/pbs ' # Enable PBS +#configopts += ' --with-slurm ' # Enable slurm configopts += ' --enable-mpi-cxx' # Enable building the C++ MPI bindings configopts += ' --with-ucx=$EBROOTUCX' +configopts += ' --with-knem=/opt/knem-1.1.4.90mlnx1' osdependencies = [('libibverbs-dev', 'libibverbs-devel', 'rdma-core-devel')] diff --git a/u/UCC-CUDA/UCC-CUDA-1.1.0-GCCcore-12.2.0-CUDA-12.2.0.eb b/u/UCC-CUDA/UCC-CUDA-1.1.0-GCCcore-12.2.0-CUDA-12.2.0.eb index 54b70e3a..36518e8e 100644 --- a/u/UCC-CUDA/UCC-CUDA-1.1.0-GCCcore-12.2.0-CUDA-12.2.0.eb +++ b/u/UCC-CUDA/UCC-CUDA-1.1.0-GCCcore-12.2.0-CUDA-12.2.0.eb @@ -39,7 +39,7 @@ builddependencies = [ dependencies = [ ('UCC', '1.1.0'), ('CUDA', '12.2.0', '', SYSTEM), - ('UCX-CUDA', '1.13.1', '-CUDA-%(cudaver)s'), + ('UCX-CUDA', '1.14.1', '-CUDA-%(cudaver)s'), ('NCCL', '2.16.2', '-CUDA-%(cudaver)s'), ] diff --git a/u/UCC/UCC-1.1.0-GCCcore-12.2.0.eb b/u/UCC/UCC-1.1.0-GCCcore-12.2.0.eb new file mode 100644 index 00000000..4598a685 --- /dev/null +++ b/u/UCC/UCC-1.1.0-GCCcore-12.2.0.eb @@ -0,0 +1,44 @@ +# IT4Innovations +# LK 2023 + +easyblock = 'ConfigureMake' + +name = 'UCC' +version = '1.1.0' + +homepage = 'https://www.openucx.org/' +description = """UCC (Unified Collective Communication) is a collective +communication operations API and library that is flexible, complete, and +feature-rich for current and emerging programming models and runtimes. +""" + +toolchain = {'name': 'GCCcore', 'version': '12.2.0'} +toolchainopts = {'pic': True} + +source_urls = ['https://github.com/openucx/ucc/archive/refs/tags'] +sources = ['v%(version)s.tar.gz'] +patches = ['UCC-%(version)s-multiple_component_paths.patch'] +checksums = [ + {'v1.1.0.tar.gz': '74c8ba75037b5bd88cb703e8c8ae55639af3fecfd4428912a433c010c97b4df7'}, + {'UCC-1.1.0-multiple_component_paths.patch': '3081d0f694331daa4a88a0fa3fb54b9a918015248ae5eb7b3157b924abd31bee'}, +] + +builddependencies = [ + ('binutils', '2.39'), + ('Autotools', '20220317'), +] + +dependencies = [ + ('UCX', '1.14.1'), +] + +preconfigopts = "./autogen.sh && " + +sanity_check_paths = { + 'files': ['bin/ucc_info'], + 'dirs': ['include', 'lib'] +} + +sanity_check_commands = ["ucc_info -c"] + +moduleclass = 'lib' diff --git a/u/UCX-CUDA/UCX-CUDA-1.14.0-GCCcore-12.2.0-CUDA-12.2.0.eb b/u/UCX-CUDA/UCX-CUDA-1.13.1-GCCcore-12.2.0-CUDA-12.2.0.eb similarity index 98% rename from u/UCX-CUDA/UCX-CUDA-1.14.0-GCCcore-12.2.0-CUDA-12.2.0.eb rename to u/UCX-CUDA/UCX-CUDA-1.13.1-GCCcore-12.2.0-CUDA-12.2.0.eb index 82adad89..a8d66dc9 100644 --- a/u/UCX-CUDA/UCX-CUDA-1.14.0-GCCcore-12.2.0-CUDA-12.2.0.eb +++ b/u/UCX-CUDA/UCX-CUDA-1.13.1-GCCcore-12.2.0-CUDA-12.2.0.eb @@ -4,7 +4,7 @@ easyblock = 'EB_UCX_Plugins' name = 'UCX-CUDA' -version = '1.14.0' +version = '1.13.1' versionsuffix = '-CUDA-%(cudaver)s' homepage = 'http://www.openucx.org/' diff --git a/u/UCX-CUDA/UCX-CUDA-1.14.1-GCCcore-12.2.0-CUDA-12.2.0.eb b/u/UCX-CUDA/UCX-CUDA-1.14.1-GCCcore-12.2.0-CUDA-12.2.0.eb new file mode 100644 index 00000000..23b1dff5 --- /dev/null +++ b/u/UCX-CUDA/UCX-CUDA-1.14.1-GCCcore-12.2.0-CUDA-12.2.0.eb @@ -0,0 +1,48 @@ +# IT4Innovations +# LK 2023 + +easyblock = 'EB_UCX_Plugins' + +name = 'UCX-CUDA' +version = '1.14.1' +versionsuffix = '-CUDA-%(cudaver)s' + +homepage = 'http://www.openucx.org/' +description = """Unified Communication X +An open-source production grade communication framework for data centric +and high-performance applications + +This module adds the UCX CUDA support. +""" + +toolchain = {'name': 'GCCcore', 'version': '12.2.0'} +toolchainopts = {'pic': True} + +source_urls = ['https://github.com/openucx/ucx/releases/download/v%(version)s'] +sources = [{'filename': 'ucx-%(version)s.tar.gz', 'alt_location': 'UCX'}] +patches = ['%(name)s-1.11.0_link_against_existing_UCX_libs.patch'] +checksums = [ + ('457187fa020e526609ba91e7750c9941d57bd57d60d6eed317b40ad8824aca93', + 'baa0634cafb269a3112f626eb226bcd2ca8c9fcf0fec3b8e2a3553baad5f77aa'), # ucx-1.14.1.tar.gz + {'UCX-CUDA-1.11.0_link_against_existing_UCX_libs.patch': + '457187fa020e526609ba91e7750c9941d57bd57d60d6eed317b40ad8824aca93'}, +] + +builddependencies = [ + ('binutils', '2.39'), + ('Autotools', '20220317'), + ('pkgconf', '1.9.3'), +] + +dependencies = [ + ('zlib', '1.2.12'), + ('UCX', '1.14.1'), + ('CUDA', '12.2.0', '', SYSTEM), + ('GDRCopy', '2.3'), +] + +configopts = '--enable-optimizations --enable-cma --enable-mt --with-verbs ' +configopts += '--without-java --without-go --disable-doxygen-doc ' +configopts += '--with-xpmem --with-knem=/opt/knem-1.1.4.90mlnx1' + +moduleclass = 'lib' diff --git a/u/UCX/UCX-1.14.1-GCCcore-12.2.0.eb b/u/UCX/UCX-1.14.1-GCCcore-12.2.0.eb new file mode 100644 index 00000000..8259b70a --- /dev/null +++ b/u/UCX/UCX-1.14.1-GCCcore-12.2.0.eb @@ -0,0 +1,56 @@ +# IT4Innovations +# LK 2023 + +easyblock = 'ConfigureMake' + +name = 'UCX' +version = '1.14.1' + +homepage = 'https://www.openucx.org/' +description = """Unified Communication X +An open-source production grade communication framework for data centric +and high-performance applications +""" + +toolchain = {'name': 'GCCcore', 'version': '12.2.0'} +toolchainopts = {'pic': True} + +source_urls = ['https://github.com/openucx/ucx/releases/download/v%(version)s'] +sources = ['%(namelower)s-%(version)s.tar.gz'] +patches = [ + 'UCX-1.13.1-dynamic_modules.patch', +] +checksums = [ + {'ucx-1.14.1.tar.gz': 'baa0634cafb269a3112f626eb226bcd2ca8c9fcf0fec3b8e2a3553baad5f77aa'}, + {'UCX-1.13.1-dynamic_modules.patch': '00874687bd90b795fff61aaa183f6c6bea2210aa1003b28f23d9ebf7066f8782'}, +] + +builddependencies = [ + ('binutils', '2.39'), + ('Autotools', '20220317'), + ('pkgconf', '1.9.3'), +] + +osdependencies = [OS_PKG_IBVERBS_DEV] + +dependencies = [ + ('zlib', '1.2.12'), + ('numactl', '2.0.16'), +] + +configure_cmd = "contrib/configure-release" + +configopts = '--enable-optimizations --enable-cma --enable-mt --with-verbs ' +configopts += '--without-java --without-go --disable-doxygen-doc ' +configopts += '--with-xpmem --with-knem=/opt/knem-1.1.4.90mlnx1' + +buildopts = 'V=1' + +sanity_check_paths = { + 'files': ['bin/ucx_info', 'bin/ucx_perftest', 'bin/ucx_read_profile'], + 'dirs': ['include', 'lib', 'share'] +} + +sanity_check_commands = ["ucx_info -d"] + +moduleclass = 'lib'