easyconfigs-it4i/u/UCC-CUDA/UCC-CUDA-1.3.0_cuda_12_mem_ops.patch
Lukas Krupcik 2f1e8aefc7 new file: a/AMD-uProf/AMD-uProf-4.2.850.eb
new file:   a/AOCC/AOCC-4.2.0-GCCcore-12.2.0.eb
	new file:   a/AOCL/AOCL-4.2.0-CCcore-12.2.0.eb
	new file:   a/ASE/ASE-3.22.1-gfbf-2023b.eb
	new file:   a/apptainer/apptainer-1.3.1.eb
	modified:   b/BEEF/BEEF-0.1.1-intel-2020a.eb
	modified:   d/DFT-D4/DFT-D4-3.6.0-intel-2022b-Python-3.10.8.eb
	new file:   d/DFT-D4/DFT-D4-3.6.0-intel-2023b-Python-3.11.5.eb
	new file:   e/ELPA/ELPA-2023.11.001-intel-2023b.eb
	new file:   f/FFTW.MPI/FFTW.MPI-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb
	new file:   f/FFTW.MPI/FFTW.MPI-3.3.10-NVHPC-24.3-CUDA-12.3.0.eb
	new file:   f/FFTW/FFTW-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb
	new file:   f/FFTW/FFTW-3.3.10-NVHPC-24.3-CUDA-12.3.0.eb
	modified:   f/FFTW/FFTW-3.3.8.eb
	new file:   f/Forge/Forge-23.1.2.eb
	new file:   f/ffnvcodec/ffnvcodec-11.1.5.2.eb
	new file:   f/ffnvcodec/ffnvcodec-12.0.16.0.eb
	new file:   g/GDRCopy/GDRCopy-2.4.1-GCCcore-12.2.0.eb
	modified:   g/GROMACS/GROMACS-2024-foss-2022a-CUDA-12.0.0-v2.eb
	modified:   g/GROMACS/GROMACS-4.5.5-ORCA-5.0.1-OpenMPI-4.1.1.eb
	modified:   g/gettext/gettext-0.19.4.eb
	new file:   h/HDF5/HDF5-1.14.0-iimpi-2022b.eb
	new file:   h/HDF5/HDF5-1.14.3-NVHPC-24.1-CUDA-12.4.0.eb
	new file:   h/HDF5/HDF5-1.14.3-NVHPC-24.3-CUDA-12.3.0.eb
	new file:   h/HDF5/HDF5-1.14.3-iimpi-2023b.eb
	new file:   i/impi/impi-2021.8.0-intel-compilers-2023.0.0.eb
	new file:   l/libxc/libxc-5.2.3-GCC-11.3.0.eb
	new file:   m/METIS/METIS-5.1.0-GCCcore-13.2.0.eb
	new file:   n/NCCL/NCCL-2.21.5-GCCcore-12.2.0-CUDA-12.3.0.eb
	new file:   n/NCCL/NCCL-2.21.5-GCCcore-12.2.0-CUDA-12.4.0.eb
	new file:   n/NVHPC/NVHPC-24.1-CUDA-12.3.0.eb
	new file:   n/NVHPC/NVHPC-24.1-CUDA-12.4.0.eb
	new file:   n/NVHPC/NVHPC-24.3-CUDA-12.3.0.eb
	modified:   n/ncurses/ncurses-6.1.eb
	new file:   n/nvompi/nvompi-2024.1.eb
	new file:   n/nvompi/nvompi-2024.3.eb
	modified:   o/OpenMPI/OpenMPI-4.1.6-NVHPC-23.11-CUDA-12.2.0.eb
	new file:   o/OpenMPI/OpenMPI-4.1.6-NVHPC-24.1-CUDA-12.4.0.eb
	new file:   o/OpenMPI/OpenMPI-4.1.6-NVHPC-24.3-CUDA-12.3.0.eb
	deleted:    o/OpenSSL/OpenSSL-1.0.2n.eb
	deleted:    o/OpenSSL/OpenSSL-1.1-test.eb
	deleted:    o/OpenSSL/OpenSSL-1.1.eb
	new file:   p/Python/Python-3.11.5-GCCcore-13.2.0-TB2J.eb
	new file:   p/Python/Python-3.11.5-GCCcore-13.2.0.eb
	new file:   q/QD/QD-2.3.17-NVHPC-24.1-CUDA-12.4.0.eb
	new file:   q/QD/QD-2.3.17-NVHPC-24.3-CUDA-12.3.0.eb
	new file:   s/ScaLAPACK/ScaLAPACK-3.0-NVHPC-24.1-CUDA-12.4.0.eb
	new file:   s/ScaLAPACK/ScaLAPACK-3.0-NVHPC-24.3-CUDA-12.3.0.eb
	new file:   s/Siesta/Siesta-5.0.0-beta1-foss-2023b.eb
	new file:   s/scikit-build-core/scikit-build-core-0.5.0-GCCcore-13.2.0.eb
	new file:   s/spglib-python/spglib-python-2.1.0-gfbf-2023b.eb
	modified:   s/squashfs-tools/squashfs-tools-4.3.eb
	deleted:    t/TB2J/TB2J-7.1.1-Python-3.8.6-GCCcore-10.2.0.eb
	new file:   u/UCC-CUDA/UCC-CUDA-1.3.0-GCCcore-12.2.0-CUDA-12.3.0.eb
	new file:   u/UCC-CUDA/UCC-CUDA-1.3.0-GCCcore-12.2.0-CUDA-12.4.0.eb
	new file:   u/UCC-CUDA/UCC-CUDA-1.3.0_cuda_12_mem_ops.patch
	new file:   u/UCC-CUDA/UCC-CUDA-1.3.0_link_against_existing_UCC_libs.patch
	new file:   u/UCC/UCC-1.3.0-GCCcore-12.2.0.eb
	modified:   u/UCX-CUDA/UCX-CUDA-1.14.1-GCCcore-11.3.0-CUDA-12.2.0.eb
	modified:   u/UCX-CUDA/UCX-CUDA-1.14.1-GCCcore-12.2.0-CUDA-12.2.0.eb
	new file:   u/UCX-CUDA/UCX-CUDA-1.16.0-GCCcore-12.2.0-CUDA-12.3.0.eb
	new file:   u/UCX-CUDA/UCX-CUDA-1.16.0-GCCcore-12.2.0-CUDA-12.4.0.eb
	modified:   u/UCX/UCX-1.14.1-GCCcore-11.3.0.eb
	modified:   u/UCX/UCX-1.14.1-GCCcore-12.2.0.eb
	modified:   u/UCX/UCX-1.14.1-GCCcore-12.3.0-test.eb
	new file:   u/UCX/UCX-1.15.0-GCCcore-13.2.0.eb
	new file:   u/UCX/UCX-1.16.0-GCCcore-12.2.0.eb
	modified:   v/VASP/VASP-5.4.1-24Jun15-intel-2020b.eb
	modified:   v/VASP/VASP-5.4.4-intel-2020a-mkl=sequential-BEEF-karolina.eb
	modified:   v/VASP/VASP-5.4.4-intel-2020a-mkl=sequential-VASPsol-karolina.eb
	modified:   v/VASP/VASP-5.4.4-intel-2020a-mkl=sequential-karolina.eb
	modified:   v/VASP/VASP-5.4.4-intel-2020a-mkl=sequential-vtst-karolina.eb
	new file:   v/VASP/VASP-5.4.4-intel-2022b.eb
	modified:   v/VASP/VASP-6.3.0-intel-2020b-mkl=sequential-vtst-karolina.eb
	new file:   v/VASP/VASP-6.4.2-NVHPC-23.11-CUDA-12.4.0.eb
	new file:   v/VASP/VASP-6.4.2-NVHPC-24.3-CUDA-12.3.0.eb
	new file:   v/VASP/VASP-6.4.2-intel-2022b.eb
	new file:   v/VASP/VASP-6.4.2-intel-2023b-DFT-D4.eb
	new file:   v/VSCode/VSCode-1.85.0.eb
	new file:   w/Wannier90/Wannier90-3.1.0-intel-2023b-serial.eb
	new file:   x/XALT/XALT-3.0.2.eb
	modified:   x/XZ/XZ-5.2.5-GCCcore-9.3.0.eb
2024-05-09 11:03:21 +02:00

100 lines
4.3 KiB
Diff

Backported fix for CUDA 12 https://github.com/openucx/ucc/pull/700
Essentially just removes the deprecated checks for CUDA MEM OPS as they are required in CUDA 12
author: micketeer@gmail.com
--- src/components/ec/cuda/ec_cuda.c.orig 2023-02-02 18:44:36.085221084 +0000
+++ src/components/ec/cuda/ec_cuda.c 2023-02-02 18:47:23.726819030 +0000
@@ -205,11 +205,10 @@
{
ucc_ec_cuda_config_t *cfg = EC_CUDA_CONFIG;
ucc_status_t status;
- int device, num_devices, attr;
+ int device, num_devices;
CUdevice cu_dev;
CUresult cu_st;
cudaError_t cuda_st;
- const char *cu_err_st_str;
ucc_ec_cuda.stream = NULL;
ucc_ec_cuda.stream_initialized = 0;
@@ -272,9 +271,14 @@
} else {
ucc_ec_cuda.strm_task_mode = UCC_EC_CUDA_TASK_MEM_OPS;
ucc_ec_cuda.post_strm_task = ucc_ec_cuda_post_driver_stream_task;
+#if CUDA_VERSION < 12000
+ CUresult cu_st;
+ CUdevice cu_dev;
+ int attr;
cu_st = cuCtxGetDevice(&cu_dev);
if (cu_st != CUDA_SUCCESS){
+ const char *cu_err_st_str;
cuGetErrorString(cu_st, &cu_err_st_str);
ec_debug(&ucc_ec_cuda.super, "cuCtxGetDevice() failed: %s",
cu_err_st_str);
@@ -297,6 +301,7 @@
"CUDA MEM OPS are not supported or disabled");
return UCC_ERR_NOT_SUPPORTED;
}
+#endif
}
ucc_ec_cuda.task_strm_type = cfg->task_strm_type;
ucc_spinlock_init(&ucc_ec_cuda.init_spinlock, 0);
--- src/components/tl/nccl/tl_nccl_context.c.orig 2023-02-03 15:17:09.358881676 +0000
+++ src/components/tl/nccl/tl_nccl_context.c 2023-02-03 17:04:31.680185749 +0000
@@ -101,13 +101,14 @@
ucc_derived_of(config, ucc_tl_nccl_context_config_t);
int mem_ops_attr = 0;
ucc_status_t status;
- CUresult cu_st;
- CUdevice cu_dev;
UCC_CLASS_CALL_SUPER_INIT(ucc_tl_context_t, &tl_nccl_config->super,
params->context);
memcpy(&self->cfg, tl_nccl_config, sizeof(*tl_nccl_config));
if (self->cfg.sync_type != UCC_TL_NCCL_COMPLETION_SYNC_TYPE_EVENT) {
+#if CUDA_VERSION < 12000
+ CUresult cu_st;
+ CUdevice cu_dev;
cu_st = cuCtxGetDevice(&cu_dev);
if (cu_st == CUDA_SUCCESS) {
cu_st = cuDeviceGetAttribute(&mem_ops_attr,
@@ -116,6 +117,9 @@
} else {
tl_info(self->super.super.lib, "failed to get cuda device");
}
+#else
+ mem_ops_attr = 1;
+#endif
if (mem_ops_attr == 0) {
if (self->cfg.sync_type == UCC_TL_NCCL_COMPLETION_SYNC_TYPE_MEMOPS) {
tl_error(self->super.super.lib, "memops not supported");
--- config/m4/cuda.m4.orig 2023-02-03 17:04:44.367155175 +0000
+++ config/m4/cuda.m4 2023-02-03 17:06:26.110909987 +0000
@@ -15,6 +15,11 @@
ARCH10="-gencode=arch=compute_75,code=sm_75"
ARCH11="-gencode=arch=compute_80,code=sm_80 \
-gencode=arch=compute_80,code=compute_80"
+ARCH111="-gencode=arch=compute_86,code=sm_86 \
+-gencode=arch=compute_86,code=compute_86"
+ARCH120="-gencode=arch=compute_90,code=sm_90 \
+-gencode=arch=compute_90,code=compute_90"
+
AC_DEFUN([CHECK_CUDA],[
AS_IF([test "x$cuda_checked" != "xyes"],
@@ -104,8 +109,12 @@
[NVCC_CFLAGS="$NVCC_CFLAGS -O3 -g -DNDEBUG"])
AS_IF([test "x$cuda_happy" = "xyes"],
[AS_IF([test "x$with_nvcc_gencode" = "xdefault"],
- [AS_IF([test $CUDA_MAJOR_VERSION -eq 11],
- [NVCC_ARCH="${ARCH8} ${ARCH9} ${ARCH10} ${ARCH11}"])],
+ [AS_IF([test $CUDA_MAJOR_VERSION -eq 12],
+ [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111} ${ARCH120}"],
+ [AS_IF([test $CUDA_MAJOR_VERSION -eq 11],
+ [AS_IF([test $CUDA_MINOR_VERSION -lt 1],
+ [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110}"],
+ [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111}"])])])],
[NVCC_ARCH="$with_nvcc_gencode"])
AC_SUBST([NVCC_ARCH], ["$NVCC_ARCH"])])
LDFLAGS="$save_LDFLAGS"