mirror of
https://code.it4i.cz/sccs/easyconfigs-it4i.git
synced 2025-04-07 23:42:12 +01:00

new file: a/AOCC/AOCC-4.2.0-GCCcore-12.2.0.eb new file: a/AOCL/AOCL-4.2.0-CCcore-12.2.0.eb new file: a/ASE/ASE-3.22.1-gfbf-2023b.eb new file: a/apptainer/apptainer-1.3.1.eb modified: b/BEEF/BEEF-0.1.1-intel-2020a.eb modified: d/DFT-D4/DFT-D4-3.6.0-intel-2022b-Python-3.10.8.eb new file: d/DFT-D4/DFT-D4-3.6.0-intel-2023b-Python-3.11.5.eb new file: e/ELPA/ELPA-2023.11.001-intel-2023b.eb new file: f/FFTW.MPI/FFTW.MPI-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb new file: f/FFTW.MPI/FFTW.MPI-3.3.10-NVHPC-24.3-CUDA-12.3.0.eb new file: f/FFTW/FFTW-3.3.10-NVHPC-24.1-CUDA-12.4.0.eb new file: f/FFTW/FFTW-3.3.10-NVHPC-24.3-CUDA-12.3.0.eb modified: f/FFTW/FFTW-3.3.8.eb new file: f/Forge/Forge-23.1.2.eb new file: f/ffnvcodec/ffnvcodec-11.1.5.2.eb new file: f/ffnvcodec/ffnvcodec-12.0.16.0.eb new file: g/GDRCopy/GDRCopy-2.4.1-GCCcore-12.2.0.eb modified: g/GROMACS/GROMACS-2024-foss-2022a-CUDA-12.0.0-v2.eb modified: g/GROMACS/GROMACS-4.5.5-ORCA-5.0.1-OpenMPI-4.1.1.eb modified: g/gettext/gettext-0.19.4.eb new file: h/HDF5/HDF5-1.14.0-iimpi-2022b.eb new file: h/HDF5/HDF5-1.14.3-NVHPC-24.1-CUDA-12.4.0.eb new file: h/HDF5/HDF5-1.14.3-NVHPC-24.3-CUDA-12.3.0.eb new file: h/HDF5/HDF5-1.14.3-iimpi-2023b.eb new file: i/impi/impi-2021.8.0-intel-compilers-2023.0.0.eb new file: l/libxc/libxc-5.2.3-GCC-11.3.0.eb new file: m/METIS/METIS-5.1.0-GCCcore-13.2.0.eb new file: n/NCCL/NCCL-2.21.5-GCCcore-12.2.0-CUDA-12.3.0.eb new file: n/NCCL/NCCL-2.21.5-GCCcore-12.2.0-CUDA-12.4.0.eb new file: n/NVHPC/NVHPC-24.1-CUDA-12.3.0.eb new file: n/NVHPC/NVHPC-24.1-CUDA-12.4.0.eb new file: n/NVHPC/NVHPC-24.3-CUDA-12.3.0.eb modified: n/ncurses/ncurses-6.1.eb new file: n/nvompi/nvompi-2024.1.eb new file: n/nvompi/nvompi-2024.3.eb modified: o/OpenMPI/OpenMPI-4.1.6-NVHPC-23.11-CUDA-12.2.0.eb new file: o/OpenMPI/OpenMPI-4.1.6-NVHPC-24.1-CUDA-12.4.0.eb new file: o/OpenMPI/OpenMPI-4.1.6-NVHPC-24.3-CUDA-12.3.0.eb deleted: o/OpenSSL/OpenSSL-1.0.2n.eb deleted: o/OpenSSL/OpenSSL-1.1-test.eb deleted: o/OpenSSL/OpenSSL-1.1.eb new file: p/Python/Python-3.11.5-GCCcore-13.2.0-TB2J.eb new file: p/Python/Python-3.11.5-GCCcore-13.2.0.eb new file: q/QD/QD-2.3.17-NVHPC-24.1-CUDA-12.4.0.eb new file: q/QD/QD-2.3.17-NVHPC-24.3-CUDA-12.3.0.eb new file: s/ScaLAPACK/ScaLAPACK-3.0-NVHPC-24.1-CUDA-12.4.0.eb new file: s/ScaLAPACK/ScaLAPACK-3.0-NVHPC-24.3-CUDA-12.3.0.eb new file: s/Siesta/Siesta-5.0.0-beta1-foss-2023b.eb new file: s/scikit-build-core/scikit-build-core-0.5.0-GCCcore-13.2.0.eb new file: s/spglib-python/spglib-python-2.1.0-gfbf-2023b.eb modified: s/squashfs-tools/squashfs-tools-4.3.eb deleted: t/TB2J/TB2J-7.1.1-Python-3.8.6-GCCcore-10.2.0.eb new file: u/UCC-CUDA/UCC-CUDA-1.3.0-GCCcore-12.2.0-CUDA-12.3.0.eb new file: u/UCC-CUDA/UCC-CUDA-1.3.0-GCCcore-12.2.0-CUDA-12.4.0.eb new file: u/UCC-CUDA/UCC-CUDA-1.3.0_cuda_12_mem_ops.patch new file: u/UCC-CUDA/UCC-CUDA-1.3.0_link_against_existing_UCC_libs.patch new file: u/UCC/UCC-1.3.0-GCCcore-12.2.0.eb modified: u/UCX-CUDA/UCX-CUDA-1.14.1-GCCcore-11.3.0-CUDA-12.2.0.eb modified: u/UCX-CUDA/UCX-CUDA-1.14.1-GCCcore-12.2.0-CUDA-12.2.0.eb new file: u/UCX-CUDA/UCX-CUDA-1.16.0-GCCcore-12.2.0-CUDA-12.3.0.eb new file: u/UCX-CUDA/UCX-CUDA-1.16.0-GCCcore-12.2.0-CUDA-12.4.0.eb modified: u/UCX/UCX-1.14.1-GCCcore-11.3.0.eb modified: u/UCX/UCX-1.14.1-GCCcore-12.2.0.eb modified: u/UCX/UCX-1.14.1-GCCcore-12.3.0-test.eb new file: u/UCX/UCX-1.15.0-GCCcore-13.2.0.eb new file: u/UCX/UCX-1.16.0-GCCcore-12.2.0.eb modified: v/VASP/VASP-5.4.1-24Jun15-intel-2020b.eb modified: v/VASP/VASP-5.4.4-intel-2020a-mkl=sequential-BEEF-karolina.eb modified: v/VASP/VASP-5.4.4-intel-2020a-mkl=sequential-VASPsol-karolina.eb modified: v/VASP/VASP-5.4.4-intel-2020a-mkl=sequential-karolina.eb modified: v/VASP/VASP-5.4.4-intel-2020a-mkl=sequential-vtst-karolina.eb new file: v/VASP/VASP-5.4.4-intel-2022b.eb modified: v/VASP/VASP-6.3.0-intel-2020b-mkl=sequential-vtst-karolina.eb new file: v/VASP/VASP-6.4.2-NVHPC-23.11-CUDA-12.4.0.eb new file: v/VASP/VASP-6.4.2-NVHPC-24.3-CUDA-12.3.0.eb new file: v/VASP/VASP-6.4.2-intel-2022b.eb new file: v/VASP/VASP-6.4.2-intel-2023b-DFT-D4.eb new file: v/VSCode/VSCode-1.85.0.eb new file: w/Wannier90/Wannier90-3.1.0-intel-2023b-serial.eb new file: x/XALT/XALT-3.0.2.eb modified: x/XZ/XZ-5.2.5-GCCcore-9.3.0.eb
100 lines
4.3 KiB
Diff
100 lines
4.3 KiB
Diff
Backported fix for CUDA 12 https://github.com/openucx/ucc/pull/700
|
|
Essentially just removes the deprecated checks for CUDA MEM OPS as they are required in CUDA 12
|
|
author: micketeer@gmail.com
|
|
--- src/components/ec/cuda/ec_cuda.c.orig 2023-02-02 18:44:36.085221084 +0000
|
|
+++ src/components/ec/cuda/ec_cuda.c 2023-02-02 18:47:23.726819030 +0000
|
|
@@ -205,11 +205,10 @@
|
|
{
|
|
ucc_ec_cuda_config_t *cfg = EC_CUDA_CONFIG;
|
|
ucc_status_t status;
|
|
- int device, num_devices, attr;
|
|
+ int device, num_devices;
|
|
CUdevice cu_dev;
|
|
CUresult cu_st;
|
|
cudaError_t cuda_st;
|
|
- const char *cu_err_st_str;
|
|
|
|
ucc_ec_cuda.stream = NULL;
|
|
ucc_ec_cuda.stream_initialized = 0;
|
|
@@ -272,9 +271,14 @@
|
|
} else {
|
|
ucc_ec_cuda.strm_task_mode = UCC_EC_CUDA_TASK_MEM_OPS;
|
|
ucc_ec_cuda.post_strm_task = ucc_ec_cuda_post_driver_stream_task;
|
|
+#if CUDA_VERSION < 12000
|
|
+ CUresult cu_st;
|
|
+ CUdevice cu_dev;
|
|
+ int attr;
|
|
|
|
cu_st = cuCtxGetDevice(&cu_dev);
|
|
if (cu_st != CUDA_SUCCESS){
|
|
+ const char *cu_err_st_str;
|
|
cuGetErrorString(cu_st, &cu_err_st_str);
|
|
ec_debug(&ucc_ec_cuda.super, "cuCtxGetDevice() failed: %s",
|
|
cu_err_st_str);
|
|
@@ -297,6 +301,7 @@
|
|
"CUDA MEM OPS are not supported or disabled");
|
|
return UCC_ERR_NOT_SUPPORTED;
|
|
}
|
|
+#endif
|
|
}
|
|
ucc_ec_cuda.task_strm_type = cfg->task_strm_type;
|
|
ucc_spinlock_init(&ucc_ec_cuda.init_spinlock, 0);
|
|
--- src/components/tl/nccl/tl_nccl_context.c.orig 2023-02-03 15:17:09.358881676 +0000
|
|
+++ src/components/tl/nccl/tl_nccl_context.c 2023-02-03 17:04:31.680185749 +0000
|
|
@@ -101,13 +101,14 @@
|
|
ucc_derived_of(config, ucc_tl_nccl_context_config_t);
|
|
int mem_ops_attr = 0;
|
|
ucc_status_t status;
|
|
- CUresult cu_st;
|
|
- CUdevice cu_dev;
|
|
|
|
UCC_CLASS_CALL_SUPER_INIT(ucc_tl_context_t, &tl_nccl_config->super,
|
|
params->context);
|
|
memcpy(&self->cfg, tl_nccl_config, sizeof(*tl_nccl_config));
|
|
if (self->cfg.sync_type != UCC_TL_NCCL_COMPLETION_SYNC_TYPE_EVENT) {
|
|
+#if CUDA_VERSION < 12000
|
|
+ CUresult cu_st;
|
|
+ CUdevice cu_dev;
|
|
cu_st = cuCtxGetDevice(&cu_dev);
|
|
if (cu_st == CUDA_SUCCESS) {
|
|
cu_st = cuDeviceGetAttribute(&mem_ops_attr,
|
|
@@ -116,6 +117,9 @@
|
|
} else {
|
|
tl_info(self->super.super.lib, "failed to get cuda device");
|
|
}
|
|
+#else
|
|
+ mem_ops_attr = 1;
|
|
+#endif
|
|
if (mem_ops_attr == 0) {
|
|
if (self->cfg.sync_type == UCC_TL_NCCL_COMPLETION_SYNC_TYPE_MEMOPS) {
|
|
tl_error(self->super.super.lib, "memops not supported");
|
|
--- config/m4/cuda.m4.orig 2023-02-03 17:04:44.367155175 +0000
|
|
+++ config/m4/cuda.m4 2023-02-03 17:06:26.110909987 +0000
|
|
@@ -15,6 +15,11 @@
|
|
ARCH10="-gencode=arch=compute_75,code=sm_75"
|
|
ARCH11="-gencode=arch=compute_80,code=sm_80 \
|
|
-gencode=arch=compute_80,code=compute_80"
|
|
+ARCH111="-gencode=arch=compute_86,code=sm_86 \
|
|
+-gencode=arch=compute_86,code=compute_86"
|
|
+ARCH120="-gencode=arch=compute_90,code=sm_90 \
|
|
+-gencode=arch=compute_90,code=compute_90"
|
|
+
|
|
|
|
AC_DEFUN([CHECK_CUDA],[
|
|
AS_IF([test "x$cuda_checked" != "xyes"],
|
|
@@ -104,8 +109,12 @@
|
|
[NVCC_CFLAGS="$NVCC_CFLAGS -O3 -g -DNDEBUG"])
|
|
AS_IF([test "x$cuda_happy" = "xyes"],
|
|
[AS_IF([test "x$with_nvcc_gencode" = "xdefault"],
|
|
- [AS_IF([test $CUDA_MAJOR_VERSION -eq 11],
|
|
- [NVCC_ARCH="${ARCH8} ${ARCH9} ${ARCH10} ${ARCH11}"])],
|
|
+ [AS_IF([test $CUDA_MAJOR_VERSION -eq 12],
|
|
+ [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111} ${ARCH120}"],
|
|
+ [AS_IF([test $CUDA_MAJOR_VERSION -eq 11],
|
|
+ [AS_IF([test $CUDA_MINOR_VERSION -lt 1],
|
|
+ [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110}"],
|
|
+ [NVCC_ARCH="${ARCH7} ${ARCH8} ${ARCH9} ${ARCH10} ${ARCH110} ${ARCH111}"])])])],
|
|
[NVCC_ARCH="$with_nvcc_gencode"])
|
|
AC_SUBST([NVCC_ARCH], ["$NVCC_ARCH"])])
|
|
LDFLAGS="$save_LDFLAGS"
|