diff --git a/a/aria2/aria2-1.35.0-GCCcore-10.3.0.eb b/a/aria2/aria2-1.35.0-GCCcore-10.3.0.eb
new file mode 100644
index 00000000..896c7c98
--- /dev/null
+++ b/a/aria2/aria2-1.35.0-GCCcore-10.3.0.eb
@@ -0,0 +1,44 @@
+# IT4Innovations
+# JK 2022
+
+easyblock = 'ConfigureMake'
+
+name = 'aria2'
+version = '1.35.0'
+
+homepage = 'https://aria2.github.io'
+description = "aria2 is a lightweight multi-protocol & multi-source command-line download utility."
+
+toolchain = {'name': 'GCCcore', 'version': '10.3.0'}
+
+source_urls = ['https://github.com/aria2/aria2/releases/download/release-%(version)s']
+sources = [SOURCE_TAR_GZ]
+checksums = ['fd85589416f8246cefc4e6ba2fa52da54fdf11fd5602a2db4b6749f7c33b5b2d']
+
+builddependencies = [
+    ('binutils', '2.36.1'),
+    ('Autotools', '20210128'),
+    ('CppUnit', '1.15.1'),
+]
+
+dependencies = [
+    ('zlib', '1.2.11'),
+    ('libxml2', '2.9.10'),
+    ('SQLite', '3.35.4'),
+    ('c-ares', '1.17.2'),
+    ('OpenSSL', '1.1', '', True),
+]
+
+# add certificates' path to use https
+configopts = "--without-gnutls --with-openssl --enable-libaria2 --enable-static --with-ca-bundle='/etc/ssl/certs/ca-bundle.crt'"
+
+#runtest = 'check'
+
+sanity_check_paths = {
+    'files': ['bin/aria2c'],
+    'dirs': ['share'],
+}
+
+sanity_check_commands = ["aria2c --help"]
+
+moduleclass = 'tools'
diff --git a/b/BLIS/BLIS-0.8.1_fix_dgemm-fpe-signalling-on-broadwell.patch b/b/BLIS/BLIS-0.8.1_fix_dgemm-fpe-signalling-on-broadwell.patch
new file mode 100644
index 00000000..ad6dee6c
--- /dev/null
+++ b/b/BLIS/BLIS-0.8.1_fix_dgemm-fpe-signalling-on-broadwell.patch
@@ -0,0 +1,2219 @@
+Taken from https://github.com/flame/blis/pull/544
+Fixes a problem with DGEMM causing FPR signalling on Broadwell
+See https://github.com/flame/blis/issues/486
+
+Åke Sandgren, 20210916
+
+commit 5191c43faccf45975f577c60b9089abee25722c9
+Author: Devin Matthews <damatthews@smu.edu>
+Date:   Thu Sep 16 10:16:17 2021 -0500
+
+    Fix more copy-paste errors in the haswell gemmsup code.
+    
+    Fixes #486.
+
+diff --git a/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx4.c b/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx4.c
+index 4c6094b1..21dd3b89 100644
+--- a/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx4.c
++++ b/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx4.c
+@@ -101,7 +101,7 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	begin_asm()
+ 
+ 	//vzeroall()                         // zero all xmm/ymm registers.
+-	
++
+ 	mov(var(a), r14)                   // load address of a.
+ 	mov(var(rs_a), r8)                 // load rs_a
+ 	//mov(var(cs_a), r9)                 // load cs_a
+@@ -119,7 +119,7 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 
+ 	lea(mem(r11, r11, 2), r13)         // r13 = 3*cs_b
+ 	lea(mem(r8,  r8,  2), r10)         // r10 = 3*rs_a
+-	
++
+ 
+ 	mov(var(c), r12)                   // load address of c
+ 	mov(var(rs_c), rdi)                // load rs_c
+@@ -172,19 +172,19 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	prefetch(0, mem(rcx, rdi, 2, 3*8)) // prefetch c + 2*rs_c
+ #endif
+ 	lea(mem(r8,  r8,  4), rbp)         // rbp = 5*rs_a
+-	
+ 
+-	
+-	
++
++
++
+ 	mov(var(k_iter16), rsi)            // i = k_iter16;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKITER4)                 // if i == 0, jump to code that
+ 	                                   // contains the k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER16)               // MAIN LOOP
+-	
+-	
++
++
+ 	// ---------------------------------- iteration 0
+ 
+ #if 0
+@@ -219,7 +219,7 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	vfmadd231pd(ymm1, ymm3, ymm14)
+ 	vfmadd231pd(ymm2, ymm3, ymm15)
+ 
+-	
++
+ 	// ---------------------------------- iteration 1
+ 
+ 	vmovupd(mem(rax       ), ymm0)
+@@ -250,7 +250,7 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 
+ 
+ 	// ---------------------------------- iteration 2
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -312,27 +312,27 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	vfmadd231pd(ymm1, ymm3, ymm14)
+ 	vfmadd231pd(ymm2, ymm3, ymm15)
+ 
+-	
++
+ 
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER16)                 // iterate again if i != 0.
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	label(.DCONSIDKITER4)
+-	
++
+ 	mov(var(k_iter4), rsi)             // i = k_iter4;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKLEFT1)                 // if i == 0, jump to code that
+ 	                                   // considers k_left1 loop.
+ 	                                   // else, we prepare to enter k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER4)                // EDGE LOOP (ymm)
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -343,7 +343,7 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	vmovupd(mem(rax, r8, 1), ymm1)
+ 	vmovupd(mem(rax, r8, 2), ymm2)
+ 	add(imm(4*8), rax)                 // a += 4*cs_b = 4*8;
+-	
++
+ 	vmovupd(mem(rbx        ), ymm3)
+ 	vfmadd231pd(ymm0, ymm3, ymm4)
+ 	vfmadd231pd(ymm1, ymm3, ymm5)
+@@ -365,21 +365,21 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	vfmadd231pd(ymm1, ymm3, ymm14)
+ 	vfmadd231pd(ymm2, ymm3, ymm15)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER4)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 	label(.DCONSIDKLEFT1)
+-	
++
+ 	mov(var(k_left1), rsi)             // i = k_left1;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DPOSTACCUM)                    // if i == 0, we're done; jump to end.
+ 	                                   // else, we prepare to enter k_left1 loop.
+-	
+-	
++
++
+ 
+ 
+ 	label(.DLOOPKLEFT1)                // EDGE LOOP (scalar)
+@@ -387,12 +387,12 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	                                   // using the xmm registers would zero out the
+ 	                                   // high bits of the destination registers,
+ 	                                   // which would destory intermediate results.
+-	
++
+ 	vmovsd(mem(rax       ), xmm0)
+ 	vmovsd(mem(rax, r8, 1), xmm1)
+ 	vmovsd(mem(rax, r8, 2), xmm2)
+ 	add(imm(1*8), rax)                 // a += 1*cs_a = 1*8;
+-	
++
+ 	vmovsd(mem(rbx        ), xmm3)
+ 	vfmadd231pd(ymm0, ymm3, ymm4)
+ 	vfmadd231pd(ymm1, ymm3, ymm5)
+@@ -414,12 +414,12 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	vfmadd231pd(ymm1, ymm3, ymm14)
+ 	vfmadd231pd(ymm2, ymm3, ymm15)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKLEFT1)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 
+ 
+@@ -427,11 +427,11 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	label(.DPOSTACCUM)
+ 
+ 
+-	
+-	                                   // ymm4  ymm7  ymm10 ymm13  
++
++	                                   // ymm4  ymm7  ymm10 ymm13
+ 	                                   // ymm5  ymm8  ymm11 ymm14
+ 	                                   // ymm6  ymm9  ymm12 ymm15
+-	
++
+ 	vhaddpd( ymm7, ymm4, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm0 )
+@@ -469,7 +469,7 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	                                   // xmm6[0:3] = sum(ymm6) sum(ymm9) sum(ymm12) sum(ymm15)
+ 
+ 
+-	
++
+ 	//mov(var(rs_c), rdi)                // load rs_c
+ 	//lea(mem(, rdi, 8), rdi)            // rs_c *= sizeof(double)
+ 
+@@ -477,73 +477,73 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 	mov(var(beta), rbx)                // load address of beta
+ 	vbroadcastsd(mem(rax), ymm0)       // load alpha and duplicate
+ 	vbroadcastsd(mem(rbx), ymm3)       // load beta and duplicate
+-	
++
+ 	vmulpd(ymm0, ymm4, ymm4)           // scale by alpha
+ 	vmulpd(ymm0, ymm5, ymm5)
+ 	vmulpd(ymm0, ymm6, ymm6)
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	//mov(var(cs_c), rsi)                // load cs_c
+ 	//lea(mem(, rsi, 8), rsi)            // rsi = cs_c * sizeof(double)
+-	
+-	
+-	
++
++
++
+ 	                                   // now avoid loading C if beta == 0
+-	
++
+ 	vxorpd(ymm0, ymm0, ymm0)           // set ymm0 to zero.
+ 	vucomisd(xmm0, xmm3)               // set ZF if beta == 0.
+ 	je(.DBETAZERO)                     // if ZF = 1, jump to beta == 0 case
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORED)
+-	
+-	
++
++
+ 	vfmadd231pd(mem(rcx), ymm3, ymm4)
+ 	vmovupd(ymm4, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vfmadd231pd(mem(rcx), ymm3, ymm5)
+ 	vmovupd(ymm5, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vfmadd231pd(mem(rcx), ymm3, ymm6)
+ 	vmovupd(ymm6, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+-	
+-	
++
++
++
+ 	jmp(.DDONE)                        // jump to end.
+-	
+-	
+-	
+-	
++
++
++
++
+ 	label(.DBETAZERO)
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORBZ)
+-	
+-	
++
++
+ 	vmovupd(ymm4, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovupd(ymm5, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovupd(ymm6, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+-	
+-	
+-	
++
++
++
++
+ 	label(.DDONE)
+-	
+-	
++
++
+ 
+ 
+ 	lea(mem(r12, rdi, 2), r12)         //
+@@ -560,7 +560,7 @@ void bli_dgemmsup_rd_haswell_asm_6x4
+ 
+ 	label(.DRETURN)
+ 
+-	
++
+ 
+     end_asm(
+ 	: // output operands (none)
+@@ -629,7 +629,7 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 	// -------------------------------------------------------------------------
+ 
+ 	begin_asm()
+-	
++
+ 	//vzeroall()                         // zero all xmm/ymm registers.
+ 
+ 	mov(var(a), rax)                   // load address of a.
+@@ -649,7 +649,7 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 
+ 	lea(mem(r11, r11, 2), r13)         // r13 = 3*cs_b
+ 	//lea(mem(r8,  r8,  2), r10)         // r10 = 3*rs_a
+-	
++
+ 
+ 	mov(var(c), rcx)                   // load address of c
+ 	mov(var(rs_c), rdi)                // load rs_c
+@@ -682,7 +682,7 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 	//lea(mem(r14), rax)                 // rax = a;
+ 	//lea(mem(rdx), rbx)                 // rbx = b;
+ 
+-	
++
+ #if 1
+ 	//mov(var(rs_c), rdi)                // load rs_c
+ 	//lea(mem(, rdi, 8), rdi)            // rs_c *= sizeof(double)
+@@ -690,18 +690,18 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 	prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c
+ #endif
+ 
+-	
+-	
+-	
++
++
++
+ 	mov(var(k_iter16), rsi)            // i = k_iter16;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKITER4)                 // if i == 0, jump to code that
+ 	                                   // contains the k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER16)               // MAIN LOOP
+-	
+-	
++
++
+ 	// ---------------------------------- iteration 0
+ 
+ #if 0
+@@ -730,7 +730,7 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 	vfmadd231pd(ymm0, ymm3, ymm13)
+ 	vfmadd231pd(ymm1, ymm3, ymm14)
+ 
+-	
++
+ 	// ---------------------------------- iteration 1
+ 
+ 	vmovupd(mem(rax       ), ymm0)
+@@ -756,7 +756,7 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 
+ 
+ 	// ---------------------------------- iteration 2
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -807,27 +807,27 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 	vfmadd231pd(ymm0, ymm3, ymm13)
+ 	vfmadd231pd(ymm1, ymm3, ymm14)
+ 
+-	
++
+ 
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER16)                 // iterate again if i != 0.
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	label(.DCONSIDKITER4)
+-	
++
+ 	mov(var(k_iter4), rsi)             // i = k_iter4;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKLEFT1)                 // if i == 0, jump to code that
+ 	                                   // considers k_left1 loop.
+ 	                                   // else, we prepare to enter k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER4)                // EDGE LOOP (ymm)
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -836,7 +836,7 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 	vmovupd(mem(rax       ), ymm0)
+ 	vmovupd(mem(rax, r8, 1), ymm1)
+ 	add(imm(4*8), rax)                 // a += 4*cs_b = 4*8;
+-	
++
+ 	vmovupd(mem(rbx        ), ymm3)
+ 	vfmadd231pd(ymm0, ymm3, ymm4)
+ 	vfmadd231pd(ymm1, ymm3, ymm5)
+@@ -854,21 +854,21 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 	vfmadd231pd(ymm0, ymm3, ymm13)
+ 	vfmadd231pd(ymm1, ymm3, ymm14)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER4)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 	label(.DCONSIDKLEFT1)
+-	
++
+ 	mov(var(k_left1), rsi)             // i = k_left1;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DPOSTACCUM)                    // if i == 0, we're done; jump to end.
+ 	                                   // else, we prepare to enter k_left1 loop.
+-	
+-	
++
++
+ 
+ 
+ 	label(.DLOOPKLEFT1)                // EDGE LOOP (scalar)
+@@ -876,11 +876,11 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 	                                   // using the xmm registers would zero out the
+ 	                                   // high bits of the destination registers,
+ 	                                   // which would destory intermediate results.
+-	
++
+ 	vmovsd(mem(rax       ), xmm0)
+ 	vmovsd(mem(rax, r8, 1), xmm1)
+ 	add(imm(1*8), rax)                 // a += 1*cs_a = 1*8;
+-	
++
+ 	vmovsd(mem(rbx        ), xmm3)
+ 	vfmadd231pd(ymm0, ymm3, ymm4)
+ 	vfmadd231pd(ymm1, ymm3, ymm5)
+@@ -898,12 +898,12 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 	vfmadd231pd(ymm0, ymm3, ymm13)
+ 	vfmadd231pd(ymm1, ymm3, ymm14)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKLEFT1)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 
+ 
+@@ -911,10 +911,10 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 	label(.DPOSTACCUM)
+ 
+ 
+-	
+-	                                   // ymm4  ymm7  ymm10 ymm13  
++
++	                                   // ymm4  ymm7  ymm10 ymm13
+ 	                                   // ymm5  ymm8  ymm11 ymm14
+-	
++
+ 	vhaddpd( ymm7, ymm4, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm0 )
+@@ -943,75 +943,75 @@ void bli_dgemmsup_rd_haswell_asm_2x4
+ 
+ 	//mov(var(rs_c), rdi)                // load rs_c
+     //lea(mem(, rdi, 4), rdi)            // rs_c *= sizeof(float)
+-	
++
+ 	mov(var(alpha), rax)               // load address of alpha
+ 	mov(var(beta), rbx)                // load address of beta
+ 	vbroadcastsd(mem(rax), ymm0)       // load alpha and duplicate
+ 	vbroadcastsd(mem(rbx), ymm3)       // load beta and duplicate
+-	
++
+ 	vmulpd(ymm0, ymm4, ymm4)           // scale by alpha
+ 	vmulpd(ymm0, ymm5, ymm5)
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	//mov(var(cs_c), rsi)                // load cs_c
+ 	//lea(mem(, rsi, 8), rsi)            // rsi = cs_c * sizeof(double)
+-	
+-	
+-	
++
++
++
+ 	                                   // now avoid loading C if beta == 0
+-	
++
+ 	vxorpd(ymm0, ymm0, ymm0)           // set ymm0 to zero.
+ 	vucomisd(xmm0, xmm3)               // set ZF if beta == 0.
+ 	je(.DBETAZERO)                     // if ZF = 1, jump to beta == 0 case
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORED)
+-	
+-	
++
++
+ 	vfmadd231pd(mem(rcx), ymm3, ymm4)
+ 	vmovupd(ymm4, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vfmadd231pd(mem(rcx), ymm3, ymm5)
+ 	vmovupd(ymm5, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+-	
+-	
++
++
++
+ 	jmp(.DDONE)                        // jump to end.
+-	
+-	
+-	
+-	
++
++
++
++
+ 	label(.DBETAZERO)
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORBZ)
+-	
+-	
++
++
+ 	vmovupd(ymm4, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovupd(ymm5, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+-	
+-	
+-	
++
++
++
++
+ 	label(.DDONE)
+ 
+ 
+ 
+ 
+ 	label(.DRETURN)
+-	
+-	
++
++
+ 
+     end_asm(
+ 	: // output operands (none)
+@@ -1079,7 +1079,7 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 	// -------------------------------------------------------------------------
+ 
+ 	begin_asm()
+-	
++
+ 	//vzeroall()                         // zero all xmm/ymm registers.
+ 
+ 	mov(var(a), rax)                   // load address of a.
+@@ -1099,7 +1099,7 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 
+ 	lea(mem(r11, r11, 2), r13)         // r13 = 3*cs_b
+ 	//lea(mem(r8,  r8,  2), r10)         // r10 = 3*rs_a
+-	
++
+ 
+ 	mov(var(c), rcx)                   // load address of c
+ 	mov(var(rs_c), rdi)                // load rs_c
+@@ -1128,26 +1128,26 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 	//lea(mem(r14), rax)                 // rax = a;
+ 	//lea(mem(rdx), rbx)                 // rbx = b;
+ 
+-	
++
+ #if 1
+ 	//mov(var(rs_c), rdi)                // load rs_c
+ 	//lea(mem(, rdi, 8), rdi)            // rs_c *= sizeof(double)
+ 	prefetch(0, mem(rcx,         3*8)) // prefetch c + 0*rs_c
+-	prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c
++	//prefetch(0, mem(rcx, rdi, 1, 3*8)) // prefetch c + 1*rs_c
+ #endif
+ 
+-	
+-	
+-	
++
++
++
+ 	mov(var(k_iter16), rsi)            // i = k_iter16;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKITER4)                 // if i == 0, jump to code that
+ 	                                   // contains the k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER16)               // MAIN LOOP
+-	
+-	
++
++
+ 	// ---------------------------------- iteration 0
+ 
+ #if 0
+@@ -1170,7 +1170,7 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 	add(imm(4*8), rbx)                 // b += 4*rs_b = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm13)
+ 
+-	
++
+ 	// ---------------------------------- iteration 1
+ 
+ 	vmovupd(mem(rax       ), ymm0)
+@@ -1191,7 +1191,7 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 
+ 
+ 	// ---------------------------------- iteration 2
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ #endif
+@@ -1231,27 +1231,27 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 	add(imm(4*8), rbx)                 // b += 4*rs_b = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm13)
+ 
+-	
++
+ 
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER16)                 // iterate again if i != 0.
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	label(.DCONSIDKITER4)
+-	
++
+ 	mov(var(k_iter4), rsi)             // i = k_iter4;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKLEFT1)                 // if i == 0, jump to code that
+ 	                                   // considers k_left1 loop.
+ 	                                   // else, we prepare to enter k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER4)                // EDGE LOOP (ymm)
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -1259,7 +1259,7 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 
+ 	vmovupd(mem(rax       ), ymm0)
+ 	add(imm(4*8), rax)                 // a += 4*cs_b = 4*8;
+-	
++
+ 	vmovupd(mem(rbx        ), ymm3)
+ 	vfmadd231pd(ymm0, ymm3, ymm4)
+ 
+@@ -1273,21 +1273,21 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 	add(imm(4*8), rbx)                 // b += 4*rs_b = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm13)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER4)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 	label(.DCONSIDKLEFT1)
+-	
++
+ 	mov(var(k_left1), rsi)             // i = k_left1;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DPOSTACCUM)                    // if i == 0, we're done; jump to end.
+ 	                                   // else, we prepare to enter k_left1 loop.
+-	
+-	
++
++
+ 
+ 
+ 	label(.DLOOPKLEFT1)                // EDGE LOOP (scalar)
+@@ -1295,10 +1295,10 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 	                                   // using the xmm registers would zero out the
+ 	                                   // high bits of the destination registers,
+ 	                                   // which would destory intermediate results.
+-	
++
+ 	vmovsd(mem(rax       ), xmm0)
+ 	add(imm(1*8), rax)                 // a += 1*cs_a = 1*8;
+-	
++
+ 	vmovsd(mem(rbx        ), xmm3)
+ 	vfmadd231pd(ymm0, ymm3, ymm4)
+ 
+@@ -1312,12 +1312,12 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 	add(imm(1*8), rbx)                 // b += 1*rs_b = 1*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm13)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKLEFT1)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 
+ 
+@@ -1325,9 +1325,9 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 	label(.DPOSTACCUM)
+ 
+ 
+-	
+-	                                   // ymm4  ymm7  ymm10 ymm13  
+-	
++
++	                                   // ymm4  ymm7  ymm10 ymm13
++
+ 	vhaddpd( ymm7, ymm4, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm0 )
+@@ -1339,15 +1339,15 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 	vperm2f128(imm(0x20), ymm2, ymm0, ymm4 )
+ 
+ 
+-	vhaddpd( ymm8, ymm5, ymm0 )
+-	vextractf128(imm(1), ymm0, xmm1 )
+-	vaddpd( xmm0, xmm1, xmm0 )
++	//vhaddpd( ymm8, ymm5, ymm0 )
++	//vextractf128(imm(1), ymm0, xmm1 )
++	//vaddpd( xmm0, xmm1, xmm0 )
+ 
+-	vhaddpd( ymm14, ymm11, ymm2 )
+-	vextractf128(imm(1), ymm2, xmm1 )
+-	vaddpd( xmm2, xmm1, xmm2 )
++	//vhaddpd( ymm14, ymm11, ymm2 )
++	//vextractf128(imm(1), ymm2, xmm1 )
++	//vaddpd( xmm2, xmm1, xmm2 )
+ 
+-	vperm2f128(imm(0x20), ymm2, ymm0, ymm5 )
++	//vperm2f128(imm(0x20), ymm2, ymm0, ymm5 )
+ 
+ 	                                   // xmm4[0:3] = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13)
+ 
+@@ -1355,67 +1355,67 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 
+ 	//mov(var(rs_c), rdi)                // load rs_c
+     //lea(mem(, rdi, 4), rdi)            // rs_c *= sizeof(float)
+-	
++
+ 	mov(var(alpha), rax)               // load address of alpha
+ 	mov(var(beta), rbx)                // load address of beta
+ 	vbroadcastsd(mem(rax), ymm0)       // load alpha and duplicate
+ 	vbroadcastsd(mem(rbx), ymm3)       // load beta and duplicate
+-	
++
+ 	vmulpd(ymm0, ymm4, ymm4)           // scale by alpha
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	//mov(var(cs_c), rsi)                // load cs_c
+ 	//lea(mem(, rsi, 8), rsi)            // rsi = cs_c * sizeof(double)
+-	
+-	
+-	
++
++
++
+ 	                                   // now avoid loading C if beta == 0
+-	
++
+ 	vxorpd(ymm0, ymm0, ymm0)           // set ymm0 to zero.
+ 	vucomisd(xmm0, xmm3)               // set ZF if beta == 0.
+ 	je(.DBETAZERO)                     // if ZF = 1, jump to beta == 0 case
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORED)
+-	
+-	
++
++
+ 	vfmadd231pd(mem(rcx), ymm3, ymm4)
+ 	vmovupd(ymm4, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+-	
+-	
++
++
++
+ 	jmp(.DDONE)                        // jump to end.
+-	
+-	
+-	
+-	
++
++
++
++
+ 	label(.DBETAZERO)
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORBZ)
+-	
+-	
++
++
+ 	vmovupd(ymm4, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+-	
+-	
+-	
++
++
++
++
+ 	label(.DDONE)
+ 
+ 
+ 
+ 
+ 	label(.DRETURN)
+-	
+-	
++
++
+ 
+     end_asm(
+ 	: // output operands (none)
+commit e3dc1954ffb5eee2a8b41fce85ba589f75770eea
+Author: Devin Matthews <damatthews@smu.edu>
+Date:   Thu Sep 16 10:59:37 2021 -0500
+
+    Fix problem where uninitialized registers are included in vhaddpd in the Mx1 gemmsup kernels for haswell.
+    
+    The fix is to use the same (valid) source register twice in the horizontal addition.
+
+diff --git a/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx1.c b/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx1.c
+index 6e3c1a0e..457ef9f2 100644
+--- a/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx1.c
++++ b/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx1.c
+@@ -99,9 +99,9 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 	// -------------------------------------------------------------------------
+ 
+ 	begin_asm()
+-	
++
+ 	//vzeroall()                         // zero all xmm/ymm registers.
+-	
++
+ 	mov(var(a), rax)                   // load address of a.
+ 	mov(var(rs_a), r8)                 // load rs_a
+ 	//mov(var(cs_a), r9)                 // load cs_a
+@@ -119,7 +119,7 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 
+ 	//lea(mem(r11, r11, 2), r13)         // r13 = 3*cs_b
+ 	//lea(mem(r8,  r8,  2), r10)         // r10 = 3*rs_a
+-	
++
+ 
+ 	mov(var(c), rcx)                   // load address of c
+ 	mov(var(rs_c), rdi)                // load rs_c
+@@ -163,19 +163,19 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 	prefetch(0, mem(r10, rdi, 1, 1*8)) // prefetch c + 4*rs_c
+ 	prefetch(0, mem(r10, rdi, 2, 1*8)) // prefetch c + 5*rs_c
+ #endif
+-	
+ 
+-	
+-	
++
++
++
+ 	mov(var(k_iter16), rsi)            // i = k_iter16;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKITER4)                 // if i == 0, jump to code that
+ 	                                   // contains the k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER16)               // MAIN LOOP
+-	
+-	
++
++
+ 	// ---------------------------------- iteration 0
+ 
+ #if 0
+@@ -206,7 +206,7 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm14)
+ 
+-	
++
+ 	// ---------------------------------- iteration 1
+ 
+ 	vmovupd(mem(rbx        ), ymm0)
+@@ -233,7 +233,7 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 
+ 
+ 	// ---------------------------------- iteration 2
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -287,27 +287,27 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm14)
+ 
+-	
++
+ 
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER16)                 // iterate again if i != 0.
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	label(.DCONSIDKITER4)
+-	
++
+ 	mov(var(k_iter4), rsi)             // i = k_iter4;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKLEFT1)                 // if i == 0, jump to code that
+ 	                                   // considers k_left1 loop.
+ 	                                   // else, we prepare to enter k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER4)                // EDGE LOOP (ymm)
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -336,21 +336,21 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm14)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER4)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 	label(.DCONSIDKLEFT1)
+-	
++
+ 	mov(var(k_left1), rsi)             // i = k_left1;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DPOSTACCUM)                    // if i == 0, we're done; jump to end.
+ 	                                   // else, we prepare to enter k_left1 loop.
+-	
+-	
++
++
+ 
+ 
+ 	label(.DLOOPKLEFT1)                // EDGE LOOP (scalar)
+@@ -358,7 +358,7 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 	                                   // using the xmm registers would zero out the
+ 	                                   // high bits of the destination registers,
+ 	                                   // which would destory intermediate results.
+-	
++
+ 	vmovsd(mem(rbx        ), xmm0)
+ 	add(imm(1*8), rbx)                 // b += 1*rs_b = 1*8;
+ 
+@@ -381,12 +381,12 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 	add(imm(1*8), rax)                 // a += 1*cs_a = 1*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm14)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKLEFT1)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 
+ 
+@@ -399,28 +399,28 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 	                                   // ymm10
+ 	                                   // ymm12
+ 	                                   // ymm14
+-	
+-	vhaddpd( ymm5, ymm4, ymm0 )
++
++	vhaddpd( ymm4, ymm4, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm4 )
+ 
+-	vhaddpd( ymm7, ymm6, ymm0 )
++	vhaddpd( ymm6, ymm6, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm6 )
+ 
+-	vhaddpd( ymm9, ymm8, ymm0 )
++	vhaddpd( ymm8, ymm8, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm8 )
+ 
+-	vhaddpd( ymm11, ymm10, ymm0 )
++	vhaddpd( ymm10, ymm10, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm10 )
+ 
+-	vhaddpd( ymm13, ymm12, ymm0 )
++	vhaddpd( ymm12, ymm12, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm12 )
+ 
+-	vhaddpd( ymm15, ymm14, ymm0 )
++	vhaddpd( ymm14, ymm14, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm14 )
+ 
+@@ -435,114 +435,114 @@ void bli_dgemmsup_rd_haswell_asm_6x1
+ 
+ 	//mov(var(rs_c), rdi)                // load rs_c
+ 	//lea(mem(, rdi, 4), rdi)            // rs_c *= sizeof(double)
+-	
++
+ 	mov(var(alpha), rax)               // load address of alpha
+ 	mov(var(beta), rbx)                // load address of beta
+ 	vbroadcastsd(mem(rax), ymm0)       // load alpha and duplicate
+ 	vbroadcastsd(mem(rbx), ymm3)       // load beta and duplicate
+-	
++
+ 	vmulpd(xmm0, xmm4,  xmm4)          // scale by alpha
+ 	vmulpd(xmm0, xmm6,  xmm6)
+ 	vmulpd(xmm0, xmm8,  xmm8)
+ 	vmulpd(xmm0, xmm10, xmm10)
+ 	vmulpd(xmm0, xmm12, xmm12)
+ 	vmulpd(xmm0, xmm14, xmm14)
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	//mov(var(cs_c), rsi)                // load cs_c
+ 	//lea(mem(, rsi, 8), rsi)            // rsi = cs_c * sizeof(double)
+-	
+-	
+-	
++
++
++
+ 	                                   // now avoid loading C if beta == 0
+-	
++
+ 	vxorpd(ymm0, ymm0, ymm0)           // set ymm0 to zero.
+ 	vucomisd(xmm0, xmm3)               // set ZF if beta == 0.
+ 	je(.DBETAZERO)                     // if ZF = 1, jump to beta == 0 case
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORED)
+-	
+ 
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm4)
+ 	vmovsd(xmm4, mem(rcx))
+ 	add(rdi, rcx)
+-	
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm6)
+ 	vmovsd(xmm6, mem(rcx))
+ 	add(rdi, rcx)
+-	
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm8)
+ 	vmovsd(xmm8, mem(rcx))
+ 	add(rdi, rcx)
+-	
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm10)
+ 	vmovsd(xmm10, mem(rcx))
+ 	add(rdi, rcx)
+-	
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm12)
+ 	vmovsd(xmm12, mem(rcx))
+ 	add(rdi, rcx)
+-	
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm14)
+ 	vmovsd(xmm14, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+-	
+-	
++
++
++
+ 	jmp(.DDONE)                        // jump to end.
+-	
+-	
+-	
+-	
++
++
++
++
+ 	label(.DBETAZERO)
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORBZ)
+-	
+-	
++
++
+ 	vmovsd(xmm4, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovsd(xmm6, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovsd(xmm8, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovsd(xmm10, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovsd(xmm12, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovsd(xmm14, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+ 
+-	
+-	
+-	
++
++
++
++
+ 	label(.DDONE)
+-	
++
+ 
+ 
+ 
+ 	label(.DRETURN)
+ 
+-	
++
+ 
+     end_asm(
+ 	: // output operands (none)
+@@ -613,9 +613,9 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 	// -------------------------------------------------------------------------
+ 
+ 	begin_asm()
+-	
++
+ 	//vzeroall()                         // zero all xmm/ymm registers.
+-	
++
+ 	mov(var(a), rax)                   // load address of a.
+ 	mov(var(rs_a), r8)                 // load rs_a
+ 	//mov(var(cs_a), r9)                 // load cs_a
+@@ -633,7 +633,7 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 
+ 	//lea(mem(r11, r11, 2), r13)         // r13 = 3*cs_b
+ 	//lea(mem(r8,  r8,  2), r10)         // r10 = 3*rs_a
+-	
++
+ 
+ 	mov(var(c), rcx)                   // load address of c
+ 	mov(var(rs_c), rdi)                // load rs_c
+@@ -671,19 +671,19 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 	prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c
+ 	prefetch(0, mem(rcx, rdi, 2, 1*8)) // prefetch c + 2*rs_c
+ #endif
+-	
+ 
+-	
+-	
++
++
++
+ 	mov(var(k_iter16), rsi)            // i = k_iter16;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKITER4)                 // if i == 0, jump to code that
+ 	                                   // contains the k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER16)               // MAIN LOOP
+-	
+-	
++
++
+ 	// ---------------------------------- iteration 0
+ 
+ #if 0
+@@ -705,7 +705,7 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm8)
+ 
+-	
++
+ 	// ---------------------------------- iteration 1
+ 
+ 	vmovupd(mem(rbx        ), ymm0)
+@@ -723,7 +723,7 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 
+ 
+ 	// ---------------------------------- iteration 2
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -759,27 +759,27 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm8)
+ 
+-	
++
+ 
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER16)                 // iterate again if i != 0.
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	label(.DCONSIDKITER4)
+-	
++
+ 	mov(var(k_iter4), rsi)             // i = k_iter4;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKLEFT1)                 // if i == 0, jump to code that
+ 	                                   // considers k_left1 loop.
+ 	                                   // else, we prepare to enter k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER4)                // EDGE LOOP (ymm)
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -799,21 +799,21 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm8)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER4)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 	label(.DCONSIDKLEFT1)
+-	
++
+ 	mov(var(k_left1), rsi)             // i = k_left1;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DPOSTACCUM)                    // if i == 0, we're done; jump to end.
+ 	                                   // else, we prepare to enter k_left1 loop.
+-	
+-	
++
++
+ 
+ 
+ 	label(.DLOOPKLEFT1)                // EDGE LOOP (scalar)
+@@ -821,7 +821,7 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 	                                   // using the xmm registers would zero out the
+ 	                                   // high bits of the destination registers,
+ 	                                   // which would destory intermediate results.
+-	
++
+ 	vmovsd(mem(rbx        ), xmm0)
+ 	add(imm(1*8), rbx)                 // b += 1*rs_b = 1*8;
+ 
+@@ -835,12 +835,12 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 	add(imm(1*8), rax)                 // a += 1*cs_a = 1*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm8)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKLEFT1)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 
+ 
+@@ -850,16 +850,16 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 	                                   // ymm4
+ 	                                   // ymm6
+ 	                                   // ymm8
+-	
+-	vhaddpd( ymm5, ymm4, ymm0 )
++
++	vhaddpd( ymm4, ymm4, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm4 )
+ 
+-	vhaddpd( ymm7, ymm6, ymm0 )
++	vhaddpd( ymm6, ymm6, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm6 )
+ 
+-	vhaddpd( ymm9, ymm8, ymm0 )
++	vhaddpd( ymm8, ymm8, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm8 )
+ 
+@@ -871,87 +871,87 @@ void bli_dgemmsup_rd_haswell_asm_3x1
+ 
+ 	//mov(var(rs_c), rdi)                // load rs_c
+ 	//lea(mem(, rdi, 4), rdi)            // rs_c *= sizeof(double)
+-	
++
+ 	mov(var(alpha), rax)               // load address of alpha
+ 	mov(var(beta), rbx)                // load address of beta
+ 	vbroadcastsd(mem(rax), ymm0)       // load alpha and duplicate
+ 	vbroadcastsd(mem(rbx), ymm3)       // load beta and duplicate
+-	
++
+ 	vmulpd(xmm0, xmm4,  xmm4)          // scale by alpha
+ 	vmulpd(xmm0, xmm6,  xmm6)
+ 	vmulpd(xmm0, xmm8,  xmm8)
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	//mov(var(cs_c), rsi)                // load cs_c
+ 	//lea(mem(, rsi, 8), rsi)            // rsi = cs_c * sizeof(double)
+-	
+-	
+-	
++
++
++
+ 	                                   // now avoid loading C if beta == 0
+-	
++
+ 	vxorpd(ymm0, ymm0, ymm0)           // set ymm0 to zero.
+ 	vucomisd(xmm0, xmm3)               // set ZF if beta == 0.
+ 	je(.DBETAZERO)                     // if ZF = 1, jump to beta == 0 case
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORED)
+-	
+ 
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm4)
+ 	vmovsd(xmm4, mem(rcx))
+ 	add(rdi, rcx)
+-	
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm6)
+ 	vmovsd(xmm6, mem(rcx))
+ 	add(rdi, rcx)
+-	
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm8)
+ 	vmovsd(xmm8, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+-	
+-	
++
++
++
+ 	jmp(.DDONE)                        // jump to end.
+-	
+-	
+-	
+-	
++
++
++
++
+ 	label(.DBETAZERO)
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORBZ)
+-	
+-	
++
++
+ 	vmovsd(xmm4, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovsd(xmm6, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovsd(xmm8, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+ 
+-	
+-	
+-	
++
++
++
++
+ 	label(.DDONE)
+-	
++
+ 
+ 
+ 
+ 	label(.DRETURN)
+ 
+-	
++
+ 
+     end_asm(
+ 	: // output operands (none)
+@@ -1022,9 +1022,9 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 	// -------------------------------------------------------------------------
+ 
+ 	begin_asm()
+-	
++
+ 	//vzeroall()                         // zero all xmm/ymm registers.
+-	
++
+ 	mov(var(a), rax)                   // load address of a.
+ 	mov(var(rs_a), r8)                 // load rs_a
+ 	//mov(var(cs_a), r9)                 // load cs_a
+@@ -1042,7 +1042,7 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 
+ 	//lea(mem(r11, r11, 2), r13)         // r13 = 3*cs_b
+ 	//lea(mem(r8,  r8,  2), r10)         // r10 = 3*rs_a
+-	
++
+ 
+ 	mov(var(c), rcx)                   // load address of c
+ 	mov(var(rs_c), rdi)                // load rs_c
+@@ -1078,19 +1078,19 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 	prefetch(0, mem(rcx,         1*8)) // prefetch c + 0*rs_c
+ 	prefetch(0, mem(rcx, rdi, 1, 1*8)) // prefetch c + 1*rs_c
+ #endif
+-	
+ 
+-	
+-	
++
++
++
+ 	mov(var(k_iter16), rsi)            // i = k_iter16;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKITER4)                 // if i == 0, jump to code that
+ 	                                   // contains the k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER16)               // MAIN LOOP
+-	
+-	
++
++
+ 	// ---------------------------------- iteration 0
+ 
+ #if 0
+@@ -1109,7 +1109,7 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm6)
+ 
+-	
++
+ 	// ---------------------------------- iteration 1
+ 
+ 	vmovupd(mem(rbx        ), ymm0)
+@@ -1124,7 +1124,7 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 
+ 
+ 	// ---------------------------------- iteration 2
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -1154,27 +1154,27 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm6)
+ 
+-	
++
+ 
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER16)                 // iterate again if i != 0.
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	label(.DCONSIDKITER4)
+-	
++
+ 	mov(var(k_iter4), rsi)             // i = k_iter4;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKLEFT1)                 // if i == 0, jump to code that
+ 	                                   // considers k_left1 loop.
+ 	                                   // else, we prepare to enter k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER4)                // EDGE LOOP (ymm)
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -1191,21 +1191,21 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm6)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER4)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 	label(.DCONSIDKLEFT1)
+-	
++
+ 	mov(var(k_left1), rsi)             // i = k_left1;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DPOSTACCUM)                    // if i == 0, we're done; jump to end.
+ 	                                   // else, we prepare to enter k_left1 loop.
+-	
+-	
++
++
+ 
+ 
+ 	label(.DLOOPKLEFT1)                // EDGE LOOP (scalar)
+@@ -1213,7 +1213,7 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 	                                   // using the xmm registers would zero out the
+ 	                                   // high bits of the destination registers,
+ 	                                   // which would destory intermediate results.
+-	
++
+ 	vmovsd(mem(rbx        ), xmm0)
+ 	add(imm(1*8), rbx)                 // b += 1*rs_b = 1*8;
+ 
+@@ -1224,12 +1224,12 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 	add(imm(1*8), rax)                 // a += 1*cs_a = 1*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm6)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKLEFT1)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 
+ 
+@@ -1238,12 +1238,12 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 
+ 	                                   // ymm4
+ 	                                   // ymm6
+-	
+-	vhaddpd( ymm5, ymm4, ymm0 )
++
++	vhaddpd( ymm4, ymm4, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm4 )
+ 
+-	vhaddpd( ymm7, ymm6, ymm0 )
++	vhaddpd( ymm6, ymm6, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm6 )
+ 
+@@ -1254,78 +1254,78 @@ void bli_dgemmsup_rd_haswell_asm_2x1
+ 
+ 	//mov(var(rs_c), rdi)                // load rs_c
+ 	//lea(mem(, rdi, 4), rdi)            // rs_c *= sizeof(double)
+-	
++
+ 	mov(var(alpha), rax)               // load address of alpha
+ 	mov(var(beta), rbx)                // load address of beta
+ 	vbroadcastsd(mem(rax), ymm0)       // load alpha and duplicate
+ 	vbroadcastsd(mem(rbx), ymm3)       // load beta and duplicate
+-	
++
+ 	vmulpd(xmm0, xmm4,  xmm4)          // scale by alpha
+ 	vmulpd(xmm0, xmm6,  xmm6)
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	//mov(var(cs_c), rsi)                // load cs_c
+ 	//lea(mem(, rsi, 8), rsi)            // rsi = cs_c * sizeof(double)
+-	
+-	
+-	
++
++
++
+ 	                                   // now avoid loading C if beta == 0
+-	
++
+ 	vxorpd(ymm0, ymm0, ymm0)           // set ymm0 to zero.
+ 	vucomisd(xmm0, xmm3)               // set ZF if beta == 0.
+ 	je(.DBETAZERO)                     // if ZF = 1, jump to beta == 0 case
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORED)
+-	
+ 
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm4)
+ 	vmovsd(xmm4, mem(rcx))
+ 	add(rdi, rcx)
+-	
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm6)
+ 	vmovsd(xmm6, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+-	
+-	
++
++
++
+ 	jmp(.DDONE)                        // jump to end.
+-	
+-	
+-	
+-	
++
++
++
++
+ 	label(.DBETAZERO)
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORBZ)
+-	
+-	
++
++
+ 	vmovsd(xmm4, mem(rcx))
+ 	add(rdi, rcx)
+-	
++
+ 	vmovsd(xmm6, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+ 
+-	
+-	
+-	
++
++
++
++
+ 	label(.DDONE)
+-	
++
+ 
+ 
+ 
+ 	label(.DRETURN)
+ 
+-	
++
+ 
+     end_asm(
+ 	: // output operands (none)
+@@ -1396,9 +1396,9 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 	// -------------------------------------------------------------------------
+ 
+ 	begin_asm()
+-	
++
+ 	//vzeroall()                         // zero all xmm/ymm registers.
+-	
++
+ 	mov(var(a), rax)                   // load address of a.
+ 	mov(var(rs_a), r8)                 // load rs_a
+ 	//mov(var(cs_a), r9)                 // load cs_a
+@@ -1416,7 +1416,7 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 
+ 	//lea(mem(r11, r11, 2), r13)         // r13 = 3*cs_b
+ 	//lea(mem(r8,  r8,  2), r10)         // r10 = 3*rs_a
+-	
++
+ 
+ 	mov(var(c), rcx)                   // load address of c
+ 	mov(var(rs_c), rdi)                // load rs_c
+@@ -1450,19 +1450,19 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 	//lea(mem(r10, rdi, 1), r10)         // rdx = c + 3*rs_c;
+ 	prefetch(0, mem(rcx,         1*8)) // prefetch c + 0*rs_c
+ #endif
+-	
+ 
+-	
+-	
++
++
++
+ 	mov(var(k_iter16), rsi)            // i = k_iter16;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKITER4)                 // if i == 0, jump to code that
+ 	                                   // contains the k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER16)               // MAIN LOOP
+-	
+-	
++
++
+ 	// ---------------------------------- iteration 0
+ 
+ #if 0
+@@ -1478,7 +1478,7 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm4)
+ 
+-	
++
+ 	// ---------------------------------- iteration 1
+ 
+ 	vmovupd(mem(rbx        ), ymm0)
+@@ -1490,7 +1490,7 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 
+ 
+ 	// ---------------------------------- iteration 2
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -1514,27 +1514,27 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm4)
+ 
+-	
++
+ 
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER16)                 // iterate again if i != 0.
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	label(.DCONSIDKITER4)
+-	
++
+ 	mov(var(k_iter4), rsi)             // i = k_iter4;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DCONSIDKLEFT1)                 // if i == 0, jump to code that
+ 	                                   // considers k_left1 loop.
+ 	                                   // else, we prepare to enter k_iter4 loop.
+-	
+-	
++
++
+ 	label(.DLOOPKITER4)                // EDGE LOOP (ymm)
+-	
++
+ #if 0
+ 	prefetch(0, mem(rax, r10, 1, 0*8)) // prefetch rax + 3*cs_a
+ 	prefetch(0, mem(rax, r8,  4, 0*8)) // prefetch rax + 4*cs_a
+@@ -1548,21 +1548,21 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 	add(imm(4*8), rax)                 // a += 4*cs_a = 4*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm4)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKITER4)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 	label(.DCONSIDKLEFT1)
+-	
++
+ 	mov(var(k_left1), rsi)             // i = k_left1;
+ 	test(rsi, rsi)                     // check i via logical AND.
+ 	je(.DPOSTACCUM)                    // if i == 0, we're done; jump to end.
+ 	                                   // else, we prepare to enter k_left1 loop.
+-	
+-	
++
++
+ 
+ 
+ 	label(.DLOOPKLEFT1)                // EDGE LOOP (scalar)
+@@ -1570,7 +1570,7 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 	                                   // using the xmm registers would zero out the
+ 	                                   // high bits of the destination registers,
+ 	                                   // which would destory intermediate results.
+-	
++
+ 	vmovsd(mem(rbx        ), xmm0)
+ 	add(imm(1*8), rbx)                 // b += 1*rs_b = 1*8;
+ 
+@@ -1578,12 +1578,12 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 	add(imm(1*8), rax)                 // a += 1*cs_a = 1*8;
+ 	vfmadd231pd(ymm0, ymm3, ymm4)
+ 
+-	
++
+ 	dec(rsi)                           // i -= 1;
+ 	jne(.DLOOPKLEFT1)                  // iterate again if i != 0.
+-	
+-	
+-	
++
++
++
+ 
+ 
+ 
+@@ -1591,8 +1591,8 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 	label(.DPOSTACCUM)
+ 
+ 	                                   // ymm4
+-	
+-	vhaddpd( ymm5, ymm4, ymm0 )
++
++	vhaddpd( ymm4, ymm4, ymm0 )
+ 	vextractf128(imm(1), ymm0, xmm1 )
+ 	vaddpd( xmm0, xmm1, xmm4 )
+ 
+@@ -1602,69 +1602,69 @@ void bli_dgemmsup_rd_haswell_asm_1x1
+ 
+ 	//mov(var(rs_c), rdi)                // load rs_c
+ 	//lea(mem(, rdi, 4), rdi)            // rs_c *= sizeof(double)
+-	
++
+ 	mov(var(alpha), rax)               // load address of alpha
+ 	mov(var(beta), rbx)                // load address of beta
+ 	vbroadcastsd(mem(rax), ymm0)       // load alpha and duplicate
+ 	vbroadcastsd(mem(rbx), ymm3)       // load beta and duplicate
+-	
++
+ 	vmulpd(xmm0, xmm4,  xmm4)          // scale by alpha
+-	
+-	
+-	
+-	
+-	
+-	
++
++
++
++
++
++
+ 	//mov(var(cs_c), rsi)                // load cs_c
+ 	//lea(mem(, rsi, 8), rsi)            // rsi = cs_c * sizeof(double)
+-	
+-	
+-	
++
++
++
+ 	                                   // now avoid loading C if beta == 0
+-	
++
+ 	vxorpd(ymm0, ymm0, ymm0)           // set ymm0 to zero.
+ 	vucomisd(xmm0, xmm3)               // set ZF if beta == 0.
+ 	je(.DBETAZERO)                     // if ZF = 1, jump to beta == 0 case
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORED)
+-	
+ 
+-	vmovsd(mem(rcx), xmm0)	
++
++	vmovsd(mem(rcx), xmm0)
+ 	vfmadd231pd(xmm0, xmm3, xmm4)
+ 	vmovsd(xmm4, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+-	
+-	
++
++
++
+ 	jmp(.DDONE)                        // jump to end.
+-	
+-	
+-	
+-	
++
++
++
++
+ 	label(.DBETAZERO)
+-	
+ 
+-	
++
++
+ 	label(.DROWSTORBZ)
+-	
+-	
++
++
+ 	vmovsd(xmm4, mem(rcx))
+ 	//add(rdi, rcx)
+-	
+ 
+-	
+-	
+-	
++
++
++
++
+ 	label(.DDONE)
+-	
++
+ 
+ 
+ 
+ 	label(.DRETURN)
+ 
+-	
++
+ 
+     end_asm(
+ 	: // output operands (none)
+diff --git a/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx4.c b/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx4.c
+index 21dd3b89..516bfced 100644
+--- a/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx4.c
++++ b/kernels/haswell/3/sup/d6x8/bli_gemmsup_rd_haswell_asm_dMx4.c
+@@ -1338,17 +1338,6 @@ void bli_dgemmsup_rd_haswell_asm_1x4
+ 
+ 	vperm2f128(imm(0x20), ymm2, ymm0, ymm4 )
+ 
+-
+-	//vhaddpd( ymm8, ymm5, ymm0 )
+-	//vextractf128(imm(1), ymm0, xmm1 )
+-	//vaddpd( xmm0, xmm1, xmm0 )
+-
+-	//vhaddpd( ymm14, ymm11, ymm2 )
+-	//vextractf128(imm(1), ymm2, xmm1 )
+-	//vaddpd( xmm2, xmm1, xmm2 )
+-
+-	//vperm2f128(imm(0x20), ymm2, ymm0, ymm5 )
+-
+ 	                                   // xmm4[0:3] = sum(ymm4) sum(ymm7) sum(ymm10) sum(ymm13)
+ 
+ 
diff --git a/b/BLIS/BLIS-2.2-GCCcore-10.2.0.eb b/b/BLIS/BLIS-2.2-GCCcore-10.2.0.eb
new file mode 100644
index 00000000..d02f1937
--- /dev/null
+++ b/b/BLIS/BLIS-2.2-GCCcore-10.2.0.eb
@@ -0,0 +1,45 @@
+# IT4Innovations
+# LK 2022
+
+easyblock = 'ConfigureMake'
+
+name = 'BLIS'
+version = '2.2'
+
+homepage = 'https://developer.amd.com/amd-cpu-libraries/blas-library/'
+description = """AMD's fork of BLIS. BLIS is a portable software framework for instantiating high-performance
+BLAS-like dense linear algebra libraries."""
+
+toolchain = {'name': 'GCCcore', 'version': '10.2.0'}
+
+source_urls = ['https://github.com/amd/blis/archive/']
+sources = ['%(version)s.tar.gz']
+patches = ['BLIS-2.2-amd_fix-undefined-reference-blist-abort.patch']
+checksums = [
+    'e1feb60ac919cf6d233c43c424f6a8a11eab2c62c2c6e3f2652c15ee9063c0c9',  # 2.2.tar.gz
+    # BLIS-2.2-amd_fix-undefined-reference-blist-abort.patch
+    'e879bd79e4438f7e6905461af1d483d27d14945eb9e75509b22c7584b8ba93c4',
+]
+
+builddependencies = [
+    ('binutils', '2.35'),
+    ('Python', '3.8.6'),
+    ('Perl', '5.32.0'),
+]
+
+# Build Serial and multithreaded library
+configopts = ['--enable-cblas --enable-shared CC="$CC" auto',
+              '--enable-cblas --enable-threading=openmp --enable-shared CC="$CC" auto']
+
+runtest = 'check'
+
+sanity_check_paths = {
+    'files': ['include/blis/cblas.h', 'include/blis/blis.h',
+              'lib/libblis.a', 'lib/libblis.%s' % SHLIB_EXT,
+              'lib/libblis-mt.a', 'lib/libblis-mt.%s' % SHLIB_EXT],
+    'dirs': [],
+}
+
+modextrapaths = {'CPATH': 'include/blis'}
+
+moduleclass = 'numlib'
diff --git a/b/BLIS/BLIS-2.2-amd_fix-undefined-reference-blist-abort.patch b/b/BLIS/BLIS-2.2-amd_fix-undefined-reference-blist-abort.patch
new file mode 100644
index 00000000..18ebc44d
--- /dev/null
+++ b/b/BLIS/BLIS-2.2-amd_fix-undefined-reference-blist-abort.patch
@@ -0,0 +1,14 @@
+fix undefined reference to 'blis_abort'
+
+see https://github.com/flame/blis/issues/428 + https://github.com/flame/blis/pull/429
+
+--- blis-2.2.orig/frame/base/bli_error.h	2020-12-07 19:40:33.936990613 +0100
++++ blis-2.2/frame/base/bli_error.h	2020-12-07 19:45:35.079406108 +0100
+@@ -40,6 +40,7 @@
+ 
+ void      bli_print_msg( char* str, char* file, guint_t line );
+ void      bli_abort( void );
++BLIS_EXPORT_BLIS void      bli_abort( void );
+ 
+ char*     bli_error_string_for_code( gint_t code );
+ 
diff --git a/b/BLIS/BLIS-3.0.1-GCCcore-10.2.0.eb b/b/BLIS/BLIS-3.0.1-GCCcore-10.2.0.eb
new file mode 100644
index 00000000..30bf4d2a
--- /dev/null
+++ b/b/BLIS/BLIS-3.0.1-GCCcore-10.2.0.eb
@@ -0,0 +1,39 @@
+# IT4Innovations
+# LK 2022
+easyblock = 'ConfigureMake'
+
+name = 'BLIS'
+version = '3.0.1'
+
+homepage = 'https://developer.amd.com/amd-cpu-libraries/blas-library/'
+description = """AMD's fork of BLIS. BLIS is a portable software framework for instantiating high-performance
+BLAS-like dense linear algebra libraries."""
+
+toolchain = {'name': 'GCCcore', 'version': '10.2.0'}
+
+source_urls = ['https://github.com/amd/blis/archive/']
+sources = ['%(version)s.tar.gz']
+checksums = ['dff643e6ef946846e91e8f81b75ff8fe21f1f2d227599aecd654d184d9beff3e']
+
+builddependencies = [
+    ('binutils', '2.35'),
+    ('Python', '3.8.6'),
+    ('Perl', '5.32.0'),
+]
+
+# Build Serial and multithreaded library
+configopts = ['--enable-cblas --enable-shared CC="$CC" auto',
+              '--enable-cblas --enable-threading=openmp --enable-shared CC="$CC" auto']
+
+runtest = 'check'
+
+sanity_check_paths = {
+    'files': ['include/blis/cblas.h', 'include/blis/blis.h',
+              'lib/libblis.a', 'lib/libblis.%s' % SHLIB_EXT,
+              'lib/libblis-mt.a', 'lib/libblis-mt.%s' % SHLIB_EXT],
+    'dirs': [],
+}
+
+modextrapaths = {'CPATH': 'include/blis'}
+
+moduleclass = 'numlib'
diff --git a/b/Biopython/Biopython-1.72-foss-2020b-Python-2.7.18.eb b/b/Biopython/Biopython-1.72-foss-2020b-Python-2.7.18.eb
new file mode 100644
index 00000000..3c0bce6d
--- /dev/null
+++ b/b/Biopython/Biopython-1.72-foss-2020b-Python-2.7.18.eb
@@ -0,0 +1,40 @@
+# IT4Innovations 2022
+# JK
+
+easyblock = 'PythonPackage'
+
+name = 'Biopython'
+version = '1.72'
+versionsuffix = '-Python-%(pyver)s'
+
+homepage = 'http://www.biopython.org'
+description = """Biopython is a set of freely available tools for biological
+ computation written in Python by an international team of developers. It is
+ a distributed collaborative effort to develop Python libraries and
+ applications which address the needs of current and future work in
+ bioinformatics. """
+
+
+toolchain = {'name': 'foss', 'version': '2020b'}
+
+source_urls = ['http://biopython.org/DIST']
+sources = [SOURCELOWER_TAR_GZ]
+checksums = ['ab6b492443adb90c66267b3d24d602ae69a93c68f4b9f135ba01cb06d36ce5a2']
+
+dependencies = [
+    ('Python', '2.7.18'),
+]
+
+#download_dep_fail = True
+#use_pip = True
+#sanity_pip_check = False
+
+skipsteps = ['sanitycheck']
+
+sanity_check_paths = {
+    'files': [],
+    'dirs': ['lib/python%(pyshortver)s/site-packages/Bio',
+             'lib/python%(pyshortver)s/site-packages/BioSQL'],
+}
+
+moduleclass = 'bio'
diff --git a/c/Cordax/Cordax-1.0-Python-2.7.18.eb b/c/Cordax/Cordax-1.0-Python-2.7.18.eb
new file mode 100644
index 00000000..2172fa73
--- /dev/null
+++ b/c/Cordax/Cordax-1.0-Python-2.7.18.eb
@@ -0,0 +1,26 @@
+easyblock = 'Binary'
+
+name = 'Cordax'
+version = '1.0'
+versionsuffix = '-Python-%(pyver)s'
+
+homepage = 'N/A'
+description = """CORDAX is an aggregation propensity predictor based on predicted packing energies."""
+
+toolchain = {'name': 'foss', 'version': '2020b'}
+
+sources = ['%(namelower)s-%(version)s.tar.gz']
+
+dependencies = [
+    ('Python', '2.7.18'),
+    ('matplotlib', '2.2.5', '-Python-%(pyver)s'),
+    ('SciPy-bundle', '2020.11', '-Python-%(pyver)s'),
+    ('scikit-learn', '0.20.4', '-Python-%(pyver)s'),
+    ('Biopython', '1.72', '-Python-%(pyver)s'),
+    ('FoldX', '5.0', '', True),
+]
+
+extract_sources = True
+skipsteps = ['sanitycheck']
+
+moduleclass = 'bio'
diff --git a/d/DFTB+/DFTB+-21.2-intel-2020b-Python-3.8.6.eb b/d/DFTB+/DFTB+-21.2-intel-2020b-Python-3.8.6.eb
new file mode 100644
index 00000000..456007dc
--- /dev/null
+++ b/d/DFTB+/DFTB+-21.2-intel-2020b-Python-3.8.6.eb
@@ -0,0 +1,71 @@
+# IT4Innovations
+# LK 2022
+
+easyblock = 'CMakeMake'
+
+name = 'DFTB+'
+version = '21.2'
+versionsuffix = '-Python-3.8.6'
+
+homepage = 'https://www.dftb-plus.info'
+description = """DFTB+ is a fast and efficient versatile quantum mechanical simulation package.
+It is based on the Density Functional Tight Binding (DFTB) method, containing
+almost all of the useful extensions which have been developed for the DFTB
+framework so far.  Using DFTB+ you can carry out quantum mechanical simulations
+like with ab-initio density functional theory based packages, but in an
+approximate way gaining typically around two order of magnitude in speed."""
+
+toolchain = {'name': 'intel', 'version': '2020b'}
+# AMD/intel cpu
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+	toolchainopts = {'lowopt': True, 'optarch': 'march=core-avx2'}
+else:
+	toolchainopts = {'lowopt': True, 'optarch': False}
+
+
+local_external_dir = '%%(builddir)s/dftbplus-%%(version)s/external/%s/origin/'
+local_external_extract = 'mkdir -p %s && tar -C %s' % (local_external_dir, local_external_dir)
+local_external_extract += ' --strip-components=1 -xzf %%s'
+
+sources = [
+    {
+        # DFTB+ source code
+        'source_urls': ['https://github.com/dftbplus/dftbplus/archive'],
+        'download_filename': '%(version)s.tar.gz',
+        'filename': SOURCE_TAR_GZ,
+    },
+    {
+        # Slater-Koster (slakos) data for testing
+        'source_urls': ['https://github.com/dftbplus/testparams/archive'],
+        'download_filename': 'd0ea16df2b56d14c7c3dc9329a8d3bac9fea50a0.tar.gz',
+        'filename': 'slakos-data-%(version)s.tar.gz',
+        'extract_cmd': local_external_extract % ('slakos', 'slakos'),
+    },
+]
+
+builddependencies = [
+    ('CMake', '3.18.4'),
+]
+
+dependencies = [
+    ('Python', '3.8.6'),
+    ('SciPy-bundle', '2020.11'),
+    ('arpack-ng', '3.8.0'),
+    ('dftd3-lib', '0.9.2', '', ('GCC', '10.2.0')),
+]
+
+configopts = ['-DWITH_TBLITE=TRUE -DWITH_MPI=TRUE -DWITH_DFTD3=TRUE COMPILE_DFTD3=FALSE DFTD3_INCS="-I$EBROOTDFTD3MINLIB/include" -DFTD3_LIBS="-L$EBROOTDFTD3MINLIB/lib -ldftd3" -DWITH_PYTHON=TRUE']
+
+installopts = 'INSTALLDIR="%(installdir)s"'
+
+sanity_check_paths = {
+    'files': ['bin/' + x for x in ['dftb+', 'modes', 'waveplot']],
+    'dirs': []
+}
+
+sanity_check_commands = [('python', '-c "import dptools"')]
+
+modextrapaths = {'PYTHONPATH': 'lib/python%(pyshortver)s/site-packages'}
+
+moduleclass = 'phys'
diff --git a/d/DFTB+/DFTB+-21.2-intel-2020b-TB.eb b/d/DFTB+/DFTB+-21.2-intel-2020b-TB.eb
new file mode 100644
index 00000000..d39235d5
--- /dev/null
+++ b/d/DFTB+/DFTB+-21.2-intel-2020b-TB.eb
@@ -0,0 +1,81 @@
+# IT4Innovations
+# LK 2022
+
+easyblock = 'CMakeMake'
+
+name = 'DFTB+'
+version = '21.2'
+versionsuffix = '-TB'
+
+homepage = 'https://www.dftb-plus.info'
+description = """DFTB+ is a fast and efficient versatile quantum mechanical simulation package.
+It is based on the Density Functional Tight Binding (DFTB) method, containing
+almost all of the useful extensions which have been developed for the DFTB
+framework so far.  Using DFTB+ you can carry out quantum mechanical simulations
+like with ab-initio density functional theory based packages, but in an
+approximate way gaining typically around two order of magnitude in speed."""
+
+toolchain = {'name': 'intel', 'version': '2020b'}
+# AMD/intel cpu
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+	toolchainopts = {'lowopt': True, 'optarch': 'march=core-avx2'}
+else:
+	toolchainopts = {'lowopt': True, 'optarch': False}
+
+
+local_external_dir = '%%(builddir)s/dftbplus-%%(version)s/external/%s/origin/'
+local_external_extract = 'mkdir -p %s && tar -C %s' % (local_external_dir, local_external_dir)
+local_external_extract += ' --strip-components=1 -xzf %%s'
+
+sources = [
+    {
+        # DFTB+ source code
+        'source_urls': ['https://github.com/dftbplus/dftbplus/archive'],
+        'download_filename': '%(version)s.tar.gz',
+        'filename': SOURCE_TAR_GZ,
+    },
+    {
+        # Slater-Koster (slakos) data for testing
+        'source_urls': ['https://github.com/dftbplus/testparams/archive'],
+        'download_filename': 'd0ea16df2b56d14c7c3dc9329a8d3bac9fea50a0.tar.gz',
+        'filename': 'slakos-data-%(version)s.tar.gz',
+        'extract_cmd': local_external_extract % ('slakos', 'slakos'),
+    },
+]
+
+builddependencies = [
+    ('CMake', '3.18.4'),
+]
+
+dependencies = [
+#    ('Python', '3.8.6'),
+#    ('SciPy-bundle', '2020.11'),
+    ('arpack-ng', '3.8.0'),
+    ('dftd3-lib', '0.9.2', '', ('GCC', '10.2.0')),
+]
+
+configopts = ['-DWITH_TBLITE=TRUE -DWITH_MPI=TRUE -DWITH_DFTD3=TRUE COMPILE_DFTD3=FALSE DFTD3_INCS="-I$EBROOTDFTD3MINLIB/include" -DFTD3_LIBS="-L$EBROOTDFTD3MINLIB/lib -ldftd3"']
+
+# Link to Arpack
+#local_makeopts = ' WITH_TBLITE=1 WITH_MPI=1 WITH_ARPACK=1 ARPACK_LIBS="-L$EBROOTARPACKMINNG/lib -larpack" ARPACK_NEEDS_LAPACK=1'
+# Use DFTD3 from EB
+#local_makeopts += ' WITH_DFTD3=1 COMPILE_DFTD3=0 DFTD3_INCS="-I$EBROOTDFTD3MINLIB/include"'
+#local_makeopts += ' DFTD3_LIBS="-L$EBROOTDFTD3MINLIB/lib -ldftd3"'
+
+#buildopts = local_makeopts
+
+#runtest = 'test' + local_makeopts
+
+installopts = 'INSTALLDIR="%(installdir)s"'
+
+sanity_check_paths = {
+    'files': ['bin/' + x for x in ['dftb+', 'modes', 'waveplot']],
+    'dirs': []
+}
+
+#sanity_check_commands = [('python', '-c "import dptools"')]
+
+#modextrapaths = {'PYTHONPATH': 'lib/python%(pyshortver)s/site-packages'}
+
+moduleclass = 'phys'
diff --git a/d/Dakota/Dakota-6.15.0-fix_lapack_detection.patch b/d/Dakota/Dakota-6.15.0-fix_lapack_detection.patch
new file mode 100644
index 00000000..cd09fa60
--- /dev/null
+++ b/d/Dakota/Dakota-6.15.0-fix_lapack_detection.patch
@@ -0,0 +1,18 @@
+# Fix LAPACK detection with CMake
+# IT4Innovations
+# JK 2022
+--- dakota-6.15.0-public-src-cli-ORIG/cmake/DakotaFindSystemTPLs.cmake	2022-01-27 10:41:04.173529574 +0100
++++ dakota-6.15.0-public-src-cli/cmake/DakotaFindSystemTPLs.cmake	2022-01-27 10:45:08.772867478 +0100
+@@ -64,9 +64,9 @@
+       if(NOT BLAS_LIBS OR NOT LAPACK_LIBS)
+ 	# if not a system blas and lapack, then look for a cmake built LAPACK
+ 	# with find_package
+-	find_package(LAPACK REQUIRED NO_MODULE)
+-	set(BLAS_LIBS blas)
+-	set(LAPACK_LIBS lapack)
++	find_package(LAPACK REQUIRED MODULE)
++	set(BLAS_LIBS ${BLAS_LIBRARIES})
++	set(LAPACK_LIBS ${LAPACK_LIBRARIES})
+       endif()
+     endif()
+   endif()
diff --git a/d/Dakota/Dakota-6.15.0-foss-2021b.eb b/d/Dakota/Dakota-6.15.0-foss-2021b.eb
new file mode 100644
index 00000000..b6d438da
--- /dev/null
+++ b/d/Dakota/Dakota-6.15.0-foss-2021b.eb
@@ -0,0 +1,48 @@
+# https://github.com/easybuilders/easybuild-easyconfigs/pull/12275/commits/4ee39c881caa7638c4c9ddd08b967608f844e90c
+# JK 2022 upraveno
+
+easyblock = 'CMakeMake'
+
+name = 'Dakota'
+version = '6.15.0'
+
+homepage = 'https://dakota.sandia.gov'
+description = """The Dakota project delivers both state-of-the-art research and robust, usable software for optimization and UQ. Broadly, the Dakota software's advanced parametric analyses enable design exploration, model calibration, risk analysis, and quantification of margins and uncertainty with computational models."""
+
+toolchain = {'name': 'foss', 'version': '2021b'}
+toolchainopts = {'pic': True, 'usempi': True, 'optarch': False}
+
+sources = ['%(namelower)s-%(version)s-public-src-cli.tar.gz']
+source_urls = ['https://dakota.sandia.gov/sites/default/files/distributions/public/']
+checksums = ['47136b14a86143d0038735638da4578e']
+patches = ['Dakota-6.15.0-fix_lapack_detection.patch']
+
+builddependencies = [('CMake', '3.21.1')]
+
+dependencies = [
+    ('HDF5', '1.12.1', '-parallel'),
+    ('Python', '3.9.6'),
+    ('Perl', '5.34.0'),
+    ('GSL', '2.7'),
+    ('Boost', '1.77.0'),
+]
+
+# build shared libraries
+configopts = "-DBUILD_SHARED_LIBS=ON "
+# set other dependencies
+configopts += "-DDAKOTA_HAVE_MPI=ON "
+configopts += "-DBoost_NO_SYSTEM_PATHS=ON "
+configopts += "-DDAKOTA_HAVE_HDF5=ON "
+configopts += "-DDAKOTA_HAVE_GSL=ON "
+
+runtest = ' test ARGS="-L AcceptanceTest -j %(parallel)s"'
+
+# Run install step in parallel
+installopts = ' -j %(parallel)s'
+
+sanity_check_paths = {
+    'files': ["bin/dakota"],
+    'dirs': []
+}
+
+moduleclass = 'math'
diff --git a/d/Dakota/Dakota-6.15.0-intel-2021b.eb b/d/Dakota/Dakota-6.15.0-intel-2021b.eb
new file mode 100644
index 00000000..8ea7d788
--- /dev/null
+++ b/d/Dakota/Dakota-6.15.0-intel-2021b.eb
@@ -0,0 +1,48 @@
+# https://github.com/easybuilders/easybuild-easyconfigs/pull/12275/commits/4ee39c881caa7638c4c9ddd08b967608f844e90c
+# JK 2022 upraveno
+
+easyblock = 'CMakeMake'
+
+name = 'Dakota'
+version = '6.15.0'
+
+homepage = 'https://dakota.sandia.gov'
+description = """The Dakota project delivers both state-of-the-art research and robust, usable software for optimization and UQ. Broadly, the Dakota software's advanced parametric analyses enable design exploration, model calibration, risk analysis, and quantification of margins and uncertainty with computational models."""
+
+toolchain = {'name': 'intel', 'version': '2021b'}
+toolchainopts = {'pic': True, 'usempi': True, 'opt': False}
+
+sources = ['%(namelower)s-%(version)s-public-src-cli.tar.gz']
+source_urls = ['https://dakota.sandia.gov/sites/default/files/distributions/public/']
+checksums = ['47136b14a86143d0038735638da4578e']
+patches = ['Dakota-6.15.0-fix_lapack_detection.patch']
+
+builddependencies = [('CMake', '3.21.1')]
+
+dependencies = [
+    ('HDF5', '1.12.1', '-parallel'),
+    ('Python', '3.9.6'),
+    ('Perl', '5.34.0'),
+    ('GSL', '2.7'),
+    ('Boost', '1.77.0'),
+]
+
+# build shared libraries
+configopts = "-DBUILD_SHARED_LIBS=ON "
+# set other dependencies
+configopts += "-DDAKOTA_HAVE_MPI=ON "
+configopts += "-DBoost_NO_SYSTEM_PATHS=ON "
+configopts += "-DDAKOTA_HAVE_HDF5=ON "
+configopts += "-DDAKOTA_HAVE_GSL=ON "
+
+runtest = ' test ARGS="-L AcceptanceTest -j %(parallel)s"'
+
+# Run install step in parallel
+installopts = ' -j %(parallel)s'
+
+sanity_check_paths = {
+    'files': ["bin/dakota"],
+    'dirs': []
+}
+
+moduleclass = 'math'
diff --git a/e/ELPA/ELPA-2020.11.001-fosscuda-2020b.eb b/e/ELPA/ELPA-2020.11.001-fosscuda-2020b.eb
new file mode 100644
index 00000000..6a8b430f
--- /dev/null
+++ b/e/ELPA/ELPA-2020.11.001-fosscuda-2020b.eb
@@ -0,0 +1,33 @@
+##
+# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild
+#
+# Authors::   Inge Gutheil <i.gutheil@fz-juelich.de>, Alan O'Cais <a.ocais@fz-juelich.de>
+# License::   MIT/GPL
+#
+##
+
+name = 'ELPA'
+version = '2020.11.001'
+
+homepage = 'https://elpa.rzg.mpg.de'
+description = """Eigenvalue SoLvers for Petaflop-Applications ."""
+
+toolchain = {'name': 'fosscuda', 'version': '2020b'}
+toolchainopts = {'openmp': True, 'usempi': True}
+
+source_urls = ['https://elpa.rzg.mpg.de/software/tarball-archive/Releases/%(version)s/']
+sources = [SOURCELOWER_TAR_GZ]
+checksums = ['15591f142eeaa98ab3201d27ca9ac328e21beabf0803b011a04183fcaf6efdde']
+
+builddependencies = [
+    ('Autotools', '20200321'),
+    # remove_xcompiler script requires 'python' command,
+    ('Python', '3.8.6'),
+]
+
+# When building in parallel, the file test_setup_mpi.mod is sometimes
+# used before it is built, leading to an error.  This must be a bug in
+# the makefile affecting parallel builds.
+maxparallel = 1
+
+moduleclass = 'math'
diff --git a/f/FoldX/FoldX-5.0.eb b/f/FoldX/FoldX-5.0.eb
new file mode 100644
index 00000000..06d564f7
--- /dev/null
+++ b/f/FoldX/FoldX-5.0.eb
@@ -0,0 +1,24 @@
+# IT4Innovations 2022
+# JK
+
+easyblock = 'Binary'
+
+name = 'FoldX'
+version = '5.0'
+
+homepage = 'http://http://foldxsuite.crg.eu/'
+description = """FoldX is used to provide a fast and quantitative estimation of the importance of the interactions
+ contributing to the stability of proteins and protein complexes."""
+
+toolchain = SYSTEM
+
+sources = ['%(namelower)s%(version_major)sLinux64.tar.gz']
+
+extract_sources = True
+
+sanity_check_paths = {
+    'files': ['foldx_20221231', 'yasaraPlugin.zip'],
+    'dirs': ["molecules/"]
+}
+
+moduleclass = 'bio'
diff --git a/f/Forge/Forge-21.1.3.eb b/f/Forge/Forge-21.1.3.eb
new file mode 100644
index 00000000..fbd775a2
--- /dev/null
+++ b/f/Forge/Forge-21.1.3.eb
@@ -0,0 +1,48 @@
+# IT4Innovations 2020
+# !!! --include-easyblocks /apps/easybuild/it4i-easyblocks/easyblocks/a/allineabase.py !!!
+# LK
+
+easyblock = 'AllineaBase'
+
+name = 'Forge'
+version = "21.1.3"
+
+homepage = 'http://www.allinea.com/products/develop-allinea-forge'
+description = """Allinea Forge is the local_complete toolsuite for software development
+- with everything needed to debug, profile, optimize, edit and build C, C++
+and FORTRAN applications on Linux for high performance - from single threads through
+to local_complex parallel HPC codes with MPI, OpenMP, threads or CUDA.
+"""
+
+toolchain = SYSTEM
+
+source_urls = [
+    # Use manually downloaded sources
+    # http://content.allinea.com/downloads/allinea-reports-latest-Redhat-6.0-x86_64.tar
+    # and rename it to format %(namelower)s-%(version)s.tar, so
+    # forge-7.0.4.tar.
+]
+sources = ['arm-forge-21.1.3-linux-x86_64.tar']
+
+skipsteps = ['configure', 'build']
+
+postinstallcmds = [
+    'ln -s /apps/licenses/Arm/Licence %(installdir)s/licences/Licence.16312',
+    'ln -s /apps/licenses/PerformanceReports/Licence %(installdir)s/licences/Licence.16313',
+]
+
+sanity_check_paths = {
+    'files': [
+        'bin/ddt-client',
+        'bin/map',
+        'bin/ddt',
+        #'bin/ddt-debugger',
+        #'bin/ddt-debugger-ll',
+        #'bin/ddt-debugger-mps',
+        'bin/ddt-mpirun',
+        'bin/forge',
+        'bin/make-profiler-libraries'],
+    'dirs': ['lib'],
+}
+
+moduleclass = 'debugger'
diff --git a/g/GROMACS/GROMACS-2018.8-fosscuda-2020b-PLUMED-2.5.6-switch.eb b/g/GROMACS/GROMACS-2018.8-fosscuda-2020b-PLUMED-2.5.6-switch.eb
new file mode 100644
index 00000000..2acd1a75
--- /dev/null
+++ b/g/GROMACS/GROMACS-2018.8-fosscuda-2020b-PLUMED-2.5.6-switch.eb
@@ -0,0 +1,53 @@
+# IT4Innovations
+# LK 2022
+
+name = 'GROMACS'
+version = '2018.8'
+versionsuffix = '-PLUMED-2.5.6-switch'
+
+homepage = 'https://www.gromacs.org'
+description = """
+GROMACS is a versatile package to perform molecular dynamics, i.e. simulate the
+Newtonian equations of motion for systems with hundreds to millions of
+particles.
+
+This is a GPU enabled build, containing both MPI and threadMPI builds.
+
+It also contains the gmxapi extension for the single precision MPI build.
+"""
+
+toolchain = {'name': 'fosscuda', 'version': '2020b'}
+toolchainopts = {'openmp': True, 'usempi': True}
+
+source_urls = [
+    'https://ftp.gromacs.org/pub/gromacs/',
+    'ftp://ftp.gromacs.org/pub/gromacs/',
+]
+
+patches = [
+    'GROMACS-2018_fix_search_for_nvml_include.patch',
+    'GROMACS-2018_amend_search_for_nvml_lib.patch',
+]
+
+sources = ['gromacs-%(version)s.tar.gz']
+
+builddependencies = [
+    ('CMake', '3.18.4'),
+    ('scikit-build', '0.11.1'),
+]
+
+dependencies = [
+    ('Python', '3.8.6'),
+    ('SciPy-bundle', '2020.11'),
+    ('networkx', '2.5'),
+    ('PLUMED', '2.5.6', '-switch'),
+    ('Boost', '1.74.0'),
+]
+
+exts_defaultclass = 'PythonPackage'
+
+modextrapaths = {
+    'PYTHONPATH': 'lib/python%(pyshortver)s/site-packages',
+}
+
+moduleclass = 'bio'
diff --git a/g/GROMACS/GROMACS-2021.4-foss-2020b-PLUMED-2.7.3.eb b/g/GROMACS/GROMACS-2021.4-foss-2020b-PLUMED-2.7.3.eb
new file mode 100644
index 00000000..ec0dcec1
--- /dev/null
+++ b/g/GROMACS/GROMACS-2021.4-foss-2020b-PLUMED-2.7.3.eb
@@ -0,0 +1,72 @@
+# IT4Innovations
+# LK 2022
+
+name = 'GROMACS'
+version = '2021.4'
+local_plum_ver = '2.7.3'
+versionsuffix = '-PLUMED-%s' % local_plum_ver
+
+homepage = 'https://www.gromacs.org'
+description = """
+GROMACS is a versatile package to perform molecular dynamics, i.e. simulate the
+Newtonian equations of motion for systems with hundreds to millions of
+particles.
+
+This is a GPU enabled build, containing both MPI and threadMPI builds.
+
+It also contains the gmxapi extension for the single precision MPI build.
+"""
+
+toolchain = {'name': 'foss', 'version': '2020b'}
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   toolchainopts = {'openmp': True, 'usempi': True, 'optarch': 'march=core-avx2', 'pic': True}
+else:
+   toolchainopts = {'openmp': True, 'usempi': True, 'pic': True}
+
+source_urls = [
+    'https://ftp.gromacs.org/pub/gromacs/',
+    'ftp://ftp.gromacs.org/pub/gromacs/',
+]
+sources = [SOURCELOWER_TAR_GZ]
+
+patches = [
+    'GROMACS-2019_fix_omp_num_threads_and_google_test_death_style_in_tests.patch',
+#    'GROMACS-2019_increase_test_timeout_for_GPU.patch',
+    'GROMACS-2021_fix_gmxapi_gmx_allowed_cmd_name.patch',
+#    'GROMACS-2020.5_fix_threads_gpu_Gmxapitests.patch',
+]
+
+builddependencies = [
+    ('CMake', '3.18.4'),
+    ('scikit-build', '0.11.1'),
+]
+
+dependencies = [
+    ('Python', '3.8.6'),
+    ('SciPy-bundle', '2020.11'),
+    ('networkx', '2.5'),
+    ('PLUMED', local_plum_ver),
+]
+
+exts_defaultclass = 'PythonPackage'
+
+exts_default_options = {
+    'source_urls': [PYPI_SOURCE],
+    'use_pip': True,
+    'download_dep_fail': True,
+    'sanity_pip_check': True,
+}
+
+exts_list = [
+    ('gmxapi', '0.2.0', {
+        'preinstallopts': "export GMXTOOLCHAINDIR=%(installdir)s/share/cmake/gromacs_mpi && ",
+        'checksums': ['3954bf123da12fc60bcfaeed8263f5e2d3e16e5136c2bb5c8207b20fa7406788'],
+    }),
+]
+
+modextrapaths = {
+    'PYTHONPATH': 'lib/python%(pyshortver)s/site-packages',
+}
+
+moduleclass = 'bio'
diff --git a/h/HDF5/HDF5-1.12.1-NVHPC-21.11.eb b/h/HDF5/HDF5-1.12.1-NVHPC-21.11.eb
new file mode 100644
index 00000000..238d7ae9
--- /dev/null
+++ b/h/HDF5/HDF5-1.12.1-NVHPC-21.11.eb
@@ -0,0 +1,27 @@
+# IT4Innovations
+# LK 2022
+
+name = 'HDF5'
+version = '1.12.1'
+
+homepage = 'https://portal.hdfgroup.org/display/support'
+description = """HDF5 is a data model, library, and file format for storing and managing data.
+ It supports an unlimited variety of datatypes, and is designed for flexible
+ and efficient I/O and for high volume and complex data."""
+
+toolchain = {'name': 'NVHPC', 'version': '21.11'}
+toolchainopts = {'pic': True}
+
+source_urls = ['https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-%(version_major_minor)s/hdf5-%(version)s/src']
+sources = [SOURCELOWER_TAR_GZ]
+checksums = ['79c66ff67e666665369396e9c90b32e238e501f345afd2234186bfb8331081ca']
+
+configopts = '--enable-fortran --enable-fortran 2003 --enable-cxx --enable-parallel --enable-shared'
+
+dependencies = [
+    ('OpenMPI', '4.0.6', '-CUDA-11.4.1-v2'),
+    ('zlib', '1.2.11'),
+    ('Szip', '2.1.1'),
+]
+
+moduleclass = 'data'
diff --git a/h/HDF5/HDF5-1.12.1-foss-2021b-parallel.eb b/h/HDF5/HDF5-1.12.1-foss-2021b-parallel.eb
new file mode 100644
index 00000000..da83ed4e
--- /dev/null
+++ b/h/HDF5/HDF5-1.12.1-foss-2021b-parallel.eb
@@ -0,0 +1,31 @@
+# IT4Innovations 2021
+# LK
+
+name = 'HDF5'
+version = '1.12.1'
+versionsuffix = '-parallel'
+
+homepage = 'http://www.hdfgroup.org/HDF5/'
+description = """HDF5 is a unique technology suite that makes possible the management of
+ extremely large and local_complex data collections."""
+
+toolchain = {'name': 'foss', 'version': '2021b'}
+toolchainopts = {'pic': True, 'usempi': True}
+
+source_urls = [
+    'https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-%(version_major_minor)s/hdf5-%(version)s/src']
+sources = [SOURCELOWER_TAR_GZ]
+
+# AMD/intel cpu
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+  prebuildopts = "echo %(builddir)s && while read i; do echo $i; sed 's|-xHost|-avx2|g' -i $i; done < <(grep xHost %(builddir)s -R | cut -d ':' -f 1 | sort -u) && "
+
+configopts = '--enable-fortran --enable-fortran 2003 --enable-cxx'
+
+dependencies = [
+    ('zlib', '1.2.11'),
+    ('Szip', '2.1.1'),
+]
+
+moduleclass = 'data'
diff --git a/h/HDF5/HDF5-1.12.1-iimpi-2021b.eb b/h/HDF5/HDF5-1.12.1-iimpi-2021b.eb
new file mode 100644
index 00000000..5cc5946f
--- /dev/null
+++ b/h/HDF5/HDF5-1.12.1-iimpi-2021b.eb
@@ -0,0 +1,30 @@
+# IT4Innovations
+# JK 2022
+
+name = 'HDF5'
+version = '1.12.1'
+
+homepage = 'https://portal.hdfgroup.org/display/support'
+description = """HDF5 is a data model, library, and file format for storing and managing data.
+ It supports an unlimited variety of datatypes, and is designed for flexible
+ and efficient I/O and for high volume and complex data."""
+
+toolchain = {'name': 'iimpi', 'version': '2021b'}
+toolchainopts = {'pic': True, 'usempi': True}
+
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+    toolchainopts = {'pic': True, 'usempi': True, 'optarch': 'march=core-avx2'}
+else:
+    toolchainopts = {'pic': True, 'usempi': True}
+
+source_urls = ['https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-%(version_major_minor)s/hdf5-%(version)s/src']
+sources = [SOURCELOWER_TAR_GZ]
+checksums = ['79c66ff67e666665369396e9c90b32e238e501f345afd2234186bfb8331081ca']
+
+dependencies = [
+    ('zlib', '1.2.11'),
+    ('Szip', '2.1.1'),
+]
+
+moduleclass = 'data'
diff --git a/h/HDF5/HDF5-1.12.1-intel-2021b-parallel.eb b/h/HDF5/HDF5-1.12.1-intel-2021b-parallel.eb
new file mode 100644
index 00000000..b88e8dfe
--- /dev/null
+++ b/h/HDF5/HDF5-1.12.1-intel-2021b-parallel.eb
@@ -0,0 +1,31 @@
+# IT4Innovations 2021
+# LK
+
+name = 'HDF5'
+version = '1.12.1'
+versionsuffix = '-parallel'
+
+homepage = 'http://www.hdfgroup.org/HDF5/'
+description = """HDF5 is a unique technology suite that makes possible the management of
+ extremely large and local_complex data collections."""
+
+toolchain = {'name': 'intel', 'version': '2021b'}
+toolchainopts = {'pic': True, 'usempi': True}
+
+source_urls = [
+    'https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-%(version_major_minor)s/hdf5-%(version)s/src']
+sources = [SOURCELOWER_TAR_GZ]
+
+# AMD/intel cpu
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+  prebuildopts = "echo %(builddir)s && while read i; do echo $i; sed 's|-xHost|-avx2|g' -i $i; done < <(grep xHost %(builddir)s -R | cut -d ':' -f 1 | sort -u) && "
+
+configopts = '--enable-fortran --enable-fortran 2003 --enable-cxx'
+
+dependencies = [
+    ('zlib', '1.2.11'),
+    ('Szip', '2.1.1'),
+]
+
+moduleclass = 'data'
diff --git a/h/HyperQueue/HyperQueue-0.8.0.eb b/h/HyperQueue/HyperQueue-0.8.0.eb
new file mode 100644
index 00000000..1c93440a
--- /dev/null
+++ b/h/HyperQueue/HyperQueue-0.8.0.eb
@@ -0,0 +1,24 @@
+# IT4Innovations
+# JK 2021
+
+easyblock = 'PackedBinary'
+
+name = 'HyperQueue'
+version = '0.8.0'
+
+homepage = 'https://it4innovations.github.io/hyperqueue/'
+description = """HyperQueue lets you build a computation plan consisting of a large amount of tasks and then execute it transparently over a system like SLURM/PBS. It dynamically groups jobs into SLURM/PBS jobs and distributes them to fully utilize allocated notes. You thus do not have to manually aggregate your tasks into SLURM/PBS jobs."""
+
+toolchain = SYSTEM
+
+source_urls = ['https://github.com/It4innovations/hyperqueue/releases/download/v%(version)s/']
+sources = ['hq-v%(version)s-linux-x64.tar.gz']
+checksums = ['868b858510ef2abf4da61a66a7408283']
+
+sanity_check_paths = {
+    'files': ['hq'],
+    'dirs': [],
+}
+
+
+moduleclass = 'devel'
diff --git a/h/Hypre/Hypre-2.23.0-intel-2020b.eb b/h/Hypre/Hypre-2.23.0-intel-2020b.eb
new file mode 100644
index 00000000..d38bee84
--- /dev/null
+++ b/h/Hypre/Hypre-2.23.0-intel-2020b.eb
@@ -0,0 +1,18 @@
+name = 'Hypre'
+version = '2.23.0'
+
+homepage = 'https://computation.llnl.gov/projects/hypre-scalable-linear-solvers-multigrid-methods'
+description = """Hypre is a library for solving large, sparse linear systems of equations on massively
+ parallel computers. The problems of interest arise in the simulation codes being developed at LLNL
+ and elsewhere to study physical phenomena in the defense, environmental, energy, and biological sciences."""
+
+toolchain = {'name': 'intel', 'version': '2020b'}
+toolchainopts = {'pic': True}
+
+source_urls = ['https://github.com/hypre-space/hypre/archive/']
+sources = ['v%(version)s.tar.gz']
+checksums = ['8a9f9fb6f65531b77e4c319bf35bfc9d34bf529c36afe08837f56b635ac052e2']
+
+start_dir = 'src'
+
+moduleclass = 'numlib'
diff --git a/h/h5py/h5py-3.6.0-intel-2021b.eb b/h/h5py/h5py-3.6.0-intel-2021b.eb
new file mode 100644
index 00000000..1bdc03e8
--- /dev/null
+++ b/h/h5py/h5py-3.6.0-intel-2021b.eb
@@ -0,0 +1,34 @@
+easyblock = 'PythonPackage'
+
+name = 'h5py'
+version = '3.6.0'
+
+homepage = 'https://www.h5py.org/'
+description = """HDF5 for Python (h5py) is a general-purpose Python interface to the Hierarchical Data Format library,
+ version 5. HDF5 is a versatile, mature scientific software library designed for the fast, flexible storage of enormous
+ amounts of data."""
+
+toolchain = {'name': 'intel', 'version': '2021b'}
+toolchainopts = {'usempi': True}
+
+sources = [SOURCE_TAR_GZ]
+checksums = ['8752d2814a92aba4e2b2a5922d2782d0029102d99caaf3c201a566bc0b40db29']
+
+builddependencies = [('pkgconfig', '1.5.5', '-python')]
+
+dependencies = [
+    ('Python', '3.9.6'),
+    ('SciPy-bundle', '2021.05', '', ('foss', '2021a')),
+    ('HDF5', '1.12.1', '', ('iimpi', '2021b')),
+]
+
+use_pip = True
+sanity_pip_check = True
+download_dep_fail = True
+
+# h5py's setup.py will disable setup_requires if H5PY_SETUP_REQUIRES is set to 0
+# without this environment variable, pip will fetch the minimum numpy version h5py supports during install,
+# even though SciPy-bundle provides a newer version that satisfies h5py's install_requires dependency.
+preinstallopts = 'HDF5_MPI=ON HDF5_DIR="$EBROOTHDF5" H5PY_SETUP_REQUIRES=0 '
+
+moduleclass = 'data'
diff --git a/l/LAPACK/LAPACK-3.10.0-GCC-11.2.0.eb b/l/LAPACK/LAPACK-3.10.0-GCC-11.2.0.eb
new file mode 100644
index 00000000..995e1f59
--- /dev/null
+++ b/l/LAPACK/LAPACK-3.10.0-GCC-11.2.0.eb
@@ -0,0 +1,16 @@
+name = 'LAPACK'
+version = '3.10.0'
+
+homepage = 'https://www.netlib.org/lapack/'
+description = """LAPACK is written in Fortran90 and provides routines for solving systems of
+ simultaneous linear equations, least-squares solutions of linear systems of equations, eigenvalue
+ problems, and singular value problems."""
+
+toolchain = {'name': 'GCC', 'version': '11.2.0'}
+toolchainopts = {'pic': True}
+
+source_urls = ['https://github.com/Reference-LAPACK/lapack/archive/']
+sources = ['v%(version)s.tar.gz']
+checksums = ['328c1bea493a32cac5257d84157dc686cc3ab0b004e2bea22044e0a59f6f8a19']
+
+moduleclass = 'numlib'
diff --git a/l/libFLAME/libFLAME-5.2.0-GCCcore-10.2.0.eb b/l/libFLAME/libFLAME-5.2.0-GCCcore-10.2.0.eb
new file mode 100644
index 00000000..20d17f46
--- /dev/null
+++ b/l/libFLAME/libFLAME-5.2.0-GCCcore-10.2.0.eb
@@ -0,0 +1,56 @@
+# IT4Innovations
+# LK 2022
+
+easyblock = 'ConfigureMake'
+
+name = 'libFLAME'
+version = '5.2.0'
+
+homepage = 'https://developer.amd.com/amd-cpu-libraries/blas-library/#libflame'
+description = """libFLAME is a portable library for dense matrix computations,
+providing much of the functionality present in LAPACK."""
+
+toolchain = {'name': 'GCCcore', 'version': '10.2.0'}
+toolchainopts = {'pic': True}
+
+source_urls = ['https://github.com/flame/libflame/archive/']
+sources = ['%(version)s.tar.gz']
+checksums = ['997c860f351a5c7aaed8deec00f502167599288fd0559c92d5bfd77d0b4d475c']
+
+# '--enable-max-arg-list-hack --enable-dynamic-build' requires 'file' function from GNU Make 4.x
+builddependencies = [
+    ('binutils', '2.35'),
+    ('Python', '3.8.6'),
+    ('make', '4.3'),  # needed on Cent OS 7  where make 3 is installed
+]
+
+dependencies = [('BLIS', '3.0.1')]
+
+# Use unset FLIBS to let configure pick up LDFLAGS
+preconfigopts = 'unset FLIBS && '
+preconfigopts += 'LIBS="-lblis $LIBS" '
+preconfigopts += 'LDFLAGS="$LDFLAGS -L$EBROOTBLIS/lib -fopenmp -lm -lpthread" '
+preconfigopts += 'CFLAGS="$CFLAGS -I$EBROOTBLIS/include/blis" '
+
+configopts = '--enable-max-arg-list-hack '
+configopts += '--enable-lapack2flame '
+configopts += '--enable-external-lapack-interfaces '
+configopts += '--enable-cblas-interfaces '
+configopts += '--enable-dynamic-build '
+configopts += '--enable-multithreading=openmp '
+
+# libFLAME C++ Template API tests
+# runtest = 'checkcpp LIBBLAS=$EBROOTBLIS/lib/libblis.a'
+
+# sanity_check_commands = [
+#     'cd %(builddir)s/%(namelower)s-%(version)s/test '
+#     '&& make LIBBLAS=$EBROOTBLIS/lib/libblis-mt.so LDFLAGS="-fopenmp -lm -lpthread" '
+#     '&& ./test_libfame.x'
+# ]
+
+sanity_check_paths = {
+    'files': ['include/FLAME.h', 'lib/libflame.a', 'lib/libflame.%s' % SHLIB_EXT],
+    'dirs': ['lib'],
+}
+
+moduleclass = 'numlib'
diff --git a/m/MaSuRCA/MaSuRCA-4.0.7-foss-2020a-Perl-5.30.2.eb b/m/MaSuRCA/MaSuRCA-4.0.7-foss-2020a-Perl-5.30.2.eb
new file mode 100644
index 00000000..09817112
--- /dev/null
+++ b/m/MaSuRCA/MaSuRCA-4.0.7-foss-2020a-Perl-5.30.2.eb
@@ -0,0 +1,66 @@
+##
+# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild
+#
+# Copyright:: Copyright 2017 University of Geneva
+# Authors::   Yann Sagon <yann.sagon@unige.ch>
+# License::   MIT/GPL
+# $Id$
+#
+##
+
+easyblock = 'ConfigureMake'
+
+name = 'MaSuRCA'
+version = '4.0.7'
+versionsuffix = '-Perl-%(perlver)s'
+
+homepage = 'http://www.genome.umd.edu/masurca.html'
+
+description = '''MaSuRCA is whole genome assembly software. It combines the efficiency of the de Bruijn graph
+ and Overlap-Layout-Consensus (OLC) approaches. MaSuRCA can assemble data sets containing
+ only short reads from Illumina sequencing or a mixture of short reads and long reads
+ (Sanger, 454, Pacbio and Nanopore).'''
+
+toolchain = {'name': 'foss', 'version': '2020a'}
+
+# need a temporary url to download it. Do it manually here:
+# http://www.genome.umd.edu/masurca_form.html
+
+source_urls = ['https://github.com/alekseyzimin/masurca/releases/download/v%(version)s']
+sources = ['%(name)s-%(version)s.tar.gz']
+checksums = ['6ad1c06396cc1bd6025d37f0fcce617633c57d414954cdc7dd7c70eec8a09154']
+
+dependencies = [
+    ('libreadline', '8.0'),
+    ('Tcl', '8.6.10'),
+    ('Boost', '1.72.0'),
+    ('zlib', '1.2.11'),
+    ('Perl', '5.30.2'),
+    ('bzip2', '1.0.8'),
+]
+
+buildopts = "install-special"
+start_dir = "global-1"
+
+postinstallcmds = [
+    # fix location of 'bin' in install prefix in runCA and runCA-dedupe scripts
+    # escaping single quotes within single quotes is tricky, so we use $'...' to use ANSI C-like escaping
+    "sed -i $'s|^$bin =.*|$bin = \"$ENV{\'EBROOTMASURCA\'}/bin\";|g' %(installdir)s/bin/runCA",
+    "sed -i $'s|^$bin =.*|$bin = \"$ENV{\'EBROOTMASURCA\'}/bin\";|g' %(installdir)s/bin/runCA-dedupe",
+    # fix hardcoded path in masurca script, just point back to 'bin' subdirectory instead
+    "sed -i 's@../CA8/Linux-amd64/bin@../bin@g' %(installdir)s/bin/masurca",
+    # commands to install built-in version of Flye
+    "cd ../Flye && make && cp -a ../Flye %(installdir)s",
+]
+
+sanity_check_paths = {
+    'files': ['bin/masurca', 'Flye/bin/flye'],
+    'dirs': ['include', 'lib'],
+}
+
+sanity_check_commands = [
+    "masurca --help",
+    "runCA --help",
+]
+
+moduleclass = 'bio'
diff --git a/m/Molpro/Molpro-mpp-2022.1.1.linux_x86_64_mpipr.eb b/m/Molpro/Molpro-mpp-2022.1.1.linux_x86_64_mpipr.eb
new file mode 100644
index 00000000..dbe7d50e
--- /dev/null
+++ b/m/Molpro/Molpro-mpp-2022.1.1.linux_x86_64_mpipr.eb
@@ -0,0 +1,23 @@
+# IT4Innovations 2021
+# LK
+
+name = 'Molpro'
+version = '2022.1.1'
+versionprefix = 'mpp-'
+versionsuffix = '.linux_x86_64_mpipr'
+
+homepage = 'https://www.molpro.net'
+description = """Molpro is a local_complete system of ab initio programs for molecular electronic structure calculations."""
+
+toolchain = SYSTEM
+
+# no source URL available, requires registration to download
+sources = ['%(namelower)s-%(versionprefix)s%(version)s%(versionsuffix)s.sh']
+
+precompiled_binaries = True
+
+# license file - uncomment if a licence file is supplied by your site and
+# is valid for all users - the value of license_file may have to be changed
+# license_file = HOME + '/licenses/%(name)s/license.lic'
+
+moduleclass = 'chem'
diff --git a/m/Molpro/Molpro-mpp-2022.1.1.linux_x86_64_sockets.eb b/m/Molpro/Molpro-mpp-2022.1.1.linux_x86_64_sockets.eb
new file mode 100644
index 00000000..a5edc884
--- /dev/null
+++ b/m/Molpro/Molpro-mpp-2022.1.1.linux_x86_64_sockets.eb
@@ -0,0 +1,23 @@
+# IT4Innovations 2021
+# LK
+
+name = 'Molpro'
+version = '2022.1.1'
+versionprefix = 'mpp-'
+versionsuffix = '.linux_x86_64_sockets'
+
+homepage = 'https://www.molpro.net'
+description = """Molpro is a local_complete system of ab initio programs for molecular electronic structure calculations."""
+
+toolchain = SYSTEM
+
+# no source URL available, requires registration to download
+sources = ['%(namelower)s-%(versionprefix)s%(version)s%(versionsuffix)s.sh']
+
+precompiled_binaries = True
+
+# license file - uncomment if a licence file is supplied by your site and
+# is valid for all users - the value of license_file may have to be changed
+# license_file = HOME + '/licenses/%(name)s/license.lic'
+
+moduleclass = 'chem'
diff --git a/n/nompi/nompi-2022a.eb b/n/nompi/nompi-2022a.eb
new file mode 100644
index 00000000..b58f6112
--- /dev/null
+++ b/n/nompi/nompi-2022a.eb
@@ -0,0 +1,20 @@
+# IT4Innovations
+# LK 2022
+
+easyblock = "Toolchain"
+
+name = 'nompi'
+version = '2022a'
+
+homepage = '(none)'
+description = """NVHPC compiler, including OpenMPI for MPI support."""
+
+toolchain = SYSTEM
+
+dependencies = [
+    ('NVHPC', '21.11'),
+    ('OpenMPI', '4.0.6', '-NVHPC-21.11-CUDA-11.4.1'),
+    ('CUDAcore', '11.4.1'),
+]
+
+moduleclass = 'toolchain'
diff --git a/o/ORCA/ORCA-5.0.3-OpenMPI-4.1.1.eb b/o/ORCA/ORCA-5.0.3-OpenMPI-4.1.1.eb
new file mode 100644
index 00000000..80dcf115
--- /dev/null
+++ b/o/ORCA/ORCA-5.0.3-OpenMPI-4.1.1.eb
@@ -0,0 +1,36 @@
+# IT4Innovations 2021
+# LK JK
+
+easyblock = "PackedBinary"
+
+name = "ORCA"
+version = '5.0.3'
+versionsuffix = '-OpenMPI-4.1.1'
+
+homepage = 'http://cec.mpg.de/forum/'
+description = """ORCA is a flexible, efficient and easy-to-use general purpose tool for quantum chemistry
+ with specific emphasis on spectroscopic properties of open-shell molecules.
+  It features a wide variety of standard quantum chemical methods ranging from semiempirical methods to DFT to single-
+ and multireference correlated ab initio methods.
+ It can also treat environmental and relativistic effects."""
+
+toolchain = SYSTEM
+
+# Download from https://orcaforum.kofo.mpg.de
+sources = ['orca_5_0_3_linux_x86-64_openmpi411.tar.gz']
+checksums = ['4d860698816e83793a0e5889cbdf17a24c81bd0ec9af358f7dc9369abf6fca59']
+dependencies = [('OpenMPI', '4.1.1', '-GCC-10.2.0')]
+
+sanity_check_paths = {
+    'files': ['orca_%s%s' % (x, y) for x in ['anoint', 'casscf', 'cis', 'cpscf',
+                                             'eprnmr', 'gtoint', 'mdci', 'mp2', 'mrci', 'pc',
+                                             'rocis', 'scf', 'scfgrad', 'soc'] for y in ['', '_mpi']] +
+             ['orca_%s' % x for x in ['2mkl', 'asa', 'chelpg', 'ciprep', 'eca', 'ecplib',
+                                      'euler', 'fci', 'fitpes', 'gstep', 'loc', 'mapspc',
+                                      'md', 'mergefrag', 'ndoint', 'numfreq', 'plot',
+                                      'pltvib', 'pop', 'rel', 'vib', 'vpot']] +
+             ['orca'],
+    'dirs': [],
+}
+
+moduleclass = 'chem'
diff --git a/o/Octopus/Octopus-11.3-intel-2020b-mpi.eb b/o/Octopus/Octopus-11.3-intel-2020b-mpi.eb
index 64db6325..24301920 100644
--- a/o/Octopus/Octopus-11.3-intel-2020b-mpi.eb
+++ b/o/Octopus/Octopus-11.3-intel-2020b-mpi.eb
@@ -51,6 +51,7 @@ configopts += '--with-netcdf-prefix=$EBROOTNETCDFMINFORTRAN '
 #configopts += '--with-etsf-io-prefix=$EBROOTETSF_IO '
 #configopts += '--with-pfft-prefix=$EBROOTPFFT --with-mpifftw-prefix=$EBROOTFFTW '
 
+# approx. 8/228 checks fail
 #runtest = 'MPIEXEC=`which mpirun` check'
 
 #sanity_check_paths = {
diff --git a/o/OpenCV/OpenCV-4.5.3-foss-2021a-CUDA-11.3.1-contrib.eb b/o/OpenCV/OpenCV-4.5.3-foss-2021a-CUDA-11.3.1-contrib.eb
new file mode 100644
index 00000000..bbcdc98b
--- /dev/null
+++ b/o/OpenCV/OpenCV-4.5.3-foss-2021a-CUDA-11.3.1-contrib.eb
@@ -0,0 +1,89 @@
+# IT4Innovations
+# LK 2022
+
+name = 'OpenCV'
+version = '4.5.3'
+versionsuffix = '-CUDA-%(cudaver)s-contrib'
+
+# the hash is version dependent! see 3rdparty/ippicv/ippicv.cmake
+local_ippicv_hash = 'a56b6ac6f030c312b2dce17430eef13aed9af274'
+
+homepage = 'https://opencv.org/'
+description = """OpenCV (Open Source Computer Vision Library) is an open source computer vision
+ and machine learning software library. OpenCV was built to provide
+ a common infrastructure for computer vision applications and to accelerate
+ the use of machine perception in the commercial products.
+ Includes extra modules for OpenCV from the contrib repository."""
+
+toolchain = {'name': 'foss', 'version': '2021a'}
+
+sources = [
+    {'source_urls': ['https://github.com/opencv/opencv/archive/'],
+     'download_filename': '%(version)s.zip', 'filename': SOURCELOWER_ZIP},
+    {'source_urls': ['https://github.com/opencv/opencv_contrib/archive/'],
+     'download_filename': '%(version)s.zip', 'filename': '%(namelower)s_contrib-%(version)s.zip'},
+    {'source_urls': ['https://raw.githubusercontent.com/opencv/opencv_3rdparty/%s/ippicv' % local_ippicv_hash],
+     'filename': 'ippicv_2020_lnx_intel64_20191018_general.tgz', 'extract_cmd': "cp %s %(builddir)s"},
+]
+checksums = [
+    'a61e7a4618d353140c857f25843f39b2abe5f451b018aab1604ef0bc34cd23d5',  # opencv-4.5.3.zip
+    'dc3317950cf0d6cab6d24ec8df864d5e7c4efe39627dbd1c7c177dc12a8bcd78',  # opencv_contrib-4.5.3.zip
+    '08627fa5660d52d59309a572dd7db5b9c8aea234cfa5aee0942a1dd903554246',  # ippicv_2020_lnx_intel64_20191018_general.tgz
+]
+
+builddependencies = [
+    ('CMake', '3.20.1'),
+]
+
+dependencies = [
+    ('Python', '3.9.5'),
+    ('SciPy-bundle', '2021.05'),  # for numpy
+    ('zlib', '1.2.11'),
+    ('FFmpeg', '4.3.2'),
+    ('freetype', '2.10.4'),
+    ('HarfBuzz', '2.8.1'),
+    ('libjpeg-turbo', '2.0.6'),
+    ('libpng', '1.6.37'),
+    ('LibTIFF', '4.2.0'),
+    ('libwebp', '1.2.0'),
+    ('OpenEXR', '3.0.1'),
+    ('JasPer', '2.0.28'),
+    ('Java', '11', '', True),
+    ('ant', '1.10.9', '-Java-%(javaver)s', True),
+    ('GLib', '2.68.2'),
+    ('GTK3', '3.24.29'),
+    ('HDF5', '1.10.7'),  # needed by hdf from contrib
+    ('CUDA', '11.3.1', '', True),
+    ('cuDNN', '8.2.1.32', '-CUDA-%(cudaver)s', True),
+]
+
+# XXXX in configurations is a bug fix in OpenCV because ocv_check_modules is not able to recognize freetype and harfbuzz
+# ref: https://github.com/opencv/opencv/blob/6e8daaec0f46aaba9ea22e2afce47307b1dbff9f/cmake/OpenCVUtils.cmake#L861
+configopts = '-DOPENCV_EXTRA_MODULES_PATH=%(builddir)s/%(namelower)s_contrib-%(version)s/modules '
+configopts += '-DFREETYPE_FOUND=ON '
+configopts += '-DFREETYPE_INCLUDE_DIRS=$EBROOTFREETYPE/include/freetype2/ '
+configopts += '-DFREETYPE_LIBRARIES=$EBROOTFREETYPE/lib64/libfreetype.so '
+configopts += '-DFREETYPE_LINK_LIBRARIES=$EBROOTFREETYPE/lib64/libfreetype.so '
+configopts += '-DFREETYPE_LINK_LIBRARIES_XXXXX=ON '
+configopts += '-DHARFBUZZ_FOUND=ON '
+configopts += '-DHARFBUZZ_INCLUDE_DIRS=$EBROOTHARFBUZZ/include/harfbuzz '
+configopts += '-DHARFBUZZ_LIBRARIES=$EBROOTHARFBUZZ/lib64/libharfbuzz.so '
+configopts += '-DHARFBUZZ_LINK_LIBRARIES=$EBROOTHARFBUZZ/lib64/libharfbuzz.so '
+configopts += '-DHARFBUZZ_LINK_LIBRARIES_XXXXX=ON '
+configopts += '-DBUILD_opencv_python2=OFF '
+
+enhance_sanity_check = True
+
+local_contrib_libs = [
+    'aruco', 'bgsegm', 'bioinspired', 'ccalib', 'datasets', 'dnn_objdetect', 'dnn_superres', 'dpm', 'face', 'freetype',
+    'fuzzy', 'hdf', 'hfs', 'img_hash', 'line_descriptor', 'optflow', 'phase_unwrapping', 'plot', 'quality', 'reg',
+    'rgbd', 'saliency', 'shape', 'stereo', 'structured_light', 'superres', 'surface_matching', 'text', 'tracking',
+    'videostab', 'xfeatures2d', 'ximgproc', 'xobjdetect', 'xphoto'
+]
+
+sanity_check_paths = {
+    'files': ['lib64/libopencv_%s.%s' % (l, SHLIB_EXT) for l in local_contrib_libs],
+    'dirs': [],
+}
+
+moduleclass = 'vis'
diff --git a/o/OpenCV/OpenCV-4.5.5-foss-2021a-CUDA-11.4.1-contrib.eb b/o/OpenCV/OpenCV-4.5.5-foss-2021a-CUDA-11.4.1-contrib.eb
new file mode 100644
index 00000000..affb6981
--- /dev/null
+++ b/o/OpenCV/OpenCV-4.5.5-foss-2021a-CUDA-11.4.1-contrib.eb
@@ -0,0 +1,84 @@
+# IT4Innovations
+# LK 2022
+
+name = 'OpenCV'
+version = '4.5.5'
+versionsuffix = '-CUDA-%(cudaver)s-contrib'
+
+# the hash is version dependent! see 3rdparty/ippicv/ippicv.cmake
+local_ippicv_hash = 'a56b6ac6f030c312b2dce17430eef13aed9af274'
+
+homepage = 'https://opencv.org/'
+description = """OpenCV (Open Source Computer Vision Library) is an open source computer vision
+ and machine learning software library. OpenCV was built to provide
+ a common infrastructure for computer vision applications and to accelerate
+ the use of machine perception in the commercial products.
+ Includes extra modules for OpenCV from the contrib repository."""
+
+toolchain = {'name': 'foss', 'version': '2021a'}
+
+sources = [
+    {'source_urls': ['https://github.com/opencv/opencv/archive/'],
+     'download_filename': '%(version)s.zip', 'filename': SOURCELOWER_ZIP},
+    {'source_urls': ['https://github.com/opencv/opencv_contrib/archive/'],
+     'download_filename': '%(version)s.zip', 'filename': '%(namelower)s_contrib-%(version)s.zip'},
+    {'source_urls': ['https://raw.githubusercontent.com/opencv/opencv_3rdparty/%s/ippicv' % local_ippicv_hash],
+     'filename': 'ippicv_2020_lnx_intel64_20191018_general.tgz', 'extract_cmd': "cp %s %(builddir)s"},
+]
+
+builddependencies = [
+    ('CMake', '3.20.1'),
+]
+
+dependencies = [
+    ('Python', '3.9.5'),
+    ('SciPy-bundle', '2021.05'),  # for numpy
+    ('zlib', '1.2.11'),
+    ('FFmpeg', '4.3.2'),
+    ('freetype', '2.10.4'),
+    ('HarfBuzz', '2.8.1'),
+    ('libjpeg-turbo', '2.0.6'),
+    ('libpng', '1.6.37'),
+    ('LibTIFF', '4.2.0'),
+    ('libwebp', '1.2.0'),
+    ('OpenEXR', '3.0.1'),
+    ('JasPer', '2.0.28'),
+    ('Java', '11', '', True),
+    ('ant', '1.10.9', '-Java-%(javaver)s', True),
+    ('GLib', '2.68.2'),
+    ('GTK3', '3.24.29'),
+    ('HDF5', '1.10.7'),  # needed by hdf from contrib
+    ('CUDA', '11.4.1', '', True),
+    ('cuDNN', '8.2.2.26', '-CUDA-%(cudaver)s', True),
+]
+
+# XXXX in configurations is a bug fix in OpenCV because ocv_check_modules is not able to recognize freetype and harfbuzz
+# ref: https://github.com/opencv/opencv/blob/6e8daaec0f46aaba9ea22e2afce47307b1dbff9f/cmake/OpenCVUtils.cmake#L861
+configopts = '-DOPENCV_EXTRA_MODULES_PATH=%(builddir)s/%(namelower)s_contrib-%(version)s/modules '
+configopts += '-DFREETYPE_FOUND=ON '
+configopts += '-DFREETYPE_INCLUDE_DIRS=$EBROOTFREETYPE/include/freetype2/ '
+configopts += '-DFREETYPE_LIBRARIES=$EBROOTFREETYPE/lib64/libfreetype.so '
+configopts += '-DFREETYPE_LINK_LIBRARIES=$EBROOTFREETYPE/lib64/libfreetype.so '
+configopts += '-DFREETYPE_LINK_LIBRARIES_XXXXX=ON '
+configopts += '-DHARFBUZZ_FOUND=ON '
+configopts += '-DHARFBUZZ_INCLUDE_DIRS=$EBROOTHARFBUZZ/include/harfbuzz '
+configopts += '-DHARFBUZZ_LIBRARIES=$EBROOTHARFBUZZ/lib64/libharfbuzz.so '
+configopts += '-DHARFBUZZ_LINK_LIBRARIES=$EBROOTHARFBUZZ/lib64/libharfbuzz.so '
+configopts += '-DHARFBUZZ_LINK_LIBRARIES_XXXXX=ON '
+configopts += '-DBUILD_opencv_python2=OFF '
+
+enhance_sanity_check = True
+
+local_contrib_libs = [
+    'aruco', 'bgsegm', 'bioinspired', 'ccalib', 'datasets', 'dnn_objdetect', 'dnn_superres', 'dpm', 'face', 'freetype',
+    'fuzzy', 'hdf', 'hfs', 'img_hash', 'line_descriptor', 'optflow', 'phase_unwrapping', 'plot', 'quality', 'reg',
+    'rgbd', 'saliency', 'shape', 'stereo', 'structured_light', 'superres', 'surface_matching', 'text', 'tracking',
+    'videostab', 'xfeatures2d', 'ximgproc', 'xobjdetect', 'xphoto'
+]
+
+sanity_check_paths = {
+    'files': ['lib64/libopencv_%s.%s' % (l, SHLIB_EXT) for l in local_contrib_libs],
+    'dirs': [],
+}
+
+moduleclass = 'vis'
diff --git a/o/OpenMPI/OpenMPI-4.1.1-GCC-11.2.0.eb b/o/OpenMPI/OpenMPI-4.1.1-GCC-11.2.0.eb
new file mode 100644
index 00000000..a150076e
--- /dev/null
+++ b/o/OpenMPI/OpenMPI-4.1.1-GCC-11.2.0.eb
@@ -0,0 +1,87 @@
+# IT4Innovations
+# LK 2022
+
+name = 'OpenMPI'
+version = '4.1.1'
+
+homepage = 'https://www.open-mpi.org/'
+description = """The Open MPI Project is an open source MPI-3 implementation."""
+
+toolchain = {'name': 'GCC', 'version': '11.2.0'}
+
+source_urls = ['https://www.open-mpi.org/software/ompi/v%(version_major_minor)s/downloads']
+sources = [SOURCELOWER_TAR_BZ2]
+patches = [
+    'OpenMPI-4.1.1_fix-bufferoverflow-in-common_ofi.patch',
+    'OpenMPI-4.0.6_remove-pmix-check-in-pmi-switch.patch',
+    'OpenMPI-4.1.1_opal-pmix-package-rank.patch',
+    'OpenMPI-4.1.1_pmix3x-protection.patch',
+    'OpenMPI-4.1.0-1-pml-ucx-datatype-memleak.patch',
+]
+checksums = [
+    'e24f7a778bd11a71ad0c14587a7f5b00e68a71aa5623e2157bafee3d44c07cda',  # openmpi-4.1.1.tar.bz2
+    # OpenMPI-4.1.1_fix-bufferoverflow-in-common_ofi.patch
+    'a189d834506f3d7c31eda6aa184598a3631ea24a94bc551d5ed1f053772ca49e',
+    # OpenMPI-4.0.6_remove-pmix-check-in-pmi-switch.patch
+    '8acee6c9b2b4bf12873a39b85a58ca669de78e90d26186e52f221bb4853abc4d',
+    '04353672cf7be031e5306c94068d7012d99e6cd94b69d93230797ffcd7f31903',  # OpenMPI-4.1.1_opal-pmix-package-rank.patch
+    '384ef9f1fa803b0d71dae2ec0748d0f20295992437532afedf21478bda164ff8',  # OpenMPI-4.1.1_pmix3x-protection.patch
+    # OpenMPI-4.1.0-1-pml-ucx-datatype-memleak.patch
+    'a94a74b174ce783328abfd3656ff5196b89ef4c819fe4c8b8a0f1277123e76ea',
+]
+
+builddependencies = [
+    ('pkg-config', '0.29.2'),
+]
+
+dependencies = [
+    ('zlib', '1.2.11'),
+    ('hwloc', '2.5.0'),
+    ('libevent', '2.1.12'),
+    ('UCX', '1.11.2'),
+    ('libfabric', '1.13.2'),
+    ('PMIx', '4.1.0'),
+]
+
+configopts = '--enable-shared --enable-mpi-thread-multiple --with-verbs '
+configopts += '--enable-mpirun-prefix-by-default '
+configopts += '--with-hwloc=$EBROOTHWLOC '  # hwloc support
+configopts += '--with-tm=/opt/pbs '  # Enable PBS
+configopts += '--enable-mpi-cxx '  # Enable building the C++ MPI bindings
+configopts += '--with-ucx=$EBROOTUCX '
+
+osdependencies = [('libibverbs-dev', 'libibverbs-devel', 'rdma-core-devel')]
+
+postinstallcmds = [
+    'echo "# By default, for Open MPI 4.0 and later, infiniband ports on a device are not used by default." >> %(installdir)s/etc/openmpi-mca-params.conf',
+    'echo "btl_openib_allow_ib = true" >> %(installdir)s/etc/openmpi-mca-params.conf',
+]
+
+libs = ["mpi_mpifh", "mpi", "ompitrace", "open-pal", "open-rte"]
+sanity_check_paths = {
+    'files': [
+        "bin/%s" %
+        binfile for binfile in [
+            "ompi_info", "opal_wrapper", "orterun"]] + [
+                "lib/lib%s.%s" %
+                (libfile, SHLIB_EXT) for libfile in libs] + [
+                    "include/%s.h" %
+                    x for x in [
+                        "mpi-ext", "mpif-config", "mpif", "mpi", "mpi_portable_platform"]], 'dirs': [], }
+
+import os
+if os.environ.get("CLUSTERNAME") in ["BARBORA"]:
+   modextravars = {'OMPI_MCA_btl_openib_if_include': 'mlx5_0',
+                'OMPI_MCA_btl_tcp_if_include': '10.33.4.0/24',
+                'OMPI_MCA_orte_base_help_aggregate': '0',
+   }
+elif os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   modextravars = {'OMPI_MCA_btl_openib_if_include': 'mlx5_0',
+                'OMPI_MCA_orte_base_help_aggregate': '0',
+   }
+else:
+   modextravars = {'OMPI_MCA_btl_openib_if_include': 'mlx4_0',
+                'OMPI_MCA_oob_tcp_if_include': '10.0.0.0/8',
+   }
+
+moduleclass = 'mpi'
diff --git a/o/OpenMPI/OpenMPI-4.1.2-GCC-11.2.0-Java-1.8.0_221.eb b/o/OpenMPI/OpenMPI-4.1.2-GCC-11.2.0-Java-1.8.0_221.eb
new file mode 100644
index 00000000..bd7560e4
--- /dev/null
+++ b/o/OpenMPI/OpenMPI-4.1.2-GCC-11.2.0-Java-1.8.0_221.eb
@@ -0,0 +1,73 @@
+# IT4Innovations
+# LK 2022
+
+name = 'OpenMPI'
+version = '4.1.2'
+versionsuffix= '-Java-1.8.0_221'
+
+homepage = 'https://www.open-mpi.org/'
+description = """The Open MPI Project is an open source MPI-3 implementation."""
+
+toolchain = {'name': 'GCC', 'version': '11.2.0'}
+
+source_urls = ['https://www.open-mpi.org/software/ompi/v%(version_major_minor)s/downloads']
+sources = [SOURCELOWER_TAR_BZ2]
+checksums = ['9b78c7cf7fc32131c5cf43dd2ab9740149d9d87cadb2e2189f02685749a6b527']
+
+builddependencies = [
+    ('pkgconf', '1.8.0'),
+]
+
+dependencies = [
+    ('zlib', '1.2.11'),
+    ('hwloc', '2.5.0'),
+    ('libevent', '2.1.12'),
+    ('UCX', '1.11.2'),
+    ('libfabric', '1.13.2'),
+    ('PMIx', '4.1.0'),
+    ('Java', '1.8.0_221', '', True),
+]
+
+configopts = '--enable-shared --enable-mpi-thread-multiple --with-verbs '
+configopts += '--enable-mpirun-prefix-by-default '
+configopts += '--with-hwloc=$EBROOTHWLOC '  # hwloc support
+configopts += '--with-tm=/opt/pbs '  # Enable PBS
+configopts += '--enable-mpi-cxx '  # Enable building the C++ MPI bindings
+configopts += '--with-ucx=$EBROOTUCX '
+configopts += '--enable-mpi-java '  # Java support RT#28536
+
+osdependencies = [('libibverbs-dev', 'libibverbs-devel', 'rdma-core-devel')]
+
+postinstallcmds = [
+    'echo "# By default, for Open MPI 4.0 and later, infiniband ports on a device are not used by default." >> %(installdir)s/etc/openmpi-mca-params.conf',
+    'echo "btl_openib_allow_ib = true" >> %(installdir)s/etc/openmpi-mca-params.conf',
+]
+
+libs = ["mpi_mpifh", "mpi", "ompitrace", "open-pal", "open-rte"]
+sanity_check_paths = {
+    'files': [
+        "bin/%s" %
+        binfile for binfile in [
+            "ompi_info", "opal_wrapper", "orterun"]] + [
+                "lib/lib%s.%s" %
+                (libfile, SHLIB_EXT) for libfile in libs] + [
+                    "include/%s.h" %
+                    x for x in [
+                        "mpi-ext", "mpif-config", "mpif", "mpi", "mpi_portable_platform"]], 'dirs': [], }
+
+import os
+if os.environ.get("CLUSTERNAME") in ["BARBORA"]:
+   modextravars = {'OMPI_MCA_btl_openib_if_include': 'mlx5_0',
+                'OMPI_MCA_btl_tcp_if_include': '10.33.4.0/24',
+                'OMPI_MCA_orte_base_help_aggregate': '0',
+   }
+elif os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   modextravars = {'OMPI_MCA_btl_openib_if_include': 'mlx5_0',
+                'OMPI_MCA_orte_base_help_aggregate': '0',
+   }
+else:
+   modextravars = {'OMPI_MCA_btl_openib_if_include': 'mlx4_0',
+                'OMPI_MCA_oob_tcp_if_include': '10.0.0.0/8',
+   }
+
+moduleclass = 'mpi'
diff --git a/o/OpenMPI/OpenMPI-4.1.2-GCC-11.2.0.eb b/o/OpenMPI/OpenMPI-4.1.2-GCC-11.2.0.eb
new file mode 100644
index 00000000..33499ad7
--- /dev/null
+++ b/o/OpenMPI/OpenMPI-4.1.2-GCC-11.2.0.eb
@@ -0,0 +1,70 @@
+# IT4Innovations
+# LK 2022
+
+name = 'OpenMPI'
+version = '4.1.2'
+
+homepage = 'https://www.open-mpi.org/'
+description = """The Open MPI Project is an open source MPI-3 implementation."""
+
+toolchain = {'name': 'GCC', 'version': '11.2.0'}
+
+source_urls = ['https://www.open-mpi.org/software/ompi/v%(version_major_minor)s/downloads']
+sources = [SOURCELOWER_TAR_BZ2]
+checksums = ['9b78c7cf7fc32131c5cf43dd2ab9740149d9d87cadb2e2189f02685749a6b527']
+
+builddependencies = [
+    ('pkgconf', '1.8.0'),
+]
+
+dependencies = [
+    ('zlib', '1.2.11'),
+    ('hwloc', '2.5.0'),
+    ('libevent', '2.1.12'),
+    ('UCX', '1.11.2'),
+    ('libfabric', '1.13.2'),
+    ('PMIx', '4.1.0'),
+]
+
+configopts = '--enable-shared --enable-mpi-thread-multiple --with-verbs '
+configopts += '--enable-mpirun-prefix-by-default '
+configopts += '--with-hwloc=$EBROOTHWLOC '  # hwloc support
+configopts += '--with-tm=/opt/pbs '  # Enable PBS
+configopts += '--enable-mpi-cxx '  # Enable building the C++ MPI bindings
+configopts += '--with-ucx=$EBROOTUCX '
+
+osdependencies = [('libibverbs-dev', 'libibverbs-devel', 'rdma-core-devel')]
+
+postinstallcmds = [
+    'echo "# By default, for Open MPI 4.0 and later, infiniband ports on a device are not used by default." >> %(installdir)s/etc/openmpi-mca-params.conf',
+    'echo "btl_openib_allow_ib = true" >> %(installdir)s/etc/openmpi-mca-params.conf',
+]
+
+libs = ["mpi_mpifh", "mpi", "ompitrace", "open-pal", "open-rte"]
+sanity_check_paths = {
+    'files': [
+        "bin/%s" %
+        binfile for binfile in [
+            "ompi_info", "opal_wrapper", "orterun"]] + [
+                "lib/lib%s.%s" %
+                (libfile, SHLIB_EXT) for libfile in libs] + [
+                    "include/%s.h" %
+                    x for x in [
+                        "mpi-ext", "mpif-config", "mpif", "mpi", "mpi_portable_platform"]], 'dirs': [], }
+
+import os
+if os.environ.get("CLUSTERNAME") in ["BARBORA"]:
+   modextravars = {'OMPI_MCA_btl_openib_if_include': 'mlx5_0',
+                'OMPI_MCA_btl_tcp_if_include': '10.33.4.0/24',
+                'OMPI_MCA_orte_base_help_aggregate': '0',
+   }
+elif os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   modextravars = {'OMPI_MCA_btl_openib_if_include': 'mlx5_0',
+                'OMPI_MCA_orte_base_help_aggregate': '0',
+   }
+else:
+   modextravars = {'OMPI_MCA_btl_openib_if_include': 'mlx4_0',
+                'OMPI_MCA_oob_tcp_if_include': '10.0.0.0/8',
+   }
+
+moduleclass = 'mpi'
diff --git a/p/PETSc/PETSc-3.14.4-intel-2020b.eb b/p/PETSc/PETSc-3.14.4-intel-2020b.eb
index 5e792a3e..741ac522 100644
--- a/p/PETSc/PETSc-3.14.4-intel-2020b.eb
+++ b/p/PETSc/PETSc-3.14.4-intel-2020b.eb
@@ -6,7 +6,11 @@ description = """PETSc, pronounced PET-see (the S is silent), is a suite of data
  scalable (parallel) solution of scientific applications modeled by partial differential equations."""
 
 toolchain = {'name': 'intel', 'version': '2020b'}
-toolchainopts = {'openmp': True, 'usempi': True, 'pic': True}
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   toolchainopts = {'openmp': True, 'usempi': True, 'optarch': 'march=core-avx2', 'pic': True}
+else:
+   toolchainopts = {'openmp': True, 'usempi': True, 'pic': True}
 
 source_urls = [
     'https://ftp.mcs.anl.gov/pub/petsc/release-snapshots/',
diff --git a/p/PLUMED/PLUMED-2.5.6-fosscuda-2020b-patch.eb b/p/PLUMED/PLUMED-2.5.6-fosscuda-2020b-patch.eb
index 510dfd99..e866be08 100644
--- a/p/PLUMED/PLUMED-2.5.6-fosscuda-2020b-patch.eb
+++ b/p/PLUMED/PLUMED-2.5.6-fosscuda-2020b-patch.eb
@@ -16,7 +16,12 @@ description = """PLUMED is an open source library for free energy calculations i
 """
 
 toolchain = {'name': 'fosscuda', 'version': '2020b'}
-toolchainopts = {'usempi': 'True'}
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   toolchainopts = {'usempi': True, 'optarch': 'march=core-avx2'}
+else:
+   toolchainopts = {'usempi': True}
+
 
 #source_urls = ['https://github.com/plumed/plumed2/releases/download/v%(version)s/']
 sources = ['plumed-2.5.6-patch.tar.gz']
diff --git a/p/PLUMED/PLUMED-2.5.6-fosscuda-2020b-switch.eb b/p/PLUMED/PLUMED-2.5.6-fosscuda-2020b-switch.eb
new file mode 100644
index 00000000..b7b78986
--- /dev/null
+++ b/p/PLUMED/PLUMED-2.5.6-fosscuda-2020b-switch.eb
@@ -0,0 +1,61 @@
+# IT4Innovations
+# LK 2022
+
+easyblock = 'ConfigureMake'
+
+name = 'PLUMED'
+version = '2.5.6'
+versionsuffix = '-switch'
+
+homepage = 'https://www.plumed.org'
+description = """PLUMED is an open source library for free energy calculations in molecular systems which
+ works together with some of the most popular molecular dynamics engines. Free energy calculations can be
+ performed as a function of many order parameters with a particular  focus on biological problems, using
+ state of the art methods such as metadynamics, umbrella sampling and Jarzynski-equation based steered MD.
+ The software, written in C++, can be easily interfaced with both fortran and C/C++ codes.
+"""
+
+toolchain = {'name': 'fosscuda', 'version': '2020b'}
+toolchainopts = {'usempi': 'True'}
+
+#source_urls = ['https://github.com/plumed/plumed2/releases/download/v%(version)s/']
+sources = ['plumed-2.5.6-switch.tar.gz']
+checksums = ['a44b29011e0f5544470bebd3ce788e88']
+
+dependencies = [
+    ('zlib', '1.2.11'),
+    ('GSL', '2.6'),
+    ('Python', '3.8.6'),
+    ('SciPy-bundle', '2020.11'),
+    ('Boost', '1.74.0'),
+]
+
+preconfigopts = 'env FC=$MPIF90 LIBS="$LIBLAPACK $LIBS" '
+configopts = '--exec-prefix=%(installdir)s --enable-gsl --enable-modules=all --enable-python '
+configopts += '--enable-boost_graph --enable-boost_serialization '
+configopts += '--enable-asmjit '
+prebuildopts = 'source sourceme.sh && '
+prebuildopts += 'sed "s|native|core-avx2|g" Makefile.conf -i && '
+
+# make sure that ld.gold linker is used
+# required to work around problems like "ld: BFD (GNU Binutils) 2.30 assertion fail elf.c:3564"
+# (problem with intel build but maintain consistency between easyconfigs)
+buildopts = 'LD_RO="ld.gold -r -o"'
+
+# install path for PLUMED libraries must be included in $LD_LIBRARY_PATH when Python bindings get built/installed
+preinstallopts = 'LD_LIBRARY_PATH="%(installdir)s/lib:$LD_LIBRARY_PATH" '
+
+sanity_check_paths = {
+    'files': ['bin/plumed', 'lib/libplumedKernel.%s' % SHLIB_EXT, 'lib/libplumed.%s' % SHLIB_EXT],
+    'dirs': [],
+}
+
+sanity_check_commands = ["python -c 'import plumed'"]
+
+modextrapaths = {
+    'PLUMED_KERNEL': 'lib/libplumedKernel.%s' % SHLIB_EXT,
+    'PLUMED_ROOT': 'lib/plumed',
+    'PYTHONPATH': 'lib/plumed/python',
+}
+
+moduleclass = 'chem'
diff --git a/p/PLUMED/PLUMED-2.7.3-foss-2020b.eb b/p/PLUMED/PLUMED-2.7.3-foss-2020b.eb
new file mode 100644
index 00000000..29fb5f93
--- /dev/null
+++ b/p/PLUMED/PLUMED-2.7.3-foss-2020b.eb
@@ -0,0 +1,59 @@
+# IT4Innovations
+# LK 2022
+
+easyblock = 'ConfigureMake'
+
+name = 'PLUMED'
+version = '2.7.3'
+
+homepage = 'https://www.plumed.org'
+description = """PLUMED is an open source library for free energy calculations in molecular systems which
+ works together with some of the most popular molecular dynamics engines. Free energy calculations can be
+ performed as a function of many order parameters with a particular  focus on biological problems, using
+ state of the art methods such as metadynamics, umbrella sampling and Jarzynski-equation based steered MD.
+ The software, written in C++, can be easily interfaced with both fortran and C/C++ codes.
+"""
+
+toolchain = {'name': 'foss', 'version': '2020b'}
+toolchainopts = {'usempi': 'True'}
+
+source_urls = ['https://github.com/plumed/plumed2/releases/download/v%(version)s/']
+sources = [SOURCE_TGZ]
+checksums = ['02899545d9d83a1391b80a202f243fde']
+
+dependencies = [
+    ('zlib', '1.2.11'),
+    ('GSL', '2.6'),
+    ('Python', '3.8.6'),
+    ('SciPy-bundle', '2020.11'),
+    ('Boost', '1.74.0'),
+]
+
+preconfigopts = 'env FC=$MPIF90 LIBS="$LIBLAPACK $LIBS" '
+configopts = '--exec-prefix=%(installdir)s --enable-gsl --enable-modules=all --enable-python '
+configopts += '--enable-boost_graph --enable-boost_serialization '
+configopts += '--enable-asmjit '
+prebuildopts = 'source sourceme.sh && '
+
+# make sure that ld.gold linker is used
+# required to work around problems like "ld: BFD (GNU Binutils) 2.30 assertion fail elf.c:3564"
+# (problem with intel build but maintain consistency between easyconfigs)
+buildopts = 'LD_RO="ld.gold -r -o"'
+
+# install path for PLUMED libraries must be included in $LD_LIBRARY_PATH when Python bindings get built/installed
+preinstallopts = 'LD_LIBRARY_PATH="%(installdir)s/lib:$LD_LIBRARY_PATH" '
+
+sanity_check_paths = {
+    'files': ['bin/plumed', 'lib/libplumedKernel.%s' % SHLIB_EXT, 'lib/libplumed.%s' % SHLIB_EXT],
+    'dirs': [],
+}
+
+sanity_check_commands = ["python -c 'import plumed'"]
+
+modextrapaths = {
+    'PLUMED_KERNEL': 'lib/libplumedKernel.%s' % SHLIB_EXT,
+    'PLUMED_ROOT': 'lib/plumed',
+    'PYTHONPATH': 'lib/plumed/python',
+}
+
+moduleclass = 'chem'
diff --git a/p/phonopy/phonopy-2.12.0-conda.eb b/p/phonopy/phonopy-2.12.0-conda.eb
index 68311537..8297d28a 100644
--- a/p/phonopy/phonopy-2.12.0-conda.eb
+++ b/p/phonopy/phonopy-2.12.0-conda.eb
@@ -1,4 +1,5 @@
 # IT4Innovations 2021
+# JK
 
 easyblock = "Conda"
 
diff --git a/q/QMCPACK/QMCPACK-3.11.0-intel-2020b-Python-3.8.6.eb b/q/QMCPACK/QMCPACK-3.11.0-intel-2020b-Python-3.8.6.eb
index 8143fc15..914e0ced 100644
--- a/q/QMCPACK/QMCPACK-3.11.0-intel-2020b-Python-3.8.6.eb
+++ b/q/QMCPACK/QMCPACK-3.11.0-intel-2020b-Python-3.8.6.eb
@@ -28,7 +28,7 @@ builddependencies = [('CMake', '3.20.1', '', True)]
 dependencies = [
     ('libxml2', '2.9.10'),
     ('Boost', '1.74.0'),
-    ('HDF5', '1.10.6', '-parallel'),
+    ('HDF5', '1.10.7', '', ('iimpi', '2020b')),
     ('Python', '3.8.6'),
     ('h5py', '3.1.0'),
     ('SciPy-bundle', '2020.11'),
diff --git a/q/QMCPACK/QMCPACK-3.12.0-intel-2020b-Python-3.8.6.eb b/q/QMCPACK/QMCPACK-3.12.0-intel-2020b-Python-3.8.6.eb
new file mode 100644
index 00000000..e6af6564
--- /dev/null
+++ b/q/QMCPACK/QMCPACK-3.12.0-intel-2020b-Python-3.8.6.eb
@@ -0,0 +1,64 @@
+# IT4Innovations 2021
+# JK2021
+
+easyblock = 'CMakeMake'
+
+name = 'QMCPACK'
+version = '3.12.0'
+versionsuffix = "-Python-%(pyver)s"
+
+homepage = "https://qmcpack.org/"
+description = """QMCPACK, is a modern high-performance open-source Quantum Monte Carlo (QMC) simulation code. Its main applications are electronic structure calculations of molecular, quasi-2D and solid-state systems. Variational Monte Carlo (VMC), diffusion Monte Carlo (DMC) and a number of other advanced QMC algorithms are implemented. Orbital space auxiliary field QMC (AFQMC) has recently been added. By directly solving the Schrodinger equation, QMC methods offer greater accuracy than methods such as density functional theory, but at a trade-off of much greater local_computational expense.
+"""
+
+# vypada to, ze od 3.12.0 uz optimalizace na avx2 nedela trable?
+toolchain = {'name': 'intel', 'version': '2020b'}
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   toolchainopts = {'opt': True, 'pic': True, 'usempi': True, 'optarch': 'march=core-avx2'}
+   prebuildopts = "echo %(builddir)s && while read i; do echo $i; sed 's|-xHost|-march=core-avx2|g' -i $i; done < <(grep xHost %(builddir)s -R | cut -d ':' -f 1 | sort -u) &&"
+else:
+   toolchainopts = {'opt': True, 'pic': True, 'usempi': True}
+
+source_urls = ['https://github.com/QMCPACK/qmcpack/archive/']
+sources = ['v%(version)s.tar.gz']
+
+builddependencies = [('CMake', '3.20.1', '', True),]
+
+# odstran z module file GCC 9.3.0 - rovnak na intel
+dependencies = [
+    ('libxml2', '2.9.10'),
+    ('Boost', '1.74.0'),
+    ('HDF5', '1.10.7', '', ('iimpi', '2020b')),
+    ('Python', '3.8.6'),
+    ('h5py', '3.1.0'),
+    ('SciPy-bundle', '2020.11'),
+    ('GCC', '9.3.0', '', True),
+]
+
+separate_build_dir = True
+
+configopts = ' -DENABLE_SOA=1 '
+configopts += ' -DCMAKE_C_COMPILER=mpiicc -DCMAKE_CXX_COMPILER=mpiicpc '
+configopts += ' -DHDF5_PREFER_PARALLEL=1 -DENABLE_PHDF5=1 '
+configopts += ' -DQMC_SYMLINK_TEST_FILES=0 '
+configopts += ' -DCMAKE_BUILD_TYPE=Release '
+configopts += ' -DQMC_OMP=ON '
+
+# prekopiruje nexus knihovny o kterych install file tvrdi, ze nejsou potreba
+# ale evidentne to bez nich nejede
+preinstallopts = [
+		  ' mkdir -p %(installdir)s/nexus && ',
+		  ' cp -r %(builddir)s/qmcpack-%(version)s/nexus/lib %(installdir)s/nexus/lib && ',
+]
+
+
+# prida nexus knihovny do PYTHONPATH
+modextrapaths = {'PYTHONPATH': 'nexus/lib'}
+
+sanity_check_paths = {
+    'files': ['bin/qmcpack'],
+    'dirs': ['bin'],
+}
+
+moduleclass = 'phys'
diff --git a/q/QMCPACK/QMCPACK-3.12.0-intel-2021b-Python-3.9.6-lowopt.eb b/q/QMCPACK/QMCPACK-3.12.0-intel-2021b-Python-3.9.6-lowopt.eb
new file mode 100644
index 00000000..db93107d
--- /dev/null
+++ b/q/QMCPACK/QMCPACK-3.12.0-intel-2021b-Python-3.9.6-lowopt.eb
@@ -0,0 +1,59 @@
+# IT4Innovations 2021
+# JK2021
+
+easyblock = 'CMakeMake'
+
+name = 'QMCPACK'
+version = '3.12.0'
+versionsuffix = "-Python-%(pyver)s-lowopt"
+
+homepage = "https://qmcpack.org/"
+description = """QMCPACK, is a modern high-performance open-source Quantum Monte Carlo (QMC) simulation code. Its main applications are electronic structure calculations of molecular, quasi-2D and solid-state systems. Variational Monte Carlo (VMC), diffusion Monte Carlo (DMC) and a number of other advanced QMC algorithms are implemented. Orbital space auxiliary field QMC (AFQMC) has recently been added. By directly solving the Schrodinger equation, QMC methods offer greater accuracy than methods such as density functional theory, but at a trade-off of much greater local_computational expense.
+"""
+
+# 3.12.0 update - už se zkompiluje bez 9.3.0
+# vyžaduje GCC 9.3.0, které nemá podporu pro naše AMD procesory => kompilace s druhým AMD hackem je broken
+toolchain = {'name': 'intel', 'version': '2021b'}
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   toolchainopts = {'opt': True, 'pic': True, 'usempi': True, 'optarch': 'march=core-avx2'}
+#   prebuildopts = "echo %(builddir)s && while read i; do echo $i; sed 's|-xHost|-march=core-avx2|g' -i $i; done < <(grep xHost %(builddir)s -R | cut -d ':' -f 1 | sort -u) &&"
+else:
+   toolchainopts = {'opt': True, 'pic': True, 'usempi': True}
+
+source_urls = ['https://github.com/QMCPACK/qmcpack/archive/']
+sources = ['v%(version)s.tar.gz']
+
+builddependencies = [('CMake', '3.20.1', '', True)]
+
+dependencies = [
+    ('libxml2', '2.9.10'),
+    ('Boost', '1.77.0'),
+    ('HDF5', '1.12.1', '', ('iimpi', '2021b')),
+    ('Python', '3.9.6'),
+    ('h5py', '3.6.0'),
+    ('SciPy-bundle', '2021.05', '-foss-2021a', True),
+#    ('GCC', '9.3.0', '', True), # obejití podmínky GCC 9.3.0
+]
+
+separate_build_dir = True
+
+configopts = ' -DENABLE_SOA=1 '
+
+# prekopiruje nexus knihovny o kterych install file tvrdi, ze nejsou potreba
+# ale evidentne to bez nich nejede
+preinstallopts = [
+		  ' mkdir -p %(installdir)s/nexus && ',
+		  ' cp -r %(builddir)s/qmcpack-%(version)s/nexus/lib %(installdir)s/nexus/lib && ',
+]
+
+
+# prida nexus knihovny do PYTHONPATH
+modextrapaths = {'PYTHONPATH': 'nexus/lib'}
+
+sanity_check_paths = {
+    'files': ['bin/qmcpack'],
+    'dirs': ['bin'],
+}
+
+moduleclass = 'phys'
diff --git a/q/QMCPACK/QMCPACK-3.13.0-intel-2020b-Python-3.8.6.eb b/q/QMCPACK/QMCPACK-3.13.0-intel-2020b-Python-3.8.6.eb
new file mode 100644
index 00000000..c7257254
--- /dev/null
+++ b/q/QMCPACK/QMCPACK-3.13.0-intel-2020b-Python-3.8.6.eb
@@ -0,0 +1,61 @@
+# IT4Innovations 2022
+# JK
+
+easyblock = 'CMakeMake'
+
+name = 'QMCPACK'
+version = '3.13.0'
+versionsuffix = "-Python-%(pyver)s"
+
+homepage = "https://qmcpack.org/"
+description = """QMCPACK, is a modern high-performance open-source Quantum Monte Carlo (QMC) simulation code. Its main applications are electronic structure calculations of molecular, quasi-2D and solid-state systems. Variational Monte Carlo (VMC), diffusion Monte Carlo (DMC) and a number of other advanced QMC algorithms are implemented. Orbital space auxiliary field QMC (AFQMC) has recently been added. By directly solving the Schrodinger equation, QMC methods offer greater accuracy than methods such as density functional theory, but at a trade-off of much greater local_computational expense.
+"""
+
+toolchain = {'name': 'intel', 'version': '2020b'}
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   toolchainopts = {'opt': True, 'pic': True, 'usempi': True, 'optarch': 'march=core-avx2'}
+   prebuildopts = "echo %(builddir)s && while read i; do echo $i; sed 's|-xHost|-march=core-avx2|g' -i $i; done < <(grep xHost %(builddir)s -R | cut -d ':' -f 1 | sort -u) &&"
+else:
+   toolchainopts = {'opt': True, 'pic': True, 'usempi': True}
+
+source_urls = ['https://github.com/QMCPACK/qmcpack/archive/']
+sources = ['v%(version)s.tar.gz']
+
+builddependencies = [('CMake', '3.20.1', '', True),]
+
+# odstran z module file GCC 9.3.0 - rovnak na intel
+dependencies = [
+    ('libxml2', '2.9.10'),
+    ('Boost', '1.74.0'),
+    ('HDF5', '1.10.7', '', ('iimpi', '2020b')),
+    ('Python', '3.8.6'),
+    ('h5py', '3.1.0'),
+    ('SciPy-bundle', '2020.11'),
+    ('GCC', '9.3.0', '', True),
+]
+
+separate_build_dir = True
+
+configopts = ' -DENABLE_SOA=1 '
+configopts += ' -DCMAKE_C_COMPILER=mpiicc -DCMAKE_CXX_COMPILER=mpiicpc '
+configopts += ' -DHDF5_PREFER_PARALLEL=1 -DENABLE_PHDF5=1 '
+configopts += ' -DQMC_SYMLINK_TEST_FILES=0 '
+configopts += ' -DCMAKE_BUILD_TYPE=Release '
+configopts += ' -DQMC_OMP=ON '
+
+# prekopiruje nexus knihovny o kterych install file tvrdi, ze nejsou potreba
+# ale evidentne to bez nich nejede
+preinstallopts = [
+		  ' mkdir -p %(installdir)s/nexus && ',
+		  ' cp -r %(builddir)s/qmcpack-%(version)s/nexus/lib %(installdir)s/nexus/lib && ',
+]
+
+modextrapaths = {'PYTHONPATH': 'nexus/lib'}
+
+sanity_check_paths = {
+    'files': ['bin/qmcpack'],
+    'dirs': ['bin'],
+}
+
+moduleclass = 'phys'
diff --git a/q/QuantumESPRESSO/QuantumESPRESSO-6.7-intel-2021a.eb b/q/QuantumESPRESSO/QuantumESPRESSO-6.7-intel-2021a.eb
new file mode 100644
index 00000000..b9441481
--- /dev/null
+++ b/q/QuantumESPRESSO/QuantumESPRESSO-6.7-intel-2021a.eb
@@ -0,0 +1,54 @@
+# JK 2022
+
+name = 'QuantumESPRESSO'
+version = '6.7'
+
+homepage = 'https://www.quantum-espresso.org'
+description = """Quantum ESPRESSO  is an integrated suite of computer codes
+for electronic-structure calculations and materials modeling at the nanoscale.
+It is based on density-functional theory, plane waves, and pseudopotentials
+(both norm-conserving and ultrasoft).
+"""
+
+toolchain = {'name': 'intel', 'version': '2021a'}
+
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   toolchainopts = {'usempi': True, 'openmp': True,  'optarch': 'march=core-avx2'}
+else:
+   toolchainopts = {'usempi': True, 'openmp': True}
+
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   prebuildopts = "echo %(builddir)s && while read i; do echo $i; sed 's|-xHost|-march=core-avx2|g' -i $i; done < <(grep xHost %(builddir)s -R | cut -d ':' -f 1 | sort -u) && "
+
+source_urls = [
+    'https://github.com/QEF/q-e/releases/download/qe-%(version)s.0/',
+    'https://github.com/dceresoli/qe-gipaw/archive/',
+    'https://github.com/wannier-developers/wannier90/archive/'
+]
+sources = [
+    'qe-%(version)s-ReleasePack.tgz',
+    {'filename': 'qe-gipaw-%(version)s.tar.gz', 'download_filename': '%(version)sMaX.tar.gz'},
+    {'filename': 'wannier90-3.1.0.tar.gz', 'download_filename': 'v3.1.0.tar.gz'},
+]
+checksums = [
+    '8f06ea31ae52ad54e900a2f51afd5c70f78096d9dcf39c86c2b17dccb1ec9c87',  # qe-6.7-ReleasePack.tgz
+    '95d2ed2f4d27f044dba171bdf8c1913a67ebc8846ed3463462828f2d414a2a61',  # qe-gipaw-%(version)s.tar.gz
+    '40651a9832eb93dec20a8360dd535262c261c34e13c41b6755fa6915c936b254',  # wannier90-3.1.0.tar.gz
+]
+
+dependencies = [
+    ('HDF5', '1.10.7'),
+    ('ELPA', '2021.05.001'),
+    ('libxc', '5.1.5'),
+]
+
+# The third party packages should be installed separately and added as
+# dependencies.  The exception is w90, which is force built, and gipaw
+# which depends on qe source
+buildopts = 'all gwl xspectra couple epw gipaw w90'
+
+# parallel build tends to fail
+parallel = 1
+
+moduleclass = 'chem'
diff --git a/q/QuantumESPRESSO/QuantumESPRESSO-7.0-NVHPC-21.9.eb b/q/QuantumESPRESSO/QuantumESPRESSO-7.0-NVHPC-21.9.eb
new file mode 100644
index 00000000..0fc08f70
--- /dev/null
+++ b/q/QuantumESPRESSO/QuantumESPRESSO-7.0-NVHPC-21.9.eb
@@ -0,0 +1,54 @@
+# JK 2022
+
+easyblock = 'ConfigureMake'
+name = 'QuantumESPRESSO'
+version = '7.0'
+
+homepage = 'https://www.quantum-espresso.org'
+description = """Quantum ESPRESSO  is an integrated suite of computer codes
+for electronic-structure calculations and materials modeling at the nanoscale.
+It is based on density-functional theory, plane waves, and pseudopotentials
+(both norm-conserving and ultrasoft).
+"""
+
+toolchain = {'name': 'NVHPC', 'version': '21.9'}
+
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   toolchainopts = {'pic': True,  'optarch': 'march=core-avx2'}
+else:
+   toolchainopts = {'pic': True}
+
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   prebuildopts = "echo %(builddir)s && while read i; do echo $i; sed 's|-xHost|-march=core-avx2|g' -i $i; done < <(grep xHost %(builddir)s -R | cut -d ':' -f 1 | sort -u) && "
+
+source_urls = ['https://github.com/QEF/q-e/releases/download/qe-%(version)s/']
+sources = ['qe-%(version)s-ReleasePack.tgz']
+checksums = ['268ec506f88c56ba4e9b691c1e81e33a6ad7949f857f1c6c32197f9c2af2a957']  # qe 7.0 release pack
+
+dependencies = [
+#    ('ELPA', '2021.05.001', '', ('intel', '2021a')),
+#    ('libxc', '5.1.3', '', ('GCC', '10.2.0')),
+#    ('HDF5', '1.10.7', '', ('iimpi', '2021a')),
+    ('OpenMPI', '4.0.7', '-CUDA-11.4.1'),
+]
+
+preconfigopts = " export MPIF90=mpif90 && "
+preconfigopts += " export MPIFC=mpif90 && "
+preconfigopts += " export MPIF77=mpif90 && "
+preconfigopts += " export MPICC=mpicc && "
+preconfigopts += " export MPICXX=mpicxx && "
+
+configopts = 'FC=pgfortran F77=pgfortran F90=pgfortran CC=pgcc CXX=pgc++ --with-cuda=$CUDA_HOME --with-cuda-cc=80 --with-cuda-runtime=11.4 --enable-openmp'
+
+prebuildopts = "sed -i 's/-D__MPI\\b/& -D__GPU_MPI /' %(builddir)s/qe-%(version)s/make.inc && "
+
+# only pw is available for GPU
+buildopts = 'pw'
+
+# parallel build tends to fail
+parallel = 1
+
+skipsteps = ['sanitycheck']
+
+moduleclass = 'chem'
diff --git a/r/rocm-cuda2hip/rocm-cuda2hip-4.3.1-gcccuda-2020b.eb b/r/rocm-cuda2hip/rocm-cuda2hip-4.3.1-gcccuda-2020b.eb
new file mode 100644
index 00000000..05303d0b
--- /dev/null
+++ b/r/rocm-cuda2hip/rocm-cuda2hip-4.3.1-gcccuda-2020b.eb
@@ -0,0 +1,38 @@
+# IT$innovations
+# LK 2022
+
+easyblock                 = 'CMakeMake'
+
+name = 'rocm-cuda2hip'
+version = '4.3.1'
+
+homepage = 'https://github.com/ROCm-Developer-Tools/HIPIFY'
+description = "ROCm-HIPIFY : Tools to translate CUDA source code into portable HIP C++ automatically."
+
+toolchain = {'name': 'gcccuda', 'version': '2020b'}
+
+sources = ['https://github.com/ROCm-Developer-Tools/HIPIFY/archive/refs/tags/rocm-4.3.1.tar.gz']
+
+builddependencies = [
+   ('CMake', '3.20.1')
+]
+
+dependencies = [
+   ('Clang', '11.0.1'),
+   ('cuDNN', '8.2.1.32', '-CUDA-11.3.1', True),
+]
+
+postinstallcmds = [
+   'cp -a %(installdir)s/hipify-clang %(installdir)s/bin/rocm-cuda2hip',
+   'ln  %(installdir)s/bin/rocm-cuda2hip  %(installdir)s/bin/hipify-clang',
+   'ln  %(installdir)s/bin/rocm-cuda2hip  %(installdir)s/bin/ROCm-HIPIFY' 
+]
+
+sanity_check_commands = [('hipify-clang', '--version')]
+
+sanity_check_paths = {
+    'files': ['bin/%(namelower)s'],
+    'dirs': ['include', 'lib']
+}
+
+moduleclass = 'devel'
diff --git a/s/ScaLAPACK/ScaLAPACK-2.2-NVHPC-21.11.eb b/s/ScaLAPACK/ScaLAPACK-2.2-NVHPC-21.11.eb
new file mode 100644
index 00000000..32ec71c3
--- /dev/null
+++ b/s/ScaLAPACK/ScaLAPACK-2.2-NVHPC-21.11.eb
@@ -0,0 +1,49 @@
+# IT4Innovations
+# LK 2022
+
+easyblock = 'CMakeMake'
+
+name = 'ScaLAPACK'
+version = '2.2'
+
+homepage = 'https://www.netlib.org/scalapack/'
+description = """The ScaLAPACK (or Scalable LAPACK) library includes a subset of LAPACK routines
+redesigned for distributed memory MIMD parallel computers."""
+
+toolchain = {'name': 'NVHPC', 'version': '21.11'}
+toolchainopts = {'pic': True}
+
+# https://github.com/amd/scalapack/archive/2.2.tar.gz
+source_urls = ['https://github.com/amd/scalapack/archive/']
+sources = ['%(version)s.tar.gz']
+checksums = ['2d64926864fc6d12157b86e3f88eb1a5205e7fc157bf67e7577d0f18b9a7484c']
+
+builddependencies = [
+    ('CMake', '3.18.4', '', True),
+]
+
+dependencies = [
+    ('OpenMPI', '4.0.6', '-CUDA-11.4.1-v2'),
+    ('BLIS', '3.0.1'),
+    ('libFLAME', '5.2.0'),
+]
+
+# Config Opts based on AOCL User Guide:
+# https://developer.amd.com/wp-content/resources/AOCL_User%20Guide_2.2.pdf
+
+configopts = '-DBUILD_SHARED_LIBS=ON '
+configopts += '-DBLAS_LIBRARIES="$EBROOTBLIS/lib/libblis-mt.a" '
+configopts += '-DLAPACK_LIBRARIES="$EBROOTLIBFLAME/lib/libflame.a" '
+configopts += '-DCMAKE_C_COMPILER=mpicc '
+configopts += '-DCMAKE_Fortran_COMPILER=mpif90 '
+configopts += '-DUSE_OPTIMIZED_LAPACK_BLAS=ON '
+configopts += '-DUSE_F2C=ON '
+configopts += '-DCMAKE_Fortran_FLAGS="-lpthread -fopenmp $DCMAKE_Fortran_FLAGS" '
+
+sanity_check_paths = {
+    'files': ['lib/libscalapack.%s' % SHLIB_EXT, 'lib64/libscalapack.%s' % SHLIB_EXT],
+    'dirs': ["lib", "lib64"],
+}
+
+
+moduleclass = 'numlib'
diff --git a/t/Tango/Tango.eb b/t/Tango/Tango.eb
new file mode 100644
index 00000000..b778abb2
--- /dev/null
+++ b/t/Tango/Tango.eb
@@ -0,0 +1,24 @@
+# IT4Innovations 2022
+# JK
+
+easyblock = 'Binary'
+
+name = 'Tango'
+version = '1.0'
+
+homepage = "N/A"
+description = """N/A"""
+
+toolchain = SYSTEM
+
+sources = ["%(namelower)s.tar.gz"]
+
+extract_sources = True
+
+sanity_check_paths = {
+	'files': ['agadirwrapper'],
+	'dirs': [],
+}
+
+moduleclass = 'bio'
+
diff --git a/t/Tensorflow/TensorFlow-2.5.0-fosscuda-2020b.eb b/t/Tensorflow/TensorFlow-2.5.0-fosscuda-2020b.eb
new file mode 100644
index 00000000..5c9f105b
--- /dev/null
+++ b/t/Tensorflow/TensorFlow-2.5.0-fosscuda-2020b.eb
@@ -0,0 +1,233 @@
+easyblock = 'PythonBundle'
+
+name = 'TensorFlow'
+version = '2.5.0'
+
+homepage = 'https://www.tensorflow.org/'
+description = "An open-source software library for Machine Intelligence"
+
+toolchain = {'name': 'fosscuda', 'version': '2020b'}
+toolchainopts = {'pic': True}
+
+builddependencies = [
+    ('Bazel', '3.7.2'),
+    ('protobuf', '3.14.0'),
+    # git 2.x required, see also https://github.com/tensorflow/tensorflow/issues/29053
+    ('git', '2.28.0', '-nodocs'),
+    ('pybind11', '2.6.0'),
+    ('pkgconfig', '1.5.1', '-python'),  # For h5py
+    ('UnZip', '6.0'),
+]
+dependencies = [
+    ('cuDNN', '8.0.4.30', '-CUDA-%(cudaver)s', True),
+    ('NCCL', '2.8.3', '-CUDA-%(cudaver)s'),
+    ('Python', '3.8.6'),
+    ('h5py', '3.1.0'),
+    ('cURL', '7.72.0'),
+    ('double-conversion', '3.1.5'),
+    ('flatbuffers', '1.12.0'),
+    ('giflib', '5.2.1'),
+    ('hwloc', '2.2.0'),
+    ('ICU', '67.1'),
+    ('JsonCpp', '1.9.4'),
+    ('libjpeg-turbo', '2.0.5'),
+    ('LMDB', '0.9.24'),
+    ('NASM', '2.15.05'),
+    ('nsync', '1.24.0'),
+    ('SQLite', '3.33.0'),
+    ('PCRE', '8.44'),
+    ('protobuf-python', '3.14.0'),
+    ('flatbuffers-python', '1.12'),
+    ('typing-extensions', '3.7.4.3'),
+    ('libpng', '1.6.37'),
+    ('snappy', '1.1.8'),
+    ('zlib', '1.2.11'),
+]
+
+use_pip = True
+sanity_pip_check = True
+# Dependencies created and updated using findPythonDeps.sh:
+# https://gist.github.com/Flamefire/49426e502cd8983757bd01a08a10ae0d
+exts_list = [
+    ('Markdown', '3.3.4', {
+        'checksums': ['31b5b491868dcc87d6c24b7e3d19a0d730d59d3e46f4eea6430a321bed387a49'],
+    }),
+    ('pyasn1-modules', '0.2.8', {
+        'checksums': ['905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e'],
+    }),
+    ('rsa', '4.7.2', {
+        'checksums': ['9d689e6ca1b3038bc82bf8d23e944b6b6037bc02301a574935b2dd946e0353b9'],
+    }),
+    ('cachetools', '4.2.2', {
+        'checksums': ['61b5ed1e22a0924aed1d23b478f37e8d52549ff8a961de2909c69bf950020cff'],
+    }),
+    ('google-auth', '1.30.0', {
+        'modulename': 'google.auth',
+        'checksums': ['9ad25fba07f46a628ad4d0ca09f38dcb262830df2ac95b217f9b0129c9e42206'],
+    }),
+    ('oauthlib', '3.1.0', {
+        'checksums': ['bee41cc35fcca6e988463cacc3bcb8a96224f470ca547e697b604cc697b2f889'],
+    }),
+    ('requests-oauthlib', '1.3.0', {
+        'checksums': ['b4261601a71fd721a8bd6d7aa1cc1d6a8a93b4a9f5e96626f8e4d91e8beeaa6a'],
+    }),
+    ('google-auth-oauthlib', '0.4.4', {
+        'checksums': ['09832c6e75032f93818edf1affe4746121d640c625a5bef9b5c96af676e98eee'],
+    }),
+    ('Werkzeug', '2.0.0', {
+        'checksums': ['3389bbfe6d40c6dd25e6d3f974155163c8b3de5bbda6a89342d4ab93fae80ba0'],
+    }),
+    ('absl-py', '0.12.0', {
+        'modulename': 'absl',
+        'checksums': ['b44f68984a5ceb2607d135a615999b93924c771238a63920d17d3387b0d229d5'],
+    }),
+    ('astunparse', '1.6.3', {
+        'checksums': ['5ad93a8456f0d084c3456d059fd9a92cce667963232cbf763eac3bc5b7940872'],
+    }),
+    ('grpcio', '1.34.1', {
+        'modulename': 'grpc',
+        'checksums': ['1c746a3cd8a830d8d916a9d0476a786aaa98c5cc2a096344af2be955e439f8ac'],
+        'preinstallopts': "export GRPC_PYTHON_BUILD_EXT_COMPILER_JOBS=%(parallel)s && ",
+    }),
+    ('gviz-api', '1.9.0', {
+        'source_tmpl': 'gviz_api-%(version)s.tar.gz',
+        'checksums': ['43d13ccc21834d0501b33a291ef3265e933dbb4bbdca3d34b1ed0a048c0ef640'],
+    }),
+    ('tensorboard_data_server', '0.6.1', {
+        'source_tmpl': SOURCE_PY3_WHL,  # Requires Rust to build, take the dummy for now
+        'checksums': ['809fe9887682d35c1f7d1f54f0f40f98bb1f771b14265b453ca051e2ce58fca7'],
+    }),
+    ('tensorboard', version, {
+        'source_tmpl': SOURCE_PY3_WHL,
+        'checksums': ['e167460085b6528956b33bab1c970c989cdce47a6616273880733f5e7bde452e'],
+    }),
+    ('tensorboard_plugin_wit', '1.8.0', {
+        'source_tmpl': SOURCE_PY3_WHL,
+        'checksums': ['2a80d1c551d741e99b2f197bb915d8a133e24adb8da1732b840041860f91183a'],
+    }),
+    ('tensorboard_plugin_profile', '2.4.0', {
+        'checksums': ['dfbf254ee960440e3b2518324f876a6d6704c60b936887d99214fa36988a206a'],
+    }),
+    ('google-pasta', '0.2.0', {
+        'modulename': 'pasta',
+        'checksums': ['c9f2c8dfc8f96d0d5808299920721be30c9eec37f2389f28904f454565c8a16e'],
+    }),
+    ('termcolor', '1.1.0', {
+        'checksums': ['1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b'],
+    }),
+    ('tensorflow_estimator', version, {
+        'source_tmpl': SOURCE_WHL,
+        'checksums': ['d1fe76dee8b1dcab865d807a0246da0a9c4a635b1eba6e9545bf216c3aad6955'],
+    }),
+    ('astor', '0.8.1', {
+        'checksums': ['6a6effda93f4e1ce9f618779b2dd1d9d84f1e32812c23a29b3fff6fd7f63fa5e'],
+    }),
+    ('gast', '0.4.0', {
+        'checksums': ['40feb7b8b8434785585ab224d1568b857edb18297e5a3047f1ba012bc83b42c1'],
+    }),
+    ('opt_einsum', '3.3.0', {
+        'checksums': ['59f6475f77bbc37dcf7cd748519c0ec60722e91e63ca114e68821c0c54a46549'],
+    }),
+    ('wrapt', '1.12.1', {
+        'checksums': ['b62ffa81fb85f4332a4f609cab4ac40709470da05643a082ec1eb88e6d9b97d7'],
+    }),
+    ('Keras_Preprocessing', '1.1.2', {
+        'checksums': ['add82567c50c8bc648c14195bf544a5ce7c1f76761536956c3d2978970179ef3'],
+    }),
+    ('dill', '0.3.4', {
+        'source_tmpl': '%(name)s-%(version)s.zip',
+        'checksums': ['9f9734205146b2b353ab3fec9af0070237b6ddae78452af83d2fca84d739e675'],
+    }),
+    ('tblib', '1.7.0', {
+        'checksums': ['059bd77306ea7b419d4f76016aef6d7027cc8a0785579b5aad198803435f882c'],
+    }),
+    ('portpicker', '1.3.1', {
+        'checksums': ['d2cdc776873635ed421315c4d22e63280042456bbfa07397817e687b142b9667'],
+    }),
+    ('keras_nightly', '2.5.0.dev2021032900', {
+        'modulename': 'keras',
+        'source_tmpl': SOURCE_WHL,
+        'checksums': ['6ba70f738f4008222de7e7fdd5b2b18c48c49b897a9fca54c844854e25964011'],
+    }),
+    (name, version, {
+        'source_tmpl': 'v%(version)s.tar.gz',
+        'source_urls': ['https://github.com/tensorflow/tensorflow/archive/'],
+        'patches': [
+            'TensorFlow-2.1.0_fix-cuda-build.patch',
+            'TensorFlow-2.4.0_add-ldl.patch',
+            'TensorFlow-2.4.0_dont-use-var-lock.patch',
+            'TensorFlow-2.4.1_fix-min-priority-test.patch',
+            'TensorFlow-2.5.0_add-default-shell-env.patch',
+            'TensorFlow-2.5.0_add-protobuf-deps.patch',
+            'TensorFlow-2.5.0_add-support-for-large-core-systems.patch',
+            'TensorFlow-2.5.0_disable-avx512-extensions.patch',
+            'TensorFlow-2.5.0-fix-alias-violation-in-absl.patch',
+            'TensorFlow-2.5.0_fix-alignment-in-matmul-test.patch',
+            'TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch',
+            'TensorFlow-2.5.0_fix-crash-on-shutdown.patch',
+            'TensorFlow-2.5.0_remove-duplicate-gpu-tests.patch',
+        ],
+        'checksums': [
+            '233875ea27fc357f6b714b2a0de5f6ff124b50c1ee9b3b41f9e726e9e677b86c',  # v2.5.0.tar.gz
+            '78c20aeaa7784b8ceb46238a81e8c2461137d28e0b576deeba8357d23fbe1f5a',  # TensorFlow-2.1.0_fix-cuda-build.patch
+            '917ee7282e782e48673596d8917c3207e60e0851bb9acf230a2a439b067af2e3',  # TensorFlow-2.4.0_add-ldl.patch
+            # TensorFlow-2.4.0_dont-use-var-lock.patch
+            'b14f2493fd2edf79abd1c4f2dde6c98a3e7d5cb9c25ab9386df874d5f072d6b5',
+            # TensorFlow-2.4.1_fix-min-priority-test.patch
+            '389febce9a0612fd457daf4cb18c02f77fc7311bacae0963602a3198d9f2737f',
+            # TensorFlow-2.5.0_add-default-shell-env.patch
+            '09b0c5d4ff04f56a9657875471ed78001d4201cac795aeff62019d582115b468',
+            # TensorFlow-2.5.0_add-protobuf-deps.patch
+            '2aa79b89cff13e81f83e385761917d5d6dbdffd6b0366d90580761b958f14363',
+            # TensorFlow-2.5.0_add-support-for-large-core-systems.patch
+            '915f3477d6407fafd48269fe1e684a05ce361d9b9b85e58686682df87760f636',
+            # TensorFlow-2.5.0_disable-avx512-extensions.patch
+            '3655ce24c97569ac9738c07cac85347ba6f5c815ada95b19b606ffa46d4dda03',
+            # TensorFlow-2.5.0-fix-alias-violation-in-absl.patch
+            '12454fda3330fb45cd380377e283f04488b40e0b8ae7378e786ddf731a581f75',
+            # TensorFlow-2.5.0_fix-alignment-in-matmul-test.patch
+            '6a4d6cbf45a622b8a2c3ea0b1c0171f01f595684d9c57d415bb39b1b27e1180f',
+            # TensorFlow-2.5.0_fix-arm-vector-intrinsics.patch
+            '6abfadc0f67ff3b510d70430843201cb46d7bd65db045ec9b482af70e0c8c0c8',
+            # TensorFlow-2.5.0_fix-crash-on-shutdown.patch
+            '578c7493221ebd3dc25ca43d63a72cbb28fdf4112b1e2baa7390f25781bd78fd',
+            # TensorFlow-2.5.0_remove-duplicate-gpu-tests.patch
+            'b940d438e036faac24453bff2cf1834c5e1359e87e84d1f1999fa7a30b278fec',
+        ],
+        'test_script': 'TensorFlow-2.x_mnist-test.py',
+        'test_tag_filters_cpu': '-gpu,-tpu,-no_cuda_on_cpu_tap,-no_pip,-no_oss,-oss_serial,-benchmark-test,-v1only',
+        'test_tag_filters_gpu': ('gpu,-no_gpu,-nogpu,-gpu_cupti,-no_cuda11,-no_pip,-no_oss,-oss_serial,'
+                                 '-benchmark-test,-v1only'),
+        'test_targets': [
+            '//tensorflow/core/...',
+            '-//tensorflow/core:example_java_proto',
+            '-//tensorflow/core/example:example_protos_closure',
+            '//tensorflow/cc/...',
+            '//tensorflow/c/...',
+            '//tensorflow/python/...',
+            # Fails on some nodes but C API isn't installed anyway
+            '-//tensorflow/c/eager:c_api_test_gpu',
+            '-//tensorflow/c/eager:c_api_distributed_test',
+            '-//tensorflow/c/eager:c_api_distributed_test_gpu',
+            # Race condition with port picker: https://github.com/tensorflow/tensorflow/issues/46602
+            '-//tensorflow/c/eager:c_api_cluster_test_gpu',
+            '-//tensorflow/c/eager:c_api_remote_function_test_gpu',
+            '-//tensorflow/c/eager:c_api_remote_test_gpu',
+            # Fails to open its own test.xml(?)
+            '-//tensorflow/core/common_runtime:collective_param_resolver_local_test',
+            # Fails on non-AVX-512 systems: https://github.com/tensorflow/tensorflow/issues/46532
+            '-//tensorflow/core/common_runtime:mkl_layout_pass_test',
+            '-//tensorflow/core/kernels/mkl:mkl_fused_ops_test',
+            # Fails on AMD EPYC systems: https://github.com/tensorflow/tensorflow/issues/52151
+            '-//tensorflow/core/kernels/mkl:mkl_fused_batch_norm_op_test',
+        ],
+        'testopts': "--test_timeout=3600 --test_size_filters=small",
+        'testopts_gpu': "--test_timeout=3600 --test_size_filters=small " +
+                        "--run_under=//tensorflow/tools/ci_build/gpu_build:parallel_gpu_execute",
+        'with_xla': True,
+    }),
+]
+
+cuda_compute_capabilities = ['8.0']
+
+moduleclass = 'lib'
diff --git a/v/VASP/VASP-5.4.1-24Jun15-intel-2020b.eb b/v/VASP/VASP-5.4.1-24Jun15-intel-2020b.eb
new file mode 100644
index 00000000..5dddd20d
--- /dev/null
+++ b/v/VASP/VASP-5.4.1-24Jun15-intel-2020b.eb
@@ -0,0 +1,63 @@
+# IT4Innovations
+# LK 2022
+
+easyblock = 'MakeCp'
+
+name = 'VASP'
+version = '5.4.1'
+versionsuffix = '-24Jun15'
+
+homepage = 'http://www.vasp.at'
+description = """The Vienna Ab initio Simulation Package (VASP) is a local_computer program for atomic scale
+materials modelling, e.g. electronic structure calculations and quantum-mechanical molecular dynamics,
+from first principles.
+
+To use VASP, You need academic licenses from University of Wiena. Follow the instructions https://www.vasp.at/index.php/faqs.
+
+Then send us please a list of authorized users and their ID for which you need this access. Please use only http://support.it4i.cz/rt. We are responsible to verify your licenses. After succesfull verification You will be granted to use VASP in our enviroment."""
+
+toolchain = {'name': 'intel', 'version': '2020b'}
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   toolchainopts = {'pic': True, 'usempi': True, 'optarch': 'march=core-avx2'}
+else:
+   toolchainopts = {'pic': True, 'usempi': True}
+
+
+# Vasp is proprietary software, see http://www.vasp.at/index.php/faqs on
+# how to get access to the code
+sources = ['vasp.5.4.1.tar.gz']
+
+dependencies = [
+    ('FFTW', '3.3.8'),
+    ('zlib', '1.2.11'),
+]
+
+prebuildopts = 'cp arch/makefile.include.linux_intel ./makefile.include && '
+
+# AMD/intel cpu
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   prebuildopts += 'sed -i "s|-xHOST|-march=core-avx2|" makefile.include && '
+
+# path to libfftw3xf_intel.a is hardcoded in makefile.include
+prebuildopts += 'sed -i "s|\$(MKLROOT)/interfaces/fftw3xf|\$(FFTW_LIB_DIR)|" makefile.include && '
+
+# remove mkl flag to prevent mixing dynamic libs with the static libs in
+# LIBBLACS/SCALAPACK
+prebuildopts += 'sed -i "s|-mkl||" makefile.include && '
+
+# VASP uses LIBS as a list of folders
+prebuildopts += 'unset LIBS && '
+
+buildopts = 'all BLACS="$LIBBLACS" SCALAPACK="$LIBSCALAPACK"'
+
+parallel = 1
+
+files_to_copy = [(['bin/vasp_std', 'bin/vasp_gam', 'bin/vasp_ncl'], 'bin')]
+
+sanity_check_paths = {
+    'files': ['bin/vasp_std', 'bin/vasp_gam', 'bin/vasp_ncl'],
+    'dirs': []
+}
+
+moduleclass = 'phys'
diff --git a/w/Waltz/Waltz.eb b/w/Waltz/Waltz.eb
new file mode 100644
index 00000000..50f4acda
--- /dev/null
+++ b/w/Waltz/Waltz.eb
@@ -0,0 +1,23 @@
+#IT4Innovations 2022
+# JK
+
+easyblock = 'Binary'
+
+name = 'Waltz'
+version = '1.0'
+
+homepage = "N/A"
+description = """N/A"""
+
+toolchain = SYSTEM
+
+sources = ["%(namelower)s.tar.gz"]
+
+extract_sources = True
+
+sanity_check_paths = {
+	'files': ['616.mat', 'scoreMatrixGT.pl', 'waltz616seb_nmeth2010_regions.pl'],
+	'dirs': [],
+}
+
+moduleclass = 'bio'
diff --git a/y/Yambo/Yambo-5.0.4-intel-2020a.eb b/y/Yambo/Yambo-5.0.4-intel-2020a.eb
new file mode 100644
index 00000000..55fc83b4
--- /dev/null
+++ b/y/Yambo/Yambo-5.0.4-intel-2020a.eb
@@ -0,0 +1,59 @@
+# IT4Innovations 2021
+# LK
+
+easyblock = 'MakeCp'
+
+name = 'Yambo'
+version = '5.0.4'
+
+homepage = 'http://www.yambo-code.org'
+description = """Yambo is a FORTRAN/C code for Many-Body calculations in solid state and molecular physics.
+ Yambo relies on the Kohn-Sham wavefunctions generated by two DFT public codes: abinit, and PWscf."""
+
+toolchain = {'name': 'intel', 'version': '2020a'}
+import os
+if os.environ.get("CLUSTERNAME") in ["KAROLINA"]:
+   toolchainopts = {'usempi': True, 'optarch': 'march=core-avx2'}
+else:
+   toolchainopts = {'usempi': True}
+
+
+source_urls = ['https://github.com/yambo-code/yambo/archive']
+sources = ["%(version)s.tar.gz"]
+
+dependencies = [
+        ('netCDF-Fortran', '4.5.2'),
+        ('libxc', '2.2.3'),
+        ('IOTK', '1.2.2'),
+]
+
+#with_configure = True
+
+#configopts = 'CPPFLAGS="" FCFLAGS="-nofor_main" --with-blas-libs="$LIBBLAS" '
+#configopts += '--with-lapack-libs="$LIBLAPACK" --with-blacs-libs="$LIBBLACS" '
+#configopts += '--with-scalapack-libs="$LIBSCALAPACK" --with-fft-libs="$LIBFFT" '
+#configopts += '--with-netcdf-libs="-lnetcdff -lnetcdf" '
+#configopts += '--with-hdf5-path=$EBROOTHDF5 '
+#configopts += '--with-libxc-path=$EBROOTLIBXC '
+#configopts += '--enable-iotk '
+#onfigopts += '--with-iotk-path=$EBROOTIOTK '
+#configopts += '--enable-dp --enable-memory-profile --disable-open-mp '
+
+prebuildopts = './configure --build=x86_64-pc-linux-gnu  --host=x86_64-pc-linux-gnu CPPFLAGS="" FCFLAGS="-nofor_main" --with-blas-libs="$LIBBLAS" --with-lapack-libs="$LIBLAPACK" --with-blacs-libs="$LIBBLACS" --with-scalapack-libs="$LIBSCALAPACK" --with-fft-libs="$LIBFFT" --with-netcdf-libs="-lnetcdff -lnetcdf" --with-hdf5-path=$EBROOTHDF5 --with-libxc-path=$EBROOTLIBXC --enable-iotk --with-iotk-path=$EBROOTIOTK --enable-dp --enable-memory-profile --disable-open-mp && '
+
+buildopts = 'all'
+
+parallel = 1
+
+files_to_copy = [
+        (['bin/*'], 'bin'),
+        (['lib/*.a'], 'lib'),
+        (['include/*'], 'include'),
+]
+
+sanity_check_paths = {
+    'files': ['bin/' + x for x in ['a2y', 'p2y', 'yambo', 'ypp']],
+    'dirs': []
+}
+
+moduleclass = 'phys'