diff --git a/CHANGELOG.md b/CHANGELOG.md
index b4e6d9a3..3645503f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,9 @@
         * `simpleVoteIntrinsics_nvrtc` demonstrating NVRTC usage for `simpleVoteIntrinsics` sample (reason: redundant)
     * `2_Concepts_and_Techniques`
         * `cuHook` demonstrating dlsym hooks. (reason: incompatible with modern `glibc`)
+    * `4_CUDA_Libraries`
+        * `batchedLabelMarkersAndLabelCompressionNPP` demonstrating NPP features (reason: some functionality removed from library)
+
 
 
 ### CUDA 12.5
diff --git a/Samples/4_CUDA_Libraries/CMakeLists.txt b/Samples/4_CUDA_Libraries/CMakeLists.txt
new file mode 100644
index 00000000..255d0446
--- /dev/null
+++ b/Samples/4_CUDA_Libraries/CMakeLists.txt
@@ -0,0 +1,40 @@
+#add_subdirectory(FilterBorderControlNPP)
+#add_subdirectory(MersenneTwisterGP11213)
+add_subdirectory(batchCUBLAS)
+add_subdirectory(boxFilterNPP)
+#add_subdirectory(cannyEdgeDetectorNPP)
+#add_subdirectory(conjugateGradient)
+#add_subdirectory(conjugateGradientCudaGraphs)
+#add_subdirectory(conjugateGradientMultiBlockCG)
+#add_subdirectory(conjugateGradientMultiDeviceCG)
+#add_subdirectory(conjugateGradientPrecond)
+#add_subdirectory(conjugateGradientUM)
+#add_subdirectory(cuDLAErrorReporting)
+#add_subdirectory(cuDLAHybridMode)
+#add_subdirectory(cuDLALayerwiseStatsHybrid)
+#add_subdirectory(cuDLALayerwiseStatsStandalone)
+#add_subdirectory(cuDLAStandaloneMode)
+#add_subdirectory(cuSolverDn_LinearSolver)
+#add_subdirectory(cuSolverRf)
+#add_subdirectory(cuSolverSp_LinearSolver)
+#add_subdirectory(cuSolverSp_LowlevelCholesky)
+#add_subdirectory(cuSolverSp_LowlevelQR)
+#add_subdirectory(cudaNvSci)
+#add_subdirectory(cudaNvSciNvMedia)
+#add_subdirectory(freeImageInteropNPP)
+#add_subdirectory(histEqualizationNPP)
+#add_subdirectory(jitLto)
+#add_subdirectory(lineOfSight)
+#add_subdirectory(matrixMulCUBLAS)
+#add_subdirectory(nvJPEG)
+#add_subdirectory(nvJPEG_encoder)
+#add_subdirectory(oceanFFT)
+#add_subdirectory(randomFog)
+#add_subdirectory(simpleCUBLAS)
+#add_subdirectory(simpleCUBLASXT)
+#add_subdirectory(simpleCUBLAS_LU)
+#add_subdirectory(simpleCUFFT)
+#add_subdirectory(simpleCUFFT_2d_MGPU)
+#add_subdirectory(simpleCUFFT_MGPU)
+#add_subdirectory(simpleCUFFT_callback)
+#add_subdirectory(watershedSegmentationNPP)
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt b/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt
new file mode 100644
index 00000000..3351e24c
--- /dev/null
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt
@@ -0,0 +1,20 @@
+# Include directories and libraries
+include_directories(../../../Common)
+
+# Source file
+set(SRC_FILES
+    batchCUBLAS.cpp
+)
+
+# Add target for batchCUBLAS
+add_executable(batchCUBLAS ${SRC_FILES})
+set_target_properties(batchCUBLAS PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+target_include_directories(batchCUBLAS PRIVATE
+    ${CUDAToolkit_INCLUDE_DIRS}
+)
+
+target_link_libraries(batchCUBLAS PRIVATE
+    CUDA::cublas
+    CUDA::cudart
+)
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/Makefile b/Samples/4_CUDA_Libraries/batchCUBLAS/Makefile
deleted file mode 100644
index 1e813cf0..00000000
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/Makefile
+++ /dev/null
@@ -1,347 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= /usr/local/cuda
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
-    $(info WARNING - x86_64 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=x86_64 instead)
-    TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
-    $(info WARNING - ARMv7 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=armv7l instead)
-    TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
-    $(info WARNING - aarch64 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=aarch64 instead)
-    TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
-    $(info WARNING - ppc64le variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=ppc64le instead)
-    TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
-    $(info WARNING - GCC variable has been deprecated)
-    $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
-    HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
-    $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH   := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
-    ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-        ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
-            TARGET_SIZE := 64
-        else ifneq (,$(filter $(TARGET_ARCH),armv7l))
-            TARGET_SIZE := 32
-        endif
-    else
-        TARGET_SIZE := $(shell getconf LONG_BIT)
-    endif
-else
-    $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-
-# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
-ifeq ($(HOST_ARCH),aarch64)
-    ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
-        HOST_ARCH := sbsa
-        TARGET_ARCH := sbsa
-    endif
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
-        $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
-    endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
-    TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS   := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
-    $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifdef HOST_COMPILER
- CUSTOM_HOST_COMPILER = 1
-endif
-
-ifeq ($(TARGET_OS),darwin)
-    ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
-        HOST_COMPILER ?= clang++
-    endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
-        ifeq ($(TARGET_OS),linux)
-            HOST_COMPILER ?= arm-linux-gnueabihf-g++
-        else ifeq ($(TARGET_OS),qnx)
-            ifeq ($(QNX_HOST),)
-                $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
-            endif
-            ifeq ($(QNX_TARGET),)
-                $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
-            endif
-            export QNX_HOST
-            export QNX_TARGET
-            HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
-        else ifeq ($(TARGET_OS),android)
-            HOST_COMPILER ?= arm-linux-androideabi-g++
-        endif
-    else ifeq ($(TARGET_ARCH),aarch64)
-        ifeq ($(TARGET_OS), linux)
-            HOST_COMPILER ?= aarch64-linux-gnu-g++
-        else ifeq ($(TARGET_OS),qnx)
-            ifeq ($(QNX_HOST),)
-                $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
-            endif
-            ifeq ($(QNX_TARGET),)
-                $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
-            endif
-            export QNX_HOST
-            export QNX_TARGET
-            HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
-        else ifeq ($(TARGET_OS), android)
-            HOST_COMPILER ?= aarch64-linux-android-clang++
-        endif
-    else ifeq ($(TARGET_ARCH),sbsa)
-        HOST_COMPILER ?= aarch64-linux-gnu-g++
-    else ifeq ($(TARGET_ARCH),ppc64le)
-        HOST_COMPILER ?= powerpc64le-linux-gnu-g++
-    endif
-endif
-HOST_COMPILER ?= g++
-NVCC          := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS   := -m${TARGET_SIZE}
-CCFLAGS     :=
-LDFLAGS     :=
-
-# build flags
-
-# Link flag for customized HOST_COMPILER with gcc realpath
-GCC_PATH := $(shell which gcc)
-ifeq ($(CUSTOM_HOST_COMPILER),1)
-    ifneq ($(filter /%,$(HOST_COMPILER)),)
-        ifneq ($(findstring gcc,$(HOST_COMPILER)),)
-            ifneq ($(GCC_PATH),$(HOST_COMPILER))
-                LDFLAGS += -lstdc++
-            endif
-        endif
-    endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
-    LDFLAGS += -rpath $(CUDA_PATH)/lib
-    CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
-    LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
-    CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
-    LDFLAGS += -pie
-    CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
-        ifneq ($(TARGET_FS),)
-            GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
-            ifeq ($(GCCVERSIONLTEQ46),1)
-                CCFLAGS += --sysroot=$(TARGET_FS)
-            endif
-            LDFLAGS += --sysroot=$(TARGET_FS)
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
-        endif
-    endif
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
-        ifneq ($(TARGET_FS),)
-            GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
-            ifeq ($(GCCVERSIONLTEQ46),1)
-                CCFLAGS += --sysroot=$(TARGET_FS)
-            endif
-            LDFLAGS += --sysroot=$(TARGET_FS)
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
-            LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
-            CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
-            CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
-        endif
-    endif
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
-        NVCCFLAGS += -D_QNX_SOURCE
-        NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
-        CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
-        LDFLAGS += -lsocket
-        LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
-        CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
-        ifdef TARGET_OVERRIDE
-            LDFLAGS += -lslog2
-        endif
-
-        ifneq ($(TARGET_FS),)
-            LDFLAGS += -L$(TARGET_FS)/usr/lib
-            CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
-            LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
-            CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
-            CCFLAGS += -I$(TARGET_FS)/../include
-        endif
-    endif
-endif
-
-ifdef TARGET_OVERRIDE # cuda toolkit targets override
-    NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
-    CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
-    CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
-    CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
-      NVCCFLAGS += -g -G
-      BUILD_TYPE := debug
-else
-      BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES  := -I../../../Common
-LIBRARIES :=
-
-################################################################################
-
-# Gencode arguments
-SMS ?=
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-ifeq ($(SMS),)
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-# Generate PTX code from SM 53
-GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53
-else
-# Generate PTX code from SM 50
-GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50
-endif
-endif
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-ALL_CCFLAGS += --threads 0 --std=c++11
-
-LIBRARIES += -lcublas
-
-################################################################################
-
-# Target rules
-all: build
-
-build: batchCUBLAS
-
-batchCUBLAS.o:batchCUBLAS.cpp
-	$(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-batchCUBLAS: batchCUBLAS.o
-	$(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
-	mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-	cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
-	./batchCUBLAS
-
-testrun: build
-
-clean:
-	rm -f batchCUBLAS batchCUBLAS.o
-	rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/batchCUBLAS
-
-clobber: clean
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml
deleted file mode 100644
index 25d2e453..00000000
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml
+++ /dev/null
@@ -1,89 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?> 
-<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
-<entry>
-  <name>batchCUBLAS</name>
-  <cuda_api_list>
-    <driver>cuRand</driver>
-    <driver>cuEqual</driver>
-    <toolkit>cudaMemcpy</toolkit>
-    <toolkit>cudaGetErrorString</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaGetLastError</toolkit>
-    <toolkit>cudaDeviceSynchronize</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaStreamCreate</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-  </cuda_api_list>
-  <description><![CDATA[A CUDA Sample that demonstrates how using batched CUBLAS API calls to improve overall performance.]]></description>
-  <devicecompilation>whole</devicecompilation>
-  <fallback_min_ptx>true</fallback_min_ptx>
-  <includepaths>
-    <path>./</path>
-    <path>../</path>
-    <path>../../../Common</path>
-  </includepaths>
-  <keyconcepts>
-    <concept level="basic">Linear Algebra</concept>
-    <concept level="basic">CUBLAS Library</concept>
-  </keyconcepts>
-  <keywords>
-    <keyword>CUBLAS</keyword>
-    <keyword>Linear Algebra</keyword>
-  </keywords>
-  <libraries>
-    <library>cublas</library>
-  </libraries>
-  <librarypaths>
-  </librarypaths>
-  <nsight_eclipse>true</nsight_eclipse>
-  <primary_file>batchCUBLAS.cpp</primary_file>
-  <required_dependencies>
-    <dependency>CUBLAS</dependency>
-  </required_dependencies>
-  <scopes>
-    <scope>1:CUDA Basic Topics</scope>
-    <scope>3:Linear Algebra</scope>
-  </scopes>
-  <sm-arch>sm50</sm-arch>
-  <sm-arch>sm52</sm-arch>
-  <sm-arch>sm53</sm-arch>
-  <sm-arch>sm60</sm-arch>
-  <sm-arch>sm61</sm-arch>
-  <sm-arch>sm70</sm-arch>
-  <sm-arch>sm72</sm-arch>
-  <sm-arch>sm75</sm-arch>
-  <sm-arch>sm80</sm-arch>
-  <sm-arch>sm86</sm-arch>
-  <sm-arch>sm87</sm-arch>
-  <sm-arch>sm89</sm-arch>
-  <sm-arch>sm90</sm-arch>
-  <supported_envs>
-    <env>
-      <arch>x86_64</arch>
-      <platform>linux</platform>
-    </env>
-    <env>
-      <platform>windows7</platform>
-    </env>
-    <env>
-      <arch>x86_64</arch>
-      <platform>macosx</platform>
-    </env>
-    <env>
-      <arch>arm</arch>
-    </env>
-    <env>
-      <arch>sbsa</arch>
-    </env>
-    <env>
-      <arch>ppc64le</arch>
-      <platform>linux</platform>
-    </env>
-  </supported_envs>
-  <supported_sm_architectures>
-    <include>all</include>
-  </supported_sm_architectures>
-  <title>batchCUBLAS</title>
-  <type>exe</type>
-</entry>
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/c_cpp_properties.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/c_cpp_properties.json
deleted file mode 100644
index f0066b0f..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/c_cpp_properties.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
-    "configurations": [
-        {
-            "name": "Linux",
-            "includePath": [
-                "${workspaceFolder}/**",
-                "${workspaceFolder}/../../../Common"
-            ],
-            "defines": [],
-            "compilerPath": "/usr/local/cuda/bin/nvcc",
-            "cStandard": "gnu17",
-            "cppStandard": "gnu++14",
-            "intelliSenseMode": "linux-gcc-x64",
-            "configurationProvider": "ms-vscode.makefile-tools"
-        }
-    ],
-    "version": 4
-}
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/extensions.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/extensions.json
deleted file mode 100644
index c7eb54dc..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/extensions.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "recommendations": [
-        "nvidia.nsight-vscode-edition",
-        "ms-vscode.cpptools",
-        "ms-vscode.makefile-tools"
-    ]
-}
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/launch.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/launch.json
deleted file mode 100644
index 45180508..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/launch.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
-    "configurations": [
-        {
-            "name": "CUDA C++: Launch",
-            "type": "cuda-gdb",
-            "request": "launch",
-            "program": "${workspaceFolder}/batchedLabelMarkersAndLabelCompressionNPP"
-        }
-    ]
-}
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/tasks.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/tasks.json
deleted file mode 100644
index 4509aeb1..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/tasks.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
-    "version": "2.0.0",
-    "tasks": [
-        {
-            "label": "sample",
-            "type": "shell",
-            "command": "make dbg=1",
-            "problemMatcher": ["$nvcc"],
-            "group": {
-                "kind": "build",
-                "isDefault": true
-            }
-        }
-    ]
-}
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw
deleted file mode 100644
index a6fa444d..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw
deleted file mode 100644
index 526a6c4f..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw
deleted file mode 100644
index d1e73735..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUF_8Way_512x512_32u.raw
deleted file mode 100644
index 767615ed..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUF_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/Makefile b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/Makefile
deleted file mode 100644
index 27201be9..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/Makefile
+++ /dev/null
@@ -1,372 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= /usr/local/cuda
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
-    $(info WARNING - x86_64 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=x86_64 instead)
-    TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
-    $(info WARNING - ARMv7 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=armv7l instead)
-    TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
-    $(info WARNING - aarch64 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=aarch64 instead)
-    TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
-    $(info WARNING - ppc64le variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=ppc64le instead)
-    TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
-    $(info WARNING - GCC variable has been deprecated)
-    $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
-    HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
-    $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH   := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
-    ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-        ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
-            TARGET_SIZE := 64
-        else ifneq (,$(filter $(TARGET_ARCH),armv7l))
-            TARGET_SIZE := 32
-        endif
-    else
-        TARGET_SIZE := $(shell getconf LONG_BIT)
-    endif
-else
-    $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-
-# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
-ifeq ($(HOST_ARCH),aarch64)
-    ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
-        HOST_ARCH := sbsa
-        TARGET_ARCH := sbsa
-    endif
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
-        $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
-    endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
-    TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS   := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
-    $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifdef HOST_COMPILER
- CUSTOM_HOST_COMPILER = 1
-endif
-
-ifeq ($(TARGET_OS),darwin)
-    ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
-        HOST_COMPILER ?= clang++
-    endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
-        ifeq ($(TARGET_OS),linux)
-            HOST_COMPILER ?= arm-linux-gnueabihf-g++
-        else ifeq ($(TARGET_OS),qnx)
-            ifeq ($(QNX_HOST),)
-                $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
-            endif
-            ifeq ($(QNX_TARGET),)
-                $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
-            endif
-            export QNX_HOST
-            export QNX_TARGET
-            HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
-        else ifeq ($(TARGET_OS),android)
-            HOST_COMPILER ?= arm-linux-androideabi-g++
-        endif
-    else ifeq ($(TARGET_ARCH),aarch64)
-        ifeq ($(TARGET_OS), linux)
-            HOST_COMPILER ?= aarch64-linux-gnu-g++
-        else ifeq ($(TARGET_OS),qnx)
-            ifeq ($(QNX_HOST),)
-                $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
-            endif
-            ifeq ($(QNX_TARGET),)
-                $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
-            endif
-            export QNX_HOST
-            export QNX_TARGET
-            HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
-        else ifeq ($(TARGET_OS), android)
-            HOST_COMPILER ?= aarch64-linux-android-clang++
-        endif
-    else ifeq ($(TARGET_ARCH),sbsa)
-        HOST_COMPILER ?= aarch64-linux-gnu-g++
-    else ifeq ($(TARGET_ARCH),ppc64le)
-        HOST_COMPILER ?= powerpc64le-linux-gnu-g++
-    endif
-endif
-HOST_COMPILER ?= g++
-NVCC          := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS   := -m${TARGET_SIZE}
-CCFLAGS     :=
-LDFLAGS     :=
-
-# build flags
-
-# Link flag for customized HOST_COMPILER with gcc realpath
-GCC_PATH := $(shell which gcc)
-ifeq ($(CUSTOM_HOST_COMPILER),1)
-    ifneq ($(filter /%,$(HOST_COMPILER)),)
-        ifneq ($(findstring gcc,$(HOST_COMPILER)),)
-            ifneq ($(GCC_PATH),$(HOST_COMPILER))
-                LDFLAGS += -lstdc++
-            endif
-        endif
-    endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
-    LDFLAGS += -rpath $(CUDA_PATH)/lib
-    CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
-    LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
-    CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
-    LDFLAGS += -pie
-    CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
-        ifneq ($(TARGET_FS),)
-            GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
-            ifeq ($(GCCVERSIONLTEQ46),1)
-                CCFLAGS += --sysroot=$(TARGET_FS)
-            endif
-            LDFLAGS += --sysroot=$(TARGET_FS)
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
-        endif
-    endif
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
-        ifneq ($(TARGET_FS),)
-            GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
-            ifeq ($(GCCVERSIONLTEQ46),1)
-                CCFLAGS += --sysroot=$(TARGET_FS)
-            endif
-            LDFLAGS += --sysroot=$(TARGET_FS)
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
-            LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
-            CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
-            CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
-        endif
-    endif
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
-        NVCCFLAGS += -D_QNX_SOURCE
-        NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
-        CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
-        LDFLAGS += -lsocket
-        LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
-        CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
-        ifdef TARGET_OVERRIDE
-            LDFLAGS += -lslog2
-        endif
-
-        ifneq ($(TARGET_FS),)
-            LDFLAGS += -L$(TARGET_FS)/usr/lib
-            CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
-            LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
-            CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
-            CCFLAGS += -I$(TARGET_FS)/../include
-        endif
-    endif
-endif
-
-ifdef TARGET_OVERRIDE # cuda toolkit targets override
-    NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
-    CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
-    CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
-    CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
-      NVCCFLAGS += -g -G
-      BUILD_TYPE := debug
-else
-      BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-SAMPLE_ENABLED := 1
-
-# This sample is not supported on Mac OSX
-ifeq ($(TARGET_OS),darwin)
-  $(info >>> WARNING - batchedLabelMarkersAndLabelCompressionNPP is not supported on Mac OSX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - batchedLabelMarkersAndLabelCompressionNPP is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES  := -I../../../Common
-LIBRARIES :=
-
-################################################################################
-
-# Gencode arguments
-SMS ?=
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-ifeq ($(SMS),)
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-# Generate PTX code from SM 53
-GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53
-else
-# Generate PTX code from SM 50
-GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50
-endif
-endif
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-ALL_CCFLAGS += --threads 0 --std=c++11
-
-LIBRARIES += -lnppisu_static -lnppif_static -lnppc_static -lculibos
-
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
-################################################################################
-
-# Target rules
-all: build
-
-build: batchedLabelMarkersAndLabelCompressionNPP
-
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
-	@echo "Sample will be waived due to the above missing dependencies"
-else
-	@echo "Sample is ready - all dependencies have been met"
-endif
-
-batchedLabelMarkersAndLabelCompressionNPP.o:batchedLabelMarkersAndLabelCompressionNPP.cpp
-	$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-batchedLabelMarkersAndLabelCompressionNPP: batchedLabelMarkersAndLabelCompressionNPP.o
-	$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
-	$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-	$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
-	$(EXEC) ./batchedLabelMarkersAndLabelCompressionNPP
-
-testrun: build
-
-clean:
-	rm -f batchedLabelMarkersAndLabelCompressionNPP batchedLabelMarkersAndLabelCompressionNPP.o
-	rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/batchedLabelMarkersAndLabelCompressionNPP
-
-clobber: clean
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml
deleted file mode 100644
index 9e7b07b9..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml
+++ /dev/null
@@ -1,95 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?> 
-<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
-<entry>
-  <name>batchedLabelMarkersAndLabelCompressionNPP</name>
-  <cuda_api_list>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
-    <toolkit>cudaMallocPitch</toolkit>
-    <toolkit>cudaFree</toolkit>
-    <toolkit>cudaDeviceGetAttribute</toolkit>
-    <toolkit>cudaMallocHost</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-    <toolkit>cudaFreeHost</toolkit>
-    <toolkit>cudaGetDevice</toolkit>
-    <toolkit>cudaStreamGetFlags</toolkit>
-    <toolkit>cudaStreamSynchronize</toolkit>
-    <toolkit>cudaMalloc</toolkit>
-    <toolkit>cudaMemcpyAsync</toolkit>
-    <toolkit>cudaGetDeviceProperties</toolkit>
-  </cuda_api_list>
-  <description><![CDATA[An NPP CUDA Sample that demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both single image and batched image versions.]]></description>
-  <devicecompilation>whole</devicecompilation>
-  <fallback_min_ptx>true</fallback_min_ptx>
-  <includepaths>
-    <path>./</path>
-    <path>../</path>
-    <path>../../../Common</path>
-  </includepaths>
-  <keyconcepts>
-    <concept level="basic">Performance Strategies</concept>
-    <concept level="basic">Image Processing</concept>
-    <concept level="basic">NPP Library</concept>
-    <concept level="basic">Using NPP Batch Functions</concept>
-  </keyconcepts>
-  <keywords>
-    <keyword>CUDA</keyword>
-    <keyword>NPP</keyword>
-    <keyword>Image Processing</keyword>
-  </keywords>
-  <libraries>
-    <library>nppisu_static</library>
-    <library>nppif_static</library>
-    <library>nppc_static</library>
-    <library>culibos</library>
-  </libraries>
-  <librarypaths>
-  </librarypaths>
-  <nsight_eclipse>true</nsight_eclipse>
-  <primary_file>batchedLabelMarkersAndLabelCompressionNPP.cpp</primary_file>
-  <required_dependencies>
-    <dependency>NPP</dependency>
-  </required_dependencies>
-  <scopes>
-    <scope>1:CUDA Basic Topics</scope>
-    <scope>1:Performance Strategies</scope>
-    <scope>2:Image Processing</scope>
-    <scope>2:Computer Vision</scope>
-  </scopes>
-  <sm-arch>sm50</sm-arch>
-  <sm-arch>sm52</sm-arch>
-  <sm-arch>sm53</sm-arch>
-  <sm-arch>sm60</sm-arch>
-  <sm-arch>sm61</sm-arch>
-  <sm-arch>sm70</sm-arch>
-  <sm-arch>sm72</sm-arch>
-  <sm-arch>sm75</sm-arch>
-  <sm-arch>sm80</sm-arch>
-  <sm-arch>sm86</sm-arch>
-  <sm-arch>sm87</sm-arch>
-  <sm-arch>sm89</sm-arch>
-  <sm-arch>sm90</sm-arch>
-  <supported_envs>
-    <env>
-      <arch>x86_64</arch>
-      <platform>linux</platform>
-    </env>
-    <env>
-      <platform>windows7</platform>
-    </env>
-    <env>
-      <arch>arm</arch>
-    </env>
-    <env>
-      <arch>sbsa</arch>
-    </env>
-    <env>
-      <arch>ppc64le</arch>
-      <platform>linux</platform>
-    </env>
-  </supported_envs>
-  <supported_sm_architectures>
-    <include>all</include>
-  </supported_sm_architectures>
-  <title>Batched Label Markers And Label Compression NPP</title>
-  <type>exe</type>
-</entry>
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw
deleted file mode 100644
index 741413fc..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw
deleted file mode 100644
index aa597100..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw
deleted file mode 100644
index 3a7a30d3..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUF_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUF_8Way_1024x683_32u.raw
deleted file mode 100644
index ae6f02a6..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUF_8Way_1024x683_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw
deleted file mode 100644
index fffe0e9b..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw
deleted file mode 100644
index d96e95eb..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw
deleted file mode 100644
index be539bd4..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUF_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUF_8Way_1280x720_32u.raw
deleted file mode 100644
index de5e1789..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUF_8Way_1280x720_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw
deleted file mode 100644
index 14a6c202..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw
deleted file mode 100644
index 79d53426..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw
deleted file mode 100644
index e4323bec..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw
deleted file mode 100644
index 237c057d..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md
deleted file mode 100644
index 28b1b353..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md
+++ /dev/null
@@ -1,74 +0,0 @@
-# batchedLabelMarkersAndLabelCompressionNPP - Batched Label Markers And Label Compression NPP
-
-## Description
-
-An NPP CUDA Sample that demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both single image and batched image versions.
-
-## Key Concepts
-
-Performance Strategies, Image Processing, NPP Library, Using NPP Batch Functions
-
-## Supported SM Architectures
-
-[SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
-
-## Supported OSes
-
-Linux, Windows
-
-## Supported CPU Architecture
-
-x86_64, ppc64le, armv7l
-
-## CUDA APIs involved
-
-### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaRuntimeGetVersion, cudaMallocPitch, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaDriverGetVersion, cudaFreeHost, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaGetDeviceProperties
-
-## Dependencies needed to build/run
-[NPP](../../../README.md#npp)
-
-## Prerequisites
-
-Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
-Make sure the dependencies mentioned in [Dependencies]() section above are installed.
-
-## Build and Run
-
-### Windows
-The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
-```
-*_vs<version>.sln - for Visual Studio <version>
-```
-Each individual sample has its own set of solution files in its directory:
-
-To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
-> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
-
-### Linux
-The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
-```
-$ cd <sample_dir>
-$ make
-```
-The samples makefiles can take advantage of certain options:
-*  **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
-    By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
-`$ make TARGET_ARCH=x86_64` <br/> `$ make TARGET_ARCH=ppc64le` <br/> `$ make TARGET_ARCH=armv7l` <br/>
-    See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
-*   **dbg=1** - build with debug symbols
-    ```
-    $ make dbg=1
-    ```
-*   **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
-    ```
-    $ make SMS="50 60"
-    ```
-
-*  **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
-```
-    $ make HOST_COMPILER=g++
-```
-
-## References (for more details)
-
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP.cpp b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP.cpp
deleted file mode 100644
index 0a1efcc9..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP.cpp
+++ /dev/null
@@ -1,805 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *  * Neither the name of NVIDIA CORPORATION nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-#define WINDOWS_LEAN_AND_MEAN
-#define NOMINMAX
-#include <windows.h>
-#pragma warning(disable : 4819)
-#endif
-
-#include <stdio.h>
-#include <string.h>
-#include <fstream>
-
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-#include <helper_string.h>
-#include <npp.h>
-
-// Note:  If you want to view these images we HIGHLY recommend using imagej
-//        which is free on the internet and works on most platforms
-//        because it is one of the few image viewing apps that can display 32
-//        bit integer image data.  While it normalizes the data to floating
-//        point values for viewing it still provides a good representation of
-//        the relative brightness of each label value. Note that label
-//        compression output results in smaller differences between label values
-//        making it visually more difficult to detect differences in labeled
-//        regions.  If you have an editor that can display hex values you can
-//        see what the exact values of each label is, every 4 bytes represents 1
-//        32 bit integer label value.
-//
-//        The files read and written by this sample app use RAW image format,
-//        that is, only the image data itself exists in the files with no image
-//        format information.   When viewing RAW files with imagej just enter
-//        the image size and bit depth values that are part of the file name
-//        when requested by imagej.
-//
-//        This sample app works in 2 stages, first it processes all of the
-//        images individually then it processes them all again in 1 batch using
-//        the Batch_Advanced versions of the NPP batch functions which allow
-//        each image to have it's own ROI.  The 2 stages are completely
-//        separable but in this sample the second stage takes advantage of some
-//        of the data that has already been initialized.
-//
-//        Note that there is a small amount of variability in the number of
-//        unique label markers generated from one run to the next by the UF
-//        algorithm.
-//
-//        Performance of ALL NPP image batch functions is limited by the maximum
-//        ROI height in the list of images.
-
-// Batched label compression support is only available on NPP versions > 11.0,
-// comment out if using NPP 11.0
-#define USE_BATCHED_LABEL_COMPRESSION 1
-
-#define NUMBER_OF_IMAGES 5
-
-Npp8u *pInputImageDev[NUMBER_OF_IMAGES];
-Npp8u *pInputImageHost[NUMBER_OF_IMAGES];
-Npp8u *pUFGenerateLabelsScratchBufferDev[NUMBER_OF_IMAGES];
-Npp8u *pUFCompressedLabelsScratchBufferDev[NUMBER_OF_IMAGES];
-Npp32u *pUFLabelDev[NUMBER_OF_IMAGES];
-Npp32u *pUFLabelHost[NUMBER_OF_IMAGES];
-NppiImageDescriptor *pUFBatchSrcImageListDev = 0;
-NppiImageDescriptor *pUFBatchSrcDstImageListDev = 0;
-NppiImageDescriptor *pUFBatchSrcImageListHost = 0;
-NppiImageDescriptor *pUFBatchSrcDstImageListHost = 0;
-NppiBufferDescriptor *pUFBatchSrcDstScratchBufferListDev =
-    0;  // from nppi_filtering_functions.h
-NppiBufferDescriptor *pUFBatchSrcDstScratchBufferListHost = 0;
-Npp32u *pUFBatchPerImageCompressedCountListDev = 0;
-Npp32u *pUFBatchPerImageCompressedCountListHost = 0;
-
-void tearDown()  // Clean up and tear down
-{
-  if (pUFBatchPerImageCompressedCountListDev != 0)
-    cudaFree(pUFBatchPerImageCompressedCountListDev);
-  if (pUFBatchSrcDstScratchBufferListDev != 0)
-    cudaFree(pUFBatchSrcDstScratchBufferListDev);
-  if (pUFBatchSrcDstImageListDev != 0) cudaFree(pUFBatchSrcDstImageListDev);
-  if (pUFBatchSrcImageListDev != 0) cudaFree(pUFBatchSrcImageListDev);
-  if (pUFBatchPerImageCompressedCountListHost != 0)
-    cudaFreeHost(pUFBatchPerImageCompressedCountListHost);
-  if (pUFBatchSrcDstScratchBufferListHost != 0)
-    cudaFreeHost(pUFBatchSrcDstScratchBufferListHost);
-  if (pUFBatchSrcDstImageListHost != 0)
-    cudaFreeHost(pUFBatchSrcDstImageListHost);
-  if (pUFBatchSrcImageListHost != 0) cudaFreeHost(pUFBatchSrcImageListHost);
-
-  for (int j = 0; j < NUMBER_OF_IMAGES; j++) {
-    if (pUFCompressedLabelsScratchBufferDev[j] != 0)
-      cudaFree(pUFCompressedLabelsScratchBufferDev[j]);
-    if (pUFGenerateLabelsScratchBufferDev[j] != 0)
-      cudaFree(pUFGenerateLabelsScratchBufferDev[j]);
-    if (pUFLabelDev[j] != 0) cudaFree(pUFLabelDev[j]);
-    if (pInputImageDev[j] != 0) cudaFree(pInputImageDev[j]);
-    if (pUFLabelHost[j] != 0) cudaFreeHost(pUFLabelHost[j]);
-    if (pInputImageHost[j] != 0) cudaFreeHost(pInputImageHost[j]);
-  }
-}
-
-const std::string &LabelMarkersOutputFile0 =
-    "teapot_LabelMarkersUF_8Way_512x512_32u.raw";
-const std::string &LabelMarkersOutputFile1 =
-    "CT_skull_LabelMarkersUF_8Way_512x512_32u.raw";
-const std::string &LabelMarkersOutputFile2 =
-    "PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw";
-const std::string &LabelMarkersOutputFile3 =
-    "PCB2_LabelMarkersUF_8Way_1024x683_32u.raw";
-const std::string &LabelMarkersOutputFile4 =
-    "PCB_LabelMarkersUF_8Way_1280x720_32u.raw";
-
-const std::string &CompressedMarkerLabelsOutputFile0 =
-    "teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw";
-const std::string &CompressedMarkerLabelsOutputFile1 =
-    "CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw";
-const std::string &CompressedMarkerLabelsOutputFile2 =
-    "PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw";
-const std::string &CompressedMarkerLabelsOutputFile3 =
-    "PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw";
-const std::string &CompressedMarkerLabelsOutputFile4 =
-    "PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw";
-
-const std::string &LabelMarkersBatchOutputFile0 =
-    "teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw";
-const std::string &LabelMarkersBatchOutputFile1 =
-    "CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw";
-const std::string &LabelMarkersBatchOutputFile2 =
-    "PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw";
-const std::string &LabelMarkersBatchOutputFile3 =
-    "PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw";
-const std::string &LabelMarkersBatchOutputFile4 =
-    "PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw";
-
-const std::string &CompressedMarkerLabelsBatchOutputFile0 =
-    "teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw";
-const std::string &CompressedMarkerLabelsBatchOutputFile1 =
-    "CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw";
-const std::string &CompressedMarkerLabelsBatchOutputFile2 =
-    "PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw";
-const std::string &CompressedMarkerLabelsBatchOutputFile3 =
-    "PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw";
-const std::string &CompressedMarkerLabelsBatchOutputFile4 =
-    "PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw";
-
-int loadRaw8BitImage(Npp8u *pImage, int nWidth, int nHeight, int nImage) {
-  FILE *bmpFile;
-  size_t nSize;
-
-  if (nImage == 0) {
-    if (nWidth != 512 || nHeight != 512) return -1;
-    const char *fileName = "teapot_512x512_8u.raw";
-    const char *InputFile = sdkFindFilePath(fileName, ".");
-    if (InputFile == NULL) {
-      printf("%s file not found.. exiting\n", fileName);
-      exit(EXIT_WAIVED);
-    }
-
-    FOPEN(bmpFile, InputFile, "rb");
-  } else if (nImage == 1) {
-    if (nWidth != 512 || nHeight != 512) return -1;
-    const char *fileName = "CT_skull_512x512_8u.raw";
-    const char *InputFile = sdkFindFilePath(fileName, ".");
-    if (InputFile == NULL) {
-      printf("%s file not found.. exiting\n", fileName);
-      exit(EXIT_WAIVED);
-    }
-
-    FOPEN(bmpFile, InputFile, "rb");
-  } else if (nImage == 2) {
-    if (nWidth != 509 || nHeight != 335) return -1;
-    const char *fileName = "PCB_METAL_509x335_8u.raw";
-    const char *InputFile = sdkFindFilePath(fileName, ".");
-    if (InputFile == NULL) {
-      printf("%s file not found.. exiting\n", fileName);
-      exit(EXIT_WAIVED);
-    }
-
-    FOPEN(bmpFile, InputFile, "rb");
-  } else if (nImage == 3) {
-    if (nWidth != 1024 || nHeight != 683) return -1;
-    const char *fileName = "PCB2_1024x683_8u.raw";
-    const char *InputFile = sdkFindFilePath(fileName, ".");
-    if (InputFile == NULL) {
-      printf("%s file not found.. exiting\n", fileName);
-      exit(EXIT_WAIVED);
-    }
-
-    FOPEN(bmpFile, InputFile, "rb");
-  } else if (nImage == 4) {
-    if (nWidth != 1280 || nHeight != 720) return -1;
-    const char *fileName = "PCB_1280x720_8u.raw";
-    const char *InputFile = sdkFindFilePath(fileName, ".");
-    if (InputFile == NULL) {
-      printf("%s file not found.. exiting\n", fileName);
-      exit(EXIT_WAIVED);
-    }
-
-    FOPEN(bmpFile, InputFile, "rb");
-  } else {
-    printf("Input file load failed.\n");
-    return -1;
-  }
-
-  if (bmpFile == NULL) return -1;
-  nSize = fread(pImage, 1, nWidth * nHeight, bmpFile);
-  if (nSize < nWidth * nHeight) {
-    fclose(bmpFile);
-    return -1;
-  }
-  fclose(bmpFile);
-
-  printf("Input file load succeeded.\n");
-
-  return 0;
-}
-
-int main(int argc, char **argv) {
-  int aGenerateLabelsScratchBufferSize[NUMBER_OF_IMAGES];
-  int aCompressLabelsScratchBufferSize[NUMBER_OF_IMAGES];
-
-  int nCompressedLabelCount = 0;
-  cudaError_t cudaError;
-  NppStatus nppStatus;
-  NppStreamContext nppStreamCtx;
-  FILE *bmpFile;
-
-  for (int j = 0; j < NUMBER_OF_IMAGES; j++) {
-    pInputImageDev[j] = 0;
-    pInputImageHost[j] = 0;
-    pUFGenerateLabelsScratchBufferDev[j] = 0;
-    pUFCompressedLabelsScratchBufferDev[j] = 0;
-    pUFLabelDev[j] = 0;
-    pUFLabelHost[j] = 0;
-  }
-
-  nppStreamCtx.hStream = 0;  // The NULL stream by default, set this to whatever
-                             // your stream ID is if not the NULL stream.
-
-  cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
-  if (cudaError != cudaSuccess) {
-    printf("CUDA error: no devices supporting CUDA.\n");
-    return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
-  }
-
-  const NppLibraryVersion *libVer = nppGetLibVersion();
-
-  printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
-         libVer->build);
-
-  int driverVersion, runtimeVersion;
-  cudaDriverGetVersion(&driverVersion);
-  cudaRuntimeGetVersion(&runtimeVersion);
-
-  printf("CUDA Driver  Version: %d.%d\n", driverVersion / 1000,
-         (driverVersion % 100) / 10);
-  printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion / 1000,
-         (runtimeVersion % 100) / 10);
-
-  cudaError = cudaDeviceGetAttribute(
-      &nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
-      cudaDevAttrComputeCapabilityMajor, nppStreamCtx.nCudaDeviceId);
-  if (cudaError != cudaSuccess) return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
-
-  cudaError = cudaDeviceGetAttribute(
-      &nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
-      cudaDevAttrComputeCapabilityMinor, nppStreamCtx.nCudaDeviceId);
-  if (cudaError != cudaSuccess) return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
-
-  cudaError =
-      cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
-
-  cudaDeviceProp oDeviceProperties;
-
-  cudaError =
-      cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
-
-  nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
-  nppStreamCtx.nMaxThreadsPerMultiProcessor =
-      oDeviceProperties.maxThreadsPerMultiProcessor;
-  nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
-  nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
-
-  NppiSize oSizeROI[NUMBER_OF_IMAGES];
-
-  for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
-    if (nImage == 0) {
-      oSizeROI[nImage].width = 512;
-      oSizeROI[nImage].height = 512;
-    } else if (nImage == 1) {
-      oSizeROI[nImage].width = 512;
-      oSizeROI[nImage].height = 512;
-    } else if (nImage == 2) {
-      oSizeROI[nImage].width = 509;
-      oSizeROI[nImage].height = 335;
-    } else if (nImage == 3) {
-      oSizeROI[nImage].width = 1024;
-      oSizeROI[nImage].height = 683;
-    } else if (nImage == 4) {
-      oSizeROI[nImage].width = 1280;
-      oSizeROI[nImage].height = 720;
-    }
-
-    // NOTE: While using cudaMallocPitch() to allocate device memory for NPP can
-    // significantly improve the performance of many NPP functions, for UF
-    // function label markers generation or compression DO NOT USE
-    // cudaMallocPitch().  Doing so could result in incorrect output.
-
-    cudaError = cudaMalloc(
-        (void **)&pInputImageDev[nImage],
-        oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height);
-    if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
-    // For images processed with UF label markers functions ROI width and height
-    // for label markers generation output AND marker compression functions MUST
-    // be the same AND line pitch MUST be equal to ROI.width * sizeof(Npp32u).
-    // Also the image pointer used for label markers generation output must
-    // start at the same position in the image as it does in the marker
-    // compression function.  Also note that actual input image size and ROI do
-    // not necessarily need to be related other than ROI being less than or
-    // equal to image size and image starting position does not necessarily have
-    // to be at pixel 0 in the input image.
-
-    cudaError = cudaMalloc(
-        (void **)&pUFLabelDev[nImage],
-        oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height);
-    if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
-    checkCudaErrors(cudaMallocHost(
-        &(pInputImageHost[nImage]),
-        oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height));
-    checkCudaErrors(cudaMallocHost(
-        &(pUFLabelHost[nImage]),
-        oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height));
-
-    // Use UF functions throughout this sample.
-
-    nppStatus = nppiLabelMarkersUFGetBufferSize_32u_C1R(
-        oSizeROI[nImage], &aGenerateLabelsScratchBufferSize[nImage]);
-
-    // One at a time image processing
-
-    cudaError = cudaMalloc((void **)&pUFGenerateLabelsScratchBufferDev[nImage],
-                           aGenerateLabelsScratchBufferSize[nImage]);
-    if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
-    if (loadRaw8BitImage(pInputImageHost[nImage],
-                         oSizeROI[nImage].width * sizeof(Npp8u),
-                         oSizeROI[nImage].height, nImage) == 0) {
-      cudaError = cudaMemcpy2DAsync(
-          pInputImageDev[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
-          pInputImageHost[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
-          oSizeROI[nImage].width * sizeof(Npp8u), oSizeROI[nImage].height,
-          cudaMemcpyHostToDevice, nppStreamCtx.hStream);
-
-      nppStatus = nppiLabelMarkersUF_8u32u_C1R_Ctx(
-          pInputImageDev[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
-          pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
-          oSizeROI[nImage], nppiNormInf,
-          pUFGenerateLabelsScratchBufferDev[nImage], nppStreamCtx);
-
-      if (nppStatus != NPP_SUCCESS) {
-        if (nImage == 0)
-          printf("teapot_LabelMarkersUF_8Way_512x512_32u failed.\n");
-        else if (nImage == 1)
-          printf("CT_skull_LabelMarkersUF_8Way_512x512_32u failed.\n");
-        else if (nImage == 2)
-          printf("PCB_METAL_LabelMarkersUF_8Way_509x335_32u failed.\n");
-        else if (nImage == 3)
-          printf("PCB2_LabelMarkersUF_8Way_1024x683_32u failed.\n");
-        else if (nImage == 4)
-          printf("PCB_LabelMarkersUF_8Way_1280x720_32u failed.\n");
-        tearDown();
-        return -1;
-      }
-
-      cudaError = cudaMemcpy2DAsync(
-          pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
-          pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
-          oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
-          cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
-
-      // Wait host image read backs to complete, not necessary if no need to
-      // synchronize
-      if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
-          cudaSuccess) {
-        printf("Post label generation cudaStreamSynchronize failed\n");
-        tearDown();
-        return -1;
-      }
-
-      if (nImage == 0)
-        FOPEN(bmpFile, LabelMarkersOutputFile0.c_str(), "wb");
-      else if (nImage == 1)
-        FOPEN(bmpFile, LabelMarkersOutputFile1.c_str(), "wb");
-      else if (nImage == 2)
-        FOPEN(bmpFile, LabelMarkersOutputFile2.c_str(), "wb");
-      else if (nImage == 3)
-        FOPEN(bmpFile, LabelMarkersOutputFile3.c_str(), "wb");
-      else if (nImage == 4)
-        FOPEN(bmpFile, LabelMarkersOutputFile4.c_str(), "wb");
-
-      if (bmpFile == NULL) return -1;
-      size_t nSize = 0;
-      for (int j = 0; j < oSizeROI[nImage].height; j++) {
-        nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
-                        sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
-      }
-      fclose(bmpFile);
-
-      nppStatus = nppiCompressMarkerLabelsGetBufferSize_32u_C1R(
-          oSizeROI[nImage].width * oSizeROI[nImage].height,
-          &aCompressLabelsScratchBufferSize[nImage]);
-      if (nppStatus != NPP_NO_ERROR) return nppStatus;
-
-      cudaError =
-          cudaMalloc((void **)&pUFCompressedLabelsScratchBufferDev[nImage],
-                     aCompressLabelsScratchBufferSize[nImage]);
-      if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
-      nCompressedLabelCount = 0;
-
-      nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(
-          pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
-          oSizeROI[nImage], oSizeROI[nImage].width * oSizeROI[nImage].height,
-          &nCompressedLabelCount, pUFCompressedLabelsScratchBufferDev[nImage]);
-
-      if (nppStatus != NPP_SUCCESS) {
-        if (nImage == 0)
-          printf("teapot_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
-        else if (nImage == 1)
-          printf(
-              "CT_Skull_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
-        else if (nImage == 2)
-          printf(
-              "PCB_METAL_CompressedLabelMarkersUF_8Way_509x335_32u failed.\n");
-        else if (nImage == 3)
-          printf("PCB2_CompressedLabelMarkersUF_8Way_1024x683_32u failed.\n");
-        else if (nImage == 4)
-          printf("PCB_CompressedLabelMarkersUF_8Way_1280x720_32u failed.\n");
-        tearDown();
-        return -1;
-      }
-
-      cudaError = cudaMemcpy2DAsync(
-          pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
-          pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
-          oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
-          cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
-
-      // Wait for host image read backs to finish, not necessary if no need to
-      // synchronize
-      if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
-              cudaSuccess ||
-          nCompressedLabelCount == 0) {
-        printf("Post label compression cudaStreamSynchronize failed\n");
-        tearDown();
-        return -1;
-      }
-
-      if (nImage == 0)
-        FOPEN(bmpFile, CompressedMarkerLabelsOutputFile0.c_str(), "wb");
-      else if (nImage == 1)
-        FOPEN(bmpFile, CompressedMarkerLabelsOutputFile1.c_str(), "wb");
-      else if (nImage == 2)
-        FOPEN(bmpFile, CompressedMarkerLabelsOutputFile2.c_str(), "wb");
-      else if (nImage == 3)
-        FOPEN(bmpFile, CompressedMarkerLabelsOutputFile3.c_str(), "wb");
-      else if (nImage == 4)
-        FOPEN(bmpFile, CompressedMarkerLabelsOutputFile4.c_str(), "wb");
-
-      if (bmpFile == NULL) return -1;
-      nSize = 0;
-      for (int j = 0; j < oSizeROI[nImage].height; j++) {
-        nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
-                        sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
-      }
-      fclose(bmpFile);
-
-      if (nImage == 0)
-        printf(
-            "teapot_CompressedMarkerLabelsUF_8Way_512x512_32u succeeded, "
-            "compressed label count is %d.\n",
-            nCompressedLabelCount);
-      else if (nImage == 1)
-        printf(
-            "CT_Skull_CompressedMarkerLabelsUF_8Way_512x512_32u succeeded, "
-            "compressed label count is %d.\n",
-            nCompressedLabelCount);
-      else if (nImage == 2)
-        printf(
-            "PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u succeeded, "
-            "compressed label count is %d.\n",
-            nCompressedLabelCount);
-      else if (nImage == 3)
-        printf(
-            "PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u succeeded, "
-            "compressed label count is %d.\n",
-            nCompressedLabelCount);
-      else if (nImage == 4)
-        printf(
-            "PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u succeeded, "
-            "compressed label count is %d.\n",
-            nCompressedLabelCount);
-    }
-  }
-
-  // Batch image processing
-
-  // We want to allocate scratch buffers more efficiently for batch processing
-  // so first we free up the scratch buffers for image 0 and reallocate them.
-  // This is not required but helps cudaMalloc to work more efficiently.
-
-  cudaFree(pUFCompressedLabelsScratchBufferDev[0]);
-
-  int nTotalBatchedUFCompressLabelsScratchBufferDevSize = 0;
-
-  for (int k = 0; k < NUMBER_OF_IMAGES; k++)
-    nTotalBatchedUFCompressLabelsScratchBufferDevSize +=
-        aCompressLabelsScratchBufferSize[k];
-
-  cudaError = cudaMalloc((void **)&pUFCompressedLabelsScratchBufferDev[0],
-                         nTotalBatchedUFCompressLabelsScratchBufferDevSize);
-  if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
-  // Now allocate batch lists
-
-  int nBatchImageListBytes = NUMBER_OF_IMAGES * sizeof(NppiImageDescriptor);
-
-  cudaError =
-      cudaMalloc((void **)&pUFBatchSrcImageListDev, nBatchImageListBytes);
-  if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
-  cudaError =
-      cudaMalloc((void **)&pUFBatchSrcDstImageListDev, nBatchImageListBytes);
-  if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
-  checkCudaErrors(
-      cudaMallocHost((void **)&pUFBatchSrcImageListHost, nBatchImageListBytes));
-
-  checkCudaErrors(cudaMallocHost((void **)&pUFBatchSrcDstImageListHost,
-                                 nBatchImageListBytes));
-
-  NppiSize oMaxROISize = {0, 0};
-
-  for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
-    pUFBatchSrcImageListHost[nImage].pData = pInputImageDev[nImage];
-    pUFBatchSrcImageListHost[nImage].nStep =
-        oSizeROI[nImage].width * sizeof(Npp8u);
-    // src image oSize parameter is ignored in these NPP functions
-    pUFBatchSrcDstImageListHost[nImage].pData = pUFLabelDev[nImage];
-    pUFBatchSrcDstImageListHost[nImage].nStep =
-        oSizeROI[nImage].width * sizeof(Npp32u);
-    pUFBatchSrcDstImageListHost[nImage].oSize = oSizeROI[nImage];
-    if (oSizeROI[nImage].width > oMaxROISize.width)
-      oMaxROISize.width = oSizeROI[nImage].width;
-    if (oSizeROI[nImage].height > oMaxROISize.height)
-      oMaxROISize.height = oSizeROI[nImage].height;
-  }
-
-  // Copy label generation batch lists from CPU to GPU
-  cudaError = cudaMemcpyAsync(pUFBatchSrcImageListDev, pUFBatchSrcImageListHost,
-                              nBatchImageListBytes, cudaMemcpyHostToDevice,
-                              nppStreamCtx.hStream);
-  if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
-
-  cudaError = cudaMemcpyAsync(pUFBatchSrcDstImageListDev,
-                              pUFBatchSrcDstImageListHost, nBatchImageListBytes,
-                              cudaMemcpyHostToDevice, nppStreamCtx.hStream);
-  if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
-
-  // We use 8-way neighbor search throughout this example
-  nppStatus = nppiLabelMarkersUFBatch_8u32u_C1R_Advanced_Ctx(
-      pUFBatchSrcImageListDev, pUFBatchSrcDstImageListDev, NUMBER_OF_IMAGES,
-      oMaxROISize, nppiNormInf, nppStreamCtx);
-
-  if (nppStatus != NPP_SUCCESS) {
-    printf("LabelMarkersUFBatch_8Way_8u32u failed.\n");
-    tearDown();
-    return -1;
-  }
-
-  // Now read back generated device images to the host
-
-  for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
-    cudaError = cudaMemcpy2DAsync(
-        pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
-        pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
-        oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
-        cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
-  }
-
-  // Wait for host image read backs to complete, not necessary if no need to
-  // synchronize
-  if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
-      cudaSuccess) {
-    printf("Post label generation cudaStreamSynchronize failed\n");
-    tearDown();
-    return -1;
-  }
-
-  // Save output to files
-  for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
-    if (nImage == 0)
-      FOPEN(bmpFile, LabelMarkersBatchOutputFile0.c_str(), "wb");
-    else if (nImage == 1)
-      FOPEN(bmpFile, LabelMarkersBatchOutputFile1.c_str(), "wb");
-    else if (nImage == 2)
-      FOPEN(bmpFile, LabelMarkersBatchOutputFile2.c_str(), "wb");
-    else if (nImage == 3)
-      FOPEN(bmpFile, LabelMarkersBatchOutputFile3.c_str(), "wb");
-    else if (nImage == 4)
-      FOPEN(bmpFile, LabelMarkersBatchOutputFile4.c_str(), "wb");
-
-    if (bmpFile == NULL) return -1;
-    size_t nSize = 0;
-    for (int j = 0; j < oSizeROI[nImage].height; j++) {
-      nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
-                      sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
-    }
-    fclose(bmpFile);
-  }
-
-#ifdef USE_BATCHED_LABEL_COMPRESSION
-
-  // Now allocate scratch buffer memory for batched label compression
-  cudaError = cudaMalloc((void **)&pUFBatchSrcDstScratchBufferListDev,
-                         NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor));
-  if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
-  cudaError = cudaMalloc((void **)&pUFBatchPerImageCompressedCountListDev,
-                         NUMBER_OF_IMAGES * sizeof(Npp32u));
-  if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
-  // Allocate host side scratch buffer point and size list and initialize with
-  // device scratch buffer pointers
-  checkCudaErrors(
-      cudaMallocHost((void **)&pUFBatchSrcDstScratchBufferListHost,
-                     NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor)));
-
-  checkCudaErrors(
-      cudaMallocHost((void **)&pUFBatchPerImageCompressedCountListHost,
-                     +NUMBER_OF_IMAGES * sizeof(Npp32u)));
-
-  // Start buffer pointer at beginning of full per image buffer list sized
-  // pUFCompressedLabelsScratchBufferDev[0]
-  Npp32u *pCurUFCompressedLabelsScratchBufferDev =
-      reinterpret_cast<Npp32u *>(pUFCompressedLabelsScratchBufferDev[0]);
-
-  int nMaxUFCompressedLabelsScratchBufferSize = 0;
-
-  for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
-    // This particular function works on in-place data and SrcDst image batch
-    // list has already been initialized in batched label generation function
-    // setup
-
-    //  Initialize each per image buffer descriptor
-    pUFBatchSrcDstScratchBufferListHost[nImage].pData =
-        reinterpret_cast<void *>(pCurUFCompressedLabelsScratchBufferDev);
-    pUFBatchSrcDstScratchBufferListHost[nImage].nBufferSize =
-        aCompressLabelsScratchBufferSize[nImage];
-
-    if (aCompressLabelsScratchBufferSize[nImage] >
-        nMaxUFCompressedLabelsScratchBufferSize)
-      nMaxUFCompressedLabelsScratchBufferSize =
-          aCompressLabelsScratchBufferSize[nImage];
-
-    // Offset buffer pointer to next per image buffer
-    Npp8u *pTempBuffer =
-        reinterpret_cast<Npp8u *>(pCurUFCompressedLabelsScratchBufferDev);
-    pTempBuffer += aCompressLabelsScratchBufferSize[nImage];
-    pCurUFCompressedLabelsScratchBufferDev =
-        reinterpret_cast<Npp32u *>((void *)(pTempBuffer));
-  }
-
-  // Copy compression batch scratch buffer list from CPU to GPU
-  cudaError = cudaMemcpyAsync(pUFBatchSrcDstScratchBufferListDev,
-                              pUFBatchSrcDstScratchBufferListHost,
-                              NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor),
-                              cudaMemcpyHostToDevice, nppStreamCtx.hStream);
-  if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
-
-  nppStatus = nppiCompressMarkerLabelsUFBatch_32u_C1IR_Advanced_Ctx(
-      pUFBatchSrcDstImageListDev, pUFBatchSrcDstScratchBufferListDev,
-      pUFBatchPerImageCompressedCountListDev, NUMBER_OF_IMAGES, oMaxROISize,
-      nMaxUFCompressedLabelsScratchBufferSize, nppStreamCtx);
-  if (nppStatus != NPP_SUCCESS) {
-    printf("BatchCompressedLabelMarkersUF_8Way_32u failed.\n");
-    tearDown();
-    return -1;
-  }
-
-  // Copy output compressed label images back to host
-  for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
-    cudaError = cudaMemcpy2DAsync(
-        pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
-        pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
-        oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
-        cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
-  }
-
-  // Wait for host image read backs to complete, not necessary if no need to
-  // synchronize
-  if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
-      cudaSuccess) {
-    printf("Post label compression cudaStreamSynchronize failed\n");
-    tearDown();
-    return -1;
-  }
-
-  // Save compressed label images into files
-  for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
-    if (nImage == 0)
-      FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile0.c_str(), "wb");
-    else if (nImage == 1)
-      FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile1.c_str(), "wb");
-    else if (nImage == 2)
-      FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile2.c_str(), "wb");
-    else if (nImage == 3)
-      FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile3.c_str(), "wb");
-    else if (nImage == 4)
-      FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile4.c_str(), "wb");
-
-    if (bmpFile == NULL) return -1;
-    size_t nSize = 0;
-    for (int j = 0; j < oSizeROI[nImage].height; j++) {
-      nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
-                      sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
-    }
-    fclose(bmpFile);
-  }
-
-  // Read back per image compressed label count.
-  cudaError = cudaMemcpyAsync(pUFBatchPerImageCompressedCountListHost,
-                              pUFBatchPerImageCompressedCountListDev,
-                              NUMBER_OF_IMAGES * sizeof(Npp32u),
-                              cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
-  if (cudaError != cudaSuccess) {
-    tearDown();
-    return NPP_MEMCPY_ERROR;
-  }
-
-  // Wait for host read back to complete
-  cudaError = cudaStreamSynchronize(nppStreamCtx.hStream);
-
-  printf("\n\n");
-
-  for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
-    if (nImage == 0)
-      printf(
-          "teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u succeeded, "
-          "compressed label count is %d.\n",
-          pUFBatchPerImageCompressedCountListHost[nImage]);
-    else if (nImage == 1)
-      printf(
-          "CT_Skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u succeeded, "
-          "compressed label count is %d.\n",
-          pUFBatchPerImageCompressedCountListHost[nImage]);
-    else if (nImage == 2)
-      printf(
-          "PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u succeeded, "
-          "compressed label count is %d.\n",
-          pUFBatchPerImageCompressedCountListHost[nImage]);
-    else if (nImage == 3)
-      printf(
-          "PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u succeeded, "
-          "compressed label count is %d.\n",
-          pUFBatchPerImageCompressedCountListHost[nImage]);
-    else if (nImage == 4)
-      printf(
-          "PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u succeeded, "
-          "compressed label count is %d.\n",
-          pUFBatchPerImageCompressedCountListHost[nImage]);
-  }
-
-#endif  // USE_BATCHED_LABEL_COMPRESSION
-
-  tearDown();
-
-  return 0;
-}
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw
deleted file mode 100644
index 5c387eea..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw
deleted file mode 100644
index 5c387eea..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw
deleted file mode 100644
index d8964918..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUF_8Way_512x512_32u.raw
deleted file mode 100644
index d8964918..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUF_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt
new file mode 100644
index 00000000..cba9c714
--- /dev/null
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Include directories and libraries
+include_directories(
+    ../../../Common
+    ../../../Common/UtilNPP
+)
+
+# Source file
+set(SRC_FILES
+    boxFilterNPP.cpp
+)
+
+find_package(FreeImage)
+
+if(${FreeImage_FOUND})
+    # Add target for boxFilterNPP
+    add_executable(boxFilterNPP ${SRC_FILES})
+    set_target_properties(boxFilterNPP PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+    target_include_directories(boxFilterNPP PRIVATE
+        ${CUDAToolkit_INCLUDE_DIRS}
+        ${FreeImage_INCLUDE_DIRS}
+    )
+
+    target_link_libraries(boxFilterNPP PRIVATE
+        CUDA::nppc
+        CUDA::nppisu
+        CUDA::nppif
+        CUDA::cudart
+        ${FreeImage_LIBRARIES}
+    )
+
+    # Copy data files to output directory
+    add_custom_command(TARGET boxFilterNPP POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        ${CMAKE_CURRENT_SOURCE_DIR}/*.pgm
+        ${CMAKE_CURRENT_BINARY_DIR}
+    )
+else()
+    message(STATUS "FreeImage not found - will not build sample 'boxFilterNPP'")
+endif()
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/Makefile b/Samples/4_CUDA_Libraries/boxFilterNPP/Makefile
deleted file mode 100644
index 8b531a63..00000000
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/Makefile
+++ /dev/null
@@ -1,378 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= /usr/local/cuda
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
-    $(info WARNING - x86_64 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=x86_64 instead)
-    TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
-    $(info WARNING - ARMv7 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=armv7l instead)
-    TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
-    $(info WARNING - aarch64 variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=aarch64 instead)
-    TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
-    $(info WARNING - ppc64le variable has been deprecated)
-    $(info WARNING - please use TARGET_ARCH=ppc64le instead)
-    TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
-    $(info WARNING - GCC variable has been deprecated)
-    $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
-    HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
-    $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH   := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
-    ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-        ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
-            TARGET_SIZE := 64
-        else ifneq (,$(filter $(TARGET_ARCH),armv7l))
-            TARGET_SIZE := 32
-        endif
-    else
-        TARGET_SIZE := $(shell getconf LONG_BIT)
-    endif
-else
-    $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-
-# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
-ifeq ($(HOST_ARCH),aarch64)
-    ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
-        HOST_ARCH := sbsa
-        TARGET_ARCH := sbsa
-    endif
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
-        $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
-    endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
-    TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS   := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
-    $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifdef HOST_COMPILER
- CUSTOM_HOST_COMPILER = 1
-endif
-
-ifeq ($(TARGET_OS),darwin)
-    ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
-        HOST_COMPILER ?= clang++
-    endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
-        ifeq ($(TARGET_OS),linux)
-            HOST_COMPILER ?= arm-linux-gnueabihf-g++
-        else ifeq ($(TARGET_OS),qnx)
-            ifeq ($(QNX_HOST),)
-                $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
-            endif
-            ifeq ($(QNX_TARGET),)
-                $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
-            endif
-            export QNX_HOST
-            export QNX_TARGET
-            HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
-        else ifeq ($(TARGET_OS),android)
-            HOST_COMPILER ?= arm-linux-androideabi-g++
-        endif
-    else ifeq ($(TARGET_ARCH),aarch64)
-        ifeq ($(TARGET_OS), linux)
-            HOST_COMPILER ?= aarch64-linux-gnu-g++
-        else ifeq ($(TARGET_OS),qnx)
-            ifeq ($(QNX_HOST),)
-                $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
-            endif
-            ifeq ($(QNX_TARGET),)
-                $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
-            endif
-            export QNX_HOST
-            export QNX_TARGET
-            HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
-        else ifeq ($(TARGET_OS), android)
-            HOST_COMPILER ?= aarch64-linux-android-clang++
-        endif
-    else ifeq ($(TARGET_ARCH),sbsa)
-        HOST_COMPILER ?= aarch64-linux-gnu-g++
-    else ifeq ($(TARGET_ARCH),ppc64le)
-        HOST_COMPILER ?= powerpc64le-linux-gnu-g++
-    endif
-endif
-HOST_COMPILER ?= g++
-NVCC          := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS   := -m${TARGET_SIZE}
-CCFLAGS     :=
-LDFLAGS     :=
-
-# build flags
-
-# Link flag for customized HOST_COMPILER with gcc realpath
-GCC_PATH := $(shell which gcc)
-ifeq ($(CUSTOM_HOST_COMPILER),1)
-    ifneq ($(filter /%,$(HOST_COMPILER)),)
-        ifneq ($(findstring gcc,$(HOST_COMPILER)),)
-            ifneq ($(GCC_PATH),$(HOST_COMPILER))
-                LDFLAGS += -lstdc++
-            endif
-        endif
-    endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
-    LDFLAGS += -rpath $(CUDA_PATH)/lib
-    CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
-    LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
-    CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
-    LDFLAGS += -pie
-    CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
-        ifneq ($(TARGET_FS),)
-            GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
-            ifeq ($(GCCVERSIONLTEQ46),1)
-                CCFLAGS += --sysroot=$(TARGET_FS)
-            endif
-            LDFLAGS += --sysroot=$(TARGET_FS)
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
-        endif
-    endif
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
-        ifneq ($(TARGET_FS),)
-            GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
-            ifeq ($(GCCVERSIONLTEQ46),1)
-                CCFLAGS += --sysroot=$(TARGET_FS)
-            endif
-            LDFLAGS += --sysroot=$(TARGET_FS)
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
-            LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
-            LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
-            CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
-            CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
-        endif
-    endif
-    ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
-        NVCCFLAGS += -D_QNX_SOURCE
-        NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
-        CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
-        LDFLAGS += -lsocket
-        LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
-        CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
-        ifdef TARGET_OVERRIDE
-            LDFLAGS += -lslog2
-        endif
-
-        ifneq ($(TARGET_FS),)
-            LDFLAGS += -L$(TARGET_FS)/usr/lib
-            CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
-            LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
-            CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
-            CCFLAGS += -I$(TARGET_FS)/../include
-        endif
-    endif
-endif
-
-ifdef TARGET_OVERRIDE # cuda toolkit targets override
-    NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
-    CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
-    CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
-    CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
-    CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
-    CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
-      NVCCFLAGS += -g -G
-      BUILD_TYPE := debug
-else
-      BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-SAMPLE_ENABLED := 1
-
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
-  $(info >>> WARNING - boxFilterNPP is not supported on QNX - waiving sample <<<)
-  SAMPLE_ENABLED := 0
-endif
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES  := -I../../../Common
-LIBRARIES :=
-
-################################################################################
-
-# Gencode arguments
-SMS ?=
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-ifeq ($(SMS),)
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-# Generate PTX code from SM 53
-GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53
-else
-# Generate PTX code from SM 50
-GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50
-endif
-endif
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-ALL_CCFLAGS += --threads 0 --std=c++11
-
-INCLUDES += -I../../../Common/UtilNPP
-
-LIBRARIES += -lnppisu_static -lnppif_static -lnppc_static -lculibos -lfreeimage
-
-# Attempt to compile a minimal application linked against FreeImage. If a.out exists, FreeImage is properly set up.
-$(shell echo "#include \"FreeImage.h\"" > test.c; echo "int main() { return 0; }" >> test.c ; $(NVCC) $(ALL_CCFLAGS) $(INCLUDES) $(ALL_LDFLAGS) $(LIBRARIES) -l freeimage test.c)
-FREEIMAGE := $(shell find a.out 2>/dev/null)
-$(shell rm a.out test.c 2>/dev/null)
-
-ifeq ("$(FREEIMAGE)","")
-$(info >>> WARNING - FreeImage is not set up correctly. Please ensure FreeImage is set up correctly. <<<)
-SAMPLE_ENABLED := 0
-endif
-
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
-################################################################################
-
-# Target rules
-all: build
-
-build: boxFilterNPP
-
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
-	@echo "Sample will be waived due to the above missing dependencies"
-else
-	@echo "Sample is ready - all dependencies have been met"
-endif
-
-boxFilterNPP.o:boxFilterNPP.cpp
-	$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-boxFilterNPP: boxFilterNPP.o
-	$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
-	$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-	$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
-	$(EXEC) ./boxFilterNPP
-
-testrun: build
-
-clean:
-	rm -f boxFilterNPP boxFilterNPP.o
-	rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/boxFilterNPP
-
-clobber: clean
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml
deleted file mode 100644
index 9d8e2b3f..00000000
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml
+++ /dev/null
@@ -1,95 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?> 
-<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
-<entry>
-  <name>boxFilterNPP</name>
-  <cuda_api_list>
-    <toolkit>cudaRuntimeGetVersion</toolkit>
-    <toolkit>cudaDriverGetVersion</toolkit>
-  </cuda_api_list>
-  <description><![CDATA[A NPP CUDA Sample that demonstrates how to use NPP FilterBox function to perform a Box Filter.]]></description>
-  <devicecompilation>whole</devicecompilation>
-  <fallback_min_ptx>true</fallback_min_ptx>
-  <files>
-    <file>./teapot512.pgm</file>
-  </files>
-  <includepaths>
-    <path>../../../Common/UtilNPP</path>
-    <path os="Windows">../../../Common/FreeImage/Dist/x64</path>
-    <path>./</path>
-    <path>../</path>
-    <path>../../../Common</path>
-  </includepaths>
-  <keyconcepts>
-    <concept level="basic">Performance Strategies</concept>
-    <concept level="basic">Image Processing</concept>
-    <concept level="basic">NPP Library</concept>
-  </keyconcepts>
-  <keywords>
-    <keyword>CUDA</keyword>
-    <keyword>NPP</keyword>
-    <keyword>Image Processing</keyword>
-    <keyword>box filter</keyword>
-  </keywords>
-  <libraries>
-    <library>nppisu_static</library>
-    <library>nppif_static</library>
-    <library>nppc_static</library>
-    <library>culibos</library>
-    <library>freeimage</library>
-  </libraries>
-  <librarypaths>
-  </librarypaths>
-  <nsight_eclipse>true</nsight_eclipse>
-  <primary_file>boxFilterNPP.cpp</primary_file>
-  <required_dependencies>
-    <dependency>FreeImage</dependency>
-    <dependency>NPP</dependency>
-  </required_dependencies>
-  <scopes>
-    <scope>1:CUDA Basic Topics</scope>
-    <scope>1:Performance Strategies</scope>
-    <scope>2:Image Processing</scope>
-    <scope>2:Computer Vision</scope>
-  </scopes>
-  <sm-arch>sm50</sm-arch>
-  <sm-arch>sm52</sm-arch>
-  <sm-arch>sm53</sm-arch>
-  <sm-arch>sm60</sm-arch>
-  <sm-arch>sm61</sm-arch>
-  <sm-arch>sm70</sm-arch>
-  <sm-arch>sm72</sm-arch>
-  <sm-arch>sm75</sm-arch>
-  <sm-arch>sm80</sm-arch>
-  <sm-arch>sm86</sm-arch>
-  <sm-arch>sm87</sm-arch>
-  <sm-arch>sm89</sm-arch>
-  <sm-arch>sm90</sm-arch>
-  <supported_envs>
-    <env>
-      <arch>x86_64</arch>
-      <platform>linux</platform>
-    </env>
-    <env>
-      <platform>windows7</platform>
-    </env>
-    <env>
-      <arch>x86_64</arch>
-      <platform>macosx</platform>
-    </env>
-    <env>
-      <arch>arm</arch>
-    </env>
-    <env>
-      <arch>sbsa</arch>
-    </env>
-    <env>
-      <arch>ppc64le</arch>
-      <platform>linux</platform>
-    </env>
-  </supported_envs>
-  <supported_sm_architectures>
-    <include>all</include>
-  </supported_sm_architectures>
-  <title>Box Filter with NPP</title>
-  <type>exe</type>
-</entry>
diff --git a/Samples/CMakeLists.txt b/Samples/CMakeLists.txt
index 34d905d3..f06523ae 100644
--- a/Samples/CMakeLists.txt
+++ b/Samples/CMakeLists.txt
@@ -2,5 +2,6 @@ add_subdirectory(0_Introduction)
 add_subdirectory(1_Utilities)
 add_subdirectory(2_Concepts_and_Techniques)
 add_subdirectory(3_CUDA_Features)
+add_subdirectory(4_CUDA_Libraries)
 add_subdirectory(6_Performance)
 add_subdirectory(7_libNVVM)
diff --git a/cmake/Modules/FindFreeImage.cmake b/cmake/Modules/FindFreeImage.cmake
new file mode 100644
index 00000000..b03e99eb
--- /dev/null
+++ b/cmake/Modules/FindFreeImage.cmake
@@ -0,0 +1,17 @@
+find_path(FreeImage_INCLUDE_DIR
+  NAMES freeimage.h FreeImage.h
+  PATHS /usr/include /usr/local/include
+)
+
+find_library(FreeImage_LIBRARY
+  NAMES freeimage
+  PATHS /usr/lib /usr/local/lib
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(FreeImage DEFAULT_MSG FreeImage_LIBRARY FreeImage_INCLUDE_DIR)
+
+if(FreeImage_FOUND)
+  set(FreeImage_LIBRARIES ${FreeImage_LIBRARY})
+  set(FreeImage_INCLUDE_DIRS ${FreeImage_INCLUDE_DIR})
+endif()