diff --git a/CHANGELOG.md b/CHANGELOG.md
index b4e6d9a3..3645503f 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,9 @@
* `simpleVoteIntrinsics_nvrtc` demonstrating NVRTC usage for `simpleVoteIntrinsics` sample (reason: redundant)
* `2_Concepts_and_Techniques`
* `cuHook` demonstrating dlsym hooks. (reason: incompatible with modern `glibc`)
+ * `4_CUDA_Libraries`
+ * `batchedLabelMarkersAndLabelCompressionNPP` demonstrating NPP features (reason: some functionality removed from library)
+
### CUDA 12.5
diff --git a/Samples/4_CUDA_Libraries/CMakeLists.txt b/Samples/4_CUDA_Libraries/CMakeLists.txt
new file mode 100644
index 00000000..255d0446
--- /dev/null
+++ b/Samples/4_CUDA_Libraries/CMakeLists.txt
@@ -0,0 +1,40 @@
+#add_subdirectory(FilterBorderControlNPP)
+#add_subdirectory(MersenneTwisterGP11213)
+add_subdirectory(batchCUBLAS)
+add_subdirectory(boxFilterNPP)
+#add_subdirectory(cannyEdgeDetectorNPP)
+#add_subdirectory(conjugateGradient)
+#add_subdirectory(conjugateGradientCudaGraphs)
+#add_subdirectory(conjugateGradientMultiBlockCG)
+#add_subdirectory(conjugateGradientMultiDeviceCG)
+#add_subdirectory(conjugateGradientPrecond)
+#add_subdirectory(conjugateGradientUM)
+#add_subdirectory(cuDLAErrorReporting)
+#add_subdirectory(cuDLAHybridMode)
+#add_subdirectory(cuDLALayerwiseStatsHybrid)
+#add_subdirectory(cuDLALayerwiseStatsStandalone)
+#add_subdirectory(cuDLAStandaloneMode)
+#add_subdirectory(cuSolverDn_LinearSolver)
+#add_subdirectory(cuSolverRf)
+#add_subdirectory(cuSolverSp_LinearSolver)
+#add_subdirectory(cuSolverSp_LowlevelCholesky)
+#add_subdirectory(cuSolverSp_LowlevelQR)
+#add_subdirectory(cudaNvSci)
+#add_subdirectory(cudaNvSciNvMedia)
+#add_subdirectory(freeImageInteropNPP)
+#add_subdirectory(histEqualizationNPP)
+#add_subdirectory(jitLto)
+#add_subdirectory(lineOfSight)
+#add_subdirectory(matrixMulCUBLAS)
+#add_subdirectory(nvJPEG)
+#add_subdirectory(nvJPEG_encoder)
+#add_subdirectory(oceanFFT)
+#add_subdirectory(randomFog)
+#add_subdirectory(simpleCUBLAS)
+#add_subdirectory(simpleCUBLASXT)
+#add_subdirectory(simpleCUBLAS_LU)
+#add_subdirectory(simpleCUFFT)
+#add_subdirectory(simpleCUFFT_2d_MGPU)
+#add_subdirectory(simpleCUFFT_MGPU)
+#add_subdirectory(simpleCUFFT_callback)
+#add_subdirectory(watershedSegmentationNPP)
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt b/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt
new file mode 100644
index 00000000..3351e24c
--- /dev/null
+++ b/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt
@@ -0,0 +1,20 @@
+# Include directories and libraries
+include_directories(../../../Common)
+
+# Source file
+set(SRC_FILES
+ batchCUBLAS.cpp
+)
+
+# Add target for batchCUBLAS
+add_executable(batchCUBLAS ${SRC_FILES})
+set_target_properties(batchCUBLAS PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+target_include_directories(batchCUBLAS PRIVATE
+ ${CUDAToolkit_INCLUDE_DIRS}
+)
+
+target_link_libraries(batchCUBLAS PRIVATE
+ CUDA::cublas
+ CUDA::cudart
+)
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/Makefile b/Samples/4_CUDA_Libraries/batchCUBLAS/Makefile
deleted file mode 100644
index 1e813cf0..00000000
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/Makefile
+++ /dev/null
@@ -1,347 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of NVIDIA CORPORATION nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= /usr/local/cuda
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
- $(info WARNING - x86_64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=x86_64 instead)
- TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
- $(info WARNING - ARMv7 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=armv7l instead)
- TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
- $(info WARNING - aarch64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=aarch64 instead)
- TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
- $(info WARNING - ppc64le variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=ppc64le instead)
- TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
- $(info WARNING - GCC variable has been deprecated)
- $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
- HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
- $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
- ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
- TARGET_SIZE := 64
- else ifneq (,$(filter $(TARGET_ARCH),armv7l))
- TARGET_SIZE := 32
- endif
- else
- TARGET_SIZE := $(shell getconf LONG_BIT)
- endif
-else
- $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-
-# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
-ifeq ($(HOST_ARCH),aarch64)
- ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
- HOST_ARCH := sbsa
- TARGET_ARCH := sbsa
- endif
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
- $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
- endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
- TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
- $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifdef HOST_COMPILER
- CUSTOM_HOST_COMPILER = 1
-endif
-
-ifeq ($(TARGET_OS),darwin)
- ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
- HOST_COMPILER ?= clang++
- endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
- ifeq ($(TARGET_OS),linux)
- HOST_COMPILER ?= arm-linux-gnueabihf-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
- else ifeq ($(TARGET_OS),android)
- HOST_COMPILER ?= arm-linux-androideabi-g++
- endif
- else ifeq ($(TARGET_ARCH),aarch64)
- ifeq ($(TARGET_OS), linux)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
- else ifeq ($(TARGET_OS), android)
- HOST_COMPILER ?= aarch64-linux-android-clang++
- endif
- else ifeq ($(TARGET_ARCH),sbsa)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_ARCH),ppc64le)
- HOST_COMPILER ?= powerpc64le-linux-gnu-g++
- endif
-endif
-HOST_COMPILER ?= g++
-NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS := -m${TARGET_SIZE}
-CCFLAGS :=
-LDFLAGS :=
-
-# build flags
-
-# Link flag for customized HOST_COMPILER with gcc realpath
-GCC_PATH := $(shell which gcc)
-ifeq ($(CUSTOM_HOST_COMPILER),1)
- ifneq ($(filter /%,$(HOST_COMPILER)),)
- ifneq ($(findstring gcc,$(HOST_COMPILER)),)
- ifneq ($(GCC_PATH),$(HOST_COMPILER))
- LDFLAGS += -lstdc++
- endif
- endif
- endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
- LDFLAGS += -rpath $(CUDA_PATH)/lib
- CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
- LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
- CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
- LDFLAGS += -pie
- CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
- LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- NVCCFLAGS += -D_QNX_SOURCE
- NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
- CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
- LDFLAGS += -lsocket
- LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
- CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
- ifdef TARGET_OVERRIDE
- LDFLAGS += -lslog2
- endif
-
- ifneq ($(TARGET_FS),)
- LDFLAGS += -L$(TARGET_FS)/usr/lib
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
- LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
- CCFLAGS += -I$(TARGET_FS)/../include
- endif
- endif
-endif
-
-ifdef TARGET_OVERRIDE # cuda toolkit targets override
- NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
- CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
- CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
- NVCCFLAGS += -g -G
- BUILD_TYPE := debug
-else
- BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES := -I../../../Common
-LIBRARIES :=
-
-################################################################################
-
-# Gencode arguments
-SMS ?=
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-ifeq ($(SMS),)
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-# Generate PTX code from SM 53
-GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53
-else
-# Generate PTX code from SM 50
-GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50
-endif
-endif
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-ALL_CCFLAGS += --threads 0 --std=c++11
-
-LIBRARIES += -lcublas
-
-################################################################################
-
-# Target rules
-all: build
-
-build: batchCUBLAS
-
-batchCUBLAS.o:batchCUBLAS.cpp
- $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-batchCUBLAS: batchCUBLAS.o
- $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
- mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
- cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
- ./batchCUBLAS
-
-testrun: build
-
-clean:
- rm -f batchCUBLAS batchCUBLAS.o
- rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/batchCUBLAS
-
-clobber: clean
diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml
deleted file mode 100644
index 25d2e453..00000000
--- a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml
+++ /dev/null
@@ -1,89 +0,0 @@
-
-
-
- batchCUBLAS
-
- cuRand
- cuEqual
- cudaMemcpy
- cudaGetErrorString
- cudaFree
- cudaGetLastError
- cudaDeviceSynchronize
- cudaGetDevice
- cudaMalloc
- cudaStreamCreate
- cudaGetDeviceProperties
-
-
- whole
- true
-
- ./
- ../
- ../../../Common
-
-
- Linear Algebra
- CUBLAS Library
-
-
- CUBLAS
- Linear Algebra
-
-
- cublas
-
-
-
- true
- batchCUBLAS.cpp
-
- CUBLAS
-
-
- 1:CUDA Basic Topics
- 3:Linear Algebra
-
- sm50
- sm52
- sm53
- sm60
- sm61
- sm70
- sm72
- sm75
- sm80
- sm86
- sm87
- sm89
- sm90
-
-
- x86_64
- linux
-
-
- windows7
-
-
- x86_64
- macosx
-
-
- arm
-
-
- sbsa
-
-
- ppc64le
- linux
-
-
-
- all
-
- batchCUBLAS
- exe
-
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/c_cpp_properties.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/c_cpp_properties.json
deleted file mode 100644
index f0066b0f..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/c_cpp_properties.json
+++ /dev/null
@@ -1,18 +0,0 @@
-{
- "configurations": [
- {
- "name": "Linux",
- "includePath": [
- "${workspaceFolder}/**",
- "${workspaceFolder}/../../../Common"
- ],
- "defines": [],
- "compilerPath": "/usr/local/cuda/bin/nvcc",
- "cStandard": "gnu17",
- "cppStandard": "gnu++14",
- "intelliSenseMode": "linux-gcc-x64",
- "configurationProvider": "ms-vscode.makefile-tools"
- }
- ],
- "version": 4
-}
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/extensions.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/extensions.json
deleted file mode 100644
index c7eb54dc..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/extensions.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
- "recommendations": [
- "nvidia.nsight-vscode-edition",
- "ms-vscode.cpptools",
- "ms-vscode.makefile-tools"
- ]
-}
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/launch.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/launch.json
deleted file mode 100644
index 45180508..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/launch.json
+++ /dev/null
@@ -1,10 +0,0 @@
-{
- "configurations": [
- {
- "name": "CUDA C++: Launch",
- "type": "cuda-gdb",
- "request": "launch",
- "program": "${workspaceFolder}/batchedLabelMarkersAndLabelCompressionNPP"
- }
- ]
-}
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/tasks.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/tasks.json
deleted file mode 100644
index 4509aeb1..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/tasks.json
+++ /dev/null
@@ -1,15 +0,0 @@
-{
- "version": "2.0.0",
- "tasks": [
- {
- "label": "sample",
- "type": "shell",
- "command": "make dbg=1",
- "problemMatcher": ["$nvcc"],
- "group": {
- "kind": "build",
- "isDefault": true
- }
- }
- ]
-}
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw
deleted file mode 100644
index a6fa444d..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw
deleted file mode 100644
index 526a6c4f..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw
deleted file mode 100644
index d1e73735..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUF_8Way_512x512_32u.raw
deleted file mode 100644
index 767615ed..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUF_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/Makefile b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/Makefile
deleted file mode 100644
index 27201be9..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/Makefile
+++ /dev/null
@@ -1,372 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of NVIDIA CORPORATION nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= /usr/local/cuda
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
- $(info WARNING - x86_64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=x86_64 instead)
- TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
- $(info WARNING - ARMv7 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=armv7l instead)
- TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
- $(info WARNING - aarch64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=aarch64 instead)
- TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
- $(info WARNING - ppc64le variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=ppc64le instead)
- TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
- $(info WARNING - GCC variable has been deprecated)
- $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
- HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
- $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
- ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
- TARGET_SIZE := 64
- else ifneq (,$(filter $(TARGET_ARCH),armv7l))
- TARGET_SIZE := 32
- endif
- else
- TARGET_SIZE := $(shell getconf LONG_BIT)
- endif
-else
- $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-
-# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
-ifeq ($(HOST_ARCH),aarch64)
- ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
- HOST_ARCH := sbsa
- TARGET_ARCH := sbsa
- endif
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
- $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
- endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
- TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
- $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifdef HOST_COMPILER
- CUSTOM_HOST_COMPILER = 1
-endif
-
-ifeq ($(TARGET_OS),darwin)
- ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
- HOST_COMPILER ?= clang++
- endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
- ifeq ($(TARGET_OS),linux)
- HOST_COMPILER ?= arm-linux-gnueabihf-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
- else ifeq ($(TARGET_OS),android)
- HOST_COMPILER ?= arm-linux-androideabi-g++
- endif
- else ifeq ($(TARGET_ARCH),aarch64)
- ifeq ($(TARGET_OS), linux)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
- else ifeq ($(TARGET_OS), android)
- HOST_COMPILER ?= aarch64-linux-android-clang++
- endif
- else ifeq ($(TARGET_ARCH),sbsa)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_ARCH),ppc64le)
- HOST_COMPILER ?= powerpc64le-linux-gnu-g++
- endif
-endif
-HOST_COMPILER ?= g++
-NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS := -m${TARGET_SIZE}
-CCFLAGS :=
-LDFLAGS :=
-
-# build flags
-
-# Link flag for customized HOST_COMPILER with gcc realpath
-GCC_PATH := $(shell which gcc)
-ifeq ($(CUSTOM_HOST_COMPILER),1)
- ifneq ($(filter /%,$(HOST_COMPILER)),)
- ifneq ($(findstring gcc,$(HOST_COMPILER)),)
- ifneq ($(GCC_PATH),$(HOST_COMPILER))
- LDFLAGS += -lstdc++
- endif
- endif
- endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
- LDFLAGS += -rpath $(CUDA_PATH)/lib
- CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
- LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
- CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
- LDFLAGS += -pie
- CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
- LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- NVCCFLAGS += -D_QNX_SOURCE
- NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
- CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
- LDFLAGS += -lsocket
- LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
- CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
- ifdef TARGET_OVERRIDE
- LDFLAGS += -lslog2
- endif
-
- ifneq ($(TARGET_FS),)
- LDFLAGS += -L$(TARGET_FS)/usr/lib
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
- LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
- CCFLAGS += -I$(TARGET_FS)/../include
- endif
- endif
-endif
-
-ifdef TARGET_OVERRIDE # cuda toolkit targets override
- NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
- CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
- CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
- NVCCFLAGS += -g -G
- BUILD_TYPE := debug
-else
- BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-SAMPLE_ENABLED := 1
-
-# This sample is not supported on Mac OSX
-ifeq ($(TARGET_OS),darwin)
- $(info >>> WARNING - batchedLabelMarkersAndLabelCompressionNPP is not supported on Mac OSX - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
- $(info >>> WARNING - batchedLabelMarkersAndLabelCompressionNPP is not supported on QNX - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES := -I../../../Common
-LIBRARIES :=
-
-################################################################################
-
-# Gencode arguments
-SMS ?=
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-ifeq ($(SMS),)
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-# Generate PTX code from SM 53
-GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53
-else
-# Generate PTX code from SM 50
-GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50
-endif
-endif
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-ALL_CCFLAGS += --threads 0 --std=c++11
-
-LIBRARIES += -lnppisu_static -lnppif_static -lnppc_static -lculibos
-
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
-################################################################################
-
-# Target rules
-all: build
-
-build: batchedLabelMarkersAndLabelCompressionNPP
-
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
- @echo "Sample will be waived due to the above missing dependencies"
-else
- @echo "Sample is ready - all dependencies have been met"
-endif
-
-batchedLabelMarkersAndLabelCompressionNPP.o:batchedLabelMarkersAndLabelCompressionNPP.cpp
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-batchedLabelMarkersAndLabelCompressionNPP: batchedLabelMarkersAndLabelCompressionNPP.o
- $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
- $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
- $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
- $(EXEC) ./batchedLabelMarkersAndLabelCompressionNPP
-
-testrun: build
-
-clean:
- rm -f batchedLabelMarkersAndLabelCompressionNPP batchedLabelMarkersAndLabelCompressionNPP.o
- rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/batchedLabelMarkersAndLabelCompressionNPP
-
-clobber: clean
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml
deleted file mode 100644
index 9e7b07b9..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml
+++ /dev/null
@@ -1,95 +0,0 @@
-
-
-
- batchedLabelMarkersAndLabelCompressionNPP
-
- cudaRuntimeGetVersion
- cudaMallocPitch
- cudaFree
- cudaDeviceGetAttribute
- cudaMallocHost
- cudaDriverGetVersion
- cudaFreeHost
- cudaGetDevice
- cudaStreamGetFlags
- cudaStreamSynchronize
- cudaMalloc
- cudaMemcpyAsync
- cudaGetDeviceProperties
-
-
- whole
- true
-
- ./
- ../
- ../../../Common
-
-
- Performance Strategies
- Image Processing
- NPP Library
- Using NPP Batch Functions
-
-
- CUDA
- NPP
- Image Processing
-
-
- nppisu_static
- nppif_static
- nppc_static
- culibos
-
-
-
- true
- batchedLabelMarkersAndLabelCompressionNPP.cpp
-
- NPP
-
-
- 1:CUDA Basic Topics
- 1:Performance Strategies
- 2:Image Processing
- 2:Computer Vision
-
- sm50
- sm52
- sm53
- sm60
- sm61
- sm70
- sm72
- sm75
- sm80
- sm86
- sm87
- sm89
- sm90
-
-
- x86_64
- linux
-
-
- windows7
-
-
- arm
-
-
- sbsa
-
-
- ppc64le
- linux
-
-
-
- all
-
- Batched Label Markers And Label Compression NPP
- exe
-
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw
deleted file mode 100644
index 741413fc..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw
deleted file mode 100644
index aa597100..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw
deleted file mode 100644
index 3a7a30d3..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUF_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUF_8Way_1024x683_32u.raw
deleted file mode 100644
index ae6f02a6..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUF_8Way_1024x683_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw
deleted file mode 100644
index fffe0e9b..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw
deleted file mode 100644
index d96e95eb..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw
deleted file mode 100644
index be539bd4..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUF_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUF_8Way_1280x720_32u.raw
deleted file mode 100644
index de5e1789..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUF_8Way_1280x720_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw
deleted file mode 100644
index 14a6c202..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw
deleted file mode 100644
index 79d53426..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw
deleted file mode 100644
index e4323bec..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw
deleted file mode 100644
index 237c057d..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md
deleted file mode 100644
index 28b1b353..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md
+++ /dev/null
@@ -1,74 +0,0 @@
-# batchedLabelMarkersAndLabelCompressionNPP - Batched Label Markers And Label Compression NPP
-
-## Description
-
-An NPP CUDA Sample that demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both single image and batched image versions.
-
-## Key Concepts
-
-Performance Strategies, Image Processing, NPP Library, Using NPP Batch Functions
-
-## Supported SM Architectures
-
-[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
-
-## Supported OSes
-
-Linux, Windows
-
-## Supported CPU Architecture
-
-x86_64, ppc64le, armv7l
-
-## CUDA APIs involved
-
-### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaRuntimeGetVersion, cudaMallocPitch, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaDriverGetVersion, cudaFreeHost, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaGetDeviceProperties
-
-## Dependencies needed to build/run
-[NPP](../../../README.md#npp)
-
-## Prerequisites
-
-Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
-Make sure the dependencies mentioned in [Dependencies]() section above are installed.
-
-## Build and Run
-
-### Windows
-The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
-```
-*_vs.sln - for Visual Studio
-```
-Each individual sample has its own set of solution files in its directory:
-
-To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
-> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
-
-### Linux
-The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
-```
-$ cd
-$ make
-```
-The samples makefiles can take advantage of certain options:
-* **TARGET_ARCH=** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l.
- By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.
-`$ make TARGET_ARCH=x86_64`
`$ make TARGET_ARCH=ppc64le`
`$ make TARGET_ARCH=armv7l`
- See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
-* **dbg=1** - build with debug symbols
- ```
- $ make dbg=1
- ```
-* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
- ```
- $ make SMS="50 60"
- ```
-
-* **HOST_COMPILER=** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
-```
- $ make HOST_COMPILER=g++
-```
-
-## References (for more details)
-
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP.cpp b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP.cpp
deleted file mode 100644
index 0a1efcc9..00000000
--- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP.cpp
+++ /dev/null
@@ -1,805 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * * Neither the name of NVIDIA CORPORATION nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-#define WINDOWS_LEAN_AND_MEAN
-#define NOMINMAX
-#include
-#pragma warning(disable : 4819)
-#endif
-
-#include
-#include
-#include
-
-#include
-#include
-#include
-#include
-
-// Note: If you want to view these images we HIGHLY recommend using imagej
-// which is free on the internet and works on most platforms
-// because it is one of the few image viewing apps that can display 32
-// bit integer image data. While it normalizes the data to floating
-// point values for viewing it still provides a good representation of
-// the relative brightness of each label value. Note that label
-// compression output results in smaller differences between label values
-// making it visually more difficult to detect differences in labeled
-// regions. If you have an editor that can display hex values you can
-// see what the exact values of each label is, every 4 bytes represents 1
-// 32 bit integer label value.
-//
-// The files read and written by this sample app use RAW image format,
-// that is, only the image data itself exists in the files with no image
-// format information. When viewing RAW files with imagej just enter
-// the image size and bit depth values that are part of the file name
-// when requested by imagej.
-//
-// This sample app works in 2 stages, first it processes all of the
-// images individually then it processes them all again in 1 batch using
-// the Batch_Advanced versions of the NPP batch functions which allow
-// each image to have it's own ROI. The 2 stages are completely
-// separable but in this sample the second stage takes advantage of some
-// of the data that has already been initialized.
-//
-// Note that there is a small amount of variability in the number of
-// unique label markers generated from one run to the next by the UF
-// algorithm.
-//
-// Performance of ALL NPP image batch functions is limited by the maximum
-// ROI height in the list of images.
-
-// Batched label compression support is only available on NPP versions > 11.0,
-// comment out if using NPP 11.0
-#define USE_BATCHED_LABEL_COMPRESSION 1
-
-#define NUMBER_OF_IMAGES 5
-
-Npp8u *pInputImageDev[NUMBER_OF_IMAGES];
-Npp8u *pInputImageHost[NUMBER_OF_IMAGES];
-Npp8u *pUFGenerateLabelsScratchBufferDev[NUMBER_OF_IMAGES];
-Npp8u *pUFCompressedLabelsScratchBufferDev[NUMBER_OF_IMAGES];
-Npp32u *pUFLabelDev[NUMBER_OF_IMAGES];
-Npp32u *pUFLabelHost[NUMBER_OF_IMAGES];
-NppiImageDescriptor *pUFBatchSrcImageListDev = 0;
-NppiImageDescriptor *pUFBatchSrcDstImageListDev = 0;
-NppiImageDescriptor *pUFBatchSrcImageListHost = 0;
-NppiImageDescriptor *pUFBatchSrcDstImageListHost = 0;
-NppiBufferDescriptor *pUFBatchSrcDstScratchBufferListDev =
- 0; // from nppi_filtering_functions.h
-NppiBufferDescriptor *pUFBatchSrcDstScratchBufferListHost = 0;
-Npp32u *pUFBatchPerImageCompressedCountListDev = 0;
-Npp32u *pUFBatchPerImageCompressedCountListHost = 0;
-
-void tearDown() // Clean up and tear down
-{
- if (pUFBatchPerImageCompressedCountListDev != 0)
- cudaFree(pUFBatchPerImageCompressedCountListDev);
- if (pUFBatchSrcDstScratchBufferListDev != 0)
- cudaFree(pUFBatchSrcDstScratchBufferListDev);
- if (pUFBatchSrcDstImageListDev != 0) cudaFree(pUFBatchSrcDstImageListDev);
- if (pUFBatchSrcImageListDev != 0) cudaFree(pUFBatchSrcImageListDev);
- if (pUFBatchPerImageCompressedCountListHost != 0)
- cudaFreeHost(pUFBatchPerImageCompressedCountListHost);
- if (pUFBatchSrcDstScratchBufferListHost != 0)
- cudaFreeHost(pUFBatchSrcDstScratchBufferListHost);
- if (pUFBatchSrcDstImageListHost != 0)
- cudaFreeHost(pUFBatchSrcDstImageListHost);
- if (pUFBatchSrcImageListHost != 0) cudaFreeHost(pUFBatchSrcImageListHost);
-
- for (int j = 0; j < NUMBER_OF_IMAGES; j++) {
- if (pUFCompressedLabelsScratchBufferDev[j] != 0)
- cudaFree(pUFCompressedLabelsScratchBufferDev[j]);
- if (pUFGenerateLabelsScratchBufferDev[j] != 0)
- cudaFree(pUFGenerateLabelsScratchBufferDev[j]);
- if (pUFLabelDev[j] != 0) cudaFree(pUFLabelDev[j]);
- if (pInputImageDev[j] != 0) cudaFree(pInputImageDev[j]);
- if (pUFLabelHost[j] != 0) cudaFreeHost(pUFLabelHost[j]);
- if (pInputImageHost[j] != 0) cudaFreeHost(pInputImageHost[j]);
- }
-}
-
-const std::string &LabelMarkersOutputFile0 =
- "teapot_LabelMarkersUF_8Way_512x512_32u.raw";
-const std::string &LabelMarkersOutputFile1 =
- "CT_skull_LabelMarkersUF_8Way_512x512_32u.raw";
-const std::string &LabelMarkersOutputFile2 =
- "PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw";
-const std::string &LabelMarkersOutputFile3 =
- "PCB2_LabelMarkersUF_8Way_1024x683_32u.raw";
-const std::string &LabelMarkersOutputFile4 =
- "PCB_LabelMarkersUF_8Way_1280x720_32u.raw";
-
-const std::string &CompressedMarkerLabelsOutputFile0 =
- "teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw";
-const std::string &CompressedMarkerLabelsOutputFile1 =
- "CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw";
-const std::string &CompressedMarkerLabelsOutputFile2 =
- "PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw";
-const std::string &CompressedMarkerLabelsOutputFile3 =
- "PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw";
-const std::string &CompressedMarkerLabelsOutputFile4 =
- "PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw";
-
-const std::string &LabelMarkersBatchOutputFile0 =
- "teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw";
-const std::string &LabelMarkersBatchOutputFile1 =
- "CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw";
-const std::string &LabelMarkersBatchOutputFile2 =
- "PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw";
-const std::string &LabelMarkersBatchOutputFile3 =
- "PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw";
-const std::string &LabelMarkersBatchOutputFile4 =
- "PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw";
-
-const std::string &CompressedMarkerLabelsBatchOutputFile0 =
- "teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw";
-const std::string &CompressedMarkerLabelsBatchOutputFile1 =
- "CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw";
-const std::string &CompressedMarkerLabelsBatchOutputFile2 =
- "PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw";
-const std::string &CompressedMarkerLabelsBatchOutputFile3 =
- "PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw";
-const std::string &CompressedMarkerLabelsBatchOutputFile4 =
- "PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw";
-
-int loadRaw8BitImage(Npp8u *pImage, int nWidth, int nHeight, int nImage) {
- FILE *bmpFile;
- size_t nSize;
-
- if (nImage == 0) {
- if (nWidth != 512 || nHeight != 512) return -1;
- const char *fileName = "teapot_512x512_8u.raw";
- const char *InputFile = sdkFindFilePath(fileName, ".");
- if (InputFile == NULL) {
- printf("%s file not found.. exiting\n", fileName);
- exit(EXIT_WAIVED);
- }
-
- FOPEN(bmpFile, InputFile, "rb");
- } else if (nImage == 1) {
- if (nWidth != 512 || nHeight != 512) return -1;
- const char *fileName = "CT_skull_512x512_8u.raw";
- const char *InputFile = sdkFindFilePath(fileName, ".");
- if (InputFile == NULL) {
- printf("%s file not found.. exiting\n", fileName);
- exit(EXIT_WAIVED);
- }
-
- FOPEN(bmpFile, InputFile, "rb");
- } else if (nImage == 2) {
- if (nWidth != 509 || nHeight != 335) return -1;
- const char *fileName = "PCB_METAL_509x335_8u.raw";
- const char *InputFile = sdkFindFilePath(fileName, ".");
- if (InputFile == NULL) {
- printf("%s file not found.. exiting\n", fileName);
- exit(EXIT_WAIVED);
- }
-
- FOPEN(bmpFile, InputFile, "rb");
- } else if (nImage == 3) {
- if (nWidth != 1024 || nHeight != 683) return -1;
- const char *fileName = "PCB2_1024x683_8u.raw";
- const char *InputFile = sdkFindFilePath(fileName, ".");
- if (InputFile == NULL) {
- printf("%s file not found.. exiting\n", fileName);
- exit(EXIT_WAIVED);
- }
-
- FOPEN(bmpFile, InputFile, "rb");
- } else if (nImage == 4) {
- if (nWidth != 1280 || nHeight != 720) return -1;
- const char *fileName = "PCB_1280x720_8u.raw";
- const char *InputFile = sdkFindFilePath(fileName, ".");
- if (InputFile == NULL) {
- printf("%s file not found.. exiting\n", fileName);
- exit(EXIT_WAIVED);
- }
-
- FOPEN(bmpFile, InputFile, "rb");
- } else {
- printf("Input file load failed.\n");
- return -1;
- }
-
- if (bmpFile == NULL) return -1;
- nSize = fread(pImage, 1, nWidth * nHeight, bmpFile);
- if (nSize < nWidth * nHeight) {
- fclose(bmpFile);
- return -1;
- }
- fclose(bmpFile);
-
- printf("Input file load succeeded.\n");
-
- return 0;
-}
-
-int main(int argc, char **argv) {
- int aGenerateLabelsScratchBufferSize[NUMBER_OF_IMAGES];
- int aCompressLabelsScratchBufferSize[NUMBER_OF_IMAGES];
-
- int nCompressedLabelCount = 0;
- cudaError_t cudaError;
- NppStatus nppStatus;
- NppStreamContext nppStreamCtx;
- FILE *bmpFile;
-
- for (int j = 0; j < NUMBER_OF_IMAGES; j++) {
- pInputImageDev[j] = 0;
- pInputImageHost[j] = 0;
- pUFGenerateLabelsScratchBufferDev[j] = 0;
- pUFCompressedLabelsScratchBufferDev[j] = 0;
- pUFLabelDev[j] = 0;
- pUFLabelHost[j] = 0;
- }
-
- nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever
- // your stream ID is if not the NULL stream.
-
- cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId);
- if (cudaError != cudaSuccess) {
- printf("CUDA error: no devices supporting CUDA.\n");
- return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
- }
-
- const NppLibraryVersion *libVer = nppGetLibVersion();
-
- printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor,
- libVer->build);
-
- int driverVersion, runtimeVersion;
- cudaDriverGetVersion(&driverVersion);
- cudaRuntimeGetVersion(&runtimeVersion);
-
- printf("CUDA Driver Version: %d.%d\n", driverVersion / 1000,
- (driverVersion % 100) / 10);
- printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion / 1000,
- (runtimeVersion % 100) / 10);
-
- cudaError = cudaDeviceGetAttribute(
- &nppStreamCtx.nCudaDevAttrComputeCapabilityMajor,
- cudaDevAttrComputeCapabilityMajor, nppStreamCtx.nCudaDeviceId);
- if (cudaError != cudaSuccess) return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
-
- cudaError = cudaDeviceGetAttribute(
- &nppStreamCtx.nCudaDevAttrComputeCapabilityMinor,
- cudaDevAttrComputeCapabilityMinor, nppStreamCtx.nCudaDeviceId);
- if (cudaError != cudaSuccess) return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY;
-
- cudaError =
- cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags);
-
- cudaDeviceProp oDeviceProperties;
-
- cudaError =
- cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId);
-
- nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount;
- nppStreamCtx.nMaxThreadsPerMultiProcessor =
- oDeviceProperties.maxThreadsPerMultiProcessor;
- nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock;
- nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock;
-
- NppiSize oSizeROI[NUMBER_OF_IMAGES];
-
- for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
- if (nImage == 0) {
- oSizeROI[nImage].width = 512;
- oSizeROI[nImage].height = 512;
- } else if (nImage == 1) {
- oSizeROI[nImage].width = 512;
- oSizeROI[nImage].height = 512;
- } else if (nImage == 2) {
- oSizeROI[nImage].width = 509;
- oSizeROI[nImage].height = 335;
- } else if (nImage == 3) {
- oSizeROI[nImage].width = 1024;
- oSizeROI[nImage].height = 683;
- } else if (nImage == 4) {
- oSizeROI[nImage].width = 1280;
- oSizeROI[nImage].height = 720;
- }
-
- // NOTE: While using cudaMallocPitch() to allocate device memory for NPP can
- // significantly improve the performance of many NPP functions, for UF
- // function label markers generation or compression DO NOT USE
- // cudaMallocPitch(). Doing so could result in incorrect output.
-
- cudaError = cudaMalloc(
- (void **)&pInputImageDev[nImage],
- oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height);
- if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
- // For images processed with UF label markers functions ROI width and height
- // for label markers generation output AND marker compression functions MUST
- // be the same AND line pitch MUST be equal to ROI.width * sizeof(Npp32u).
- // Also the image pointer used for label markers generation output must
- // start at the same position in the image as it does in the marker
- // compression function. Also note that actual input image size and ROI do
- // not necessarily need to be related other than ROI being less than or
- // equal to image size and image starting position does not necessarily have
- // to be at pixel 0 in the input image.
-
- cudaError = cudaMalloc(
- (void **)&pUFLabelDev[nImage],
- oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height);
- if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
- checkCudaErrors(cudaMallocHost(
- &(pInputImageHost[nImage]),
- oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height));
- checkCudaErrors(cudaMallocHost(
- &(pUFLabelHost[nImage]),
- oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height));
-
- // Use UF functions throughout this sample.
-
- nppStatus = nppiLabelMarkersUFGetBufferSize_32u_C1R(
- oSizeROI[nImage], &aGenerateLabelsScratchBufferSize[nImage]);
-
- // One at a time image processing
-
- cudaError = cudaMalloc((void **)&pUFGenerateLabelsScratchBufferDev[nImage],
- aGenerateLabelsScratchBufferSize[nImage]);
- if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
- if (loadRaw8BitImage(pInputImageHost[nImage],
- oSizeROI[nImage].width * sizeof(Npp8u),
- oSizeROI[nImage].height, nImage) == 0) {
- cudaError = cudaMemcpy2DAsync(
- pInputImageDev[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
- pInputImageHost[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
- oSizeROI[nImage].width * sizeof(Npp8u), oSizeROI[nImage].height,
- cudaMemcpyHostToDevice, nppStreamCtx.hStream);
-
- nppStatus = nppiLabelMarkersUF_8u32u_C1R_Ctx(
- pInputImageDev[nImage], oSizeROI[nImage].width * sizeof(Npp8u),
- pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
- oSizeROI[nImage], nppiNormInf,
- pUFGenerateLabelsScratchBufferDev[nImage], nppStreamCtx);
-
- if (nppStatus != NPP_SUCCESS) {
- if (nImage == 0)
- printf("teapot_LabelMarkersUF_8Way_512x512_32u failed.\n");
- else if (nImage == 1)
- printf("CT_skull_LabelMarkersUF_8Way_512x512_32u failed.\n");
- else if (nImage == 2)
- printf("PCB_METAL_LabelMarkersUF_8Way_509x335_32u failed.\n");
- else if (nImage == 3)
- printf("PCB2_LabelMarkersUF_8Way_1024x683_32u failed.\n");
- else if (nImage == 4)
- printf("PCB_LabelMarkersUF_8Way_1280x720_32u failed.\n");
- tearDown();
- return -1;
- }
-
- cudaError = cudaMemcpy2DAsync(
- pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
- pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
- oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
- cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
-
- // Wait host image read backs to complete, not necessary if no need to
- // synchronize
- if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
- cudaSuccess) {
- printf("Post label generation cudaStreamSynchronize failed\n");
- tearDown();
- return -1;
- }
-
- if (nImage == 0)
- FOPEN(bmpFile, LabelMarkersOutputFile0.c_str(), "wb");
- else if (nImage == 1)
- FOPEN(bmpFile, LabelMarkersOutputFile1.c_str(), "wb");
- else if (nImage == 2)
- FOPEN(bmpFile, LabelMarkersOutputFile2.c_str(), "wb");
- else if (nImage == 3)
- FOPEN(bmpFile, LabelMarkersOutputFile3.c_str(), "wb");
- else if (nImage == 4)
- FOPEN(bmpFile, LabelMarkersOutputFile4.c_str(), "wb");
-
- if (bmpFile == NULL) return -1;
- size_t nSize = 0;
- for (int j = 0; j < oSizeROI[nImage].height; j++) {
- nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
- sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
- }
- fclose(bmpFile);
-
- nppStatus = nppiCompressMarkerLabelsGetBufferSize_32u_C1R(
- oSizeROI[nImage].width * oSizeROI[nImage].height,
- &aCompressLabelsScratchBufferSize[nImage]);
- if (nppStatus != NPP_NO_ERROR) return nppStatus;
-
- cudaError =
- cudaMalloc((void **)&pUFCompressedLabelsScratchBufferDev[nImage],
- aCompressLabelsScratchBufferSize[nImage]);
- if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
- nCompressedLabelCount = 0;
-
- nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR(
- pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
- oSizeROI[nImage], oSizeROI[nImage].width * oSizeROI[nImage].height,
- &nCompressedLabelCount, pUFCompressedLabelsScratchBufferDev[nImage]);
-
- if (nppStatus != NPP_SUCCESS) {
- if (nImage == 0)
- printf("teapot_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
- else if (nImage == 1)
- printf(
- "CT_Skull_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n");
- else if (nImage == 2)
- printf(
- "PCB_METAL_CompressedLabelMarkersUF_8Way_509x335_32u failed.\n");
- else if (nImage == 3)
- printf("PCB2_CompressedLabelMarkersUF_8Way_1024x683_32u failed.\n");
- else if (nImage == 4)
- printf("PCB_CompressedLabelMarkersUF_8Way_1280x720_32u failed.\n");
- tearDown();
- return -1;
- }
-
- cudaError = cudaMemcpy2DAsync(
- pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
- pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
- oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
- cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
-
- // Wait for host image read backs to finish, not necessary if no need to
- // synchronize
- if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
- cudaSuccess ||
- nCompressedLabelCount == 0) {
- printf("Post label compression cudaStreamSynchronize failed\n");
- tearDown();
- return -1;
- }
-
- if (nImage == 0)
- FOPEN(bmpFile, CompressedMarkerLabelsOutputFile0.c_str(), "wb");
- else if (nImage == 1)
- FOPEN(bmpFile, CompressedMarkerLabelsOutputFile1.c_str(), "wb");
- else if (nImage == 2)
- FOPEN(bmpFile, CompressedMarkerLabelsOutputFile2.c_str(), "wb");
- else if (nImage == 3)
- FOPEN(bmpFile, CompressedMarkerLabelsOutputFile3.c_str(), "wb");
- else if (nImage == 4)
- FOPEN(bmpFile, CompressedMarkerLabelsOutputFile4.c_str(), "wb");
-
- if (bmpFile == NULL) return -1;
- nSize = 0;
- for (int j = 0; j < oSizeROI[nImage].height; j++) {
- nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
- sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
- }
- fclose(bmpFile);
-
- if (nImage == 0)
- printf(
- "teapot_CompressedMarkerLabelsUF_8Way_512x512_32u succeeded, "
- "compressed label count is %d.\n",
- nCompressedLabelCount);
- else if (nImage == 1)
- printf(
- "CT_Skull_CompressedMarkerLabelsUF_8Way_512x512_32u succeeded, "
- "compressed label count is %d.\n",
- nCompressedLabelCount);
- else if (nImage == 2)
- printf(
- "PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u succeeded, "
- "compressed label count is %d.\n",
- nCompressedLabelCount);
- else if (nImage == 3)
- printf(
- "PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u succeeded, "
- "compressed label count is %d.\n",
- nCompressedLabelCount);
- else if (nImage == 4)
- printf(
- "PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u succeeded, "
- "compressed label count is %d.\n",
- nCompressedLabelCount);
- }
- }
-
- // Batch image processing
-
- // We want to allocate scratch buffers more efficiently for batch processing
- // so first we free up the scratch buffers for image 0 and reallocate them.
- // This is not required but helps cudaMalloc to work more efficiently.
-
- cudaFree(pUFCompressedLabelsScratchBufferDev[0]);
-
- int nTotalBatchedUFCompressLabelsScratchBufferDevSize = 0;
-
- for (int k = 0; k < NUMBER_OF_IMAGES; k++)
- nTotalBatchedUFCompressLabelsScratchBufferDevSize +=
- aCompressLabelsScratchBufferSize[k];
-
- cudaError = cudaMalloc((void **)&pUFCompressedLabelsScratchBufferDev[0],
- nTotalBatchedUFCompressLabelsScratchBufferDevSize);
- if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
- // Now allocate batch lists
-
- int nBatchImageListBytes = NUMBER_OF_IMAGES * sizeof(NppiImageDescriptor);
-
- cudaError =
- cudaMalloc((void **)&pUFBatchSrcImageListDev, nBatchImageListBytes);
- if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
- cudaError =
- cudaMalloc((void **)&pUFBatchSrcDstImageListDev, nBatchImageListBytes);
- if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
- checkCudaErrors(
- cudaMallocHost((void **)&pUFBatchSrcImageListHost, nBatchImageListBytes));
-
- checkCudaErrors(cudaMallocHost((void **)&pUFBatchSrcDstImageListHost,
- nBatchImageListBytes));
-
- NppiSize oMaxROISize = {0, 0};
-
- for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
- pUFBatchSrcImageListHost[nImage].pData = pInputImageDev[nImage];
- pUFBatchSrcImageListHost[nImage].nStep =
- oSizeROI[nImage].width * sizeof(Npp8u);
- // src image oSize parameter is ignored in these NPP functions
- pUFBatchSrcDstImageListHost[nImage].pData = pUFLabelDev[nImage];
- pUFBatchSrcDstImageListHost[nImage].nStep =
- oSizeROI[nImage].width * sizeof(Npp32u);
- pUFBatchSrcDstImageListHost[nImage].oSize = oSizeROI[nImage];
- if (oSizeROI[nImage].width > oMaxROISize.width)
- oMaxROISize.width = oSizeROI[nImage].width;
- if (oSizeROI[nImage].height > oMaxROISize.height)
- oMaxROISize.height = oSizeROI[nImage].height;
- }
-
- // Copy label generation batch lists from CPU to GPU
- cudaError = cudaMemcpyAsync(pUFBatchSrcImageListDev, pUFBatchSrcImageListHost,
- nBatchImageListBytes, cudaMemcpyHostToDevice,
- nppStreamCtx.hStream);
- if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
-
- cudaError = cudaMemcpyAsync(pUFBatchSrcDstImageListDev,
- pUFBatchSrcDstImageListHost, nBatchImageListBytes,
- cudaMemcpyHostToDevice, nppStreamCtx.hStream);
- if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
-
- // We use 8-way neighbor search throughout this example
- nppStatus = nppiLabelMarkersUFBatch_8u32u_C1R_Advanced_Ctx(
- pUFBatchSrcImageListDev, pUFBatchSrcDstImageListDev, NUMBER_OF_IMAGES,
- oMaxROISize, nppiNormInf, nppStreamCtx);
-
- if (nppStatus != NPP_SUCCESS) {
- printf("LabelMarkersUFBatch_8Way_8u32u failed.\n");
- tearDown();
- return -1;
- }
-
- // Now read back generated device images to the host
-
- for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
- cudaError = cudaMemcpy2DAsync(
- pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
- pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
- oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
- cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
- }
-
- // Wait for host image read backs to complete, not necessary if no need to
- // synchronize
- if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
- cudaSuccess) {
- printf("Post label generation cudaStreamSynchronize failed\n");
- tearDown();
- return -1;
- }
-
- // Save output to files
- for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
- if (nImage == 0)
- FOPEN(bmpFile, LabelMarkersBatchOutputFile0.c_str(), "wb");
- else if (nImage == 1)
- FOPEN(bmpFile, LabelMarkersBatchOutputFile1.c_str(), "wb");
- else if (nImage == 2)
- FOPEN(bmpFile, LabelMarkersBatchOutputFile2.c_str(), "wb");
- else if (nImage == 3)
- FOPEN(bmpFile, LabelMarkersBatchOutputFile3.c_str(), "wb");
- else if (nImage == 4)
- FOPEN(bmpFile, LabelMarkersBatchOutputFile4.c_str(), "wb");
-
- if (bmpFile == NULL) return -1;
- size_t nSize = 0;
- for (int j = 0; j < oSizeROI[nImage].height; j++) {
- nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
- sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
- }
- fclose(bmpFile);
- }
-
-#ifdef USE_BATCHED_LABEL_COMPRESSION
-
- // Now allocate scratch buffer memory for batched label compression
- cudaError = cudaMalloc((void **)&pUFBatchSrcDstScratchBufferListDev,
- NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor));
- if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
- cudaError = cudaMalloc((void **)&pUFBatchPerImageCompressedCountListDev,
- NUMBER_OF_IMAGES * sizeof(Npp32u));
- if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR;
-
- // Allocate host side scratch buffer point and size list and initialize with
- // device scratch buffer pointers
- checkCudaErrors(
- cudaMallocHost((void **)&pUFBatchSrcDstScratchBufferListHost,
- NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor)));
-
- checkCudaErrors(
- cudaMallocHost((void **)&pUFBatchPerImageCompressedCountListHost,
- +NUMBER_OF_IMAGES * sizeof(Npp32u)));
-
- // Start buffer pointer at beginning of full per image buffer list sized
- // pUFCompressedLabelsScratchBufferDev[0]
- Npp32u *pCurUFCompressedLabelsScratchBufferDev =
- reinterpret_cast(pUFCompressedLabelsScratchBufferDev[0]);
-
- int nMaxUFCompressedLabelsScratchBufferSize = 0;
-
- for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
- // This particular function works on in-place data and SrcDst image batch
- // list has already been initialized in batched label generation function
- // setup
-
- // Initialize each per image buffer descriptor
- pUFBatchSrcDstScratchBufferListHost[nImage].pData =
- reinterpret_cast(pCurUFCompressedLabelsScratchBufferDev);
- pUFBatchSrcDstScratchBufferListHost[nImage].nBufferSize =
- aCompressLabelsScratchBufferSize[nImage];
-
- if (aCompressLabelsScratchBufferSize[nImage] >
- nMaxUFCompressedLabelsScratchBufferSize)
- nMaxUFCompressedLabelsScratchBufferSize =
- aCompressLabelsScratchBufferSize[nImage];
-
- // Offset buffer pointer to next per image buffer
- Npp8u *pTempBuffer =
- reinterpret_cast(pCurUFCompressedLabelsScratchBufferDev);
- pTempBuffer += aCompressLabelsScratchBufferSize[nImage];
- pCurUFCompressedLabelsScratchBufferDev =
- reinterpret_cast((void *)(pTempBuffer));
- }
-
- // Copy compression batch scratch buffer list from CPU to GPU
- cudaError = cudaMemcpyAsync(pUFBatchSrcDstScratchBufferListDev,
- pUFBatchSrcDstScratchBufferListHost,
- NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor),
- cudaMemcpyHostToDevice, nppStreamCtx.hStream);
- if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR;
-
- nppStatus = nppiCompressMarkerLabelsUFBatch_32u_C1IR_Advanced_Ctx(
- pUFBatchSrcDstImageListDev, pUFBatchSrcDstScratchBufferListDev,
- pUFBatchPerImageCompressedCountListDev, NUMBER_OF_IMAGES, oMaxROISize,
- nMaxUFCompressedLabelsScratchBufferSize, nppStreamCtx);
- if (nppStatus != NPP_SUCCESS) {
- printf("BatchCompressedLabelMarkersUF_8Way_32u failed.\n");
- tearDown();
- return -1;
- }
-
- // Copy output compressed label images back to host
- for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
- cudaError = cudaMemcpy2DAsync(
- pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
- pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u),
- oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height,
- cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
- }
-
- // Wait for host image read backs to complete, not necessary if no need to
- // synchronize
- if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) !=
- cudaSuccess) {
- printf("Post label compression cudaStreamSynchronize failed\n");
- tearDown();
- return -1;
- }
-
- // Save compressed label images into files
- for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
- if (nImage == 0)
- FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile0.c_str(), "wb");
- else if (nImage == 1)
- FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile1.c_str(), "wb");
- else if (nImage == 2)
- FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile2.c_str(), "wb");
- else if (nImage == 3)
- FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile3.c_str(), "wb");
- else if (nImage == 4)
- FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile4.c_str(), "wb");
-
- if (bmpFile == NULL) return -1;
- size_t nSize = 0;
- for (int j = 0; j < oSizeROI[nImage].height; j++) {
- nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width],
- sizeof(Npp32u), oSizeROI[nImage].width, bmpFile);
- }
- fclose(bmpFile);
- }
-
- // Read back per image compressed label count.
- cudaError = cudaMemcpyAsync(pUFBatchPerImageCompressedCountListHost,
- pUFBatchPerImageCompressedCountListDev,
- NUMBER_OF_IMAGES * sizeof(Npp32u),
- cudaMemcpyDeviceToHost, nppStreamCtx.hStream);
- if (cudaError != cudaSuccess) {
- tearDown();
- return NPP_MEMCPY_ERROR;
- }
-
- // Wait for host read back to complete
- cudaError = cudaStreamSynchronize(nppStreamCtx.hStream);
-
- printf("\n\n");
-
- for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) {
- if (nImage == 0)
- printf(
- "teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u succeeded, "
- "compressed label count is %d.\n",
- pUFBatchPerImageCompressedCountListHost[nImage]);
- else if (nImage == 1)
- printf(
- "CT_Skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u succeeded, "
- "compressed label count is %d.\n",
- pUFBatchPerImageCompressedCountListHost[nImage]);
- else if (nImage == 2)
- printf(
- "PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u succeeded, "
- "compressed label count is %d.\n",
- pUFBatchPerImageCompressedCountListHost[nImage]);
- else if (nImage == 3)
- printf(
- "PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u succeeded, "
- "compressed label count is %d.\n",
- pUFBatchPerImageCompressedCountListHost[nImage]);
- else if (nImage == 4)
- printf(
- "PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u succeeded, "
- "compressed label count is %d.\n",
- pUFBatchPerImageCompressedCountListHost[nImage]);
- }
-
-#endif // USE_BATCHED_LABEL_COMPRESSION
-
- tearDown();
-
- return 0;
-}
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw
deleted file mode 100644
index 5c387eea..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw
deleted file mode 100644
index 5c387eea..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw
deleted file mode 100644
index d8964918..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUF_8Way_512x512_32u.raw
deleted file mode 100644
index d8964918..00000000
Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUF_8Way_512x512_32u.raw and /dev/null differ
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt
new file mode 100644
index 00000000..cba9c714
--- /dev/null
+++ b/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt
@@ -0,0 +1,40 @@
+# Include directories and libraries
+include_directories(
+ ../../../Common
+ ../../../Common/UtilNPP
+)
+
+# Source file
+set(SRC_FILES
+ boxFilterNPP.cpp
+)
+
+find_package(FreeImage)
+
+if(${FreeImage_FOUND})
+ # Add target for boxFilterNPP
+ add_executable(boxFilterNPP ${SRC_FILES})
+ set_target_properties(boxFilterNPP PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+ target_include_directories(boxFilterNPP PRIVATE
+ ${CUDAToolkit_INCLUDE_DIRS}
+ ${FreeImage_INCLUDE_DIRS}
+ )
+
+ target_link_libraries(boxFilterNPP PRIVATE
+ CUDA::nppc
+ CUDA::nppisu
+ CUDA::nppif
+ CUDA::cudart
+ ${FreeImage_LIBRARIES}
+ )
+
+ # Copy data files to output directory
+ add_custom_command(TARGET boxFilterNPP POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ ${CMAKE_CURRENT_SOURCE_DIR}/*.pgm
+ ${CMAKE_CURRENT_BINARY_DIR}
+ )
+else()
+ message(STATUS "FreeImage not found - will not build sample 'boxFilterNPP'")
+endif()
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/Makefile b/Samples/4_CUDA_Libraries/boxFilterNPP/Makefile
deleted file mode 100644
index 8b531a63..00000000
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/Makefile
+++ /dev/null
@@ -1,378 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# * Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-# * Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in the
-# documentation and/or other materials provided with the distribution.
-# * Neither the name of NVIDIA CORPORATION nor the names of its
-# contributors may be used to endorse or promote products derived
-# from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Location of the CUDA Toolkit
-CUDA_PATH ?= /usr/local/cuda
-
-##############################
-# start deprecated interface #
-##############################
-ifeq ($(x86_64),1)
- $(info WARNING - x86_64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=x86_64 instead)
- TARGET_ARCH ?= x86_64
-endif
-ifeq ($(ARMv7),1)
- $(info WARNING - ARMv7 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=armv7l instead)
- TARGET_ARCH ?= armv7l
-endif
-ifeq ($(aarch64),1)
- $(info WARNING - aarch64 variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=aarch64 instead)
- TARGET_ARCH ?= aarch64
-endif
-ifeq ($(ppc64le),1)
- $(info WARNING - ppc64le variable has been deprecated)
- $(info WARNING - please use TARGET_ARCH=ppc64le instead)
- TARGET_ARCH ?= ppc64le
-endif
-ifneq ($(GCC),)
- $(info WARNING - GCC variable has been deprecated)
- $(info WARNING - please use HOST_COMPILER=$(GCC) instead)
- HOST_COMPILER ?= $(GCC)
-endif
-ifneq ($(abi),)
- $(error ERROR - abi variable has been removed)
-endif
-############################
-# end deprecated interface #
-############################
-
-# architecture
-HOST_ARCH := $(shell uname -m)
-TARGET_ARCH ?= $(HOST_ARCH)
-ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
- ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
- TARGET_SIZE := 64
- else ifneq (,$(filter $(TARGET_ARCH),armv7l))
- TARGET_SIZE := 32
- endif
- else
- TARGET_SIZE := $(shell getconf LONG_BIT)
- endif
-else
- $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
-endif
-
-# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
-ifeq ($(HOST_ARCH),aarch64)
- ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
- HOST_ARCH := sbsa
- TARGET_ARCH := sbsa
- endif
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
- $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
- endif
-endif
-
-# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
-ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
- TARGET_ARCH = armv7l
-endif
-
-# operating system
-HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
-TARGET_OS ?= $(HOST_OS)
-ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
- $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
-endif
-
-# host compiler
-ifdef HOST_COMPILER
- CUSTOM_HOST_COMPILER = 1
-endif
-
-ifeq ($(TARGET_OS),darwin)
- ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
- HOST_COMPILER ?= clang++
- endif
-else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
- ifeq ($(TARGET_OS),linux)
- HOST_COMPILER ?= arm-linux-gnueabihf-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
- else ifeq ($(TARGET_OS),android)
- HOST_COMPILER ?= arm-linux-androideabi-g++
- endif
- else ifeq ($(TARGET_ARCH),aarch64)
- ifeq ($(TARGET_OS), linux)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_OS),qnx)
- ifeq ($(QNX_HOST),)
- $(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
- endif
- ifeq ($(QNX_TARGET),)
- $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
- endif
- export QNX_HOST
- export QNX_TARGET
- HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
- else ifeq ($(TARGET_OS), android)
- HOST_COMPILER ?= aarch64-linux-android-clang++
- endif
- else ifeq ($(TARGET_ARCH),sbsa)
- HOST_COMPILER ?= aarch64-linux-gnu-g++
- else ifeq ($(TARGET_ARCH),ppc64le)
- HOST_COMPILER ?= powerpc64le-linux-gnu-g++
- endif
-endif
-HOST_COMPILER ?= g++
-NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
-
-# internal flags
-NVCCFLAGS := -m${TARGET_SIZE}
-CCFLAGS :=
-LDFLAGS :=
-
-# build flags
-
-# Link flag for customized HOST_COMPILER with gcc realpath
-GCC_PATH := $(shell which gcc)
-ifeq ($(CUSTOM_HOST_COMPILER),1)
- ifneq ($(filter /%,$(HOST_COMPILER)),)
- ifneq ($(findstring gcc,$(HOST_COMPILER)),)
- ifneq ($(GCC_PATH),$(HOST_COMPILER))
- LDFLAGS += -lstdc++
- endif
- endif
- endif
-endif
-
-ifeq ($(TARGET_OS),darwin)
- LDFLAGS += -rpath $(CUDA_PATH)/lib
- CCFLAGS += -arch $(HOST_ARCH)
-else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
- LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
- CCFLAGS += -mfloat-abi=hard
-else ifeq ($(TARGET_OS),android)
- LDFLAGS += -pie
- CCFLAGS += -fpie -fpic -fexceptions
-endif
-
-ifneq ($(TARGET_ARCH),$(HOST_ARCH))
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- ifneq ($(TARGET_FS),)
- GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
- ifeq ($(GCCVERSIONLTEQ46),1)
- CCFLAGS += --sysroot=$(TARGET_FS)
- endif
- LDFLAGS += --sysroot=$(TARGET_FS)
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
- LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
- LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
- CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
- endif
- endif
- ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- NVCCFLAGS += -D_QNX_SOURCE
- NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
- CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
- LDFLAGS += -lsocket
- LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
- CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
- ifdef TARGET_OVERRIDE
- LDFLAGS += -lslog2
- endif
-
- ifneq ($(TARGET_FS),)
- LDFLAGS += -L$(TARGET_FS)/usr/lib
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
- LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
- CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
- CCFLAGS += -I$(TARGET_FS)/../include
- endif
- endif
-endif
-
-ifdef TARGET_OVERRIDE # cuda toolkit targets override
- NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
-endif
-
-# Install directory of different arch
-CUDA_INSTALL_TARGET_DIR :=
-ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
- CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
- CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
-else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
- CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
-else ifeq ($(TARGET_ARCH),ppc64le)
- CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
-endif
-
-# Debug build flags
-ifeq ($(dbg),1)
- NVCCFLAGS += -g -G
- BUILD_TYPE := debug
-else
- BUILD_TYPE := release
-endif
-
-ALL_CCFLAGS :=
-ALL_CCFLAGS += $(NVCCFLAGS)
-ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
-ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
-
-SAMPLE_ENABLED := 1
-
-# This sample is not supported on QNX
-ifeq ($(TARGET_OS),qnx)
- $(info >>> WARNING - boxFilterNPP is not supported on QNX - waiving sample <<<)
- SAMPLE_ENABLED := 0
-endif
-
-ALL_LDFLAGS :=
-ALL_LDFLAGS += $(ALL_CCFLAGS)
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
-ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
-
-# Common includes and paths for CUDA
-INCLUDES := -I../../../Common
-LIBRARIES :=
-
-################################################################################
-
-# Gencode arguments
-SMS ?=
-
-ifeq ($(GENCODE_FLAGS),)
-# Generate SASS code for each SM architecture listed in $(SMS)
-$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
-
-ifeq ($(SMS),)
-ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
-# Generate PTX code from SM 53
-GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53
-else
-# Generate PTX code from SM 50
-GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50
-endif
-endif
-
-# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
-HIGHEST_SM := $(lastword $(sort $(SMS)))
-ifneq ($(HIGHEST_SM),)
-GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
-endif
-endif
-
-ALL_CCFLAGS += --threads 0 --std=c++11
-
-INCLUDES += -I../../../Common/UtilNPP
-
-LIBRARIES += -lnppisu_static -lnppif_static -lnppc_static -lculibos -lfreeimage
-
-# Attempt to compile a minimal application linked against FreeImage. If a.out exists, FreeImage is properly set up.
-$(shell echo "#include \"FreeImage.h\"" > test.c; echo "int main() { return 0; }" >> test.c ; $(NVCC) $(ALL_CCFLAGS) $(INCLUDES) $(ALL_LDFLAGS) $(LIBRARIES) -l freeimage test.c)
-FREEIMAGE := $(shell find a.out 2>/dev/null)
-$(shell rm a.out test.c 2>/dev/null)
-
-ifeq ("$(FREEIMAGE)","")
-$(info >>> WARNING - FreeImage is not set up correctly. Please ensure FreeImage is set up correctly. <<<)
-SAMPLE_ENABLED := 0
-endif
-
-ifeq ($(SAMPLE_ENABLED),0)
-EXEC ?= @echo "[@]"
-endif
-
-################################################################################
-
-# Target rules
-all: build
-
-build: boxFilterNPP
-
-check.deps:
-ifeq ($(SAMPLE_ENABLED),0)
- @echo "Sample will be waived due to the above missing dependencies"
-else
- @echo "Sample is ready - all dependencies have been met"
-endif
-
-boxFilterNPP.o:boxFilterNPP.cpp
- $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
-
-boxFilterNPP: boxFilterNPP.o
- $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
- $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
- $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
-
-run: build
- $(EXEC) ./boxFilterNPP
-
-testrun: build
-
-clean:
- rm -f boxFilterNPP boxFilterNPP.o
- rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/boxFilterNPP
-
-clobber: clean
diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml
deleted file mode 100644
index 9d8e2b3f..00000000
--- a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml
+++ /dev/null
@@ -1,95 +0,0 @@
-
-
-
- boxFilterNPP
-
- cudaRuntimeGetVersion
- cudaDriverGetVersion
-
-
- whole
- true
-
- ./teapot512.pgm
-
-
- ../../../Common/UtilNPP
- ../../../Common/FreeImage/Dist/x64
- ./
- ../
- ../../../Common
-
-
- Performance Strategies
- Image Processing
- NPP Library
-
-
- CUDA
- NPP
- Image Processing
- box filter
-
-
- nppisu_static
- nppif_static
- nppc_static
- culibos
- freeimage
-
-
-
- true
- boxFilterNPP.cpp
-
- FreeImage
- NPP
-
-
- 1:CUDA Basic Topics
- 1:Performance Strategies
- 2:Image Processing
- 2:Computer Vision
-
- sm50
- sm52
- sm53
- sm60
- sm61
- sm70
- sm72
- sm75
- sm80
- sm86
- sm87
- sm89
- sm90
-
-
- x86_64
- linux
-
-
- windows7
-
-
- x86_64
- macosx
-
-
- arm
-
-
- sbsa
-
-
- ppc64le
- linux
-
-
-
- all
-
- Box Filter with NPP
- exe
-
diff --git a/Samples/CMakeLists.txt b/Samples/CMakeLists.txt
index 34d905d3..f06523ae 100644
--- a/Samples/CMakeLists.txt
+++ b/Samples/CMakeLists.txt
@@ -2,5 +2,6 @@ add_subdirectory(0_Introduction)
add_subdirectory(1_Utilities)
add_subdirectory(2_Concepts_and_Techniques)
add_subdirectory(3_CUDA_Features)
+add_subdirectory(4_CUDA_Libraries)
add_subdirectory(6_Performance)
add_subdirectory(7_libNVVM)
diff --git a/cmake/Modules/FindFreeImage.cmake b/cmake/Modules/FindFreeImage.cmake
new file mode 100644
index 00000000..b03e99eb
--- /dev/null
+++ b/cmake/Modules/FindFreeImage.cmake
@@ -0,0 +1,17 @@
+find_path(FreeImage_INCLUDE_DIR
+ NAMES freeimage.h FreeImage.h
+ PATHS /usr/include /usr/local/include
+)
+
+find_library(FreeImage_LIBRARY
+ NAMES freeimage
+ PATHS /usr/lib /usr/local/lib
+)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(FreeImage DEFAULT_MSG FreeImage_LIBRARY FreeImage_INCLUDE_DIR)
+
+if(FreeImage_FOUND)
+ set(FreeImage_LIBRARIES ${FreeImage_LIBRARY})
+ set(FreeImage_INCLUDE_DIRS ${FreeImage_INCLUDE_DIR})
+endif()