diff --git a/CHANGELOG.md b/CHANGELOG.md index b4e6d9a3..3645503f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ * `simpleVoteIntrinsics_nvrtc` demonstrating NVRTC usage for `simpleVoteIntrinsics` sample (reason: redundant) * `2_Concepts_and_Techniques` * `cuHook` demonstrating dlsym hooks. (reason: incompatible with modern `glibc`) + * `4_CUDA_Libraries` + * `batchedLabelMarkersAndLabelCompressionNPP` demonstrating NPP features (reason: some functionality removed from library) + ### CUDA 12.5 diff --git a/Samples/4_CUDA_Libraries/CMakeLists.txt b/Samples/4_CUDA_Libraries/CMakeLists.txt new file mode 100644 index 00000000..255d0446 --- /dev/null +++ b/Samples/4_CUDA_Libraries/CMakeLists.txt @@ -0,0 +1,40 @@ +#add_subdirectory(FilterBorderControlNPP) +#add_subdirectory(MersenneTwisterGP11213) +add_subdirectory(batchCUBLAS) +add_subdirectory(boxFilterNPP) +#add_subdirectory(cannyEdgeDetectorNPP) +#add_subdirectory(conjugateGradient) +#add_subdirectory(conjugateGradientCudaGraphs) +#add_subdirectory(conjugateGradientMultiBlockCG) +#add_subdirectory(conjugateGradientMultiDeviceCG) +#add_subdirectory(conjugateGradientPrecond) +#add_subdirectory(conjugateGradientUM) +#add_subdirectory(cuDLAErrorReporting) +#add_subdirectory(cuDLAHybridMode) +#add_subdirectory(cuDLALayerwiseStatsHybrid) +#add_subdirectory(cuDLALayerwiseStatsStandalone) +#add_subdirectory(cuDLAStandaloneMode) +#add_subdirectory(cuSolverDn_LinearSolver) +#add_subdirectory(cuSolverRf) +#add_subdirectory(cuSolverSp_LinearSolver) +#add_subdirectory(cuSolverSp_LowlevelCholesky) +#add_subdirectory(cuSolverSp_LowlevelQR) +#add_subdirectory(cudaNvSci) +#add_subdirectory(cudaNvSciNvMedia) +#add_subdirectory(freeImageInteropNPP) +#add_subdirectory(histEqualizationNPP) +#add_subdirectory(jitLto) +#add_subdirectory(lineOfSight) +#add_subdirectory(matrixMulCUBLAS) +#add_subdirectory(nvJPEG) +#add_subdirectory(nvJPEG_encoder) +#add_subdirectory(oceanFFT) +#add_subdirectory(randomFog) +#add_subdirectory(simpleCUBLAS) +#add_subdirectory(simpleCUBLASXT) +#add_subdirectory(simpleCUBLAS_LU) +#add_subdirectory(simpleCUFFT) +#add_subdirectory(simpleCUFFT_2d_MGPU) +#add_subdirectory(simpleCUFFT_MGPU) +#add_subdirectory(simpleCUFFT_callback) +#add_subdirectory(watershedSegmentationNPP) diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt b/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt new file mode 100644 index 00000000..3351e24c --- /dev/null +++ b/Samples/4_CUDA_Libraries/batchCUBLAS/CMakeLists.txt @@ -0,0 +1,20 @@ +# Include directories and libraries +include_directories(../../../Common) + +# Source file +set(SRC_FILES + batchCUBLAS.cpp +) + +# Add target for batchCUBLAS +add_executable(batchCUBLAS ${SRC_FILES}) +set_target_properties(batchCUBLAS PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +target_include_directories(batchCUBLAS PRIVATE + ${CUDAToolkit_INCLUDE_DIRS} +) + +target_link_libraries(batchCUBLAS PRIVATE + CUDA::cublas + CUDA::cudart +) diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/Makefile b/Samples/4_CUDA_Libraries/batchCUBLAS/Makefile deleted file mode 100644 index 1e813cf0..00000000 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/Makefile +++ /dev/null @@ -1,347 +0,0 @@ -################################################################################ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -################################################################################ -# -# Makefile project only supported on Mac OS X and Linux Platforms) -# -################################################################################ - -# Location of the CUDA Toolkit -CUDA_PATH ?= /usr/local/cuda - -############################## -# start deprecated interface # -############################## -ifeq ($(x86_64),1) - $(info WARNING - x86_64 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=x86_64 instead) - TARGET_ARCH ?= x86_64 -endif -ifeq ($(ARMv7),1) - $(info WARNING - ARMv7 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=armv7l instead) - TARGET_ARCH ?= armv7l -endif -ifeq ($(aarch64),1) - $(info WARNING - aarch64 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=aarch64 instead) - TARGET_ARCH ?= aarch64 -endif -ifeq ($(ppc64le),1) - $(info WARNING - ppc64le variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=ppc64le instead) - TARGET_ARCH ?= ppc64le -endif -ifneq ($(GCC),) - $(info WARNING - GCC variable has been deprecated) - $(info WARNING - please use HOST_COMPILER=$(GCC) instead) - HOST_COMPILER ?= $(GCC) -endif -ifneq ($(abi),) - $(error ERROR - abi variable has been removed) -endif -############################ -# end deprecated interface # -############################ - -# architecture -HOST_ARCH := $(shell uname -m) -TARGET_ARCH ?= $(HOST_ARCH) -ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l)) - ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le)) - TARGET_SIZE := 64 - else ifneq (,$(filter $(TARGET_ARCH),armv7l)) - TARGET_SIZE := 32 - endif - else - TARGET_SIZE := $(shell getconf LONG_BIT) - endif -else - $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!) -endif - -# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now. -ifeq ($(HOST_ARCH),aarch64) - ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null)) - HOST_ARCH := sbsa - TARGET_ARCH := sbsa - endif -endif - -ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le)) - $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!) - endif -endif - -# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l -ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32) - TARGET_ARCH = armv7l -endif - -# operating system -HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") -TARGET_OS ?= $(HOST_OS) -ifeq (,$(filter $(TARGET_OS),linux darwin qnx android)) - $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!) -endif - -# host compiler -ifdef HOST_COMPILER - CUSTOM_HOST_COMPILER = 1 -endif - -ifeq ($(TARGET_OS),darwin) - ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) - HOST_COMPILER ?= clang++ - endif -else ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) - ifeq ($(TARGET_OS),linux) - HOST_COMPILER ?= arm-linux-gnueabihf-g++ - else ifeq ($(TARGET_OS),qnx) - ifeq ($(QNX_HOST),) - $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) - endif - ifeq ($(QNX_TARGET),) - $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) - endif - export QNX_HOST - export QNX_TARGET - HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++ - else ifeq ($(TARGET_OS),android) - HOST_COMPILER ?= arm-linux-androideabi-g++ - endif - else ifeq ($(TARGET_ARCH),aarch64) - ifeq ($(TARGET_OS), linux) - HOST_COMPILER ?= aarch64-linux-gnu-g++ - else ifeq ($(TARGET_OS),qnx) - ifeq ($(QNX_HOST),) - $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) - endif - ifeq ($(QNX_TARGET),) - $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) - endif - export QNX_HOST - export QNX_TARGET - HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++ - else ifeq ($(TARGET_OS), android) - HOST_COMPILER ?= aarch64-linux-android-clang++ - endif - else ifeq ($(TARGET_ARCH),sbsa) - HOST_COMPILER ?= aarch64-linux-gnu-g++ - else ifeq ($(TARGET_ARCH),ppc64le) - HOST_COMPILER ?= powerpc64le-linux-gnu-g++ - endif -endif -HOST_COMPILER ?= g++ -NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) - -# internal flags -NVCCFLAGS := -m${TARGET_SIZE} -CCFLAGS := -LDFLAGS := - -# build flags - -# Link flag for customized HOST_COMPILER with gcc realpath -GCC_PATH := $(shell which gcc) -ifeq ($(CUSTOM_HOST_COMPILER),1) - ifneq ($(filter /%,$(HOST_COMPILER)),) - ifneq ($(findstring gcc,$(HOST_COMPILER)),) - ifneq ($(GCC_PATH),$(HOST_COMPILER)) - LDFLAGS += -lstdc++ - endif - endif - endif -endif - -ifeq ($(TARGET_OS),darwin) - LDFLAGS += -rpath $(CUDA_PATH)/lib - CCFLAGS += -arch $(HOST_ARCH) -else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux) - LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 - CCFLAGS += -mfloat-abi=hard -else ifeq ($(TARGET_OS),android) - LDFLAGS += -pie - CCFLAGS += -fpie -fpic -fexceptions -endif - -ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) - ifneq ($(TARGET_FS),) - GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) - ifeq ($(GCCVERSIONLTEQ46),1) - CCFLAGS += --sysroot=$(TARGET_FS) - endif - LDFLAGS += --sysroot=$(TARGET_FS) - LDFLAGS += -rpath-link=$(TARGET_FS)/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf - endif - endif - ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) - ifneq ($(TARGET_FS),) - GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) - ifeq ($(GCCVERSIONLTEQ46),1) - CCFLAGS += --sysroot=$(TARGET_FS) - endif - LDFLAGS += --sysroot=$(TARGET_FS) - LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu - LDFLAGS += --unresolved-symbols=ignore-in-shared-libs - CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm - CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu - endif - endif - ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) - NVCCFLAGS += -D_QNX_SOURCE - NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le - CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu - LDFLAGS += -lsocket - LDFLAGS += -L/usr/lib/aarch64-qnx-gnu - CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu" - ifdef TARGET_OVERRIDE - LDFLAGS += -lslog2 - endif - - ifneq ($(TARGET_FS),) - LDFLAGS += -L$(TARGET_FS)/usr/lib - CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib" - LDFLAGS += -L$(TARGET_FS)/usr/libnvidia - CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia" - CCFLAGS += -I$(TARGET_FS)/../include - endif - endif -endif - -ifdef TARGET_OVERRIDE # cuda toolkit targets override - NVCCFLAGS += -target-dir $(TARGET_OVERRIDE) -endif - -# Install directory of different arch -CUDA_INSTALL_TARGET_DIR := -ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) - CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux) - CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android) - CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx) - CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/ -else ifeq ($(TARGET_ARCH),ppc64le) - CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/ -endif - -# Debug build flags -ifeq ($(dbg),1) - NVCCFLAGS += -g -G - BUILD_TYPE := debug -else - BUILD_TYPE := release -endif - -ALL_CCFLAGS := -ALL_CCFLAGS += $(NVCCFLAGS) -ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) -ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) -ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) - -ALL_LDFLAGS := -ALL_LDFLAGS += $(ALL_CCFLAGS) -ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) -ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) - -# Common includes and paths for CUDA -INCLUDES := -I../../../Common -LIBRARIES := - -################################################################################ - -# Gencode arguments -SMS ?= - -ifeq ($(GENCODE_FLAGS),) -# Generate SASS code for each SM architecture listed in $(SMS) -$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm))) - -ifeq ($(SMS),) -ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -# Generate PTX code from SM 53 -GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53 -else -# Generate PTX code from SM 50 -GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50 -endif -endif - -# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility -HIGHEST_SM := $(lastword $(sort $(SMS))) -ifneq ($(HIGHEST_SM),) -GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) -endif -endif - -ALL_CCFLAGS += --threads 0 --std=c++11 - -LIBRARIES += -lcublas - -################################################################################ - -# Target rules -all: build - -build: batchCUBLAS - -batchCUBLAS.o:batchCUBLAS.cpp - $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $< - -batchCUBLAS: batchCUBLAS.o - $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES) - mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) - cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) - -run: build - ./batchCUBLAS - -testrun: build - -clean: - rm -f batchCUBLAS batchCUBLAS.o - rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/batchCUBLAS - -clobber: clean diff --git a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml deleted file mode 100644 index 25d2e453..00000000 --- a/Samples/4_CUDA_Libraries/batchCUBLAS/NsightEclipse.xml +++ /dev/null @@ -1,89 +0,0 @@ - - - - batchCUBLAS - - cuRand - cuEqual - cudaMemcpy - cudaGetErrorString - cudaFree - cudaGetLastError - cudaDeviceSynchronize - cudaGetDevice - cudaMalloc - cudaStreamCreate - cudaGetDeviceProperties - - - whole - true - - ./ - ../ - ../../../Common - - - Linear Algebra - CUBLAS Library - - - CUBLAS - Linear Algebra - - - cublas - - - - true - batchCUBLAS.cpp - - CUBLAS - - - 1:CUDA Basic Topics - 3:Linear Algebra - - sm50 - sm52 - sm53 - sm60 - sm61 - sm70 - sm72 - sm75 - sm80 - sm86 - sm87 - sm89 - sm90 - - - x86_64 - linux - - - windows7 - - - x86_64 - macosx - - - arm - - - sbsa - - - ppc64le - linux - - - - all - - batchCUBLAS - exe - diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/c_cpp_properties.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/c_cpp_properties.json deleted file mode 100644 index f0066b0f..00000000 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/c_cpp_properties.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "configurations": [ - { - "name": "Linux", - "includePath": [ - "${workspaceFolder}/**", - "${workspaceFolder}/../../../Common" - ], - "defines": [], - "compilerPath": "/usr/local/cuda/bin/nvcc", - "cStandard": "gnu17", - "cppStandard": "gnu++14", - "intelliSenseMode": "linux-gcc-x64", - "configurationProvider": "ms-vscode.makefile-tools" - } - ], - "version": 4 -} diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/extensions.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/extensions.json deleted file mode 100644 index c7eb54dc..00000000 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/extensions.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "recommendations": [ - "nvidia.nsight-vscode-edition", - "ms-vscode.cpptools", - "ms-vscode.makefile-tools" - ] -} diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/launch.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/launch.json deleted file mode 100644 index 45180508..00000000 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/batchedLabelMarkersAndLabelCompressionNPP" - } - ] -} diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/tasks.json b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/tasks.json deleted file mode 100644 index 4509aeb1..00000000 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/.vscode/tasks.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "version": "2.0.0", - "tasks": [ - { - "label": "sample", - "type": "shell", - "command": "make dbg=1", - "problemMatcher": ["$nvcc"], - "group": { - "kind": "build", - "isDefault": true - } - } - ] -} diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw deleted file mode 100644 index a6fa444d..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw deleted file mode 100644 index 526a6c4f..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw deleted file mode 100644 index d1e73735..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUF_8Way_512x512_32u.raw deleted file mode 100644 index 767615ed..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/CT_skull_LabelMarkersUF_8Way_512x512_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/Makefile b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/Makefile deleted file mode 100644 index 27201be9..00000000 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/Makefile +++ /dev/null @@ -1,372 +0,0 @@ -################################################################################ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -################################################################################ -# -# Makefile project only supported on Mac OS X and Linux Platforms) -# -################################################################################ - -# Location of the CUDA Toolkit -CUDA_PATH ?= /usr/local/cuda - -############################## -# start deprecated interface # -############################## -ifeq ($(x86_64),1) - $(info WARNING - x86_64 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=x86_64 instead) - TARGET_ARCH ?= x86_64 -endif -ifeq ($(ARMv7),1) - $(info WARNING - ARMv7 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=armv7l instead) - TARGET_ARCH ?= armv7l -endif -ifeq ($(aarch64),1) - $(info WARNING - aarch64 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=aarch64 instead) - TARGET_ARCH ?= aarch64 -endif -ifeq ($(ppc64le),1) - $(info WARNING - ppc64le variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=ppc64le instead) - TARGET_ARCH ?= ppc64le -endif -ifneq ($(GCC),) - $(info WARNING - GCC variable has been deprecated) - $(info WARNING - please use HOST_COMPILER=$(GCC) instead) - HOST_COMPILER ?= $(GCC) -endif -ifneq ($(abi),) - $(error ERROR - abi variable has been removed) -endif -############################ -# end deprecated interface # -############################ - -# architecture -HOST_ARCH := $(shell uname -m) -TARGET_ARCH ?= $(HOST_ARCH) -ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l)) - ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le)) - TARGET_SIZE := 64 - else ifneq (,$(filter $(TARGET_ARCH),armv7l)) - TARGET_SIZE := 32 - endif - else - TARGET_SIZE := $(shell getconf LONG_BIT) - endif -else - $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!) -endif - -# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now. -ifeq ($(HOST_ARCH),aarch64) - ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null)) - HOST_ARCH := sbsa - TARGET_ARCH := sbsa - endif -endif - -ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le)) - $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!) - endif -endif - -# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l -ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32) - TARGET_ARCH = armv7l -endif - -# operating system -HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") -TARGET_OS ?= $(HOST_OS) -ifeq (,$(filter $(TARGET_OS),linux darwin qnx android)) - $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!) -endif - -# host compiler -ifdef HOST_COMPILER - CUSTOM_HOST_COMPILER = 1 -endif - -ifeq ($(TARGET_OS),darwin) - ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) - HOST_COMPILER ?= clang++ - endif -else ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) - ifeq ($(TARGET_OS),linux) - HOST_COMPILER ?= arm-linux-gnueabihf-g++ - else ifeq ($(TARGET_OS),qnx) - ifeq ($(QNX_HOST),) - $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) - endif - ifeq ($(QNX_TARGET),) - $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) - endif - export QNX_HOST - export QNX_TARGET - HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++ - else ifeq ($(TARGET_OS),android) - HOST_COMPILER ?= arm-linux-androideabi-g++ - endif - else ifeq ($(TARGET_ARCH),aarch64) - ifeq ($(TARGET_OS), linux) - HOST_COMPILER ?= aarch64-linux-gnu-g++ - else ifeq ($(TARGET_OS),qnx) - ifeq ($(QNX_HOST),) - $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) - endif - ifeq ($(QNX_TARGET),) - $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) - endif - export QNX_HOST - export QNX_TARGET - HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++ - else ifeq ($(TARGET_OS), android) - HOST_COMPILER ?= aarch64-linux-android-clang++ - endif - else ifeq ($(TARGET_ARCH),sbsa) - HOST_COMPILER ?= aarch64-linux-gnu-g++ - else ifeq ($(TARGET_ARCH),ppc64le) - HOST_COMPILER ?= powerpc64le-linux-gnu-g++ - endif -endif -HOST_COMPILER ?= g++ -NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) - -# internal flags -NVCCFLAGS := -m${TARGET_SIZE} -CCFLAGS := -LDFLAGS := - -# build flags - -# Link flag for customized HOST_COMPILER with gcc realpath -GCC_PATH := $(shell which gcc) -ifeq ($(CUSTOM_HOST_COMPILER),1) - ifneq ($(filter /%,$(HOST_COMPILER)),) - ifneq ($(findstring gcc,$(HOST_COMPILER)),) - ifneq ($(GCC_PATH),$(HOST_COMPILER)) - LDFLAGS += -lstdc++ - endif - endif - endif -endif - -ifeq ($(TARGET_OS),darwin) - LDFLAGS += -rpath $(CUDA_PATH)/lib - CCFLAGS += -arch $(HOST_ARCH) -else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux) - LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 - CCFLAGS += -mfloat-abi=hard -else ifeq ($(TARGET_OS),android) - LDFLAGS += -pie - CCFLAGS += -fpie -fpic -fexceptions -endif - -ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) - ifneq ($(TARGET_FS),) - GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) - ifeq ($(GCCVERSIONLTEQ46),1) - CCFLAGS += --sysroot=$(TARGET_FS) - endif - LDFLAGS += --sysroot=$(TARGET_FS) - LDFLAGS += -rpath-link=$(TARGET_FS)/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf - endif - endif - ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) - ifneq ($(TARGET_FS),) - GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) - ifeq ($(GCCVERSIONLTEQ46),1) - CCFLAGS += --sysroot=$(TARGET_FS) - endif - LDFLAGS += --sysroot=$(TARGET_FS) - LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu - LDFLAGS += --unresolved-symbols=ignore-in-shared-libs - CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm - CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu - endif - endif - ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) - NVCCFLAGS += -D_QNX_SOURCE - NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le - CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu - LDFLAGS += -lsocket - LDFLAGS += -L/usr/lib/aarch64-qnx-gnu - CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu" - ifdef TARGET_OVERRIDE - LDFLAGS += -lslog2 - endif - - ifneq ($(TARGET_FS),) - LDFLAGS += -L$(TARGET_FS)/usr/lib - CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib" - LDFLAGS += -L$(TARGET_FS)/usr/libnvidia - CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia" - CCFLAGS += -I$(TARGET_FS)/../include - endif - endif -endif - -ifdef TARGET_OVERRIDE # cuda toolkit targets override - NVCCFLAGS += -target-dir $(TARGET_OVERRIDE) -endif - -# Install directory of different arch -CUDA_INSTALL_TARGET_DIR := -ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) - CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux) - CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android) - CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx) - CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/ -else ifeq ($(TARGET_ARCH),ppc64le) - CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/ -endif - -# Debug build flags -ifeq ($(dbg),1) - NVCCFLAGS += -g -G - BUILD_TYPE := debug -else - BUILD_TYPE := release -endif - -ALL_CCFLAGS := -ALL_CCFLAGS += $(NVCCFLAGS) -ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) -ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) -ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) - -SAMPLE_ENABLED := 1 - -# This sample is not supported on Mac OSX -ifeq ($(TARGET_OS),darwin) - $(info >>> WARNING - batchedLabelMarkersAndLabelCompressionNPP is not supported on Mac OSX - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -# This sample is not supported on QNX -ifeq ($(TARGET_OS),qnx) - $(info >>> WARNING - batchedLabelMarkersAndLabelCompressionNPP is not supported on QNX - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -ALL_LDFLAGS := -ALL_LDFLAGS += $(ALL_CCFLAGS) -ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) -ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) - -# Common includes and paths for CUDA -INCLUDES := -I../../../Common -LIBRARIES := - -################################################################################ - -# Gencode arguments -SMS ?= - -ifeq ($(GENCODE_FLAGS),) -# Generate SASS code for each SM architecture listed in $(SMS) -$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm))) - -ifeq ($(SMS),) -ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -# Generate PTX code from SM 53 -GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53 -else -# Generate PTX code from SM 50 -GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50 -endif -endif - -# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility -HIGHEST_SM := $(lastword $(sort $(SMS))) -ifneq ($(HIGHEST_SM),) -GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) -endif -endif - -ALL_CCFLAGS += --threads 0 --std=c++11 - -LIBRARIES += -lnppisu_static -lnppif_static -lnppc_static -lculibos - -ifeq ($(SAMPLE_ENABLED),0) -EXEC ?= @echo "[@]" -endif - -################################################################################ - -# Target rules -all: build - -build: batchedLabelMarkersAndLabelCompressionNPP - -check.deps: -ifeq ($(SAMPLE_ENABLED),0) - @echo "Sample will be waived due to the above missing dependencies" -else - @echo "Sample is ready - all dependencies have been met" -endif - -batchedLabelMarkersAndLabelCompressionNPP.o:batchedLabelMarkersAndLabelCompressionNPP.cpp - $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $< - -batchedLabelMarkersAndLabelCompressionNPP: batchedLabelMarkersAndLabelCompressionNPP.o - $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES) - $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) - $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) - -run: build - $(EXEC) ./batchedLabelMarkersAndLabelCompressionNPP - -testrun: build - -clean: - rm -f batchedLabelMarkersAndLabelCompressionNPP batchedLabelMarkersAndLabelCompressionNPP.o - rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/batchedLabelMarkersAndLabelCompressionNPP - -clobber: clean diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml deleted file mode 100644 index 9e7b07b9..00000000 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/NsightEclipse.xml +++ /dev/null @@ -1,95 +0,0 @@ - - - - batchedLabelMarkersAndLabelCompressionNPP - - cudaRuntimeGetVersion - cudaMallocPitch - cudaFree - cudaDeviceGetAttribute - cudaMallocHost - cudaDriverGetVersion - cudaFreeHost - cudaGetDevice - cudaStreamGetFlags - cudaStreamSynchronize - cudaMalloc - cudaMemcpyAsync - cudaGetDeviceProperties - - - whole - true - - ./ - ../ - ../../../Common - - - Performance Strategies - Image Processing - NPP Library - Using NPP Batch Functions - - - CUDA - NPP - Image Processing - - - nppisu_static - nppif_static - nppc_static - culibos - - - - true - batchedLabelMarkersAndLabelCompressionNPP.cpp - - NPP - - - 1:CUDA Basic Topics - 1:Performance Strategies - 2:Image Processing - 2:Computer Vision - - sm50 - sm52 - sm53 - sm60 - sm61 - sm70 - sm72 - sm75 - sm80 - sm86 - sm87 - sm89 - sm90 - - - x86_64 - linux - - - windows7 - - - arm - - - sbsa - - - ppc64le - linux - - - - all - - Batched Label Markers And Label Compression NPP - exe - diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw deleted file mode 100644 index 741413fc..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw deleted file mode 100644 index aa597100..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw deleted file mode 100644 index 3a7a30d3..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUF_8Way_1024x683_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUF_8Way_1024x683_32u.raw deleted file mode 100644 index ae6f02a6..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB2_LabelMarkersUF_8Way_1024x683_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw deleted file mode 100644 index fffe0e9b..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw deleted file mode 100644 index d96e95eb..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw deleted file mode 100644 index be539bd4..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUF_8Way_1280x720_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUF_8Way_1280x720_32u.raw deleted file mode 100644 index de5e1789..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_LabelMarkersUF_8Way_1280x720_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw deleted file mode 100644 index 14a6c202..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw deleted file mode 100644 index 79d53426..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw deleted file mode 100644 index e4323bec..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw deleted file mode 100644 index 237c057d..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md deleted file mode 100644 index 28b1b353..00000000 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/README.md +++ /dev/null @@ -1,74 +0,0 @@ -# batchedLabelMarkersAndLabelCompressionNPP - Batched Label Markers And Label Compression NPP - -## Description - -An NPP CUDA Sample that demonstrates how to use the NPP label markers generation and label compression functions based on a Union Find (UF) algorithm including both single image and batched image versions. - -## Key Concepts - -Performance Strategies, Image Processing, NPP Library, Using NPP Batch Functions - -## Supported SM Architectures - -[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) - -## Supported OSes - -Linux, Windows - -## Supported CPU Architecture - -x86_64, ppc64le, armv7l - -## CUDA APIs involved - -### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaRuntimeGetVersion, cudaMallocPitch, cudaFree, cudaDeviceGetAttribute, cudaMallocHost, cudaDriverGetVersion, cudaFreeHost, cudaGetDevice, cudaStreamGetFlags, cudaStreamSynchronize, cudaMalloc, cudaMemcpyAsync, cudaGetDeviceProperties - -## Dependencies needed to build/run -[NPP](../../../README.md#npp) - -## Prerequisites - -Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. -Make sure the dependencies mentioned in [Dependencies]() section above are installed. - -## Build and Run - -### Windows -The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format: -``` -*_vs.sln - for Visual Studio -``` -Each individual sample has its own set of solution files in its directory: - -To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used. -> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details." - -### Linux -The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make: -``` -$ cd -$ make -``` -The samples makefiles can take advantage of certain options: -* **TARGET_ARCH=** - cross-compile targeting a specific architecture. Allowed architectures are x86_64, ppc64le, armv7l. - By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.
-`$ make TARGET_ARCH=x86_64`
`$ make TARGET_ARCH=ppc64le`
`$ make TARGET_ARCH=armv7l`
- See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details. -* **dbg=1** - build with debug symbols - ``` - $ make dbg=1 - ``` -* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`. - ``` - $ make SMS="50 60" - ``` - -* **HOST_COMPILER=** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers. -``` - $ make HOST_COMPILER=g++ -``` - -## References (for more details) - diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP.cpp b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP.cpp deleted file mode 100644 index 0a1efcc9..00000000 --- a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/batchedLabelMarkersAndLabelCompressionNPP.cpp +++ /dev/null @@ -1,805 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) -#define WINDOWS_LEAN_AND_MEAN -#define NOMINMAX -#include -#pragma warning(disable : 4819) -#endif - -#include -#include -#include - -#include -#include -#include -#include - -// Note: If you want to view these images we HIGHLY recommend using imagej -// which is free on the internet and works on most platforms -// because it is one of the few image viewing apps that can display 32 -// bit integer image data. While it normalizes the data to floating -// point values for viewing it still provides a good representation of -// the relative brightness of each label value. Note that label -// compression output results in smaller differences between label values -// making it visually more difficult to detect differences in labeled -// regions. If you have an editor that can display hex values you can -// see what the exact values of each label is, every 4 bytes represents 1 -// 32 bit integer label value. -// -// The files read and written by this sample app use RAW image format, -// that is, only the image data itself exists in the files with no image -// format information. When viewing RAW files with imagej just enter -// the image size and bit depth values that are part of the file name -// when requested by imagej. -// -// This sample app works in 2 stages, first it processes all of the -// images individually then it processes them all again in 1 batch using -// the Batch_Advanced versions of the NPP batch functions which allow -// each image to have it's own ROI. The 2 stages are completely -// separable but in this sample the second stage takes advantage of some -// of the data that has already been initialized. -// -// Note that there is a small amount of variability in the number of -// unique label markers generated from one run to the next by the UF -// algorithm. -// -// Performance of ALL NPP image batch functions is limited by the maximum -// ROI height in the list of images. - -// Batched label compression support is only available on NPP versions > 11.0, -// comment out if using NPP 11.0 -#define USE_BATCHED_LABEL_COMPRESSION 1 - -#define NUMBER_OF_IMAGES 5 - -Npp8u *pInputImageDev[NUMBER_OF_IMAGES]; -Npp8u *pInputImageHost[NUMBER_OF_IMAGES]; -Npp8u *pUFGenerateLabelsScratchBufferDev[NUMBER_OF_IMAGES]; -Npp8u *pUFCompressedLabelsScratchBufferDev[NUMBER_OF_IMAGES]; -Npp32u *pUFLabelDev[NUMBER_OF_IMAGES]; -Npp32u *pUFLabelHost[NUMBER_OF_IMAGES]; -NppiImageDescriptor *pUFBatchSrcImageListDev = 0; -NppiImageDescriptor *pUFBatchSrcDstImageListDev = 0; -NppiImageDescriptor *pUFBatchSrcImageListHost = 0; -NppiImageDescriptor *pUFBatchSrcDstImageListHost = 0; -NppiBufferDescriptor *pUFBatchSrcDstScratchBufferListDev = - 0; // from nppi_filtering_functions.h -NppiBufferDescriptor *pUFBatchSrcDstScratchBufferListHost = 0; -Npp32u *pUFBatchPerImageCompressedCountListDev = 0; -Npp32u *pUFBatchPerImageCompressedCountListHost = 0; - -void tearDown() // Clean up and tear down -{ - if (pUFBatchPerImageCompressedCountListDev != 0) - cudaFree(pUFBatchPerImageCompressedCountListDev); - if (pUFBatchSrcDstScratchBufferListDev != 0) - cudaFree(pUFBatchSrcDstScratchBufferListDev); - if (pUFBatchSrcDstImageListDev != 0) cudaFree(pUFBatchSrcDstImageListDev); - if (pUFBatchSrcImageListDev != 0) cudaFree(pUFBatchSrcImageListDev); - if (pUFBatchPerImageCompressedCountListHost != 0) - cudaFreeHost(pUFBatchPerImageCompressedCountListHost); - if (pUFBatchSrcDstScratchBufferListHost != 0) - cudaFreeHost(pUFBatchSrcDstScratchBufferListHost); - if (pUFBatchSrcDstImageListHost != 0) - cudaFreeHost(pUFBatchSrcDstImageListHost); - if (pUFBatchSrcImageListHost != 0) cudaFreeHost(pUFBatchSrcImageListHost); - - for (int j = 0; j < NUMBER_OF_IMAGES; j++) { - if (pUFCompressedLabelsScratchBufferDev[j] != 0) - cudaFree(pUFCompressedLabelsScratchBufferDev[j]); - if (pUFGenerateLabelsScratchBufferDev[j] != 0) - cudaFree(pUFGenerateLabelsScratchBufferDev[j]); - if (pUFLabelDev[j] != 0) cudaFree(pUFLabelDev[j]); - if (pInputImageDev[j] != 0) cudaFree(pInputImageDev[j]); - if (pUFLabelHost[j] != 0) cudaFreeHost(pUFLabelHost[j]); - if (pInputImageHost[j] != 0) cudaFreeHost(pInputImageHost[j]); - } -} - -const std::string &LabelMarkersOutputFile0 = - "teapot_LabelMarkersUF_8Way_512x512_32u.raw"; -const std::string &LabelMarkersOutputFile1 = - "CT_skull_LabelMarkersUF_8Way_512x512_32u.raw"; -const std::string &LabelMarkersOutputFile2 = - "PCB_METAL_LabelMarkersUF_8Way_509x335_32u.raw"; -const std::string &LabelMarkersOutputFile3 = - "PCB2_LabelMarkersUF_8Way_1024x683_32u.raw"; -const std::string &LabelMarkersOutputFile4 = - "PCB_LabelMarkersUF_8Way_1280x720_32u.raw"; - -const std::string &CompressedMarkerLabelsOutputFile0 = - "teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw"; -const std::string &CompressedMarkerLabelsOutputFile1 = - "CT_skull_CompressedMarkerLabelsUF_8Way_512x512_32u.raw"; -const std::string &CompressedMarkerLabelsOutputFile2 = - "PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u.raw"; -const std::string &CompressedMarkerLabelsOutputFile3 = - "PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u.raw"; -const std::string &CompressedMarkerLabelsOutputFile4 = - "PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u.raw"; - -const std::string &LabelMarkersBatchOutputFile0 = - "teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw"; -const std::string &LabelMarkersBatchOutputFile1 = - "CT_skull_LabelMarkersUFBatch_8Way_512x512_32u.raw"; -const std::string &LabelMarkersBatchOutputFile2 = - "PCB_METAL_LabelMarkersUFBatch_8Way_509x335_32u.raw"; -const std::string &LabelMarkersBatchOutputFile3 = - "PCB2_LabelMarkersUFBatch_8Way_1024x683_32u.raw"; -const std::string &LabelMarkersBatchOutputFile4 = - "PCB_LabelMarkersUFBatch_8Way_1280x720_32u.raw"; - -const std::string &CompressedMarkerLabelsBatchOutputFile0 = - "teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw"; -const std::string &CompressedMarkerLabelsBatchOutputFile1 = - "CT_skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw"; -const std::string &CompressedMarkerLabelsBatchOutputFile2 = - "PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u.raw"; -const std::string &CompressedMarkerLabelsBatchOutputFile3 = - "PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u.raw"; -const std::string &CompressedMarkerLabelsBatchOutputFile4 = - "PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u.raw"; - -int loadRaw8BitImage(Npp8u *pImage, int nWidth, int nHeight, int nImage) { - FILE *bmpFile; - size_t nSize; - - if (nImage == 0) { - if (nWidth != 512 || nHeight != 512) return -1; - const char *fileName = "teapot_512x512_8u.raw"; - const char *InputFile = sdkFindFilePath(fileName, "."); - if (InputFile == NULL) { - printf("%s file not found.. exiting\n", fileName); - exit(EXIT_WAIVED); - } - - FOPEN(bmpFile, InputFile, "rb"); - } else if (nImage == 1) { - if (nWidth != 512 || nHeight != 512) return -1; - const char *fileName = "CT_skull_512x512_8u.raw"; - const char *InputFile = sdkFindFilePath(fileName, "."); - if (InputFile == NULL) { - printf("%s file not found.. exiting\n", fileName); - exit(EXIT_WAIVED); - } - - FOPEN(bmpFile, InputFile, "rb"); - } else if (nImage == 2) { - if (nWidth != 509 || nHeight != 335) return -1; - const char *fileName = "PCB_METAL_509x335_8u.raw"; - const char *InputFile = sdkFindFilePath(fileName, "."); - if (InputFile == NULL) { - printf("%s file not found.. exiting\n", fileName); - exit(EXIT_WAIVED); - } - - FOPEN(bmpFile, InputFile, "rb"); - } else if (nImage == 3) { - if (nWidth != 1024 || nHeight != 683) return -1; - const char *fileName = "PCB2_1024x683_8u.raw"; - const char *InputFile = sdkFindFilePath(fileName, "."); - if (InputFile == NULL) { - printf("%s file not found.. exiting\n", fileName); - exit(EXIT_WAIVED); - } - - FOPEN(bmpFile, InputFile, "rb"); - } else if (nImage == 4) { - if (nWidth != 1280 || nHeight != 720) return -1; - const char *fileName = "PCB_1280x720_8u.raw"; - const char *InputFile = sdkFindFilePath(fileName, "."); - if (InputFile == NULL) { - printf("%s file not found.. exiting\n", fileName); - exit(EXIT_WAIVED); - } - - FOPEN(bmpFile, InputFile, "rb"); - } else { - printf("Input file load failed.\n"); - return -1; - } - - if (bmpFile == NULL) return -1; - nSize = fread(pImage, 1, nWidth * nHeight, bmpFile); - if (nSize < nWidth * nHeight) { - fclose(bmpFile); - return -1; - } - fclose(bmpFile); - - printf("Input file load succeeded.\n"); - - return 0; -} - -int main(int argc, char **argv) { - int aGenerateLabelsScratchBufferSize[NUMBER_OF_IMAGES]; - int aCompressLabelsScratchBufferSize[NUMBER_OF_IMAGES]; - - int nCompressedLabelCount = 0; - cudaError_t cudaError; - NppStatus nppStatus; - NppStreamContext nppStreamCtx; - FILE *bmpFile; - - for (int j = 0; j < NUMBER_OF_IMAGES; j++) { - pInputImageDev[j] = 0; - pInputImageHost[j] = 0; - pUFGenerateLabelsScratchBufferDev[j] = 0; - pUFCompressedLabelsScratchBufferDev[j] = 0; - pUFLabelDev[j] = 0; - pUFLabelHost[j] = 0; - } - - nppStreamCtx.hStream = 0; // The NULL stream by default, set this to whatever - // your stream ID is if not the NULL stream. - - cudaError = cudaGetDevice(&nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) { - printf("CUDA error: no devices supporting CUDA.\n"); - return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - } - - const NppLibraryVersion *libVer = nppGetLibVersion(); - - printf("NPP Library Version %d.%d.%d\n", libVer->major, libVer->minor, - libVer->build); - - int driverVersion, runtimeVersion; - cudaDriverGetVersion(&driverVersion); - cudaRuntimeGetVersion(&runtimeVersion); - - printf("CUDA Driver Version: %d.%d\n", driverVersion / 1000, - (driverVersion % 100) / 10); - printf("CUDA Runtime Version: %d.%d\n\n", runtimeVersion / 1000, - (runtimeVersion % 100) / 10); - - cudaError = cudaDeviceGetAttribute( - &nppStreamCtx.nCudaDevAttrComputeCapabilityMajor, - cudaDevAttrComputeCapabilityMajor, nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = cudaDeviceGetAttribute( - &nppStreamCtx.nCudaDevAttrComputeCapabilityMinor, - cudaDevAttrComputeCapabilityMinor, nppStreamCtx.nCudaDeviceId); - if (cudaError != cudaSuccess) return NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY; - - cudaError = - cudaStreamGetFlags(nppStreamCtx.hStream, &nppStreamCtx.nStreamFlags); - - cudaDeviceProp oDeviceProperties; - - cudaError = - cudaGetDeviceProperties(&oDeviceProperties, nppStreamCtx.nCudaDeviceId); - - nppStreamCtx.nMultiProcessorCount = oDeviceProperties.multiProcessorCount; - nppStreamCtx.nMaxThreadsPerMultiProcessor = - oDeviceProperties.maxThreadsPerMultiProcessor; - nppStreamCtx.nMaxThreadsPerBlock = oDeviceProperties.maxThreadsPerBlock; - nppStreamCtx.nSharedMemPerBlock = oDeviceProperties.sharedMemPerBlock; - - NppiSize oSizeROI[NUMBER_OF_IMAGES]; - - for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) { - if (nImage == 0) { - oSizeROI[nImage].width = 512; - oSizeROI[nImage].height = 512; - } else if (nImage == 1) { - oSizeROI[nImage].width = 512; - oSizeROI[nImage].height = 512; - } else if (nImage == 2) { - oSizeROI[nImage].width = 509; - oSizeROI[nImage].height = 335; - } else if (nImage == 3) { - oSizeROI[nImage].width = 1024; - oSizeROI[nImage].height = 683; - } else if (nImage == 4) { - oSizeROI[nImage].width = 1280; - oSizeROI[nImage].height = 720; - } - - // NOTE: While using cudaMallocPitch() to allocate device memory for NPP can - // significantly improve the performance of many NPP functions, for UF - // function label markers generation or compression DO NOT USE - // cudaMallocPitch(). Doing so could result in incorrect output. - - cudaError = cudaMalloc( - (void **)&pInputImageDev[nImage], - oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height); - if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR; - - // For images processed with UF label markers functions ROI width and height - // for label markers generation output AND marker compression functions MUST - // be the same AND line pitch MUST be equal to ROI.width * sizeof(Npp32u). - // Also the image pointer used for label markers generation output must - // start at the same position in the image as it does in the marker - // compression function. Also note that actual input image size and ROI do - // not necessarily need to be related other than ROI being less than or - // equal to image size and image starting position does not necessarily have - // to be at pixel 0 in the input image. - - cudaError = cudaMalloc( - (void **)&pUFLabelDev[nImage], - oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height); - if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR; - - checkCudaErrors(cudaMallocHost( - &(pInputImageHost[nImage]), - oSizeROI[nImage].width * sizeof(Npp8u) * oSizeROI[nImage].height)); - checkCudaErrors(cudaMallocHost( - &(pUFLabelHost[nImage]), - oSizeROI[nImage].width * sizeof(Npp32u) * oSizeROI[nImage].height)); - - // Use UF functions throughout this sample. - - nppStatus = nppiLabelMarkersUFGetBufferSize_32u_C1R( - oSizeROI[nImage], &aGenerateLabelsScratchBufferSize[nImage]); - - // One at a time image processing - - cudaError = cudaMalloc((void **)&pUFGenerateLabelsScratchBufferDev[nImage], - aGenerateLabelsScratchBufferSize[nImage]); - if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR; - - if (loadRaw8BitImage(pInputImageHost[nImage], - oSizeROI[nImage].width * sizeof(Npp8u), - oSizeROI[nImage].height, nImage) == 0) { - cudaError = cudaMemcpy2DAsync( - pInputImageDev[nImage], oSizeROI[nImage].width * sizeof(Npp8u), - pInputImageHost[nImage], oSizeROI[nImage].width * sizeof(Npp8u), - oSizeROI[nImage].width * sizeof(Npp8u), oSizeROI[nImage].height, - cudaMemcpyHostToDevice, nppStreamCtx.hStream); - - nppStatus = nppiLabelMarkersUF_8u32u_C1R_Ctx( - pInputImageDev[nImage], oSizeROI[nImage].width * sizeof(Npp8u), - pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), - oSizeROI[nImage], nppiNormInf, - pUFGenerateLabelsScratchBufferDev[nImage], nppStreamCtx); - - if (nppStatus != NPP_SUCCESS) { - if (nImage == 0) - printf("teapot_LabelMarkersUF_8Way_512x512_32u failed.\n"); - else if (nImage == 1) - printf("CT_skull_LabelMarkersUF_8Way_512x512_32u failed.\n"); - else if (nImage == 2) - printf("PCB_METAL_LabelMarkersUF_8Way_509x335_32u failed.\n"); - else if (nImage == 3) - printf("PCB2_LabelMarkersUF_8Way_1024x683_32u failed.\n"); - else if (nImage == 4) - printf("PCB_LabelMarkersUF_8Way_1280x720_32u failed.\n"); - tearDown(); - return -1; - } - - cudaError = cudaMemcpy2DAsync( - pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u), - pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), - oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height, - cudaMemcpyDeviceToHost, nppStreamCtx.hStream); - - // Wait host image read backs to complete, not necessary if no need to - // synchronize - if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) != - cudaSuccess) { - printf("Post label generation cudaStreamSynchronize failed\n"); - tearDown(); - return -1; - } - - if (nImage == 0) - FOPEN(bmpFile, LabelMarkersOutputFile0.c_str(), "wb"); - else if (nImage == 1) - FOPEN(bmpFile, LabelMarkersOutputFile1.c_str(), "wb"); - else if (nImage == 2) - FOPEN(bmpFile, LabelMarkersOutputFile2.c_str(), "wb"); - else if (nImage == 3) - FOPEN(bmpFile, LabelMarkersOutputFile3.c_str(), "wb"); - else if (nImage == 4) - FOPEN(bmpFile, LabelMarkersOutputFile4.c_str(), "wb"); - - if (bmpFile == NULL) return -1; - size_t nSize = 0; - for (int j = 0; j < oSizeROI[nImage].height; j++) { - nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width], - sizeof(Npp32u), oSizeROI[nImage].width, bmpFile); - } - fclose(bmpFile); - - nppStatus = nppiCompressMarkerLabelsGetBufferSize_32u_C1R( - oSizeROI[nImage].width * oSizeROI[nImage].height, - &aCompressLabelsScratchBufferSize[nImage]); - if (nppStatus != NPP_NO_ERROR) return nppStatus; - - cudaError = - cudaMalloc((void **)&pUFCompressedLabelsScratchBufferDev[nImage], - aCompressLabelsScratchBufferSize[nImage]); - if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR; - - nCompressedLabelCount = 0; - - nppStatus = nppiCompressMarkerLabelsUF_32u_C1IR( - pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), - oSizeROI[nImage], oSizeROI[nImage].width * oSizeROI[nImage].height, - &nCompressedLabelCount, pUFCompressedLabelsScratchBufferDev[nImage]); - - if (nppStatus != NPP_SUCCESS) { - if (nImage == 0) - printf("teapot_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n"); - else if (nImage == 1) - printf( - "CT_Skull_CompressedLabelMarkersUF_8Way_512x512_32u failed.\n"); - else if (nImage == 2) - printf( - "PCB_METAL_CompressedLabelMarkersUF_8Way_509x335_32u failed.\n"); - else if (nImage == 3) - printf("PCB2_CompressedLabelMarkersUF_8Way_1024x683_32u failed.\n"); - else if (nImage == 4) - printf("PCB_CompressedLabelMarkersUF_8Way_1280x720_32u failed.\n"); - tearDown(); - return -1; - } - - cudaError = cudaMemcpy2DAsync( - pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u), - pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), - oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height, - cudaMemcpyDeviceToHost, nppStreamCtx.hStream); - - // Wait for host image read backs to finish, not necessary if no need to - // synchronize - if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) != - cudaSuccess || - nCompressedLabelCount == 0) { - printf("Post label compression cudaStreamSynchronize failed\n"); - tearDown(); - return -1; - } - - if (nImage == 0) - FOPEN(bmpFile, CompressedMarkerLabelsOutputFile0.c_str(), "wb"); - else if (nImage == 1) - FOPEN(bmpFile, CompressedMarkerLabelsOutputFile1.c_str(), "wb"); - else if (nImage == 2) - FOPEN(bmpFile, CompressedMarkerLabelsOutputFile2.c_str(), "wb"); - else if (nImage == 3) - FOPEN(bmpFile, CompressedMarkerLabelsOutputFile3.c_str(), "wb"); - else if (nImage == 4) - FOPEN(bmpFile, CompressedMarkerLabelsOutputFile4.c_str(), "wb"); - - if (bmpFile == NULL) return -1; - nSize = 0; - for (int j = 0; j < oSizeROI[nImage].height; j++) { - nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width], - sizeof(Npp32u), oSizeROI[nImage].width, bmpFile); - } - fclose(bmpFile); - - if (nImage == 0) - printf( - "teapot_CompressedMarkerLabelsUF_8Way_512x512_32u succeeded, " - "compressed label count is %d.\n", - nCompressedLabelCount); - else if (nImage == 1) - printf( - "CT_Skull_CompressedMarkerLabelsUF_8Way_512x512_32u succeeded, " - "compressed label count is %d.\n", - nCompressedLabelCount); - else if (nImage == 2) - printf( - "PCB_METAL_CompressedMarkerLabelsUF_8Way_509x335_32u succeeded, " - "compressed label count is %d.\n", - nCompressedLabelCount); - else if (nImage == 3) - printf( - "PCB2_CompressedMarkerLabelsUF_8Way_1024x683_32u succeeded, " - "compressed label count is %d.\n", - nCompressedLabelCount); - else if (nImage == 4) - printf( - "PCB_CompressedMarkerLabelsUF_8Way_1280x720_32u succeeded, " - "compressed label count is %d.\n", - nCompressedLabelCount); - } - } - - // Batch image processing - - // We want to allocate scratch buffers more efficiently for batch processing - // so first we free up the scratch buffers for image 0 and reallocate them. - // This is not required but helps cudaMalloc to work more efficiently. - - cudaFree(pUFCompressedLabelsScratchBufferDev[0]); - - int nTotalBatchedUFCompressLabelsScratchBufferDevSize = 0; - - for (int k = 0; k < NUMBER_OF_IMAGES; k++) - nTotalBatchedUFCompressLabelsScratchBufferDevSize += - aCompressLabelsScratchBufferSize[k]; - - cudaError = cudaMalloc((void **)&pUFCompressedLabelsScratchBufferDev[0], - nTotalBatchedUFCompressLabelsScratchBufferDevSize); - if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR; - - // Now allocate batch lists - - int nBatchImageListBytes = NUMBER_OF_IMAGES * sizeof(NppiImageDescriptor); - - cudaError = - cudaMalloc((void **)&pUFBatchSrcImageListDev, nBatchImageListBytes); - if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR; - - cudaError = - cudaMalloc((void **)&pUFBatchSrcDstImageListDev, nBatchImageListBytes); - if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR; - - checkCudaErrors( - cudaMallocHost((void **)&pUFBatchSrcImageListHost, nBatchImageListBytes)); - - checkCudaErrors(cudaMallocHost((void **)&pUFBatchSrcDstImageListHost, - nBatchImageListBytes)); - - NppiSize oMaxROISize = {0, 0}; - - for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) { - pUFBatchSrcImageListHost[nImage].pData = pInputImageDev[nImage]; - pUFBatchSrcImageListHost[nImage].nStep = - oSizeROI[nImage].width * sizeof(Npp8u); - // src image oSize parameter is ignored in these NPP functions - pUFBatchSrcDstImageListHost[nImage].pData = pUFLabelDev[nImage]; - pUFBatchSrcDstImageListHost[nImage].nStep = - oSizeROI[nImage].width * sizeof(Npp32u); - pUFBatchSrcDstImageListHost[nImage].oSize = oSizeROI[nImage]; - if (oSizeROI[nImage].width > oMaxROISize.width) - oMaxROISize.width = oSizeROI[nImage].width; - if (oSizeROI[nImage].height > oMaxROISize.height) - oMaxROISize.height = oSizeROI[nImage].height; - } - - // Copy label generation batch lists from CPU to GPU - cudaError = cudaMemcpyAsync(pUFBatchSrcImageListDev, pUFBatchSrcImageListHost, - nBatchImageListBytes, cudaMemcpyHostToDevice, - nppStreamCtx.hStream); - if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR; - - cudaError = cudaMemcpyAsync(pUFBatchSrcDstImageListDev, - pUFBatchSrcDstImageListHost, nBatchImageListBytes, - cudaMemcpyHostToDevice, nppStreamCtx.hStream); - if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR; - - // We use 8-way neighbor search throughout this example - nppStatus = nppiLabelMarkersUFBatch_8u32u_C1R_Advanced_Ctx( - pUFBatchSrcImageListDev, pUFBatchSrcDstImageListDev, NUMBER_OF_IMAGES, - oMaxROISize, nppiNormInf, nppStreamCtx); - - if (nppStatus != NPP_SUCCESS) { - printf("LabelMarkersUFBatch_8Way_8u32u failed.\n"); - tearDown(); - return -1; - } - - // Now read back generated device images to the host - - for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) { - cudaError = cudaMemcpy2DAsync( - pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u), - pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), - oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height, - cudaMemcpyDeviceToHost, nppStreamCtx.hStream); - } - - // Wait for host image read backs to complete, not necessary if no need to - // synchronize - if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) != - cudaSuccess) { - printf("Post label generation cudaStreamSynchronize failed\n"); - tearDown(); - return -1; - } - - // Save output to files - for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) { - if (nImage == 0) - FOPEN(bmpFile, LabelMarkersBatchOutputFile0.c_str(), "wb"); - else if (nImage == 1) - FOPEN(bmpFile, LabelMarkersBatchOutputFile1.c_str(), "wb"); - else if (nImage == 2) - FOPEN(bmpFile, LabelMarkersBatchOutputFile2.c_str(), "wb"); - else if (nImage == 3) - FOPEN(bmpFile, LabelMarkersBatchOutputFile3.c_str(), "wb"); - else if (nImage == 4) - FOPEN(bmpFile, LabelMarkersBatchOutputFile4.c_str(), "wb"); - - if (bmpFile == NULL) return -1; - size_t nSize = 0; - for (int j = 0; j < oSizeROI[nImage].height; j++) { - nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width], - sizeof(Npp32u), oSizeROI[nImage].width, bmpFile); - } - fclose(bmpFile); - } - -#ifdef USE_BATCHED_LABEL_COMPRESSION - - // Now allocate scratch buffer memory for batched label compression - cudaError = cudaMalloc((void **)&pUFBatchSrcDstScratchBufferListDev, - NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor)); - if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR; - - cudaError = cudaMalloc((void **)&pUFBatchPerImageCompressedCountListDev, - NUMBER_OF_IMAGES * sizeof(Npp32u)); - if (cudaError != cudaSuccess) return NPP_MEMORY_ALLOCATION_ERR; - - // Allocate host side scratch buffer point and size list and initialize with - // device scratch buffer pointers - checkCudaErrors( - cudaMallocHost((void **)&pUFBatchSrcDstScratchBufferListHost, - NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor))); - - checkCudaErrors( - cudaMallocHost((void **)&pUFBatchPerImageCompressedCountListHost, - +NUMBER_OF_IMAGES * sizeof(Npp32u))); - - // Start buffer pointer at beginning of full per image buffer list sized - // pUFCompressedLabelsScratchBufferDev[0] - Npp32u *pCurUFCompressedLabelsScratchBufferDev = - reinterpret_cast(pUFCompressedLabelsScratchBufferDev[0]); - - int nMaxUFCompressedLabelsScratchBufferSize = 0; - - for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) { - // This particular function works on in-place data and SrcDst image batch - // list has already been initialized in batched label generation function - // setup - - // Initialize each per image buffer descriptor - pUFBatchSrcDstScratchBufferListHost[nImage].pData = - reinterpret_cast(pCurUFCompressedLabelsScratchBufferDev); - pUFBatchSrcDstScratchBufferListHost[nImage].nBufferSize = - aCompressLabelsScratchBufferSize[nImage]; - - if (aCompressLabelsScratchBufferSize[nImage] > - nMaxUFCompressedLabelsScratchBufferSize) - nMaxUFCompressedLabelsScratchBufferSize = - aCompressLabelsScratchBufferSize[nImage]; - - // Offset buffer pointer to next per image buffer - Npp8u *pTempBuffer = - reinterpret_cast(pCurUFCompressedLabelsScratchBufferDev); - pTempBuffer += aCompressLabelsScratchBufferSize[nImage]; - pCurUFCompressedLabelsScratchBufferDev = - reinterpret_cast((void *)(pTempBuffer)); - } - - // Copy compression batch scratch buffer list from CPU to GPU - cudaError = cudaMemcpyAsync(pUFBatchSrcDstScratchBufferListDev, - pUFBatchSrcDstScratchBufferListHost, - NUMBER_OF_IMAGES * sizeof(NppiBufferDescriptor), - cudaMemcpyHostToDevice, nppStreamCtx.hStream); - if (cudaError != cudaSuccess) return NPP_MEMCPY_ERROR; - - nppStatus = nppiCompressMarkerLabelsUFBatch_32u_C1IR_Advanced_Ctx( - pUFBatchSrcDstImageListDev, pUFBatchSrcDstScratchBufferListDev, - pUFBatchPerImageCompressedCountListDev, NUMBER_OF_IMAGES, oMaxROISize, - nMaxUFCompressedLabelsScratchBufferSize, nppStreamCtx); - if (nppStatus != NPP_SUCCESS) { - printf("BatchCompressedLabelMarkersUF_8Way_32u failed.\n"); - tearDown(); - return -1; - } - - // Copy output compressed label images back to host - for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) { - cudaError = cudaMemcpy2DAsync( - pUFLabelHost[nImage], oSizeROI[nImage].width * sizeof(Npp32u), - pUFLabelDev[nImage], oSizeROI[nImage].width * sizeof(Npp32u), - oSizeROI[nImage].width * sizeof(Npp32u), oSizeROI[nImage].height, - cudaMemcpyDeviceToHost, nppStreamCtx.hStream); - } - - // Wait for host image read backs to complete, not necessary if no need to - // synchronize - if ((cudaError = cudaStreamSynchronize(nppStreamCtx.hStream)) != - cudaSuccess) { - printf("Post label compression cudaStreamSynchronize failed\n"); - tearDown(); - return -1; - } - - // Save compressed label images into files - for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) { - if (nImage == 0) - FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile0.c_str(), "wb"); - else if (nImage == 1) - FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile1.c_str(), "wb"); - else if (nImage == 2) - FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile2.c_str(), "wb"); - else if (nImage == 3) - FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile3.c_str(), "wb"); - else if (nImage == 4) - FOPEN(bmpFile, CompressedMarkerLabelsBatchOutputFile4.c_str(), "wb"); - - if (bmpFile == NULL) return -1; - size_t nSize = 0; - for (int j = 0; j < oSizeROI[nImage].height; j++) { - nSize += fwrite(&pUFLabelHost[nImage][j * oSizeROI[nImage].width], - sizeof(Npp32u), oSizeROI[nImage].width, bmpFile); - } - fclose(bmpFile); - } - - // Read back per image compressed label count. - cudaError = cudaMemcpyAsync(pUFBatchPerImageCompressedCountListHost, - pUFBatchPerImageCompressedCountListDev, - NUMBER_OF_IMAGES * sizeof(Npp32u), - cudaMemcpyDeviceToHost, nppStreamCtx.hStream); - if (cudaError != cudaSuccess) { - tearDown(); - return NPP_MEMCPY_ERROR; - } - - // Wait for host read back to complete - cudaError = cudaStreamSynchronize(nppStreamCtx.hStream); - - printf("\n\n"); - - for (int nImage = 0; nImage < NUMBER_OF_IMAGES; nImage++) { - if (nImage == 0) - printf( - "teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u succeeded, " - "compressed label count is %d.\n", - pUFBatchPerImageCompressedCountListHost[nImage]); - else if (nImage == 1) - printf( - "CT_Skull_CompressedMarkerLabelsUFBatch_8Way_512x512_32u succeeded, " - "compressed label count is %d.\n", - pUFBatchPerImageCompressedCountListHost[nImage]); - else if (nImage == 2) - printf( - "PCB_METAL_CompressedMarkerLabelsUFBatch_8Way_509x335_32u succeeded, " - "compressed label count is %d.\n", - pUFBatchPerImageCompressedCountListHost[nImage]); - else if (nImage == 3) - printf( - "PCB2_CompressedMarkerLabelsUFBatch_8Way_1024x683_32u succeeded, " - "compressed label count is %d.\n", - pUFBatchPerImageCompressedCountListHost[nImage]); - else if (nImage == 4) - printf( - "PCB_CompressedMarkerLabelsUFBatch_8Way_1280x720_32u succeeded, " - "compressed label count is %d.\n", - pUFBatchPerImageCompressedCountListHost[nImage]); - } - -#endif // USE_BATCHED_LABEL_COMPRESSION - - tearDown(); - - return 0; -} diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw deleted file mode 100644 index 5c387eea..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUFBatch_8Way_512x512_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw deleted file mode 100644 index 5c387eea..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_CompressedMarkerLabelsUF_8Way_512x512_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw deleted file mode 100644 index d8964918..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUFBatch_8Way_512x512_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUF_8Way_512x512_32u.raw b/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUF_8Way_512x512_32u.raw deleted file mode 100644 index d8964918..00000000 Binary files a/Samples/4_CUDA_Libraries/batchedLabelMarkersAndLabelCompressionNPP/teapot_LabelMarkersUF_8Way_512x512_32u.raw and /dev/null differ diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt b/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt new file mode 100644 index 00000000..cba9c714 --- /dev/null +++ b/Samples/4_CUDA_Libraries/boxFilterNPP/CMakeLists.txt @@ -0,0 +1,40 @@ +# Include directories and libraries +include_directories( + ../../../Common + ../../../Common/UtilNPP +) + +# Source file +set(SRC_FILES + boxFilterNPP.cpp +) + +find_package(FreeImage) + +if(${FreeImage_FOUND}) + # Add target for boxFilterNPP + add_executable(boxFilterNPP ${SRC_FILES}) + set_target_properties(boxFilterNPP PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + + target_include_directories(boxFilterNPP PRIVATE + ${CUDAToolkit_INCLUDE_DIRS} + ${FreeImage_INCLUDE_DIRS} + ) + + target_link_libraries(boxFilterNPP PRIVATE + CUDA::nppc + CUDA::nppisu + CUDA::nppif + CUDA::cudart + ${FreeImage_LIBRARIES} + ) + + # Copy data files to output directory + add_custom_command(TARGET boxFilterNPP POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + ${CMAKE_CURRENT_SOURCE_DIR}/*.pgm + ${CMAKE_CURRENT_BINARY_DIR} + ) +else() + message(STATUS "FreeImage not found - will not build sample 'boxFilterNPP'") +endif() diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/Makefile b/Samples/4_CUDA_Libraries/boxFilterNPP/Makefile deleted file mode 100644 index 8b531a63..00000000 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/Makefile +++ /dev/null @@ -1,378 +0,0 @@ -################################################################################ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -################################################################################ -# -# Makefile project only supported on Mac OS X and Linux Platforms) -# -################################################################################ - -# Location of the CUDA Toolkit -CUDA_PATH ?= /usr/local/cuda - -############################## -# start deprecated interface # -############################## -ifeq ($(x86_64),1) - $(info WARNING - x86_64 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=x86_64 instead) - TARGET_ARCH ?= x86_64 -endif -ifeq ($(ARMv7),1) - $(info WARNING - ARMv7 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=armv7l instead) - TARGET_ARCH ?= armv7l -endif -ifeq ($(aarch64),1) - $(info WARNING - aarch64 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=aarch64 instead) - TARGET_ARCH ?= aarch64 -endif -ifeq ($(ppc64le),1) - $(info WARNING - ppc64le variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=ppc64le instead) - TARGET_ARCH ?= ppc64le -endif -ifneq ($(GCC),) - $(info WARNING - GCC variable has been deprecated) - $(info WARNING - please use HOST_COMPILER=$(GCC) instead) - HOST_COMPILER ?= $(GCC) -endif -ifneq ($(abi),) - $(error ERROR - abi variable has been removed) -endif -############################ -# end deprecated interface # -############################ - -# architecture -HOST_ARCH := $(shell uname -m) -TARGET_ARCH ?= $(HOST_ARCH) -ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l)) - ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le)) - TARGET_SIZE := 64 - else ifneq (,$(filter $(TARGET_ARCH),armv7l)) - TARGET_SIZE := 32 - endif - else - TARGET_SIZE := $(shell getconf LONG_BIT) - endif -else - $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!) -endif - -# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now. -ifeq ($(HOST_ARCH),aarch64) - ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null)) - HOST_ARCH := sbsa - TARGET_ARCH := sbsa - endif -endif - -ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le)) - $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!) - endif -endif - -# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l -ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32) - TARGET_ARCH = armv7l -endif - -# operating system -HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") -TARGET_OS ?= $(HOST_OS) -ifeq (,$(filter $(TARGET_OS),linux darwin qnx android)) - $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!) -endif - -# host compiler -ifdef HOST_COMPILER - CUSTOM_HOST_COMPILER = 1 -endif - -ifeq ($(TARGET_OS),darwin) - ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) - HOST_COMPILER ?= clang++ - endif -else ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) - ifeq ($(TARGET_OS),linux) - HOST_COMPILER ?= arm-linux-gnueabihf-g++ - else ifeq ($(TARGET_OS),qnx) - ifeq ($(QNX_HOST),) - $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) - endif - ifeq ($(QNX_TARGET),) - $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) - endif - export QNX_HOST - export QNX_TARGET - HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++ - else ifeq ($(TARGET_OS),android) - HOST_COMPILER ?= arm-linux-androideabi-g++ - endif - else ifeq ($(TARGET_ARCH),aarch64) - ifeq ($(TARGET_OS), linux) - HOST_COMPILER ?= aarch64-linux-gnu-g++ - else ifeq ($(TARGET_OS),qnx) - ifeq ($(QNX_HOST),) - $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) - endif - ifeq ($(QNX_TARGET),) - $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) - endif - export QNX_HOST - export QNX_TARGET - HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++ - else ifeq ($(TARGET_OS), android) - HOST_COMPILER ?= aarch64-linux-android-clang++ - endif - else ifeq ($(TARGET_ARCH),sbsa) - HOST_COMPILER ?= aarch64-linux-gnu-g++ - else ifeq ($(TARGET_ARCH),ppc64le) - HOST_COMPILER ?= powerpc64le-linux-gnu-g++ - endif -endif -HOST_COMPILER ?= g++ -NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) - -# internal flags -NVCCFLAGS := -m${TARGET_SIZE} -CCFLAGS := -LDFLAGS := - -# build flags - -# Link flag for customized HOST_COMPILER with gcc realpath -GCC_PATH := $(shell which gcc) -ifeq ($(CUSTOM_HOST_COMPILER),1) - ifneq ($(filter /%,$(HOST_COMPILER)),) - ifneq ($(findstring gcc,$(HOST_COMPILER)),) - ifneq ($(GCC_PATH),$(HOST_COMPILER)) - LDFLAGS += -lstdc++ - endif - endif - endif -endif - -ifeq ($(TARGET_OS),darwin) - LDFLAGS += -rpath $(CUDA_PATH)/lib - CCFLAGS += -arch $(HOST_ARCH) -else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux) - LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 - CCFLAGS += -mfloat-abi=hard -else ifeq ($(TARGET_OS),android) - LDFLAGS += -pie - CCFLAGS += -fpie -fpic -fexceptions -endif - -ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) - ifneq ($(TARGET_FS),) - GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) - ifeq ($(GCCVERSIONLTEQ46),1) - CCFLAGS += --sysroot=$(TARGET_FS) - endif - LDFLAGS += --sysroot=$(TARGET_FS) - LDFLAGS += -rpath-link=$(TARGET_FS)/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf - endif - endif - ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) - ifneq ($(TARGET_FS),) - GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) - ifeq ($(GCCVERSIONLTEQ46),1) - CCFLAGS += --sysroot=$(TARGET_FS) - endif - LDFLAGS += --sysroot=$(TARGET_FS) - LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu - LDFLAGS += --unresolved-symbols=ignore-in-shared-libs - CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm - CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu - endif - endif - ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) - NVCCFLAGS += -D_QNX_SOURCE - NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le - CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu - LDFLAGS += -lsocket - LDFLAGS += -L/usr/lib/aarch64-qnx-gnu - CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu" - ifdef TARGET_OVERRIDE - LDFLAGS += -lslog2 - endif - - ifneq ($(TARGET_FS),) - LDFLAGS += -L$(TARGET_FS)/usr/lib - CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib" - LDFLAGS += -L$(TARGET_FS)/usr/libnvidia - CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia" - CCFLAGS += -I$(TARGET_FS)/../include - endif - endif -endif - -ifdef TARGET_OVERRIDE # cuda toolkit targets override - NVCCFLAGS += -target-dir $(TARGET_OVERRIDE) -endif - -# Install directory of different arch -CUDA_INSTALL_TARGET_DIR := -ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) - CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux) - CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android) - CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx) - CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/ -else ifeq ($(TARGET_ARCH),ppc64le) - CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/ -endif - -# Debug build flags -ifeq ($(dbg),1) - NVCCFLAGS += -g -G - BUILD_TYPE := debug -else - BUILD_TYPE := release -endif - -ALL_CCFLAGS := -ALL_CCFLAGS += $(NVCCFLAGS) -ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) -ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) -ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) - -SAMPLE_ENABLED := 1 - -# This sample is not supported on QNX -ifeq ($(TARGET_OS),qnx) - $(info >>> WARNING - boxFilterNPP is not supported on QNX - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -ALL_LDFLAGS := -ALL_LDFLAGS += $(ALL_CCFLAGS) -ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) -ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) - -# Common includes and paths for CUDA -INCLUDES := -I../../../Common -LIBRARIES := - -################################################################################ - -# Gencode arguments -SMS ?= - -ifeq ($(GENCODE_FLAGS),) -# Generate SASS code for each SM architecture listed in $(SMS) -$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm))) - -ifeq ($(SMS),) -ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -# Generate PTX code from SM 53 -GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53 -else -# Generate PTX code from SM 50 -GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50 -endif -endif - -# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility -HIGHEST_SM := $(lastword $(sort $(SMS))) -ifneq ($(HIGHEST_SM),) -GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) -endif -endif - -ALL_CCFLAGS += --threads 0 --std=c++11 - -INCLUDES += -I../../../Common/UtilNPP - -LIBRARIES += -lnppisu_static -lnppif_static -lnppc_static -lculibos -lfreeimage - -# Attempt to compile a minimal application linked against FreeImage. If a.out exists, FreeImage is properly set up. -$(shell echo "#include \"FreeImage.h\"" > test.c; echo "int main() { return 0; }" >> test.c ; $(NVCC) $(ALL_CCFLAGS) $(INCLUDES) $(ALL_LDFLAGS) $(LIBRARIES) -l freeimage test.c) -FREEIMAGE := $(shell find a.out 2>/dev/null) -$(shell rm a.out test.c 2>/dev/null) - -ifeq ("$(FREEIMAGE)","") -$(info >>> WARNING - FreeImage is not set up correctly. Please ensure FreeImage is set up correctly. <<<) -SAMPLE_ENABLED := 0 -endif - -ifeq ($(SAMPLE_ENABLED),0) -EXEC ?= @echo "[@]" -endif - -################################################################################ - -# Target rules -all: build - -build: boxFilterNPP - -check.deps: -ifeq ($(SAMPLE_ENABLED),0) - @echo "Sample will be waived due to the above missing dependencies" -else - @echo "Sample is ready - all dependencies have been met" -endif - -boxFilterNPP.o:boxFilterNPP.cpp - $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $< - -boxFilterNPP: boxFilterNPP.o - $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES) - $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) - $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) - -run: build - $(EXEC) ./boxFilterNPP - -testrun: build - -clean: - rm -f boxFilterNPP boxFilterNPP.o - rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/boxFilterNPP - -clobber: clean diff --git a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml b/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml deleted file mode 100644 index 9d8e2b3f..00000000 --- a/Samples/4_CUDA_Libraries/boxFilterNPP/NsightEclipse.xml +++ /dev/null @@ -1,95 +0,0 @@ - - - - boxFilterNPP - - cudaRuntimeGetVersion - cudaDriverGetVersion - - - whole - true - - ./teapot512.pgm - - - ../../../Common/UtilNPP - ../../../Common/FreeImage/Dist/x64 - ./ - ../ - ../../../Common - - - Performance Strategies - Image Processing - NPP Library - - - CUDA - NPP - Image Processing - box filter - - - nppisu_static - nppif_static - nppc_static - culibos - freeimage - - - - true - boxFilterNPP.cpp - - FreeImage - NPP - - - 1:CUDA Basic Topics - 1:Performance Strategies - 2:Image Processing - 2:Computer Vision - - sm50 - sm52 - sm53 - sm60 - sm61 - sm70 - sm72 - sm75 - sm80 - sm86 - sm87 - sm89 - sm90 - - - x86_64 - linux - - - windows7 - - - x86_64 - macosx - - - arm - - - sbsa - - - ppc64le - linux - - - - all - - Box Filter with NPP - exe - diff --git a/Samples/CMakeLists.txt b/Samples/CMakeLists.txt index 34d905d3..f06523ae 100644 --- a/Samples/CMakeLists.txt +++ b/Samples/CMakeLists.txt @@ -2,5 +2,6 @@ add_subdirectory(0_Introduction) add_subdirectory(1_Utilities) add_subdirectory(2_Concepts_and_Techniques) add_subdirectory(3_CUDA_Features) +add_subdirectory(4_CUDA_Libraries) add_subdirectory(6_Performance) add_subdirectory(7_libNVVM) diff --git a/cmake/Modules/FindFreeImage.cmake b/cmake/Modules/FindFreeImage.cmake new file mode 100644 index 00000000..b03e99eb --- /dev/null +++ b/cmake/Modules/FindFreeImage.cmake @@ -0,0 +1,17 @@ +find_path(FreeImage_INCLUDE_DIR + NAMES freeimage.h FreeImage.h + PATHS /usr/include /usr/local/include +) + +find_library(FreeImage_LIBRARY + NAMES freeimage + PATHS /usr/lib /usr/local/lib +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(FreeImage DEFAULT_MSG FreeImage_LIBRARY FreeImage_INCLUDE_DIR) + +if(FreeImage_FOUND) + set(FreeImage_LIBRARIES ${FreeImage_LIBRARY}) + set(FreeImage_INCLUDE_DIRS ${FreeImage_INCLUDE_DIR}) +endif()