mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-04-10 18:22:11 +01:00
Update ptxjit
This commit is contained in:
parent
9bebdf7ef4
commit
2f826e305a
@ -18,7 +18,7 @@ add_subdirectory(immaTensorCoreGemm)
|
||||
add_subdirectory(jacobiCudaGraphs)
|
||||
add_subdirectory(memMapIPCDrv)
|
||||
add_subdirectory(newdelete)
|
||||
#add_subdirectory(ptxjit)
|
||||
add_subdirectory(ptxjit)
|
||||
add_subdirectory(simpleCudaGraphs)
|
||||
add_subdirectory(tf32TensorCoreGemm)
|
||||
add_subdirectory(warpAggregatedAtomicsCG)
|
||||
|
37
Samples/3_CUDA_Features/ptxjit/CMakeLists.txt
Normal file
37
Samples/3_CUDA_Features/ptxjit/CMakeLists.txt
Normal file
@ -0,0 +1,37 @@
|
||||
# Include directories and libraries
|
||||
include_directories(../../../Common)
|
||||
|
||||
# Source file
|
||||
set(SRC_FILES
|
||||
ptxjit.cpp
|
||||
)
|
||||
|
||||
# Add target for ptxjit
|
||||
add_executable(ptxjit ${SRC_FILES})
|
||||
set_target_properties(ptxjit PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
target_include_directories(ptxjit PRIVATE
|
||||
${CUDAToolkit_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
target_link_libraries(ptxjit PUBLIC
|
||||
CUDA::cuda_driver
|
||||
CUDA::cudart
|
||||
)
|
||||
|
||||
set(CUDA_PTX_FILE "${CMAKE_CURRENT_BINARY_DIR}/ptxjit_kernel64.ptx")
|
||||
set(CUDA_KERNEL_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ptxjit_kernel.cu")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CUDA_PTX_FILE}
|
||||
COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} ${GENCODE_FLAGS} -o ${CUDA_PTX_FILE} -ptx ${CUDA_KERNEL_SOURCE}
|
||||
DEPENDS ${CUDA_KERNEL_SOURCE}
|
||||
COMMENT "Building CUDA PTX: ${CUDA_PTX_FILE}"
|
||||
)
|
||||
|
||||
|
||||
# Create a dummy target for fatbin generation
|
||||
add_custom_target(generate_ptxjit_ptx ALL DEPENDS ${CUDA_PTX_FILE})
|
||||
|
||||
# Ensure matrixMulDrv depends on the fatbin
|
||||
add_dependencies(matrixMulDrv generate_ptxjit_ptx)
|
@ -1,430 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
|
||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
||||
ifeq ($(HOST_ARCH),aarch64)
|
||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
||||
HOST_ARCH := sbsa
|
||||
TARGET_ARCH := sbsa
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),sbsa)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
NVCCFLAGS += -D_QNX_SOURCE
|
||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
||||
LDFLAGS += -lsocket
|
||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
||||
ifdef TARGET_OVERRIDE
|
||||
LDFLAGS += -lslog2
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_FS),)
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
||||
CCFLAGS += -I$(TARGET_FS)/../include
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
NVCCFLAGS += -g -G
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
UBUNTU = $(shell lsb_release -i -s 2>/dev/null | grep -i ubuntu)
|
||||
|
||||
SAMPLE_ENABLED := 1
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
PTX_FILE := ptxjit_kernel${TARGET_SIZE}.ptx
|
||||
|
||||
# Gencode arguments
|
||||
SMS ?=
|
||||
|
||||
ifeq ($(GENCODE_FLAGS),)
|
||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
||||
|
||||
ifeq ($(SMS),)
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
# Generate PTX code from SM 53
|
||||
GENCODE_FLAGS += -gencode arch=compute_53,code=compute_53
|
||||
else
|
||||
# Generate PTX code from SM 50
|
||||
GENCODE_FLAGS += -gencode arch=compute_50,code=compute_50
|
||||
endif
|
||||
endif
|
||||
|
||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
||||
ifneq ($(HIGHEST_SM),)
|
||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ALL_LDFLAGS += -Xcompiler -F/Library/Frameworks -Xlinker -framework -Xlinker CUDA
|
||||
else
|
||||
ifeq ($(TARGET_ARCH),x86_64)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/lib64/stubs
|
||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib/stubs
|
||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/targets/x86_64-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-gnueabihf/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/sbsa-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/armv7-linux-androideabi/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-linux-androideabi/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ARMv7-linux-QNX/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/aarch64-qnx/lib/stubs
|
||||
ifdef TARGET_OVERRIDE
|
||||
CUDA_SEARCH_PATH := $(CUDA_PATH)/targets/$(TARGET_OVERRIDE)/lib/stubs
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_SEARCH_PATH ?= $(CUDA_PATH)/targets/ppc64le-linux/lib/stubs
|
||||
endif
|
||||
|
||||
ifeq ($(HOST_ARCH),ppc64le)
|
||||
CUDA_SEARCH_PATH += $(CUDA_PATH)/lib64/stubs
|
||||
endif
|
||||
|
||||
CUDALIB ?= $(shell find -L $(CUDA_SEARCH_PATH) -maxdepth 1 -name libcuda.so 2> /dev/null)
|
||||
ifeq ("$(CUDALIB)","")
|
||||
$(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed. Please re-install the driver. <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
else
|
||||
CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
|
||||
LIBRARIES += -L$(CUDALIB) -lcuda
|
||||
endif
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS += --threads 0 --std=c++11
|
||||
|
||||
LIBRARIES += -lcudart_static
|
||||
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
EXEC ?= @echo "[@]"
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: ptxjit $(PTX_FILE)
|
||||
|
||||
check.deps:
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
@echo "Sample will be waived due to the above missing dependencies"
|
||||
else
|
||||
@echo "Sample is ready - all dependencies have been met"
|
||||
endif
|
||||
|
||||
$(PTX_FILE): ptxjit_kernel.cu
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -ptx $<
|
||||
$(EXEC) mkdir -p data
|
||||
$(EXEC) cp -f $@ ./data
|
||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp -f $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
ptxjit.o:ptxjit.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
ptxjit: ptxjit.o
|
||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
$(EXEC) ./ptxjit
|
||||
|
||||
testrun: build
|
||||
|
||||
clean:
|
||||
rm -f ptxjit ptxjit.o data/$(PTX_FILE) $(PTX_FILE)
|
||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/ptxjit
|
||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/$(PTX_FILE)
|
||||
|
||||
clobber: clean
|
@ -1,88 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||
<entry>
|
||||
<name>ptxjit</name>
|
||||
<cuda_api_list>
|
||||
<driver>cuLaunchKernel</driver>
|
||||
<driver>cuModuleLoadData</driver>
|
||||
<driver>cuLinkCreate</driver>
|
||||
<driver>cuModuleGetFunction</driver>
|
||||
<driver>cuLinkAddData</driver>
|
||||
<driver>cuModuleUnload</driver>
|
||||
<driver>cuLinkDestroy</driver>
|
||||
<driver>cuLinkComplete</driver>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaDriverGetVersion</toolkit>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This sample uses the Driver API to just-in-time compile (JIT) a Kernel from PTX code. Additionally, this sample demonstrates the seamless interoperability capability of the CUDA Runtime and CUDA Driver API calls. For CUDA 5.5, this sample shows how to use cuLink* functions to link PTX assembly using the CUDA driver at runtime.]]></description>
|
||||
<devicecompilation>separate</devicecompilation>
|
||||
<fallback_min_ptx>true</fallback_min_ptx>
|
||||
<includepaths>
|
||||
<path>./</path>
|
||||
<path>../</path>
|
||||
<path>../../../Common</path>
|
||||
</includepaths>
|
||||
<keyconcepts>
|
||||
<concept level="advanced">CUDA Driver API</concept>
|
||||
</keyconcepts>
|
||||
<keywords>
|
||||
<keyword>CUDA</keyword>
|
||||
<keyword>Advanced Topics</keyword>
|
||||
</keywords>
|
||||
<libraries>
|
||||
<library os="linux">cuda</library>
|
||||
<library framework="true" os="macosx">CUDA</library>
|
||||
<library>cudart_static</library>
|
||||
</libraries>
|
||||
<librarypaths>
|
||||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>ptxjit.cpp</primary_file>
|
||||
<ptx>ptxjit_kernel</ptx>
|
||||
<scopes>
|
||||
<scope>1:CUDA Advanced Topics</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm50</sm-arch>
|
||||
<sm-arch>sm52</sm-arch>
|
||||
<sm-arch>sm53</sm-arch>
|
||||
<sm-arch>sm60</sm-arch>
|
||||
<sm-arch>sm61</sm-arch>
|
||||
<sm-arch>sm70</sm-arch>
|
||||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
<env>
|
||||
<platform>windows7</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>x86_64</arch>
|
||||
<platform>macosx</platform>
|
||||
</env>
|
||||
<env>
|
||||
<arch>arm</arch>
|
||||
</env>
|
||||
<env>
|
||||
<arch>sbsa</arch>
|
||||
</env>
|
||||
<env>
|
||||
<arch>ppc64le</arch>
|
||||
<platform>linux</platform>
|
||||
</env>
|
||||
</supported_envs>
|
||||
<supported_sm_architectures>
|
||||
<include>all</include>
|
||||
</supported_sm_architectures>
|
||||
<title>PTX Just-in-Time compilation</title>
|
||||
<type>exe</type>
|
||||
</entry>
|
Loading…
x
Reference in New Issue
Block a user