Merge pull request #6 from jnbntz/aarch64_guarding

Changes for building for aarch64, specifically jetson orin nano
This commit is contained in:
Rob Armstrong 2025-01-09 10:06:09 -08:00 committed by GitHub
commit 450038ea73
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 189 additions and 120 deletions

View File

@ -22,25 +22,29 @@ find_package(GLUT)
# Source file
if(${OpenGL_FOUND})
if (${GLUT_FOUND})
# Add target for simpleCUDA2GL
add_executable(simpleCUDA2GL simpleCUDA2GL.cu main.cpp)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
message(STATUS "Will not build sample simpleCUDA2GL - not supported on aarch64")
else()
# Add target for simpleCUDA2GL
add_executable(simpleCUDA2GL simpleCUDA2GL.cu main.cpp)
target_compile_options(simpleCUDA2GL PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(simpleCUDA2GL PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(simpleCUDA2GL PRIVATE cxx_std_17 cuda_std_17)
target_compile_features(simpleCUDA2GL PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(simpleCUDA2GL PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_include_directories(simpleCUDA2GL PUBLIC
${OPENGL_INCLUDE_DIR}
${CUDAToolkit_INCLUDE_DIRS}
${GLUT_INCLUDE_DIRS}
)
set_target_properties(simpleCUDA2GL PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_include_directories(simpleCUDA2GL PUBLIC
${OPENGL_INCLUDE_DIR}
${CUDAToolkit_INCLUDE_DIRS}
${GLUT_INCLUDE_DIRS}
)
target_link_libraries(simpleCUDA2GL
${OPENGL_LIBRARIES}
${GLUT_LIBRARIES}
)
endif()
else()
message(STATUS "GLUT not found - will not build sample 'simpleCUDA2GL'")
endif()

View File

@ -16,12 +16,16 @@ endif()
# Include directories and libraries
include_directories(../../../Common)
# Source file
# Add target for simpleIPC
add_executable(simpleIPC simpleIPC.cu ../../../Common/helper_multiprocess.cpp)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
message(STATUS "Will not build sample simpleIPC - not supported on aarch64")
else()
# Source file
# Add target for simpleIPC
add_executable(simpleIPC simpleIPC.cu ../../../Common/helper_multiprocess.cpp)
target_compile_options(simpleIPC PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(simpleIPC PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(simpleIPC PRIVATE cxx_std_17 cuda_std_17)
target_compile_features(simpleIPC PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(simpleIPC PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(simpleIPC PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()

View File

@ -18,10 +18,14 @@ include_directories(../../../Common)
# Source file
# Add target for simpleP2P
add_executable(simpleP2P simpleP2P.cu)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
message(STATUS "Will not build sample simpleP2P - not supported on aarch64")
else()
add_executable(simpleP2P simpleP2P.cu)
target_compile_options(simpleP2P PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(simpleP2P PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(simpleP2P PRIVATE cxx_std_17 cuda_std_17)
target_compile_features(simpleP2P PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(simpleP2P PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(simpleP2P PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()

View File

@ -16,17 +16,21 @@ endif()
# Include directories and libraries
include_directories(../../../Common)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
# Source file
# Add target for systemWideAtomics
add_executable(systemWideAtomics systemWideAtomics.cu)
target_compile_options(systemWideAtomics PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(systemWideAtomics PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(systemWideAtomics PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
message(STATUS "Will not build sample systemWideAtomics - not supported on aarch64")
else()
message(STATUS "Will not build sample systemWideAtomics - requires Linux OS")
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
# Source file
# Add target for systemWideAtomics
add_executable(systemWideAtomics systemWideAtomics.cu)
target_compile_options(systemWideAtomics PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(systemWideAtomics PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(systemWideAtomics PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
else()
message(STATUS "Will not build sample systemWideAtomics - requires Linux OS")
endif()
endif()

View File

@ -18,33 +18,37 @@ include_directories(../../../Common)
# Source file
# Add target for vectorAddMMAP
add_executable(vectorAddMMAP vectorAddMMAP.cpp multidevicealloc_memmap.cpp)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
message(STATUS "Will not build sample vectorAddMMAP - not supported on aarch64")
else()
add_executable(vectorAddMMAP vectorAddMMAP.cpp multidevicealloc_memmap.cpp)
target_compile_options(vectorAddMMAP PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(vectorAddMMAP PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(vectorAddMMAP PRIVATE cxx_std_17 cuda_std_17)
target_compile_features(vectorAddMMAP PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(vectorAddMMAP PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_include_directories(vectorAddMMAP PRIVATE
${CUDAToolkit_INCLUDE_DIRS}
)
set_target_properties(vectorAddMMAP PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_include_directories(vectorAddMMAP PRIVATE
${CUDAToolkit_INCLUDE_DIRS}
)
target_link_libraries(vectorAddMMAP PUBLIC
CUDA::cuda_driver
)
target_link_libraries(vectorAddMMAP PUBLIC
CUDA::cuda_driver
)
set(CUDA_FATBIN_FILE "${CMAKE_CURRENT_BINARY_DIR}/vectorAdd_kernel64.fatbin")
set(CUDA_KERNEL_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/vectorAdd_kernel.cu")
set(CUDA_FATBIN_FILE "${CMAKE_CURRENT_BINARY_DIR}/vectorAdd_kernel64.fatbin")
set(CUDA_KERNEL_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/vectorAdd_kernel.cu")
add_custom_command(
OUTPUT ${CUDA_FATBIN_FILE}
COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE}
DEPENDS ${CUDA_KERNEL_SOURCE}
COMMENT "Building CUDA fatbin: ${CUDA_FATBIN_FILE}"
)
add_custom_command(
OUTPUT ${CUDA_FATBIN_FILE}
COMMAND ${CMAKE_CUDA_COMPILER} ${INCLUDES} ${ALL_CCFLAGS} ${GENCODE_FLAGS} -o ${CUDA_FATBIN_FILE} -fatbin ${CUDA_KERNEL_SOURCE}
DEPENDS ${CUDA_KERNEL_SOURCE}
COMMENT "Building CUDA fatbin: ${CUDA_FATBIN_FILE}"
)
# Create a dummy target for fatbin generation
add_custom_target(generate_fatbin_vectorAddMMAP ALL DEPENDS ${CUDA_FATBIN_FILE})
# Create a dummy target for fatbin generation
add_custom_target(generate_fatbin_vectorAddMMAP ALL DEPENDS ${CUDA_FATBIN_FILE})
# Ensure matrixMulDrv depends on the fatbin
add_dependencies(vectorAddMMAP generate_fatbin_vectorAddMMAP)
# Ensure matrixMulDrv depends on the fatbin
add_dependencies(vectorAddMMAP generate_fatbin_vectorAddMMAP)
endif()

View File

@ -16,12 +16,16 @@ endif()
# Include directories and libraries
include_directories(../../../Common)
# Source file
# Add target for topologyQuery
add_executable(topologyQuery topologyQuery.cu)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
message(STATUS "Will not build sample topologyQuery - not supported on aarch64")
else()
# Source file
# Add target for topologyQuery
add_executable(topologyQuery topologyQuery.cu)
target_compile_options(topologyQuery PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(topologyQuery PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(topologyQuery PRIVATE cxx_std_17 cuda_std_17)
target_compile_features(topologyQuery PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(topologyQuery PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(topologyQuery PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()

View File

@ -19,17 +19,21 @@ include_directories(../../../Common)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
# Source file
# Add target for streamOrderedAllocationIPC
add_executable(streamOrderedAllocationIPC streamOrderedAllocationIPC.cu ../../../Common/helper_multiprocess.cpp)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
message(STATUS "Will not build sample streamOrderedAllocationIPC - not supported on aarch64")
else()
add_executable(streamOrderedAllocationIPC streamOrderedAllocationIPC.cu ../../../Common/helper_multiprocess.cpp)
target_compile_options(streamOrderedAllocationIPC PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(streamOrderedAllocationIPC PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(streamOrderedAllocationIPC PRIVATE cxx_std_17 cuda_std_17)
target_compile_features(streamOrderedAllocationIPC PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(streamOrderedAllocationIPC PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(streamOrderedAllocationIPC PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(streamOrderedAllocationIPC PUBLIC
CUDA::cuda_driver
)
target_link_libraries(streamOrderedAllocationIPC PUBLIC
CUDA::cuda_driver
)
endif()
else()
message(STATUS "Will not build sample streamOrderedAllocationIPC - requires Linux OS")
endif()

View File

@ -19,13 +19,17 @@ include_directories(../../../Common)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
# Source file
# Add target for StreamPriorities
add_executable(StreamPriorities StreamPriorities.cu)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
message(STATUS "Will not build sample streamPriorities - not supported on aarch64")
else()
add_executable(StreamPriorities StreamPriorities.cu)
target_compile_options(StreamPriorities PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_options(StreamPriorities PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(StreamPriorities PRIVATE cxx_std_17 cuda_std_17)
target_compile_features(StreamPriorities PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(StreamPriorities PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
set_target_properties(StreamPriorities PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
endif()
else()
message(STATUS "Will not build sample StreamPriorities - requires Linux OS")
endif()

View File

@ -8,7 +8,12 @@ find_package(CUDAToolkit REQUIRED)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 87 90)
else()
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
endif()

View File

@ -8,7 +8,12 @@ find_package(CUDAToolkit REQUIRED)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 87 90)
else()
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
endif()

View File

@ -8,7 +8,12 @@ find_package(CUDAToolkit REQUIRED)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 87 90)
else()
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
endif()

View File

@ -8,7 +8,12 @@ find_package(CUDAToolkit REQUIRED)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 87 90)
else()
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
endif()

View File

@ -8,7 +8,12 @@ find_package(CUDAToolkit REQUIRED)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
set(CMAKE_CUDA_ARCHITECTURES 61 70 75 80 86 87 90)
else()
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
endif()

View File

@ -16,28 +16,34 @@ endif()
# Include directories and libraries
include_directories(../../../Common)
# Source file
# Add target for nvJPEG
add_executable(nvJPEG nvJPEG.cpp)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
message(STATUS "Will not build sample nvJPEG - not supported on aarch64")
else()
target_compile_options(nvJPEG PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
# Source file
# Add target for nvJPEG
add_executable(nvJPEG nvJPEG.cpp)
target_compile_features(nvJPEG PRIVATE cxx_std_17 cuda_std_17)
target_compile_options(nvJPEG PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
set_target_properties(nvJPEG PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_compile_features(nvJPEG PRIVATE cxx_std_17 cuda_std_17)
target_include_directories(nvJPEG PRIVATE
${CUDAToolkit_INCLUDE_DIRS}
)
set_target_properties(nvJPEG PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(nvJPEG PRIVATE
CUDA::cudart
CUDA::nvjpeg
)
target_include_directories(nvJPEG PRIVATE
${CUDAToolkit_INCLUDE_DIRS}
)
# Copy data to the output directory
add_custom_command(TARGET nvJPEG POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_CURRENT_SOURCE_DIR}/images
${CMAKE_CURRENT_BINARY_DIR}/images
)
target_link_libraries(nvJPEG PRIVATE
CUDA::cudart
CUDA::nvjpeg
)
# Copy data to the output directory
add_custom_command(TARGET nvJPEG POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_CURRENT_SOURCE_DIR}/images
${CMAKE_CURRENT_BINARY_DIR}/images
)
endif()

View File

@ -16,34 +16,40 @@ endif()
# Include directories and libraries
include_directories(../../../Common)
# Source file
# Add target for nvJPEG_encoder
add_executable(nvJPEG_encoder nvJPEG_encoder.cpp)
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
message(STATUS "Will not build sample nvJPEG_encoder - not supported on aarch64")
else()
target_compile_options(nvJPEG_encoder PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
# Source file
# Add target for nvJPEG_encoder
add_executable(nvJPEG_encoder nvJPEG_encoder.cpp)
target_compile_features(nvJPEG_encoder PRIVATE cxx_std_17 cuda_std_17)
target_compile_options(nvJPEG_encoder PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
set_target_properties(nvJPEG_encoder PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_compile_features(nvJPEG_encoder PRIVATE cxx_std_17 cuda_std_17)
target_include_directories(nvJPEG_encoder PRIVATE
${CUDAToolkit_INCLUDE_DIRS}
)
set_target_properties(nvJPEG_encoder PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
target_link_libraries(nvJPEG_encoder PRIVATE
CUDA::cudart
CUDA::nvjpeg
)
target_include_directories(nvJPEG_encoder PRIVATE
${CUDAToolkit_INCLUDE_DIRS}
)
# Copy data to the output directory
add_custom_command(TARGET nvJPEG_encoder POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_CURRENT_SOURCE_DIR}/images
${CMAKE_CURRENT_BINARY_DIR}/images
)
target_link_libraries(nvJPEG_encoder PRIVATE
CUDA::cudart
CUDA::nvjpeg
)
add_custom_command(TARGET nvJPEG_encoder POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_CURRENT_SOURCE_DIR}/encode_output
${CMAKE_CURRENT_BINARY_DIR}/encode_output
)
# Copy data to the output directory
add_custom_command(TARGET nvJPEG_encoder POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_CURRENT_SOURCE_DIR}/images
${CMAKE_CURRENT_BINARY_DIR}/images
)
add_custom_command(TARGET nvJPEG_encoder POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
${CMAKE_CURRENT_SOURCE_DIR}/encode_output
${CMAKE_CURRENT_BINARY_DIR}/encode_output
)
endif()