mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-04-10 18:22:11 +01:00
Remove Tegra QNX samples nbody_screen, simpleGLES_screen
This commit is contained in:
parent
feffc60cbf
commit
2d0314212b
@ -1,18 +0,0 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**",
|
||||
"${workspaceFolder}/../../../Common"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/local/cuda/bin/nvcc",
|
||||
"cStandard": "gnu17",
|
||||
"cppStandard": "gnu++14",
|
||||
"intelliSenseMode": "linux-gcc-x64",
|
||||
"configurationProvider": "ms-vscode.makefile-tools"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"nvidia.nsight-vscode-edition",
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.makefile-tools"
|
||||
]
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "CUDA C++: Launch",
|
||||
"type": "cuda-gdb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/nbody_screen"
|
||||
}
|
||||
]
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "sample",
|
||||
"type": "shell",
|
||||
"command": "make dbg=1",
|
||||
"problemMatcher": ["$nvcc"],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules")
|
||||
|
||||
project(nbody_screen LANGUAGES C CXX CUDA)
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
|
||||
endif()
|
||||
|
||||
# Include directories and libraries
|
||||
include_directories(../../../Common)
|
||||
|
||||
# Source file
|
||||
# Add target for nbody_screen
|
||||
add_executable(nbody_screen nbody_screen.cu)
|
||||
|
||||
target_compile_options(nbody_screen PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
|
||||
|
||||
target_compile_features(nbody_screen PRIVATE cxx_std_17 cuda_std_17)
|
||||
|
||||
set_target_properties(nbody_screen PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
@ -1,405 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
|
||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
||||
ifeq ($(HOST_ARCH),aarch64)
|
||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
||||
HOST_ARCH := sbsa
|
||||
TARGET_ARCH := sbsa
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),sbsa)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
NVCCFLAGS += -D_QNX_SOURCE
|
||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
||||
LDFLAGS += -lsocket
|
||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
||||
ifdef TARGET_OVERRIDE
|
||||
LDFLAGS += -lslog2
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_FS),)
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
||||
CCFLAGS += -I$(TARGET_FS)/../include
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
NVCCFLAGS += -g -G
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
SAMPLE_ENABLED := 1
|
||||
|
||||
# This sample is not supported on Linux x86_64
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
ifeq ($(TARGET_ARCH),x86_64)
|
||||
$(info >>> WARNING - nbody_screen is not supported on Linux x86_64 - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
endif
|
||||
|
||||
# This sample is not supported on Mac OSX
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
$(info >>> WARNING - nbody_screen is not supported on Mac OSX - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
# This sample is not supported on ARMv7
|
||||
ifeq ($(TARGET_ARCH),armv7l)
|
||||
$(info >>> WARNING - nbody_screen is not supported on ARMv7 - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
# This sample is not supported on aarch64
|
||||
ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifneq ($(TARGET_OS),qnx)
|
||||
$(info >>> WARNING - nbody_screen is not supported on aarch64-$(TARGET_OS) - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
endif
|
||||
# This sample is not supported on sbsa
|
||||
ifeq ($(TARGET_ARCH),sbsa)
|
||||
$(info >>> WARNING - nbody_screen is not supported on sbsa - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
# Makefile include to help find GLES Libraries
|
||||
include ./findgleslib.mk
|
||||
|
||||
# OpenGLES specific libraries
|
||||
ifneq ($(TARGET_OS),darwin)
|
||||
LIBRARIES += $(GLESLINK) -lGLESv2 -lEGL -lscreen
|
||||
endif
|
||||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ifeq ($(GENCODE_FLAGS),)
|
||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
||||
|
||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
||||
ifneq ($(HIGHEST_SM),)
|
||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
||||
endif
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS += -ftz=true --threads 0 --std=c++11
|
||||
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
EXEC ?= @echo "[@]"
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: nbody_screen
|
||||
|
||||
check.deps:
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
@echo "Sample will be waived due to the above missing dependencies"
|
||||
else
|
||||
@echo "Sample is ready - all dependencies have been met"
|
||||
endif
|
||||
|
||||
bodysystemcuda.o:bodysystemcuda.cu
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
nbody_screen.o:nbody_screen.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
render_particles.o:render_particles.cpp
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
nbody_screen: bodysystemcuda.o nbody_screen.o render_particles.o
|
||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
$(EXEC) ./nbody_screen
|
||||
|
||||
testrun: build
|
||||
$(EXEC) ./nbody_screen -benchmark -compare -cpu
|
||||
|
||||
clean:
|
||||
rm -f nbody_screen bodysystemcuda.o nbody_screen.o render_particles.o
|
||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/nbody_screen
|
||||
|
||||
clobber: clean
|
@ -1,92 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||
<entry>
|
||||
<name>nbody_screen</name>
|
||||
<cflags>
|
||||
<flag>-ftz=true</flag>
|
||||
</cflags>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaGraphicsUnmapResources</toolkit>
|
||||
<toolkit>cudaSetDeviceFlags</toolkit>
|
||||
<toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
|
||||
<toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
|
||||
<toolkit>cudaGraphicsMapResources</toolkit>
|
||||
<toolkit>cudaSetDevice</toolkit>
|
||||
<toolkit>cudaEventSynchronize</toolkit>
|
||||
<toolkit>cudaGetDeviceProperties</toolkit>
|
||||
<toolkit>cudaDeviceSynchronize</toolkit>
|
||||
<toolkit>cudaEventRecord</toolkit>
|
||||
<toolkit>cudaGetDevice</toolkit>
|
||||
<toolkit>cudaMemcpyToSymbol</toolkit>
|
||||
<toolkit>cudaStreamQuery</toolkit>
|
||||
<toolkit>cudaEventDestroy</toolkit>
|
||||
<toolkit>cudaEventElapsedTime</toolkit>
|
||||
<toolkit>cudaGetDeviceCount</toolkit>
|
||||
<toolkit>cudaEventCreate</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA. Unlike the OpenGL nbody sample, there is no user interaction.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
<files>
|
||||
<file>./galaxy_20K.bin</file>
|
||||
</files>
|
||||
<includepaths>
|
||||
<path>./</path>
|
||||
<path>../</path>
|
||||
<path>../../../Common</path>
|
||||
</includepaths>
|
||||
<keyconcepts>
|
||||
<concept level="advanced">Graphics Interop</concept>
|
||||
<concept level="advanced">Data Parallel Algorithms</concept>
|
||||
<concept level="advanced">Physically-Based Simulation</concept>
|
||||
</keyconcepts>
|
||||
<keywords>
|
||||
<keyword>CUDA</keyword>
|
||||
<keyword>GPGPU</keyword>
|
||||
<keyword>n-body</keyword>
|
||||
<keyword>simulation</keyword>
|
||||
<keyword>astrophysics</keyword>
|
||||
<keyword>OpenGL ES</keyword>
|
||||
</keywords>
|
||||
<libraries>
|
||||
</libraries>
|
||||
<librarypaths>
|
||||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>nbody.cpp</primary_file>
|
||||
<qatests>
|
||||
<qatest>-benchmark -compare -cpu</qatest>
|
||||
</qatests>
|
||||
<required_dependencies>
|
||||
<dependency>screen</dependency>
|
||||
<dependency>GLES</dependency>
|
||||
</required_dependencies>
|
||||
<scopes>
|
||||
<scope>2:Graphics Interop</scope>
|
||||
<scope>1:CUDA Advanced Topics</scope>
|
||||
<scope>1:Data-Parallel Algorithms</scope>
|
||||
<scope>3:Physically-Based Simulation</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm50</sm-arch>
|
||||
<sm-arch>sm52</sm-arch>
|
||||
<sm-arch>sm53</sm-arch>
|
||||
<sm-arch>sm60</sm-arch>
|
||||
<sm-arch>sm61</sm-arch>
|
||||
<sm-arch>sm70</sm-arch>
|
||||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<platform>qnx</platform>
|
||||
</env>
|
||||
</supported_envs>
|
||||
<supported_sm_architectures>
|
||||
<include>all</include>
|
||||
</supported_sm_architectures>
|
||||
<title>CUDA N-Body Simulation on Screen</title>
|
||||
<type>exe</type>
|
||||
</entry>
|
@ -1,37 +0,0 @@
|
||||
# nbody_screen - CUDA N-Body Simulation on Screen
|
||||
|
||||
## Description
|
||||
|
||||
This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA. Unlike the OpenGL nbody sample, there is no user interaction.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
QNX
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
aarch64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[screen](../../../README.md#screen), [GLES](../../../README.md#gles)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## References (for more details)
|
||||
|
@ -1,286 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __BODYSYSTEM_H__
|
||||
#define __BODYSYSTEM_H__
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
enum NBodyConfig {
|
||||
NBODY_CONFIG_RANDOM,
|
||||
NBODY_CONFIG_SHELL,
|
||||
NBODY_CONFIG_EXPAND,
|
||||
NBODY_NUM_CONFIGS
|
||||
};
|
||||
|
||||
enum BodyArray {
|
||||
BODYSYSTEM_POSITION,
|
||||
BODYSYSTEM_VELOCITY,
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct vec3 {
|
||||
typedef float Type;
|
||||
}; // dummy
|
||||
template <>
|
||||
struct vec3<float> {
|
||||
typedef float3 Type;
|
||||
};
|
||||
template <>
|
||||
struct vec3<double> {
|
||||
typedef double3 Type;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct vec4 {
|
||||
typedef float Type;
|
||||
}; // dummy
|
||||
template <>
|
||||
struct vec4<float> {
|
||||
typedef float4 Type;
|
||||
};
|
||||
template <>
|
||||
struct vec4<double> {
|
||||
typedef double4 Type;
|
||||
};
|
||||
|
||||
class string;
|
||||
|
||||
// BodySystem abstract base class
|
||||
template <typename T>
|
||||
class BodySystem {
|
||||
public: // methods
|
||||
BodySystem(int numBodies) {}
|
||||
virtual ~BodySystem() {}
|
||||
|
||||
virtual void loadTipsyFile(const std::string &filename) = 0;
|
||||
|
||||
virtual void update(T deltaTime) = 0;
|
||||
|
||||
virtual void setSoftening(T softening) = 0;
|
||||
virtual void setDamping(T damping) = 0;
|
||||
|
||||
virtual T *getArray(BodyArray array) = 0;
|
||||
virtual void setArray(BodyArray array, const T *data) = 0;
|
||||
|
||||
virtual unsigned int getCurrentReadBuffer() const = 0;
|
||||
|
||||
virtual unsigned int getNumBodies() const = 0;
|
||||
|
||||
virtual void synchronizeThreads() const {};
|
||||
|
||||
protected: // methods
|
||||
BodySystem() {} // default constructor
|
||||
|
||||
virtual void _initialize(int numBodies) = 0;
|
||||
virtual void _finalize() = 0;
|
||||
};
|
||||
|
||||
inline float3 scalevec(float3 &vector, float scalar) {
|
||||
float3 rt = vector;
|
||||
rt.x *= scalar;
|
||||
rt.y *= scalar;
|
||||
rt.z *= scalar;
|
||||
return rt;
|
||||
}
|
||||
|
||||
inline float normalize(float3 &vector) {
|
||||
float dist =
|
||||
sqrtf(vector.x * vector.x + vector.y * vector.y + vector.z * vector.z);
|
||||
|
||||
if (dist > 1e-6) {
|
||||
vector.x /= dist;
|
||||
vector.y /= dist;
|
||||
vector.z /= dist;
|
||||
}
|
||||
|
||||
return dist;
|
||||
}
|
||||
|
||||
inline float dot(float3 v0, float3 v1) {
|
||||
return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
|
||||
}
|
||||
|
||||
inline float3 cross(float3 v0, float3 v1) {
|
||||
float3 rt;
|
||||
rt.x = v0.y * v1.z - v0.z * v1.y;
|
||||
rt.y = v0.z * v1.x - v0.x * v1.z;
|
||||
rt.z = v0.x * v1.y - v0.y * v1.x;
|
||||
return rt;
|
||||
}
|
||||
|
||||
// utility function
|
||||
template <typename T>
|
||||
void randomizeBodies(NBodyConfig config, T *pos, T *vel, float *color,
|
||||
float clusterScale, float velocityScale, int numBodies,
|
||||
bool vec4vel) {
|
||||
switch (config) {
|
||||
default:
|
||||
case NBODY_CONFIG_RANDOM: {
|
||||
float scale = clusterScale * std::max<float>(1.0f, numBodies / (1024.0f));
|
||||
float vscale = velocityScale * scale;
|
||||
|
||||
int p = 0, v = 0;
|
||||
int i = 0;
|
||||
|
||||
while (i < numBodies) {
|
||||
float3 point;
|
||||
// const int scale = 16;
|
||||
point.x = rand() / (float)RAND_MAX * 2 - 1;
|
||||
point.y = rand() / (float)RAND_MAX * 2 - 1;
|
||||
point.z = rand() / (float)RAND_MAX * 2 - 1;
|
||||
float lenSqr = dot(point, point);
|
||||
|
||||
if (lenSqr > 1) continue;
|
||||
|
||||
float3 velocity;
|
||||
velocity.x = rand() / (float)RAND_MAX * 2 - 1;
|
||||
velocity.y = rand() / (float)RAND_MAX * 2 - 1;
|
||||
velocity.z = rand() / (float)RAND_MAX * 2 - 1;
|
||||
lenSqr = dot(velocity, velocity);
|
||||
|
||||
if (lenSqr > 1) continue;
|
||||
|
||||
pos[p++] = point.x * scale; // pos.x
|
||||
pos[p++] = point.y * scale; // pos.y
|
||||
pos[p++] = point.z * scale; // pos.z
|
||||
pos[p++] = 1.0f; // mass
|
||||
|
||||
vel[v++] = velocity.x * vscale; // pos.x
|
||||
vel[v++] = velocity.y * vscale; // pos.x
|
||||
vel[v++] = velocity.z * vscale; // pos.x
|
||||
|
||||
if (vec4vel) vel[v++] = 1.0f; // inverse mass
|
||||
|
||||
i++;
|
||||
}
|
||||
} break;
|
||||
|
||||
case NBODY_CONFIG_SHELL: {
|
||||
float scale = clusterScale;
|
||||
float vscale = scale * velocityScale;
|
||||
float inner = 2.5f * scale;
|
||||
float outer = 4.0f * scale;
|
||||
|
||||
int p = 0, v = 0;
|
||||
int i = 0;
|
||||
|
||||
while (i < numBodies) // for(int i=0; i < numBodies; i++)
|
||||
{
|
||||
float x, y, z;
|
||||
x = rand() / (float)RAND_MAX * 2 - 1;
|
||||
y = rand() / (float)RAND_MAX * 2 - 1;
|
||||
z = rand() / (float)RAND_MAX * 2 - 1;
|
||||
|
||||
float3 point = {x, y, z};
|
||||
float len = normalize(point);
|
||||
|
||||
if (len > 1) continue;
|
||||
|
||||
pos[p++] =
|
||||
point.x * (inner + (outer - inner) * rand() / (float)RAND_MAX);
|
||||
pos[p++] =
|
||||
point.y * (inner + (outer - inner) * rand() / (float)RAND_MAX);
|
||||
pos[p++] =
|
||||
point.z * (inner + (outer - inner) * rand() / (float)RAND_MAX);
|
||||
pos[p++] = 1.0f;
|
||||
|
||||
x = 0.0f; // * (rand() / (float) RAND_MAX * 2 - 1);
|
||||
y = 0.0f; // * (rand() / (float) RAND_MAX * 2 - 1);
|
||||
z = 1.0f; // * (rand() / (float) RAND_MAX * 2 - 1);
|
||||
float3 axis = {x, y, z};
|
||||
normalize(axis);
|
||||
|
||||
if (1 - dot(point, axis) < 1e-6) {
|
||||
axis.x = point.y;
|
||||
axis.y = point.x;
|
||||
normalize(axis);
|
||||
}
|
||||
|
||||
// if (point.y < 0) axis = scalevec(axis, -1);
|
||||
float3 vv = {(float)pos[4 * i], (float)pos[4 * i + 1],
|
||||
(float)pos[4 * i + 2]};
|
||||
vv = cross(vv, axis);
|
||||
vel[v++] = vv.x * vscale;
|
||||
vel[v++] = vv.y * vscale;
|
||||
vel[v++] = vv.z * vscale;
|
||||
|
||||
if (vec4vel) vel[v++] = 1.0f;
|
||||
|
||||
i++;
|
||||
}
|
||||
} break;
|
||||
|
||||
case NBODY_CONFIG_EXPAND: {
|
||||
float scale = clusterScale * numBodies / (1024.f);
|
||||
|
||||
if (scale < 1.0f) scale = clusterScale;
|
||||
|
||||
float vscale = scale * velocityScale;
|
||||
|
||||
int p = 0, v = 0;
|
||||
|
||||
for (int i = 0; i < numBodies;) {
|
||||
float3 point;
|
||||
|
||||
point.x = rand() / (float)RAND_MAX * 2 - 1;
|
||||
point.y = rand() / (float)RAND_MAX * 2 - 1;
|
||||
point.z = rand() / (float)RAND_MAX * 2 - 1;
|
||||
|
||||
float lenSqr = dot(point, point);
|
||||
|
||||
if (lenSqr > 1) continue;
|
||||
|
||||
pos[p++] = point.x * scale; // pos.x
|
||||
pos[p++] = point.y * scale; // pos.y
|
||||
pos[p++] = point.z * scale; // pos.z
|
||||
pos[p++] = 1.0f; // mass
|
||||
vel[v++] = point.x * vscale; // pos.x
|
||||
vel[v++] = point.y * vscale; // pos.x
|
||||
vel[v++] = point.z * vscale; // pos.x
|
||||
|
||||
if (vec4vel) vel[v++] = 1.0f; // inverse mass
|
||||
|
||||
i++;
|
||||
}
|
||||
} break;
|
||||
}
|
||||
|
||||
if (color) {
|
||||
int v = 0;
|
||||
|
||||
for (int i = 0; i < numBodies; i++) {
|
||||
// const int scale = 16;
|
||||
color[v++] = rand() / (float)RAND_MAX;
|
||||
color[v++] = rand() / (float)RAND_MAX;
|
||||
color[v++] = rand() / (float)RAND_MAX;
|
||||
color[v++] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __BODYSYSTEM_H__
|
@ -1,79 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __BODYSYSTEMCPU_H__
|
||||
#define __BODYSYSTEMCPU_H__
|
||||
|
||||
#include "bodysystem.h"
|
||||
|
||||
// CPU Body System
|
||||
template <typename T>
|
||||
class BodySystemCPU : public BodySystem<T> {
|
||||
public:
|
||||
BodySystemCPU(int numBodies);
|
||||
virtual ~BodySystemCPU();
|
||||
|
||||
virtual void loadTipsyFile(const std::string &filename);
|
||||
|
||||
virtual void update(T deltaTime);
|
||||
|
||||
virtual void setSoftening(T softening) {
|
||||
m_softeningSquared = softening * softening;
|
||||
}
|
||||
virtual void setDamping(T damping) { m_damping = damping; }
|
||||
|
||||
virtual T *getArray(BodyArray array);
|
||||
virtual void setArray(BodyArray array, const T *data);
|
||||
|
||||
virtual unsigned int getCurrentReadBuffer() const { return 0; }
|
||||
|
||||
virtual unsigned int getNumBodies() const { return m_numBodies; }
|
||||
|
||||
protected: // methods
|
||||
BodySystemCPU() {} // default constructor
|
||||
|
||||
virtual void _initialize(int numBodies);
|
||||
virtual void _finalize();
|
||||
|
||||
void _computeNBodyGravitation();
|
||||
void _integrateNBodySystem(T deltaTime);
|
||||
|
||||
protected: // data
|
||||
int m_numBodies;
|
||||
bool m_bInitialized;
|
||||
|
||||
T *m_pos;
|
||||
T *m_vel;
|
||||
T *m_force;
|
||||
|
||||
T m_softeningSquared;
|
||||
T m_damping;
|
||||
};
|
||||
|
||||
#include "bodysystemcpu_impl.h"
|
||||
|
||||
#endif // __BODYSYSTEMCPU_H__
|
@ -1,280 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "bodysystemcpu.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <memory.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <helper_cuda.h>
|
||||
#include <algorithm>
|
||||
#include "tipsy.h"
|
||||
|
||||
#ifdef OPENMP
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
template <typename T>
|
||||
BodySystemCPU<T>::BodySystemCPU(int numBodies)
|
||||
: m_numBodies(numBodies),
|
||||
m_bInitialized(false),
|
||||
m_force(0),
|
||||
m_softeningSquared(.00125f),
|
||||
m_damping(0.995f) {
|
||||
m_pos = 0;
|
||||
m_vel = 0;
|
||||
|
||||
_initialize(numBodies);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BodySystemCPU<T>::~BodySystemCPU() {
|
||||
_finalize();
|
||||
m_numBodies = 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCPU<T>::_initialize(int numBodies) {
|
||||
assert(!m_bInitialized);
|
||||
|
||||
m_numBodies = numBodies;
|
||||
|
||||
m_pos = new T[m_numBodies * 4];
|
||||
m_vel = new T[m_numBodies * 4];
|
||||
m_force = new T[m_numBodies * 3];
|
||||
|
||||
memset(m_pos, 0, m_numBodies * 4 * sizeof(T));
|
||||
memset(m_vel, 0, m_numBodies * 4 * sizeof(T));
|
||||
memset(m_force, 0, m_numBodies * 3 * sizeof(T));
|
||||
|
||||
m_bInitialized = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCPU<T>::_finalize() {
|
||||
assert(m_bInitialized);
|
||||
|
||||
delete[] m_pos;
|
||||
delete[] m_vel;
|
||||
delete[] m_force;
|
||||
|
||||
m_bInitialized = false;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCPU<T>::loadTipsyFile(const std::string &filename) {
|
||||
if (m_bInitialized) _finalize();
|
||||
|
||||
vector<typename vec4<T>::Type> positions;
|
||||
vector<typename vec4<T>::Type> velocities;
|
||||
vector<int> ids;
|
||||
|
||||
int nBodies = 0;
|
||||
int nFirst = 0, nSecond = 0, nThird = 0;
|
||||
|
||||
read_tipsy_file(positions, velocities, ids, filename, nBodies, nFirst,
|
||||
nSecond, nThird);
|
||||
|
||||
_initialize(nBodies);
|
||||
|
||||
memcpy(m_pos, &positions[0], sizeof(vec4<T>) * nBodies);
|
||||
memcpy(m_vel, &velocities[0], sizeof(vec4<T>) * nBodies);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCPU<T>::update(T deltaTime) {
|
||||
assert(m_bInitialized);
|
||||
|
||||
_integrateNBodySystem(deltaTime);
|
||||
|
||||
// std::swap(m_currentRead, m_currentWrite);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T *BodySystemCPU<T>::getArray(BodyArray array) {
|
||||
assert(m_bInitialized);
|
||||
|
||||
T *data = 0;
|
||||
|
||||
switch (array) {
|
||||
default:
|
||||
case BODYSYSTEM_POSITION:
|
||||
data = m_pos;
|
||||
break;
|
||||
|
||||
case BODYSYSTEM_VELOCITY:
|
||||
data = m_vel;
|
||||
break;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCPU<T>::setArray(BodyArray array, const T *data) {
|
||||
assert(m_bInitialized);
|
||||
|
||||
T *target = 0;
|
||||
|
||||
switch (array) {
|
||||
default:
|
||||
case BODYSYSTEM_POSITION:
|
||||
target = m_pos;
|
||||
break;
|
||||
|
||||
case BODYSYSTEM_VELOCITY:
|
||||
target = m_vel;
|
||||
break;
|
||||
}
|
||||
|
||||
memcpy(target, data, m_numBodies * 4 * sizeof(T));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T sqrt_T(T x) {
|
||||
return sqrt(x);
|
||||
}
|
||||
|
||||
template <>
|
||||
float sqrt_T<float>(float x) {
|
||||
return sqrtf(x);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void bodyBodyInteraction(T accel[3], T posMass0[4], T posMass1[4],
|
||||
T softeningSquared) {
|
||||
T r[3];
|
||||
|
||||
// r_01 [3 FLOPS]
|
||||
r[0] = posMass1[0] - posMass0[0];
|
||||
r[1] = posMass1[1] - posMass0[1];
|
||||
r[2] = posMass1[2] - posMass0[2];
|
||||
|
||||
// d^2 + e^2 [6 FLOPS]
|
||||
T distSqr = r[0] * r[0] + r[1] * r[1] + r[2] * r[2];
|
||||
distSqr += softeningSquared;
|
||||
|
||||
// invDistCube =1/distSqr^(3/2) [4 FLOPS (2 mul, 1 sqrt, 1 inv)]
|
||||
T invDist = (T)1.0 / (T)sqrt((double)distSqr);
|
||||
T invDistCube = invDist * invDist * invDist;
|
||||
|
||||
// s = m_j * invDistCube [1 FLOP]
|
||||
T s = posMass1[3] * invDistCube;
|
||||
|
||||
// (m_1 * r_01) / (d^2 + e^2)^(3/2) [6 FLOPS]
|
||||
accel[0] += r[0] * s;
|
||||
accel[1] += r[1] * s;
|
||||
accel[2] += r[2] * s;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCPU<T>::_computeNBodyGravitation() {
|
||||
#ifdef OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < m_numBodies; i++) {
|
||||
int indexForce = 3 * i;
|
||||
|
||||
T acc[3] = {0, 0, 0};
|
||||
|
||||
// We unroll this loop 4X for a small performance boost.
|
||||
int j = 0;
|
||||
|
||||
while (j < m_numBodies) {
|
||||
bodyBodyInteraction<T>(acc, &m_pos[4 * i], &m_pos[4 * j],
|
||||
m_softeningSquared);
|
||||
j++;
|
||||
bodyBodyInteraction<T>(acc, &m_pos[4 * i], &m_pos[4 * j],
|
||||
m_softeningSquared);
|
||||
j++;
|
||||
bodyBodyInteraction<T>(acc, &m_pos[4 * i], &m_pos[4 * j],
|
||||
m_softeningSquared);
|
||||
j++;
|
||||
bodyBodyInteraction<T>(acc, &m_pos[4 * i], &m_pos[4 * j],
|
||||
m_softeningSquared);
|
||||
j++;
|
||||
}
|
||||
|
||||
m_force[indexForce] = acc[0];
|
||||
m_force[indexForce + 1] = acc[1];
|
||||
m_force[indexForce + 2] = acc[2];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCPU<T>::_integrateNBodySystem(T deltaTime) {
|
||||
_computeNBodyGravitation();
|
||||
|
||||
#ifdef OPENMP
|
||||
#pragma omp parallel for
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < m_numBodies; ++i) {
|
||||
int index = 4 * i;
|
||||
int indexForce = 3 * i;
|
||||
|
||||
T pos[3], vel[3], force[3];
|
||||
pos[0] = m_pos[index + 0];
|
||||
pos[1] = m_pos[index + 1];
|
||||
pos[2] = m_pos[index + 2];
|
||||
T invMass = m_pos[index + 3];
|
||||
|
||||
vel[0] = m_vel[index + 0];
|
||||
vel[1] = m_vel[index + 1];
|
||||
vel[2] = m_vel[index + 2];
|
||||
|
||||
force[0] = m_force[indexForce + 0];
|
||||
force[1] = m_force[indexForce + 1];
|
||||
force[2] = m_force[indexForce + 2];
|
||||
|
||||
// acceleration = force / mass;
|
||||
// new velocity = old velocity + acceleration * deltaTime
|
||||
vel[0] += (force[0] * invMass) * deltaTime;
|
||||
vel[1] += (force[1] * invMass) * deltaTime;
|
||||
vel[2] += (force[2] * invMass) * deltaTime;
|
||||
|
||||
vel[0] *= m_damping;
|
||||
vel[1] *= m_damping;
|
||||
vel[2] *= m_damping;
|
||||
|
||||
// new position = old position + velocity * deltaTime
|
||||
pos[0] += vel[0] * deltaTime;
|
||||
pos[1] += vel[1] * deltaTime;
|
||||
pos[2] += vel[2] * deltaTime;
|
||||
|
||||
m_pos[index + 0] = pos[0];
|
||||
m_pos[index + 1] = pos[1];
|
||||
m_pos[index + 2] = pos[2];
|
||||
|
||||
m_vel[index + 0] = vel[0];
|
||||
m_vel[index + 1] = vel[1];
|
||||
m_vel[index + 2] = vel[2];
|
||||
}
|
||||
}
|
@ -1,276 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <helper_cuda.h>
|
||||
#include <math.h>
|
||||
|
||||
//#include <GL/glew.h>
|
||||
//#include <GL/freeglut.h>
|
||||
|
||||
// CUDA standard includes
|
||||
#include <cuda_runtime.h>
|
||||
//#include <cuda_gl_interop.h>
|
||||
|
||||
#include "bodysystem.h"
|
||||
|
||||
__constant__ float softeningSquared;
|
||||
__constant__ double softeningSquared_fp64;
|
||||
|
||||
cudaError_t setSofteningSquared(float softeningSq) {
|
||||
return cudaMemcpyToSymbol(softeningSquared, &softeningSq, sizeof(float), 0,
|
||||
cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
cudaError_t setSofteningSquared(double softeningSq) {
|
||||
return cudaMemcpyToSymbol(softeningSquared_fp64, &softeningSq, sizeof(double),
|
||||
0, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
template <class T>
|
||||
struct SharedMemory {
|
||||
__device__ inline operator T *() {
|
||||
extern __shared__ int __smem[];
|
||||
return (T *)__smem;
|
||||
}
|
||||
|
||||
__device__ inline operator const T *() const {
|
||||
extern __shared__ int __smem[];
|
||||
return (T *)__smem;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__device__ T rsqrt_T(T x) {
|
||||
return rsqrt(x);
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ float rsqrt_T<float>(float x) {
|
||||
return rsqrtf(x);
|
||||
}
|
||||
|
||||
template <>
|
||||
__device__ double rsqrt_T<double>(double x) {
|
||||
return rsqrt(x);
|
||||
}
|
||||
|
||||
// Macros to simplify shared memory addressing
|
||||
#define SX(i) sharedPos[i + blockDim.x * threadIdx.y]
|
||||
// This macro is only used when multithreadBodies is true (below)
|
||||
#define SX_SUM(i, j) sharedPos[i + blockDim.x * j]
|
||||
|
||||
template <typename T>
|
||||
__device__ T getSofteningSquared() {
|
||||
return softeningSquared;
|
||||
}
|
||||
template <>
|
||||
__device__ double getSofteningSquared<double>() {
|
||||
return softeningSquared_fp64;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct DeviceData {
|
||||
T *dPos[2]; // mapped host pointers
|
||||
T *dVel;
|
||||
cudaEvent_t event;
|
||||
unsigned int offset;
|
||||
unsigned int numBodies;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__device__ typename vec3<T>::Type bodyBodyInteraction(
|
||||
typename vec3<T>::Type ai, typename vec4<T>::Type bi,
|
||||
typename vec4<T>::Type bj) {
|
||||
typename vec3<T>::Type r;
|
||||
|
||||
// r_ij [3 FLOPS]
|
||||
r.x = bj.x - bi.x;
|
||||
r.y = bj.y - bi.y;
|
||||
r.z = bj.z - bi.z;
|
||||
|
||||
// distSqr = dot(r_ij, r_ij) + EPS^2 [6 FLOPS]
|
||||
T distSqr = r.x * r.x + r.y * r.y + r.z * r.z;
|
||||
distSqr += getSofteningSquared<T>();
|
||||
|
||||
// invDistCube =1/distSqr^(3/2) [4 FLOPS (2 mul, 1 sqrt, 1 inv)]
|
||||
T invDist = rsqrt_T(distSqr);
|
||||
T invDistCube = invDist * invDist * invDist;
|
||||
|
||||
// s = m_j * invDistCube [1 FLOP]
|
||||
T s = bj.w * invDistCube;
|
||||
|
||||
// a_i = a_i + s * r_ij [6 FLOPS]
|
||||
ai.x += r.x * s;
|
||||
ai.y += r.y * s;
|
||||
ai.z += r.z * s;
|
||||
|
||||
return ai;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ typename vec3<T>::Type computeBodyAccel(
|
||||
typename vec4<T>::Type bodyPos, typename vec4<T>::Type *positions,
|
||||
int numTiles) {
|
||||
typename vec4<T>::Type *sharedPos = SharedMemory<typename vec4<T>::Type>();
|
||||
|
||||
typename vec3<T>::Type acc = {0.0f, 0.0f, 0.0f};
|
||||
|
||||
for (int tile = 0; tile < numTiles; tile++) {
|
||||
sharedPos[threadIdx.x] = positions[tile * blockDim.x + threadIdx.x];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
// This is the "tile_calculation" from the GPUG3 article.
|
||||
#pragma unroll 128
|
||||
|
||||
for (unsigned int counter = 0; counter < blockDim.x; counter++) {
|
||||
acc = bodyBodyInteraction<T>(acc, bodyPos, sharedPos[counter]);
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void integrateBodies(typename vec4<T>::Type *__restrict__ newPos,
|
||||
typename vec4<T>::Type *__restrict__ oldPos,
|
||||
typename vec4<T>::Type *vel,
|
||||
unsigned int deviceOffset,
|
||||
unsigned int deviceNumBodies, float deltaTime,
|
||||
float damping, int numTiles) {
|
||||
int index = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (index >= deviceNumBodies) {
|
||||
return;
|
||||
}
|
||||
|
||||
typename vec4<T>::Type position = oldPos[deviceOffset + index];
|
||||
|
||||
typename vec3<T>::Type accel =
|
||||
computeBodyAccel<T>(position, oldPos, numTiles);
|
||||
|
||||
// acceleration = force / mass;
|
||||
// new velocity = old velocity + acceleration * deltaTime
|
||||
// note we factor out the body's mass from the equation, here and in
|
||||
// bodyBodyInteraction (because they cancel out). Thus here force ==
|
||||
// acceleration
|
||||
typename vec4<T>::Type velocity = vel[deviceOffset + index];
|
||||
|
||||
velocity.x += accel.x * deltaTime;
|
||||
velocity.y += accel.y * deltaTime;
|
||||
velocity.z += accel.z * deltaTime;
|
||||
|
||||
velocity.x *= damping;
|
||||
velocity.y *= damping;
|
||||
velocity.z *= damping;
|
||||
|
||||
// new position = old position + velocity * deltaTime
|
||||
position.x += velocity.x * deltaTime;
|
||||
position.y += velocity.y * deltaTime;
|
||||
position.z += velocity.z * deltaTime;
|
||||
|
||||
// store new position and velocity
|
||||
newPos[deviceOffset + index] = position;
|
||||
vel[deviceOffset + index] = velocity;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void integrateNbodySystem(DeviceData<T> *deviceData,
|
||||
cudaGraphicsResource **pgres,
|
||||
unsigned int currentRead, float deltaTime,
|
||||
float damping, unsigned int numBodies,
|
||||
unsigned int numDevices, int blockSize,
|
||||
bool bUsePBO) {
|
||||
if (bUsePBO) {
|
||||
checkCudaErrors(cudaGraphicsResourceSetMapFlags(
|
||||
pgres[currentRead], cudaGraphicsMapFlagsReadOnly));
|
||||
checkCudaErrors(cudaGraphicsResourceSetMapFlags(
|
||||
pgres[1 - currentRead], cudaGraphicsMapFlagsWriteDiscard));
|
||||
checkCudaErrors(cudaGraphicsMapResources(2, pgres, 0));
|
||||
size_t bytes;
|
||||
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
||||
(void **)&(deviceData[0].dPos[currentRead]), &bytes,
|
||||
pgres[currentRead]));
|
||||
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
||||
(void **)&(deviceData[0].dPos[1 - currentRead]), &bytes,
|
||||
pgres[1 - currentRead]));
|
||||
}
|
||||
|
||||
for (unsigned int dev = 0; dev != numDevices; dev++) {
|
||||
if (numDevices > 1) {
|
||||
cudaSetDevice(dev);
|
||||
}
|
||||
|
||||
int numBlocks = (deviceData[dev].numBodies + blockSize - 1) / blockSize;
|
||||
int numTiles = (numBodies + blockSize - 1) / blockSize;
|
||||
int sharedMemSize = blockSize * 4 * sizeof(T); // 4 floats for pos
|
||||
|
||||
integrateBodies<T><<<numBlocks, blockSize, sharedMemSize>>>(
|
||||
(typename vec4<T>::Type *)deviceData[dev].dPos[1 - currentRead],
|
||||
(typename vec4<T>::Type *)deviceData[dev].dPos[currentRead],
|
||||
(typename vec4<T>::Type *)deviceData[dev].dVel, deviceData[dev].offset,
|
||||
deviceData[dev].numBodies, deltaTime, damping, numTiles);
|
||||
|
||||
if (numDevices > 1) {
|
||||
checkCudaErrors(cudaEventRecord(deviceData[dev].event));
|
||||
// MJH: Hack on older driver versions to force kernel launches to flush!
|
||||
cudaStreamQuery(0);
|
||||
}
|
||||
|
||||
// check if kernel invocation generated an error
|
||||
getLastCudaError("Kernel execution failed");
|
||||
}
|
||||
|
||||
if (numDevices > 1) {
|
||||
for (unsigned int dev = 0; dev < numDevices; dev++) {
|
||||
checkCudaErrors(cudaEventSynchronize(deviceData[dev].event));
|
||||
}
|
||||
}
|
||||
|
||||
if (bUsePBO) {
|
||||
checkCudaErrors(cudaGraphicsUnmapResources(2, pgres, 0));
|
||||
}
|
||||
}
|
||||
|
||||
// Explicit specializations needed to generate code
|
||||
template void integrateNbodySystem<float>(DeviceData<float> *deviceData,
|
||||
cudaGraphicsResource **pgres,
|
||||
unsigned int currentRead,
|
||||
float deltaTime, float damping,
|
||||
unsigned int numBodies,
|
||||
unsigned int numDevices,
|
||||
int blockSize, bool bUsePBO);
|
||||
|
||||
template void integrateNbodySystem<double>(DeviceData<double> *deviceData,
|
||||
cudaGraphicsResource **pgres,
|
||||
unsigned int currentRead,
|
||||
float deltaTime, float damping,
|
||||
unsigned int numBodies,
|
||||
unsigned int numDevices,
|
||||
int blockSize, bool bUsePBO);
|
@ -1,99 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __BODYSYSTEMCUDA_H__
|
||||
#define __BODYSYSTEMCUDA_H__
|
||||
|
||||
#include "bodysystem.h"
|
||||
|
||||
template <typename T>
|
||||
struct DeviceData {
|
||||
T *dPos[2]; // mapped host pointers
|
||||
T *dVel;
|
||||
cudaEvent_t event;
|
||||
unsigned int offset;
|
||||
unsigned int numBodies;
|
||||
};
|
||||
|
||||
// CUDA BodySystem: runs on the GPU
|
||||
template <typename T>
|
||||
class BodySystemCUDA : public BodySystem<T> {
|
||||
public:
|
||||
BodySystemCUDA(unsigned int numBodies, unsigned int numDevices,
|
||||
unsigned int blockSize, bool usePBO, bool useSysMem = false);
|
||||
virtual ~BodySystemCUDA();
|
||||
|
||||
virtual void loadTipsyFile(const std::string &filename);
|
||||
|
||||
virtual void update(T deltaTime);
|
||||
|
||||
virtual void setSoftening(T softening);
|
||||
virtual void setDamping(T damping);
|
||||
|
||||
virtual T *getArray(BodyArray array);
|
||||
virtual void setArray(BodyArray array, const T *data);
|
||||
|
||||
virtual unsigned int getCurrentReadBuffer() const {
|
||||
return m_pbo[m_currentRead];
|
||||
}
|
||||
|
||||
virtual unsigned int getNumBodies() const { return m_numBodies; }
|
||||
|
||||
protected: // methods
|
||||
BodySystemCUDA() {}
|
||||
|
||||
virtual void _initialize(int numBodies);
|
||||
virtual void _finalize();
|
||||
|
||||
protected: // data
|
||||
unsigned int m_numBodies;
|
||||
unsigned int m_numDevices;
|
||||
bool m_bInitialized;
|
||||
|
||||
// Host data
|
||||
T *m_hPos[2];
|
||||
T *m_hVel;
|
||||
|
||||
DeviceData<T> *m_deviceData;
|
||||
|
||||
bool m_bUsePBO;
|
||||
bool m_bUseSysMem;
|
||||
unsigned int m_SMVersion;
|
||||
|
||||
T m_damping;
|
||||
|
||||
unsigned int m_pbo[2];
|
||||
cudaGraphicsResource *m_pGRes[2];
|
||||
unsigned int m_currentRead;
|
||||
unsigned int m_currentWrite;
|
||||
|
||||
unsigned int m_blockSize;
|
||||
};
|
||||
|
||||
#include "bodysystemcuda_impl.h"
|
||||
|
||||
#endif // __BODYSYSTEMCUDA_H__
|
@ -1,373 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <helper_cuda.h>
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <memory.h>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
//#include <GL/glew.h>
|
||||
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
template <typename T>
|
||||
void integrateNbodySystem(DeviceData<T> *deviceData,
|
||||
cudaGraphicsResource **pgres,
|
||||
unsigned int currentRead, float deltaTime,
|
||||
float damping, unsigned int numBodies,
|
||||
unsigned int numDevices, int blockSize, bool bUsePBO);
|
||||
|
||||
cudaError_t setSofteningSquared(float softeningSq);
|
||||
cudaError_t setSofteningSquared(double softeningSq);
|
||||
|
||||
template <typename T>
|
||||
BodySystemCUDA<T>::BodySystemCUDA(unsigned int numBodies,
|
||||
unsigned int numDevices,
|
||||
unsigned int blockSize, bool usePBO,
|
||||
bool useSysMem)
|
||||
: m_numBodies(numBodies),
|
||||
m_numDevices(numDevices),
|
||||
m_bInitialized(false),
|
||||
m_bUsePBO(usePBO),
|
||||
m_bUseSysMem(useSysMem),
|
||||
m_currentRead(0),
|
||||
m_currentWrite(1),
|
||||
m_blockSize(blockSize) {
|
||||
m_hPos[0] = m_hPos[1] = 0;
|
||||
m_hVel = 0;
|
||||
|
||||
m_deviceData = 0;
|
||||
|
||||
_initialize(numBodies);
|
||||
setSoftening(0.00125f);
|
||||
setDamping(0.995f);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
BodySystemCUDA<T>::~BodySystemCUDA() {
|
||||
_finalize();
|
||||
m_numBodies = 0;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCUDA<T>::_initialize(int numBodies) {
|
||||
assert(!m_bInitialized);
|
||||
|
||||
m_numBodies = numBodies;
|
||||
|
||||
unsigned int memSize = sizeof(T) * 4 * numBodies;
|
||||
|
||||
m_deviceData = new DeviceData<T>[m_numDevices];
|
||||
|
||||
// divide up the workload amongst Devices
|
||||
float *weights = new float[m_numDevices];
|
||||
int *numSms = new int[m_numDevices];
|
||||
float total = 0;
|
||||
|
||||
for (unsigned int i = 0; i < m_numDevices; i++) {
|
||||
cudaDeviceProp props;
|
||||
checkCudaErrors(cudaGetDeviceProperties(&props, i));
|
||||
|
||||
// Choose the weight based on the Compute Capability
|
||||
// We estimate that a CC2.0 SM is about 4.0x faster than a CC 1.x SM for
|
||||
// this application (since a 15-SM GF100 is about 2X faster than a 30-SM
|
||||
// GT200).
|
||||
numSms[i] = props.multiProcessorCount;
|
||||
weights[i] = numSms[i] * (props.major >= 2 ? 4.f : 1.f);
|
||||
total += weights[i];
|
||||
}
|
||||
|
||||
unsigned int offset = 0;
|
||||
unsigned int remaining = m_numBodies;
|
||||
|
||||
for (unsigned int i = 0; i < m_numDevices; i++) {
|
||||
unsigned int count = (int)((weights[i] / total) * m_numBodies);
|
||||
unsigned int round = numSms[i] * 256;
|
||||
count = round * ((count + round - 1) / round);
|
||||
|
||||
if (count > remaining) {
|
||||
count = remaining;
|
||||
}
|
||||
|
||||
remaining -= count;
|
||||
m_deviceData[i].offset = offset;
|
||||
m_deviceData[i].numBodies = count;
|
||||
offset += count;
|
||||
|
||||
if ((i == m_numDevices - 1) && (offset < m_numBodies - 1)) {
|
||||
m_deviceData[i].numBodies += m_numBodies - offset;
|
||||
}
|
||||
}
|
||||
|
||||
delete[] weights;
|
||||
delete[] numSms;
|
||||
|
||||
if (m_bUseSysMem) {
|
||||
checkCudaErrors(cudaHostAlloc((void **)&m_hPos[0], memSize,
|
||||
cudaHostAllocMapped | cudaHostAllocPortable));
|
||||
checkCudaErrors(cudaHostAlloc((void **)&m_hPos[1], memSize,
|
||||
cudaHostAllocMapped | cudaHostAllocPortable));
|
||||
checkCudaErrors(cudaHostAlloc((void **)&m_hVel, memSize,
|
||||
cudaHostAllocMapped | cudaHostAllocPortable));
|
||||
|
||||
memset(m_hPos[0], 0, memSize);
|
||||
memset(m_hPos[1], 0, memSize);
|
||||
memset(m_hVel, 0, memSize);
|
||||
|
||||
for (unsigned int i = 0; i < m_numDevices; i++) {
|
||||
if (m_numDevices > 1) {
|
||||
checkCudaErrors(cudaSetDevice(i));
|
||||
}
|
||||
|
||||
checkCudaErrors(cudaEventCreate(&m_deviceData[i].event));
|
||||
checkCudaErrors(cudaHostGetDevicePointer(
|
||||
(void **)&m_deviceData[i].dPos[0], (void *)m_hPos[0], 0));
|
||||
checkCudaErrors(cudaHostGetDevicePointer(
|
||||
(void **)&m_deviceData[i].dPos[1], (void *)m_hPos[1], 0));
|
||||
checkCudaErrors(cudaHostGetDevicePointer((void **)&m_deviceData[i].dVel,
|
||||
(void *)m_hVel, 0));
|
||||
}
|
||||
} else {
|
||||
m_hPos[0] = new T[m_numBodies * 4];
|
||||
m_hVel = new T[m_numBodies * 4];
|
||||
|
||||
memset(m_hPos[0], 0, memSize);
|
||||
memset(m_hVel, 0, memSize);
|
||||
|
||||
checkCudaErrors(cudaEventCreate(&m_deviceData[0].event));
|
||||
|
||||
if (m_bUsePBO) {
|
||||
// create the position pixel buffer objects for rendering
|
||||
// we will actually compute directly from this memory in CUDA too
|
||||
glGenBuffers(2, (GLuint *)m_pbo);
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_pbo[i]);
|
||||
glBufferData(GL_ARRAY_BUFFER, memSize, m_hPos[0], GL_DYNAMIC_DRAW);
|
||||
|
||||
int size = 0;
|
||||
glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint *)&size);
|
||||
|
||||
if ((unsigned)size != memSize) {
|
||||
fprintf(stderr, "WARNING: Pixel Buffer Object allocation failed!n");
|
||||
}
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
checkCudaErrors(cudaGraphicsGLRegisterBuffer(&m_pGRes[i], m_pbo[i],
|
||||
cudaGraphicsMapFlagsNone));
|
||||
}
|
||||
} else {
|
||||
checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[0], memSize));
|
||||
checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[1], memSize));
|
||||
}
|
||||
|
||||
checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dVel, memSize));
|
||||
}
|
||||
|
||||
m_bInitialized = true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCUDA<T>::_finalize() {
|
||||
assert(m_bInitialized);
|
||||
|
||||
if (m_bUseSysMem) {
|
||||
checkCudaErrors(cudaFreeHost(m_hPos[0]));
|
||||
checkCudaErrors(cudaFreeHost(m_hPos[1]));
|
||||
checkCudaErrors(cudaFreeHost(m_hVel));
|
||||
|
||||
for (unsigned int i = 0; i < m_numDevices; i++) {
|
||||
cudaEventDestroy(m_deviceData[i].event);
|
||||
}
|
||||
} else {
|
||||
delete[] m_hPos[0];
|
||||
delete[] m_hPos[1];
|
||||
delete[] m_hVel;
|
||||
|
||||
checkCudaErrors(cudaFree((void **)m_deviceData[0].dVel));
|
||||
|
||||
if (m_bUsePBO) {
|
||||
checkCudaErrors(cudaGraphicsUnregisterResource(m_pGRes[0]));
|
||||
checkCudaErrors(cudaGraphicsUnregisterResource(m_pGRes[1]));
|
||||
glDeleteBuffers(2, (const GLuint *)m_pbo);
|
||||
} else {
|
||||
checkCudaErrors(cudaFree((void **)m_deviceData[0].dPos[0]));
|
||||
checkCudaErrors(cudaFree((void **)m_deviceData[0].dPos[1]));
|
||||
}
|
||||
}
|
||||
|
||||
delete[] m_deviceData;
|
||||
|
||||
m_bInitialized = false;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCUDA<T>::loadTipsyFile(const std::string &filename) {
|
||||
if (m_bInitialized) _finalize();
|
||||
|
||||
std::vector<typename vec4<T>::Type> positions;
|
||||
std::vector<typename vec4<T>::Type> velocities;
|
||||
std::vector<int> ids;
|
||||
|
||||
int nBodies = 0;
|
||||
int nFirst = 0, nSecond = 0, nThird = 0;
|
||||
|
||||
read_tipsy_file(positions, velocities, ids, filename, nBodies, nFirst,
|
||||
nSecond, nThird);
|
||||
|
||||
_initialize(nBodies);
|
||||
|
||||
setArray(BODYSYSTEM_POSITION, (T *)&positions[0]);
|
||||
setArray(BODYSYSTEM_VELOCITY, (T *)&velocities[0]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCUDA<T>::setSoftening(T softening) {
|
||||
T softeningSq = softening * softening;
|
||||
|
||||
for (unsigned int i = 0; i < m_numDevices; i++) {
|
||||
if (m_numDevices > 1) {
|
||||
checkCudaErrors(cudaSetDevice(i));
|
||||
}
|
||||
|
||||
checkCudaErrors(setSofteningSquared(softeningSq));
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCUDA<T>::setDamping(T damping) {
|
||||
m_damping = damping;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCUDA<T>::update(T deltaTime) {
|
||||
assert(m_bInitialized);
|
||||
|
||||
integrateNbodySystem<T>(m_deviceData, m_pGRes, m_currentRead,
|
||||
(float)deltaTime, (float)m_damping, m_numBodies,
|
||||
m_numDevices, m_blockSize, m_bUsePBO);
|
||||
|
||||
std::swap(m_currentRead, m_currentWrite);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T *BodySystemCUDA<T>::getArray(BodyArray array) {
|
||||
assert(m_bInitialized);
|
||||
|
||||
T *hdata = 0;
|
||||
T *ddata = 0;
|
||||
|
||||
cudaGraphicsResource *pgres = NULL;
|
||||
|
||||
int currentReadHost = m_bUseSysMem ? m_currentRead : 0;
|
||||
|
||||
switch (array) {
|
||||
default:
|
||||
case BODYSYSTEM_POSITION:
|
||||
hdata = m_hPos[currentReadHost];
|
||||
ddata = m_deviceData[0].dPos[m_currentRead];
|
||||
|
||||
if (m_bUsePBO) {
|
||||
pgres = m_pGRes[m_currentRead];
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case BODYSYSTEM_VELOCITY:
|
||||
hdata = m_hVel;
|
||||
ddata = m_deviceData[0].dVel;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!m_bUseSysMem) {
|
||||
if (pgres) {
|
||||
checkCudaErrors(
|
||||
cudaGraphicsResourceSetMapFlags(pgres, cudaGraphicsMapFlagsReadOnly));
|
||||
checkCudaErrors(cudaGraphicsMapResources(1, &pgres, 0));
|
||||
size_t bytes;
|
||||
checkCudaErrors(
|
||||
cudaGraphicsResourceGetMappedPointer((void **)&ddata, &bytes, pgres));
|
||||
}
|
||||
|
||||
checkCudaErrors(cudaMemcpy(hdata, ddata, m_numBodies * 4 * sizeof(T),
|
||||
cudaMemcpyDeviceToHost));
|
||||
|
||||
if (pgres) {
|
||||
checkCudaErrors(cudaGraphicsUnmapResources(1, &pgres, 0));
|
||||
}
|
||||
}
|
||||
|
||||
return hdata;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void BodySystemCUDA<T>::setArray(BodyArray array, const T *data) {
|
||||
assert(m_bInitialized);
|
||||
|
||||
m_currentRead = 0;
|
||||
m_currentWrite = 1;
|
||||
|
||||
switch (array) {
|
||||
default:
|
||||
case BODYSYSTEM_POSITION: {
|
||||
if (m_bUsePBO) {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_pbo[m_currentRead]);
|
||||
glBufferSubData(GL_ARRAY_BUFFER, 0, 4 * sizeof(T) * m_numBodies, data);
|
||||
|
||||
int size = 0;
|
||||
glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint *)&size);
|
||||
|
||||
if ((unsigned)size != 4 * (sizeof(T) * m_numBodies)) {
|
||||
fprintf(stderr, "WARNING: Pixel Buffer Object download failed!n");
|
||||
}
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
} else {
|
||||
if (m_bUseSysMem) {
|
||||
memcpy(m_hPos[m_currentRead], data, m_numBodies * 4 * sizeof(T));
|
||||
} else
|
||||
checkCudaErrors(cudaMemcpy(m_deviceData[0].dPos[m_currentRead], data,
|
||||
m_numBodies * 4 * sizeof(T),
|
||||
cudaMemcpyHostToDevice));
|
||||
}
|
||||
} break;
|
||||
|
||||
case BODYSYSTEM_VELOCITY:
|
||||
if (m_bUseSysMem) {
|
||||
memcpy(m_hVel, data, m_numBodies * 4 * sizeof(T));
|
||||
} else
|
||||
checkCudaErrors(cudaMemcpy(m_deviceData[0].dVel, data,
|
||||
m_numBodies * 4 * sizeof(T),
|
||||
cudaMemcpyHostToDevice));
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
@ -1,149 +0,0 @@
|
||||
################################################################################
|
||||
#
|
||||
# Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
|
||||
#
|
||||
# NOTICE TO USER:
|
||||
#
|
||||
# This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
# international Copyright laws.
|
||||
#
|
||||
# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
# OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
#
|
||||
# U.S. Government End Users. This source code is a "commercial item" as
|
||||
# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
# "commercial computer software" and "commercial computer software
|
||||
# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
# and is provided to the U.S. Government only as a commercial end item.
|
||||
# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
# source code with only those rights set forth herein.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# findgleslib.mk is used to find the necessary GLES Libraries for specific distributions
|
||||
# this is supported on Linux
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Determine OS platform and unix distribution
|
||||
ifeq ("$(TARGET_OS)","linux")
|
||||
# first search lsb_release
|
||||
DISTRO := $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
ifeq ("$(DISTRO)","")
|
||||
# second search and parse /etc/issue
|
||||
DISTRO := $(shell awk '{print $$1}' /etc/issue | tr -d "[:space:]" | sed -e "/^$$/d" | tr "[:upper:]" "[:lower:]")
|
||||
# ensure data from /etc/issue is valid
|
||||
ifneq (,$(filter-out $(DISTRO),ubuntu fedora red rhel centos suse))
|
||||
DISTRO :=
|
||||
endif
|
||||
ifeq ("$(DISTRO)","")
|
||||
# third, we can search in /etc/os-release or /etc/{distro}-release
|
||||
DISTRO := $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB")
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ("$(TARGET_OS)","linux")
|
||||
# $(info) >> findgllib.mk -> LINUX path <<<)
|
||||
# Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
|
||||
UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?)
|
||||
FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?)
|
||||
RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
|
||||
CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?)
|
||||
SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
|
||||
KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?)
|
||||
ifeq ("$(UBUNTU)","0")
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
GLPATH := /usr/arm-linux-gnueabihf/lib
|
||||
GLLINK := -L/usr/arm-linux-gnueabihf/lib
|
||||
ifneq ($(TARGET_FS),)
|
||||
GLPATH += $(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
GLLINK += -L$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-aarch64)
|
||||
GLPATH := /usr/aarch64-linux-gnu/lib
|
||||
GLLINK := -L/usr/aarch64-linux-gnu/lib
|
||||
ifneq ($(TARGET_FS),)
|
||||
GLPATH += $(TARGET_FS)/usr/lib
|
||||
GLPATH += $(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
GLLINK += -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
endif
|
||||
else
|
||||
UBUNTU_PKG_NAME = $(shell which dpkg >/dev/null 2>&1 && dpkg -l 'nvidia-*' | grep '^ii' | awk '{print $$2}' | head -1)
|
||||
ifneq ("$(UBUNTU_PKG_NAME)","")
|
||||
GLPATH ?= /usr/lib/$(UBUNTU_PKG_NAME)
|
||||
GLLINK ?= -L/usr/lib/$(UBUNTU_PKG_NAME)
|
||||
endif
|
||||
|
||||
DFLT_PATH ?= /usr/lib
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ("$(SUSE)","0")
|
||||
GLPATH ?= /usr/X11R6/lib64
|
||||
GLLINK ?= -L/usr/X11R6/lib64
|
||||
DFLT_PATH ?= /usr/lib64
|
||||
else
|
||||
GLPATH ?= /usr/lib64/nvidia
|
||||
GLLINK ?= -L/usr/lib64/nvidia
|
||||
DFLT_PATH ?= /usr/lib64
|
||||
endif
|
||||
|
||||
# find libGL, libGLU, libXi,
|
||||
EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null)
|
||||
GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
|
||||
X11LIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libX11.so -print 2>/dev/null)
|
||||
|
||||
ifeq ("$(EGLLIB)","")
|
||||
$(info >>> WARNING - libEGL.so not found, please install libEGL.so <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(GLESLIB)","")
|
||||
$(info >>> WARNING - libGLES.so not found, please install libGLES.so <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(X11LIB)","")
|
||||
$(info >>> WARNING - libX11.so not found, please install libX11.so <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
HEADER_SEARCH_PATH ?= $(TARGET_FS)/usr/include
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
HEADER_SEARCH_PATH += /usr/arm-linux-gnueabihf/include
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-aarch64-linux)
|
||||
HEADER_SEARCH_PATH += /usr/aarch64-linux-gnu/include
|
||||
endif
|
||||
|
||||
EGLHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name egl.h -print 2>/dev/null)
|
||||
EGLEXTHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name eglext.h -print 2>/dev/null)
|
||||
GL31HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name gl31.h -print 2>/dev/null)
|
||||
X11HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name Xlib.h -print 2>/dev/null)
|
||||
|
||||
ifeq ("$(EGLHEADER)","")
|
||||
$(info >>> WARNING - egl.h not found, please install egl.h <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(EGLEXTHEADER)","")
|
||||
$(info >>> WARNING - eglext.h not found, please install eglext.h <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(GL31HEADER)","")
|
||||
$(info >>> WARNING - gl31.h not found, please install gl31.h <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(X11HEADER)","")
|
||||
$(info >>> WARNING - Xlib.h not found, refer to CUDA Samples release notes for how to find and install them. <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
else
|
||||
endif
|
||||
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -1,374 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "render_particles.h"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
#include <helper_cuda.h>
|
||||
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
|
||||
void mat_identity(matrix4 m) {
|
||||
m[0][1] = m[0][2] = m[0][3] = m[1][0] = m[1][2] = m[1][3] = m[2][0] =
|
||||
m[2][1] = m[2][3] = m[3][0] = m[3][1] = m[3][2] = 0.0f;
|
||||
m[0][0] = m[1][1] = m[2][2] = m[3][3] = 1.0f;
|
||||
}
|
||||
|
||||
void mat_multiply(matrix4 m0, matrix4 m1) {
|
||||
float m[4];
|
||||
|
||||
for (int r = 0; r < 4; r++) {
|
||||
m[0] = m[1] = m[2] = m[3] = 0.0f;
|
||||
|
||||
for (int c = 0; c < 4; c++) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
m[c] += m0[i][r] * m1[c][i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int c = 0; c < 4; c++) {
|
||||
m0[c][r] = m[c];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mat_translate(matrix4 m, vector3 v) {
|
||||
matrix4 m2;
|
||||
m2[0][0] = m2[1][1] = m2[2][2] = m2[3][3] = 1.0f;
|
||||
m2[0][1] = m2[0][2] = m2[0][3] = m2[1][0] = m2[1][2] = m2[1][3] = m2[2][0] =
|
||||
m2[2][1] = m2[2][3] = 0.0f;
|
||||
m2[3][0] = v[0];
|
||||
m2[3][1] = v[1];
|
||||
m2[3][2] = v[2];
|
||||
mat_multiply(m, m2);
|
||||
}
|
||||
|
||||
void mat_perspective(matrix4 m, GLfloat fovy, GLfloat aspect, GLfloat znear,
|
||||
GLfloat zfar) {
|
||||
matrix4 m2;
|
||||
m2[1][0] = m2[2][0] = m2[3][0] = m2[0][1] = m2[2][1] = m2[3][1] = m2[0][2] =
|
||||
m2[1][2] = m2[0][3] = m2[1][3] = m2[3][3] = 0.0f;
|
||||
m2[2][3] = -1.0f;
|
||||
|
||||
float f = 1 / tan((fovy * M_PI / 180) / 2);
|
||||
m2[0][0] = f / aspect;
|
||||
m2[1][1] = f;
|
||||
|
||||
m2[2][2] = ((znear + zfar) / (znear - zfar));
|
||||
m2[3][2] = ((2 * znear * zfar) / (znear - zfar));
|
||||
|
||||
mat_multiply(m, m2);
|
||||
}
|
||||
|
||||
ParticleRenderer::ParticleRenderer(unsigned int windowWidth,
|
||||
unsigned int windowHeight)
|
||||
: m_pos(0),
|
||||
m_numParticles(0),
|
||||
m_pointSize(1.0f),
|
||||
m_spriteSize(2.0f),
|
||||
m_vertexShader(0),
|
||||
m_vertexShaderPoints(0),
|
||||
m_fragmentShader(0),
|
||||
m_programPoints(0),
|
||||
m_programSprites(0),
|
||||
m_texture(0),
|
||||
m_pbo(0),
|
||||
m_vboColor(0),
|
||||
m_windowWidth(windowWidth),
|
||||
m_windowHeight(windowHeight),
|
||||
m_bFp64Positions(false) {
|
||||
m_camera[0] = 0;
|
||||
m_camera[1] = 0;
|
||||
m_camera[2] = 0;
|
||||
_initGL();
|
||||
}
|
||||
|
||||
ParticleRenderer::~ParticleRenderer() { m_pos = 0; }
|
||||
|
||||
void ParticleRenderer::resetPBO() { glDeleteBuffers(1, (GLuint *)&m_pbo); }
|
||||
|
||||
void ParticleRenderer::setPositions(float *pos, int numParticles) {
|
||||
m_pos = pos;
|
||||
m_numParticles = numParticles;
|
||||
|
||||
if (!m_pbo) {
|
||||
glGenBuffers(1, (GLuint *)&m_pbo);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_pbo);
|
||||
glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(float), pos,
|
||||
GL_STATIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
checkGLErrors("Setting particle float position");
|
||||
}
|
||||
|
||||
void ParticleRenderer::setPositions(double *pos, int numParticles) {
|
||||
m_bFp64Positions = true;
|
||||
m_pos_fp64 = pos;
|
||||
m_numParticles = numParticles;
|
||||
|
||||
if (!m_pbo) {
|
||||
glGenBuffers(1, (GLuint *)&m_pbo);
|
||||
}
|
||||
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_pbo);
|
||||
glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(double), pos,
|
||||
GL_STATIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
checkGLErrors("Setting particle double position");
|
||||
}
|
||||
|
||||
void ParticleRenderer::setColors(float *color, int numParticles) {
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_vboColor);
|
||||
glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(float), color,
|
||||
GL_STATIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
}
|
||||
|
||||
void ParticleRenderer::setBaseColor(float color[4]) {
|
||||
for (int i = 0; i < 4; i++) m_baseColor[i] = color[i];
|
||||
}
|
||||
|
||||
void ParticleRenderer::setPBO(unsigned int pbo, int numParticles, bool fp64) {
|
||||
m_pbo = pbo;
|
||||
m_numParticles = numParticles;
|
||||
|
||||
if (fp64) m_bFp64Positions = true;
|
||||
}
|
||||
|
||||
void ParticleRenderer::display() {
|
||||
glEnable(GL_BLEND);
|
||||
glBlendFunc(GL_SRC_ALPHA, GL_ONE);
|
||||
glDepthMask(GL_FALSE);
|
||||
|
||||
glUseProgram(m_programSprites);
|
||||
|
||||
// Set modelview and projection matrices
|
||||
GLint h_ModelViewMatrix = glGetUniformLocation(m_programSprites, "modelview");
|
||||
GLint h_ProjectionMatrix =
|
||||
glGetUniformLocation(m_programSprites, "projection");
|
||||
matrix4 modelview;
|
||||
matrix4 projection;
|
||||
mat_identity(modelview);
|
||||
mat_identity(projection);
|
||||
mat_translate(modelview, m_camera);
|
||||
mat_perspective(projection, 60, (float)m_windowWidth / (float)m_windowHeight,
|
||||
0.1, 1000.0);
|
||||
glUniformMatrix4fv(h_ModelViewMatrix, 1, GL_FALSE, (GLfloat *)modelview);
|
||||
glUniformMatrix4fv(h_ProjectionMatrix, 1, GL_FALSE, (GLfloat *)projection);
|
||||
|
||||
// Set point size
|
||||
GLint h_PointSize = glGetUniformLocation(m_programSprites, "size");
|
||||
glUniform1f(h_PointSize, m_spriteSize);
|
||||
|
||||
// Set base and secondary colors
|
||||
GLint h_BaseColor = glGetUniformLocation(m_programSprites, "baseColor");
|
||||
GLint h_SecondaryColor =
|
||||
glGetUniformLocation(m_programSprites, "secondaryColor");
|
||||
glUniform4f(h_BaseColor, 1.0, 1.0, 1.0, 1.0);
|
||||
glUniform4f(h_SecondaryColor, m_baseColor[0], m_baseColor[1], m_baseColor[2],
|
||||
m_baseColor[3]);
|
||||
|
||||
// Set position coords
|
||||
GLint h_position = glGetAttribLocation(m_programSprites, "a_position");
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_pbo);
|
||||
glEnableVertexAttribArray(h_position);
|
||||
glVertexAttribPointer(h_position, 4, GL_FLOAT, GL_FALSE, 0, 0);
|
||||
|
||||
GLuint texLoc = glGetUniformLocation(m_programSprites, "splatTexture");
|
||||
glUniform1i(texLoc, 0);
|
||||
glActiveTexture(GL_TEXTURE0);
|
||||
glBindTexture(GL_TEXTURE_2D, m_texture);
|
||||
|
||||
glDrawArrays(GL_POINTS, 0, m_numParticles);
|
||||
|
||||
glDisableVertexAttribArray(h_position);
|
||||
|
||||
glUseProgram(0);
|
||||
|
||||
glDisable(GL_BLEND);
|
||||
glDepthMask(GL_TRUE);
|
||||
}
|
||||
|
||||
const char vertexShader[] = {
|
||||
"attribute vec4 a_position;"
|
||||
|
||||
"uniform mat4 projection;"
|
||||
"uniform mat4 modelview;"
|
||||
"uniform float size;"
|
||||
|
||||
"void main()"
|
||||
"{"
|
||||
"float pointSize = 500.0 * size;"
|
||||
"vec4 vert = a_position;"
|
||||
"vert.w = 1.0;"
|
||||
"vec3 pos_eye = vec3(modelview * vert);"
|
||||
"gl_PointSize = max(1.0, pointSize / (1.0 - pos_eye.z));"
|
||||
"gl_Position = projection * modelview * a_position;"
|
||||
"}"};
|
||||
|
||||
const char fragmentShader[] = {
|
||||
"uniform sampler2D splatTexture;"
|
||||
"uniform lowp vec4 baseColor;"
|
||||
"uniform lowp vec4 secondaryColor;"
|
||||
|
||||
"void main()"
|
||||
"{"
|
||||
"lowp vec4 textureColor = (0.6 + 0.4 * baseColor) * "
|
||||
"texture2D(splatTexture, gl_PointCoord);"
|
||||
"gl_FragColor = textureColor * secondaryColor;"
|
||||
"}"};
|
||||
|
||||
// Checks if the shader is compiled.
|
||||
static int CheckCompiled(GLuint shader) {
|
||||
GLint isCompiled = 0;
|
||||
glGetShaderiv(shader, GL_COMPILE_STATUS, &isCompiled);
|
||||
|
||||
if (!isCompiled) {
|
||||
GLint infoLen = 0;
|
||||
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLen);
|
||||
|
||||
if (infoLen > 1) {
|
||||
char *infoLog = (char *)malloc(sizeof(char) * infoLen);
|
||||
|
||||
glGetShaderInfoLog(shader, infoLen, NULL, infoLog);
|
||||
printf("Error compiling program:\n%s\n", infoLog);
|
||||
free(infoLog);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void ParticleRenderer::_initGL() {
|
||||
m_vertexShader = glCreateShader(GL_VERTEX_SHADER);
|
||||
m_fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
|
||||
|
||||
const char *v = vertexShader;
|
||||
const char *f = fragmentShader;
|
||||
glShaderSource(m_vertexShader, 1, &v, 0);
|
||||
glShaderSource(m_fragmentShader, 1, &f, 0);
|
||||
|
||||
checkGLErrors("Shader Source");
|
||||
|
||||
glCompileShader(m_vertexShader);
|
||||
glCompileShader(m_fragmentShader);
|
||||
|
||||
if (!CheckCompiled(m_vertexShader) || !CheckCompiled(m_fragmentShader)) {
|
||||
printf("A shader failed to compile.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
m_programSprites = glCreateProgram();
|
||||
|
||||
checkGLErrors("create program");
|
||||
|
||||
glAttachShader(m_programSprites, m_vertexShader);
|
||||
glAttachShader(m_programSprites, m_fragmentShader);
|
||||
|
||||
checkGLErrors("attaching shaders");
|
||||
|
||||
glLinkProgram(m_programSprites);
|
||||
|
||||
checkGLErrors("linking program");
|
||||
|
||||
EGLint linked;
|
||||
glGetProgramiv(m_programSprites, GL_LINK_STATUS, &linked);
|
||||
if (!linked) {
|
||||
printf("A shader failed to link.\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
_createTexture(32);
|
||||
|
||||
glGenBuffers(1, (GLuint *)&m_vboColor);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, m_vboColor);
|
||||
glBufferData(GL_ARRAY_BUFFER, m_numParticles * 4 * sizeof(float), 0,
|
||||
GL_STATIC_DRAW);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, 0);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Function : EvalHermite
|
||||
// Description :
|
||||
//------------------------------------------------------------------------------
|
||||
/**
|
||||
* EvalHermite(float pA, float pB, float vA, float vB, float u)
|
||||
* @brief Evaluates Hermite basis functions for the specified coefficients.
|
||||
*/
|
||||
inline float evalHermite(float pA, float pB, float vA, float vB, float u) {
|
||||
float u2 = (u * u), u3 = u2 * u;
|
||||
float B0 = 2 * u3 - 3 * u2 + 1;
|
||||
float B1 = -2 * u3 + 3 * u2;
|
||||
float B2 = u3 - 2 * u2 + u;
|
||||
float B3 = u3 - u;
|
||||
return (B0 * pA + B1 * pB + B2 * vA + B3 * vB);
|
||||
}
|
||||
|
||||
unsigned char *createGaussianMap(int N) {
|
||||
float *M = new float[2 * N * N];
|
||||
unsigned char *B = new unsigned char[4 * N * N];
|
||||
float X, Y, Y2, Dist;
|
||||
float Incr = 2.0f / N;
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
Y = -1.0f;
|
||||
|
||||
// float mmax = 0;
|
||||
for (int y = 0; y < N; y++, Y += Incr) {
|
||||
Y2 = Y * Y;
|
||||
X = -1.0f;
|
||||
|
||||
for (int x = 0; x < N; x++, X += Incr, i += 2, j += 4) {
|
||||
Dist = (float)sqrtf(X * X + Y2);
|
||||
|
||||
if (Dist > 1) Dist = 1;
|
||||
|
||||
M[i + 1] = M[i] = evalHermite(1.0f, 0, 0, 0, Dist);
|
||||
B[j + 3] = B[j + 2] = B[j + 1] = B[j] = (unsigned char)(M[i] * 255);
|
||||
}
|
||||
}
|
||||
|
||||
delete[] M;
|
||||
return (B);
|
||||
}
|
||||
|
||||
void ParticleRenderer::_createTexture(int resolution) {
|
||||
unsigned char *data = createGaussianMap(resolution);
|
||||
glGenTextures(1, (GLuint *)&m_texture);
|
||||
glBindTexture(GL_TEXTURE_2D, m_texture);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
|
||||
GL_LINEAR); //_MIPMAP_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, resolution, resolution, 0, GL_RGBA,
|
||||
GL_UNSIGNED_BYTE, data);
|
||||
}
|
@ -1,110 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __RENDER_PARTICLES__
|
||||
#define __RENDER_PARTICLES__
|
||||
|
||||
#include <EGL/egl.h>
|
||||
#include <EGL/eglext.h>
|
||||
#include <GLES3/gl31.h>
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
typedef float matrix4[4][4];
|
||||
typedef float vector3[3];
|
||||
|
||||
// check for OpenGL errors
|
||||
inline void checkGLErrors(const char *s) {
|
||||
EGLenum error;
|
||||
|
||||
while ((error = glGetError()) != GL_NO_ERROR) {
|
||||
fprintf(stderr, "%s: error - %d\n", s, error);
|
||||
}
|
||||
}
|
||||
|
||||
class ParticleRenderer {
|
||||
public:
|
||||
ParticleRenderer(unsigned int windowWidth = 720,
|
||||
unsigned int windowHeight = 480);
|
||||
~ParticleRenderer();
|
||||
|
||||
void setPositions(float *pos, int numParticles);
|
||||
void setPositions(double *pos, int numParticles);
|
||||
void setBaseColor(float color[4]);
|
||||
void setColors(float *color, int numParticles);
|
||||
void setPBO(unsigned int pbo, int numParticles, bool fp64);
|
||||
|
||||
enum DisplayMode {
|
||||
PARTICLE_POINTS,
|
||||
PARTICLE_SPRITES,
|
||||
PARTICLE_SPRITES_COLOR,
|
||||
PARTICLE_NUM_MODES
|
||||
};
|
||||
|
||||
void display();
|
||||
|
||||
void setPointSize(float size) { m_pointSize = size; }
|
||||
void setSpriteSize(float size) { m_spriteSize = size; }
|
||||
|
||||
void setCameraPos(vector3 camera_pos) {
|
||||
m_camera[0] = camera_pos[0];
|
||||
m_camera[1] = camera_pos[1];
|
||||
m_camera[2] = camera_pos[2];
|
||||
}
|
||||
|
||||
void resetPBO();
|
||||
|
||||
protected: // methods
|
||||
void _initGL();
|
||||
void _createTexture(int resolution);
|
||||
|
||||
protected: // data
|
||||
float *m_pos;
|
||||
double *m_pos_fp64;
|
||||
int m_numParticles;
|
||||
|
||||
float m_pointSize;
|
||||
float m_spriteSize;
|
||||
vector3 m_camera;
|
||||
|
||||
unsigned int m_vertexShader;
|
||||
unsigned int m_vertexShaderPoints;
|
||||
unsigned int m_fragmentShader;
|
||||
unsigned int m_programPoints;
|
||||
unsigned int m_programSprites;
|
||||
unsigned int m_texture;
|
||||
unsigned int m_pbo;
|
||||
unsigned int m_vboColor;
|
||||
unsigned int m_windowWidth;
|
||||
unsigned int m_windowHeight;
|
||||
|
||||
float m_baseColor[4];
|
||||
|
||||
bool m_bFp64Positions;
|
||||
};
|
||||
|
||||
#endif //__ RENDER_PARTICLES__
|
@ -1,172 +0,0 @@
|
||||
#ifndef __TIPSY_H__
|
||||
#define __TIPSY_H__
|
||||
|
||||
#include <string>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define MAXDIM 3
|
||||
|
||||
typedef float Real;
|
||||
|
||||
struct gas_particle
|
||||
{
|
||||
Real mass;
|
||||
Real pos[MAXDIM];
|
||||
Real vel[MAXDIM];
|
||||
Real rho;
|
||||
Real temp;
|
||||
Real hsmooth;
|
||||
Real metals ;
|
||||
Real phi ;
|
||||
} ;
|
||||
|
||||
//struct gas_particle *gas_particles;
|
||||
|
||||
struct dark_particle
|
||||
{
|
||||
Real mass;
|
||||
Real pos[MAXDIM];
|
||||
Real vel[MAXDIM];
|
||||
Real eps;
|
||||
int phi ;
|
||||
} ;
|
||||
|
||||
//struct dark_particle *dark_particles;
|
||||
|
||||
struct star_particle
|
||||
{
|
||||
Real mass;
|
||||
Real pos[MAXDIM];
|
||||
Real vel[MAXDIM];
|
||||
Real metals ;
|
||||
Real tform ;
|
||||
Real eps;
|
||||
int phi ;
|
||||
} ;
|
||||
|
||||
//struct star_particle *star_particles;
|
||||
|
||||
struct dump
|
||||
{
|
||||
double time ;
|
||||
int nbodies ;
|
||||
int ndim ;
|
||||
int nsph ;
|
||||
int ndark ;
|
||||
int nstar ;
|
||||
} ;
|
||||
|
||||
typedef struct dump header ;
|
||||
|
||||
template <typename real4>
|
||||
void read_tipsy_file(vector<real4> &bodyPositions,
|
||||
vector<real4> &bodyVelocities,
|
||||
vector<int> &bodiesIDs,
|
||||
const std::string &fileName,
|
||||
int &NTotal,
|
||||
int &NFirst,
|
||||
int &NSecond,
|
||||
int &NThird)
|
||||
{
|
||||
/*
|
||||
Read in our custom version of the tipsy file format written by
|
||||
Jeroen Bedorf. Most important change is that we store particle id on the
|
||||
location where previously the potential was stored.
|
||||
*/
|
||||
|
||||
char fullFileName[256];
|
||||
sprintf(fullFileName, "%s", fileName.c_str());
|
||||
|
||||
cout << "Trying to read file: " << fullFileName << endl;
|
||||
|
||||
ifstream inputFile(fullFileName, ios::in | ios::binary);
|
||||
|
||||
if (!inputFile.is_open())
|
||||
{
|
||||
cout << "Can't open input file \n";
|
||||
exit(EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
dump h;
|
||||
inputFile.read((char *)&h, sizeof(h));
|
||||
|
||||
int idummy;
|
||||
real4 positions;
|
||||
real4 velocity;
|
||||
|
||||
|
||||
//Read tipsy header
|
||||
NTotal = h.nbodies;
|
||||
NFirst = h.ndark;
|
||||
NSecond = h.nstar;
|
||||
NThird = h.nsph;
|
||||
|
||||
//Start reading
|
||||
int particleCount = 0;
|
||||
|
||||
dark_particle d;
|
||||
star_particle s;
|
||||
|
||||
for (int i=0; i < NTotal; i++)
|
||||
{
|
||||
if (i < NFirst)
|
||||
{
|
||||
inputFile.read((char *)&d, sizeof(d));
|
||||
velocity.w = d.eps;
|
||||
positions.w = d.mass;
|
||||
positions.x = d.pos[0];
|
||||
positions.y = d.pos[1];
|
||||
positions.z = d.pos[2];
|
||||
velocity.x = d.vel[0];
|
||||
velocity.y = d.vel[1];
|
||||
velocity.z = d.vel[2];
|
||||
idummy = d.phi;
|
||||
}
|
||||
else
|
||||
{
|
||||
inputFile.read((char *)&s, sizeof(s));
|
||||
velocity.w = s.eps;
|
||||
positions.w = s.mass;
|
||||
positions.x = s.pos[0];
|
||||
positions.y = s.pos[1];
|
||||
positions.z = s.pos[2];
|
||||
velocity.x = s.vel[0];
|
||||
velocity.y = s.vel[1];
|
||||
velocity.z = s.vel[2];
|
||||
idummy = s.phi;
|
||||
}
|
||||
|
||||
bodyPositions.push_back(positions);
|
||||
bodyVelocities.push_back(velocity);
|
||||
bodiesIDs.push_back(idummy);
|
||||
|
||||
particleCount++;
|
||||
}//end for
|
||||
|
||||
// round up to a multiple of 256 bodies since our kernel only supports that...
|
||||
int newTotal = NTotal;
|
||||
|
||||
if (NTotal % 256)
|
||||
{
|
||||
newTotal = ((NTotal / 256) + 1) * 256;
|
||||
}
|
||||
|
||||
for (int i = NTotal; i < newTotal; i++)
|
||||
{
|
||||
positions.w = positions.x = positions.y = positions.z = 0;
|
||||
velocity.x = velocity.y = velocity.z = 0;
|
||||
bodyPositions.push_back(positions);
|
||||
bodyVelocities.push_back(velocity);
|
||||
bodiesIDs.push_back(i);
|
||||
NFirst++;
|
||||
}
|
||||
|
||||
NTotal = newTotal;
|
||||
|
||||
inputFile.close();
|
||||
|
||||
cerr << "Read " << NTotal << " bodies" << endl;
|
||||
}
|
||||
|
||||
#endif //__TIPSY_H__
|
@ -1,18 +0,0 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**",
|
||||
"${workspaceFolder}/../../../Common"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/local/cuda/bin/nvcc",
|
||||
"cStandard": "gnu17",
|
||||
"cppStandard": "gnu++14",
|
||||
"intelliSenseMode": "linux-gcc-x64",
|
||||
"configurationProvider": "ms-vscode.makefile-tools"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"nvidia.nsight-vscode-edition",
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.makefile-tools"
|
||||
]
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "CUDA C++: Launch",
|
||||
"type": "cuda-gdb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/simpleGLES_screen"
|
||||
}
|
||||
]
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "sample",
|
||||
"type": "shell",
|
||||
"command": "make dbg=1",
|
||||
"problemMatcher": ["$nvcc"],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules")
|
||||
|
||||
project(simpleGLES_screen LANGUAGES C CXX CUDA)
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
|
||||
endif()
|
||||
|
||||
# Include directories and libraries
|
||||
include_directories(../../../Common)
|
||||
|
||||
# Source file
|
||||
# Add target for simpleGLES_screen
|
||||
add_executable(simpleGLES_screen simpleGLES_screen.cu)
|
||||
|
||||
target_compile_options(simpleGLES_screen PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
|
||||
|
||||
target_compile_features(simpleGLES_screen PRIVATE cxx_std_17 cuda_std_17)
|
||||
|
||||
set_target_properties(simpleGLES_screen PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
@ -1,399 +0,0 @@
|
||||
################################################################################
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# * Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# * Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# Makefile project only supported on Mac OS X and Linux Platforms)
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Location of the CUDA Toolkit
|
||||
CUDA_PATH ?= /usr/local/cuda
|
||||
|
||||
##############################
|
||||
# start deprecated interface #
|
||||
##############################
|
||||
ifeq ($(x86_64),1)
|
||||
$(info WARNING - x86_64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
|
||||
TARGET_ARCH ?= x86_64
|
||||
endif
|
||||
ifeq ($(ARMv7),1)
|
||||
$(info WARNING - ARMv7 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=armv7l instead)
|
||||
TARGET_ARCH ?= armv7l
|
||||
endif
|
||||
ifeq ($(aarch64),1)
|
||||
$(info WARNING - aarch64 variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
|
||||
TARGET_ARCH ?= aarch64
|
||||
endif
|
||||
ifeq ($(ppc64le),1)
|
||||
$(info WARNING - ppc64le variable has been deprecated)
|
||||
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
|
||||
TARGET_ARCH ?= ppc64le
|
||||
endif
|
||||
ifneq ($(GCC),)
|
||||
$(info WARNING - GCC variable has been deprecated)
|
||||
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
|
||||
HOST_COMPILER ?= $(GCC)
|
||||
endif
|
||||
ifneq ($(abi),)
|
||||
$(error ERROR - abi variable has been removed)
|
||||
endif
|
||||
############################
|
||||
# end deprecated interface #
|
||||
############################
|
||||
|
||||
# architecture
|
||||
HOST_ARCH := $(shell uname -m)
|
||||
TARGET_ARCH ?= $(HOST_ARCH)
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
|
||||
TARGET_SIZE := 64
|
||||
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
|
||||
TARGET_SIZE := 32
|
||||
endif
|
||||
else
|
||||
TARGET_SIZE := $(shell getconf LONG_BIT)
|
||||
endif
|
||||
else
|
||||
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
|
||||
endif
|
||||
|
||||
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
|
||||
ifeq ($(HOST_ARCH),aarch64)
|
||||
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
|
||||
HOST_ARCH := sbsa
|
||||
TARGET_ARCH := sbsa
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
|
||||
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
|
||||
endif
|
||||
endif
|
||||
|
||||
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
|
||||
TARGET_ARCH = armv7l
|
||||
endif
|
||||
|
||||
# operating system
|
||||
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
TARGET_OS ?= $(HOST_OS)
|
||||
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
|
||||
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
|
||||
endif
|
||||
|
||||
# host compiler
|
||||
ifdef HOST_COMPILER
|
||||
CUSTOM_HOST_COMPILER = 1
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
|
||||
HOST_COMPILER ?= clang++
|
||||
endif
|
||||
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
HOST_COMPILER ?= arm-linux-gnueabihf-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
HOST_COMPILER ?= arm-linux-androideabi-g++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifeq ($(TARGET_OS), linux)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_OS),qnx)
|
||||
ifeq ($(QNX_HOST),)
|
||||
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
|
||||
endif
|
||||
ifeq ($(QNX_TARGET),)
|
||||
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
|
||||
endif
|
||||
export QNX_HOST
|
||||
export QNX_TARGET
|
||||
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
|
||||
else ifeq ($(TARGET_OS), android)
|
||||
HOST_COMPILER ?= aarch64-linux-android-clang++
|
||||
endif
|
||||
else ifeq ($(TARGET_ARCH),sbsa)
|
||||
HOST_COMPILER ?= aarch64-linux-gnu-g++
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
|
||||
endif
|
||||
endif
|
||||
HOST_COMPILER ?= g++
|
||||
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
|
||||
|
||||
# internal flags
|
||||
NVCCFLAGS := -m${TARGET_SIZE}
|
||||
CCFLAGS :=
|
||||
LDFLAGS :=
|
||||
|
||||
# build flags
|
||||
|
||||
# Link flag for customized HOST_COMPILER with gcc realpath
|
||||
GCC_PATH := $(shell which gcc)
|
||||
ifeq ($(CUSTOM_HOST_COMPILER),1)
|
||||
ifneq ($(filter /%,$(HOST_COMPILER)),)
|
||||
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
|
||||
ifneq ($(GCC_PATH),$(HOST_COMPILER))
|
||||
LDFLAGS += -lstdc++
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
LDFLAGS += -rpath $(CUDA_PATH)/lib
|
||||
CCFLAGS += -arch $(HOST_ARCH)
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
|
||||
CCFLAGS += -mfloat-abi=hard
|
||||
else ifeq ($(TARGET_OS),android)
|
||||
LDFLAGS += -pie
|
||||
CCFLAGS += -fpie -fpic -fexceptions
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
ifneq ($(TARGET_FS),)
|
||||
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
|
||||
ifeq ($(GCCVERSIONLTEQ46),1)
|
||||
CCFLAGS += --sysroot=$(TARGET_FS)
|
||||
endif
|
||||
LDFLAGS += --sysroot=$(TARGET_FS)
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
|
||||
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
|
||||
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
|
||||
endif
|
||||
endif
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
NVCCFLAGS += -D_QNX_SOURCE
|
||||
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
|
||||
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
|
||||
LDFLAGS += -lsocket
|
||||
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
|
||||
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
|
||||
ifdef TARGET_OVERRIDE
|
||||
LDFLAGS += -lslog2
|
||||
endif
|
||||
|
||||
ifneq ($(TARGET_FS),)
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/lib
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
|
||||
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
|
||||
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
|
||||
CCFLAGS += -I$(TARGET_FS)/../include
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef TARGET_OVERRIDE # cuda toolkit targets override
|
||||
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
|
||||
endif
|
||||
|
||||
# Install directory of different arch
|
||||
CUDA_INSTALL_TARGET_DIR :=
|
||||
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
|
||||
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
|
||||
else ifeq ($(TARGET_ARCH),ppc64le)
|
||||
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
|
||||
endif
|
||||
|
||||
# Debug build flags
|
||||
ifeq ($(dbg),1)
|
||||
NVCCFLAGS += -g -G
|
||||
BUILD_TYPE := debug
|
||||
else
|
||||
BUILD_TYPE := release
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS :=
|
||||
ALL_CCFLAGS += $(NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
|
||||
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
|
||||
|
||||
SAMPLE_ENABLED := 1
|
||||
|
||||
# This sample is not supported on Linux x86_64
|
||||
ifeq ($(TARGET_OS),linux)
|
||||
ifeq ($(TARGET_ARCH),x86_64)
|
||||
$(info >>> WARNING - simpleGLES_screen is not supported on Linux x86_64 - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
endif
|
||||
|
||||
# This sample is not supported on Mac OSX
|
||||
ifeq ($(TARGET_OS),darwin)
|
||||
$(info >>> WARNING - simpleGLES_screen is not supported on Mac OSX - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
# This sample is not supported on ARMv7
|
||||
ifeq ($(TARGET_ARCH),armv7l)
|
||||
$(info >>> WARNING - simpleGLES_screen is not supported on ARMv7 - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
# This sample is not supported on aarch64
|
||||
ifeq ($(TARGET_ARCH),aarch64)
|
||||
ifneq ($(TARGET_OS),qnx)
|
||||
$(info >>> WARNING - simpleGLES_screen is not supported on aarch64-$(TARGET_OS) - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
endif
|
||||
# This sample is not supported on sbsa
|
||||
ifeq ($(TARGET_ARCH),sbsa)
|
||||
$(info >>> WARNING - simpleGLES_screen is not supported on sbsa - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ALL_LDFLAGS :=
|
||||
ALL_LDFLAGS += $(ALL_CCFLAGS)
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
|
||||
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
|
||||
|
||||
# Common includes and paths for CUDA
|
||||
INCLUDES := -I../../../Common
|
||||
LIBRARIES :=
|
||||
|
||||
################################################################################
|
||||
|
||||
# Makefile include to help find GLES Libraries
|
||||
include ./findgleslib.mk
|
||||
|
||||
# OpenGLES specific libraries
|
||||
ifneq ($(TARGET_OS),darwin)
|
||||
LIBRARIES += $(GLESLINK) -lGLESv2 -lEGL -lscreen
|
||||
endif
|
||||
|
||||
# Gencode arguments
|
||||
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
|
||||
SMS ?= 53 61 70 72 75 80 86 87 90
|
||||
else
|
||||
SMS ?= 50 52 60 61 70 75 80 86 89 90
|
||||
endif
|
||||
|
||||
ifeq ($(SMS),)
|
||||
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
ifeq ($(GENCODE_FLAGS),)
|
||||
# Generate SASS code for each SM architecture listed in $(SMS)
|
||||
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
|
||||
|
||||
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
|
||||
HIGHEST_SM := $(lastword $(sort $(SMS)))
|
||||
ifneq ($(HIGHEST_SM),)
|
||||
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
|
||||
endif
|
||||
endif
|
||||
|
||||
ALL_CCFLAGS += -DUSE_CUDAINTEROP -DGRAPHICS_SETUP_EGL -DUSE_GLES -DWIN_INTERFACE_CUSTOM --threads 0 --std=c++11
|
||||
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
EXEC ?= @echo "[@]"
|
||||
endif
|
||||
|
||||
################################################################################
|
||||
|
||||
# Target rules
|
||||
all: build
|
||||
|
||||
build: simpleGLES_screen
|
||||
|
||||
check.deps:
|
||||
ifeq ($(SAMPLE_ENABLED),0)
|
||||
@echo "Sample will be waived due to the above missing dependencies"
|
||||
else
|
||||
@echo "Sample is ready - all dependencies have been met"
|
||||
endif
|
||||
|
||||
simpleGLES_screen.o:simpleGLES_screen.cu
|
||||
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
|
||||
|
||||
simpleGLES_screen: simpleGLES_screen.o
|
||||
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
|
||||
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
|
||||
|
||||
run: build
|
||||
$(EXEC) ./simpleGLES_screen
|
||||
|
||||
testrun: build
|
||||
$(EXEC) ./simpleGLES_screen -file=ref_simpleGLES_screen.bin
|
||||
|
||||
clean:
|
||||
rm -f simpleGLES_screen simpleGLES_screen.o
|
||||
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/simpleGLES_screen
|
||||
|
||||
clobber: clean
|
@ -1,83 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
|
||||
<entry>
|
||||
<name>simpleGLES_screen</name>
|
||||
<cflags>
|
||||
<flag>-DUSE_CUDAINTEROP</flag>
|
||||
<flag>-DGRAPHICS_SETUP_EGL</flag>
|
||||
<flag>-DUSE_GLES</flag>
|
||||
<flag>-DWIN_INTERFACE_CUSTOM</flag>
|
||||
</cflags>
|
||||
<cuda_api_list>
|
||||
<toolkit>cudaGraphicsUnmapResources</toolkit>
|
||||
<toolkit>cudaMemcpy</toolkit>
|
||||
<toolkit>cudaFree</toolkit>
|
||||
<toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
|
||||
<toolkit>cudaGraphicsMapResources</toolkit>
|
||||
<toolkit>cudaDeviceSynchronize</toolkit>
|
||||
<toolkit>cudaGraphicsUnregisterResource</toolkit>
|
||||
<toolkit>cudaMalloc</toolkit>
|
||||
<toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
|
||||
</cuda_api_list>
|
||||
<description><![CDATA[Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry.]]></description>
|
||||
<devicecompilation>whole</devicecompilation>
|
||||
<files>
|
||||
<file>data\ref_simpleGLES_screen.bin</file>
|
||||
</files>
|
||||
<includepaths>
|
||||
<path>./</path>
|
||||
<path>../</path>
|
||||
<path>../../../Common</path>
|
||||
</includepaths>
|
||||
<keyconcepts>
|
||||
<concept level="basic">Graphics Interop</concept>
|
||||
<concept level="basic">Vertex Buffers</concept>
|
||||
<concept level="basic">3D Graphics</concept>
|
||||
</keyconcepts>
|
||||
<keywords>
|
||||
<keyword>OpenGL ES</keyword>
|
||||
</keywords>
|
||||
<libraries>
|
||||
</libraries>
|
||||
<librarypaths>
|
||||
</librarypaths>
|
||||
<nsight_eclipse>true</nsight_eclipse>
|
||||
<primary_file>simpleGLES_screen.cu</primary_file>
|
||||
<qatests>
|
||||
<qatest>-file=ref_simpleGLES_screen.bin</qatest>
|
||||
</qatests>
|
||||
<required_dependencies>
|
||||
<dependency>screen</dependency>
|
||||
<dependency>GLES</dependency>
|
||||
</required_dependencies>
|
||||
<scopes>
|
||||
<scope>1:CUDA Basic Topics</scope>
|
||||
<scope>2:Graphics Interop</scope>
|
||||
</scopes>
|
||||
<sm-arch>sm50</sm-arch>
|
||||
<sm-arch>sm52</sm-arch>
|
||||
<sm-arch>sm53</sm-arch>
|
||||
<sm-arch>sm60</sm-arch>
|
||||
<sm-arch>sm61</sm-arch>
|
||||
<sm-arch>sm70</sm-arch>
|
||||
<sm-arch>sm72</sm-arch>
|
||||
<sm-arch>sm75</sm-arch>
|
||||
<sm-arch>sm80</sm-arch>
|
||||
<sm-arch>sm86</sm-arch>
|
||||
<sm-arch>sm87</sm-arch>
|
||||
<sm-arch>sm89</sm-arch>
|
||||
<sm-arch>sm90</sm-arch>
|
||||
<sources>
|
||||
<ignore>graphics_interface.c</ignore>
|
||||
</sources>
|
||||
<supported_envs>
|
||||
<env>
|
||||
<platform>qnx</platform>
|
||||
</env>
|
||||
</supported_envs>
|
||||
<supported_sm_architectures>
|
||||
<include>all</include>
|
||||
</supported_sm_architectures>
|
||||
<title>Simple OpenGLES on Screen</title>
|
||||
<type>exe</type>
|
||||
</entry>
|
@ -1,37 +0,0 @@
|
||||
# simpleGLES_screen - Simple OpenGLES on Screen
|
||||
|
||||
## Description
|
||||
|
||||
Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
Graphics Interop, Vertex Buffers, 3D Graphics
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
QNX
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
aarch64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[screen](../../../README.md#screen), [GLES](../../../README.md#gles)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## References (for more details)
|
||||
|
Binary file not shown.
@ -1,149 +0,0 @@
|
||||
################################################################################
|
||||
#
|
||||
# Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
|
||||
#
|
||||
# NOTICE TO USER:
|
||||
#
|
||||
# This source code is subject to NVIDIA ownership rights under U.S. and
|
||||
# international Copyright laws.
|
||||
#
|
||||
# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
|
||||
# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
|
||||
# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
|
||||
# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
||||
# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
|
||||
# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
|
||||
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
|
||||
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
|
||||
# OR PERFORMANCE OF THIS SOURCE CODE.
|
||||
#
|
||||
# U.S. Government End Users. This source code is a "commercial item" as
|
||||
# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
|
||||
# "commercial computer software" and "commercial computer software
|
||||
# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
|
||||
# and is provided to the U.S. Government only as a commercial end item.
|
||||
# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
|
||||
# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
|
||||
# source code with only those rights set forth herein.
|
||||
#
|
||||
################################################################################
|
||||
#
|
||||
# findgleslib.mk is used to find the necessary GLES Libraries for specific distributions
|
||||
# this is supported on Linux
|
||||
#
|
||||
################################################################################
|
||||
|
||||
# Determine OS platform and unix distribution
|
||||
ifeq ("$(TARGET_OS)","linux")
|
||||
# first search lsb_release
|
||||
DISTRO := $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
|
||||
ifeq ("$(DISTRO)","")
|
||||
# second search and parse /etc/issue
|
||||
DISTRO := $(shell awk '{print $$1}' /etc/issue | tr -d "[:space:]" | sed -e "/^$$/d" | tr "[:upper:]" "[:lower:]")
|
||||
# ensure data from /etc/issue is valid
|
||||
ifneq (,$(filter-out $(DISTRO),ubuntu fedora red rhel centos suse))
|
||||
DISTRO :=
|
||||
endif
|
||||
ifeq ("$(DISTRO)","")
|
||||
# third, we can search in /etc/os-release or /etc/{distro}-release
|
||||
DISTRO := $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB")
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ("$(TARGET_OS)","linux")
|
||||
# $(info) >> findgllib.mk -> LINUX path <<<)
|
||||
# Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
|
||||
UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?)
|
||||
FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?)
|
||||
RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
|
||||
CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?)
|
||||
SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
|
||||
KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?)
|
||||
ifeq ("$(UBUNTU)","0")
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
|
||||
GLPATH := /usr/arm-linux-gnueabihf/lib
|
||||
GLLINK := -L/usr/arm-linux-gnueabihf/lib
|
||||
ifneq ($(TARGET_FS),)
|
||||
GLPATH += $(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
GLLINK += -L$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
|
||||
endif
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-aarch64)
|
||||
GLPATH := /usr/aarch64-linux-gnu/lib
|
||||
GLLINK := -L/usr/aarch64-linux-gnu/lib
|
||||
ifneq ($(TARGET_FS),)
|
||||
GLPATH += $(TARGET_FS)/usr/lib
|
||||
GLPATH += $(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
GLLINK += -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
|
||||
endif
|
||||
else
|
||||
UBUNTU_PKG_NAME = $(shell which dpkg >/dev/null 2>&1 && dpkg -l 'nvidia-*' | grep '^ii' | awk '{print $$2}' | head -1)
|
||||
ifneq ("$(UBUNTU_PKG_NAME)","")
|
||||
GLPATH ?= /usr/lib/$(UBUNTU_PKG_NAME)
|
||||
GLLINK ?= -L/usr/lib/$(UBUNTU_PKG_NAME)
|
||||
endif
|
||||
|
||||
DFLT_PATH ?= /usr/lib
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ("$(SUSE)","0")
|
||||
GLPATH ?= /usr/X11R6/lib64
|
||||
GLLINK ?= -L/usr/X11R6/lib64
|
||||
DFLT_PATH ?= /usr/lib64
|
||||
else
|
||||
GLPATH ?= /usr/lib64/nvidia
|
||||
GLLINK ?= -L/usr/lib64/nvidia
|
||||
DFLT_PATH ?= /usr/lib64
|
||||
endif
|
||||
|
||||
# find libGL, libGLU, libXi,
|
||||
EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null)
|
||||
GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
|
||||
X11LIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libX11.so -print 2>/dev/null)
|
||||
|
||||
ifeq ("$(EGLLIB)","")
|
||||
$(info >>> WARNING - libEGL.so not found, please install libEGL.so <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(GLESLIB)","")
|
||||
$(info >>> WARNING - libGLES.so not found, please install libGLES.so <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(X11LIB)","")
|
||||
$(info >>> WARNING - libX11.so not found, please install libX11.so <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
|
||||
HEADER_SEARCH_PATH ?= $(TARGET_FS)/usr/include
|
||||
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
|
||||
HEADER_SEARCH_PATH += /usr/arm-linux-gnueabihf/include
|
||||
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-aarch64-linux)
|
||||
HEADER_SEARCH_PATH += /usr/aarch64-linux-gnu/include
|
||||
endif
|
||||
|
||||
EGLHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name egl.h -print 2>/dev/null)
|
||||
EGLEXTHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name eglext.h -print 2>/dev/null)
|
||||
GL31HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name gl31.h -print 2>/dev/null)
|
||||
X11HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name Xlib.h -print 2>/dev/null)
|
||||
|
||||
ifeq ("$(EGLHEADER)","")
|
||||
$(info >>> WARNING - egl.h not found, please install egl.h <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(EGLEXTHEADER)","")
|
||||
$(info >>> WARNING - eglext.h not found, please install eglext.h <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(GL31HEADER)","")
|
||||
$(info >>> WARNING - gl31.h not found, please install gl31.h <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
ifeq ("$(X11HEADER)","")
|
||||
$(info >>> WARNING - Xlib.h not found, refer to CUDA Samples release notes for how to find and install them. <<<)
|
||||
SAMPLE_ENABLED := 0
|
||||
endif
|
||||
else
|
||||
endif
|
||||
|
@ -1,379 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <GLES3/gl31.h>
|
||||
#include <EGL/egl.h>
|
||||
#include <EGL/eglext.h>
|
||||
#include <sys/keycodes.h>
|
||||
|
||||
screen_window_t screen_window;
|
||||
screen_context_t screen_context;
|
||||
screen_event_t screen_ev;
|
||||
|
||||
EGLDisplay eglDisplay = EGL_NO_DISPLAY;
|
||||
EGLSurface eglSurface = EGL_NO_SURFACE;
|
||||
EGLContext eglContext = EGL_NO_CONTEXT;
|
||||
|
||||
void error_exit(const char *format, ...) {
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
vfprintf(stderr, format, args);
|
||||
va_end(args);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
enum { NvGlDemoKeyCode_Escape = 27 };
|
||||
|
||||
typedef void (*GlCloseCB)(void);
|
||||
typedef void (*GlKeyCB)(char key, int state);
|
||||
GlCloseCB closeCB = NULL;
|
||||
GlKeyCB keyCB = NULL;
|
||||
|
||||
void CHECK_GLERROR() {
|
||||
GLenum err = glGetError();
|
||||
|
||||
if (err != GL_NO_ERROR) {
|
||||
fprintf(stderr, "[%s line %d] OpenGL Error: 0x%x ", __FILE__, __LINE__,
|
||||
err);
|
||||
|
||||
switch (err) {
|
||||
case GL_INVALID_ENUM:
|
||||
fprintf(stderr, "(GL_INVALID_ENUM)\n");
|
||||
break;
|
||||
case GL_INVALID_VALUE:
|
||||
fprintf(stderr, "(GL_INVALID_VALUE)\n");
|
||||
break;
|
||||
case GL_INVALID_OPERATION:
|
||||
fprintf(stderr, "(GL_INVALID_OPERATION)\n");
|
||||
break;
|
||||
case GL_OUT_OF_MEMORY:
|
||||
fprintf(stderr, "(GL_OUT_OF_MEMORY)\n");
|
||||
break;
|
||||
case GL_INVALID_FRAMEBUFFER_OPERATION:
|
||||
fprintf(stderr, "(GL_INVALID_FRAMEBUFFER_OPERATION)\n");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
fflush(stderr);
|
||||
}
|
||||
}
|
||||
|
||||
static void UpdateEventMask(void) {
|
||||
static int rc = 1;
|
||||
|
||||
if (rc) {
|
||||
rc = screen_create_event(&screen_ev);
|
||||
}
|
||||
}
|
||||
|
||||
void SetCloseCB(GlCloseCB cb) {
|
||||
// Call the eglQnxScreenConsumer module if option is enabled
|
||||
closeCB = cb;
|
||||
UpdateEventMask();
|
||||
}
|
||||
|
||||
void SetKeyCB(GlKeyCB cb) {
|
||||
keyCB = cb;
|
||||
UpdateEventMask();
|
||||
}
|
||||
|
||||
// Add keys here, that are used in demo apps.
|
||||
static unsigned char GetKeyPress(int *screenKey) {
|
||||
unsigned char key = '\0';
|
||||
|
||||
switch (*screenKey) {
|
||||
case KEYCODE_ESCAPE:
|
||||
key = NvGlDemoKeyCode_Escape;
|
||||
break;
|
||||
default:
|
||||
/* For "normal" keys, Screen KEYCODE is just ASCII. */
|
||||
if (*screenKey <= 127) {
|
||||
key = *screenKey;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
void CheckEvents(void) {
|
||||
static int vis = 1, val = 1;
|
||||
int rc;
|
||||
|
||||
/**
|
||||
** We start the loop by processing any events that might be in our
|
||||
** queue. The only event that is of interest to us are the resize
|
||||
** and close events. The timeout variable is set to 0 (no wait) or
|
||||
** forever depending if the window is visible or invisible.
|
||||
**/
|
||||
|
||||
while (!screen_get_event(screen_context, screen_ev, vis ? 0ull : ~0ull)) {
|
||||
// Get QNX CAR 2.1 event property
|
||||
rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_TYPE, &val);
|
||||
if (rc || val == SCREEN_EVENT_NONE) {
|
||||
break;
|
||||
}
|
||||
|
||||
switch (val) {
|
||||
case SCREEN_EVENT_CLOSE:
|
||||
/**
|
||||
** All we have to do when we receive the close event is
|
||||
** exit the application loop.
|
||||
**/
|
||||
if (closeCB) {
|
||||
closeCB();
|
||||
}
|
||||
break;
|
||||
|
||||
case SCREEN_EVENT_KEYBOARD:
|
||||
rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_FLAGS,
|
||||
&val);
|
||||
if (rc || val == SCREEN_EVENT_NONE) {
|
||||
break;
|
||||
}
|
||||
if (val & KEY_DOWN) {
|
||||
rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_SYM,
|
||||
&val);
|
||||
if (rc || val == SCREEN_EVENT_NONE) {
|
||||
break;
|
||||
}
|
||||
unsigned char key;
|
||||
key = GetKeyPress(&val);
|
||||
if (key != '\0') {
|
||||
keyCB(key, 1);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int graphics_setup_window(int xpos, int ypos, int width, int height,
|
||||
const char *windowname, int reqdispno) {
|
||||
EGLint configAttrs[] = {
|
||||
EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8,
|
||||
EGL_BLUE_SIZE, 8, EGL_ALPHA_SIZE, 8,
|
||||
EGL_DEPTH_SIZE, 16, EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
|
||||
EGL_NONE};
|
||||
|
||||
EGLint contextAttrs[] = {EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE};
|
||||
|
||||
EGLint windowAttrs[] = {EGL_NONE};
|
||||
EGLConfig *configList = NULL;
|
||||
EGLint configCount;
|
||||
|
||||
int displayCount = 0;
|
||||
int dispno;
|
||||
|
||||
screen_context = 0;
|
||||
|
||||
screen_display_t *screenDisplayHandle = NULL;
|
||||
|
||||
if (screen_create_context(&screen_context, 0)) {
|
||||
error_exit("Error creating screen context.\n");
|
||||
}
|
||||
|
||||
eglDisplay = eglGetDisplay(0);
|
||||
|
||||
if (eglDisplay == EGL_NO_DISPLAY) {
|
||||
error_exit("EGL failed to obtain display\n");
|
||||
}
|
||||
|
||||
if (!eglInitialize(eglDisplay, 0, 0)) {
|
||||
error_exit("EGL failed to initialize\n");
|
||||
}
|
||||
|
||||
if (!eglChooseConfig(eglDisplay, configAttrs, NULL, 0, &configCount) ||
|
||||
!configCount) {
|
||||
error_exit("EGL failed to return any matching configurations\n");
|
||||
}
|
||||
|
||||
configList = (EGLConfig *)malloc(configCount * sizeof(EGLConfig));
|
||||
|
||||
if (!eglChooseConfig(eglDisplay, configAttrs, configList, configCount,
|
||||
&configCount) ||
|
||||
!configCount) {
|
||||
error_exit("EGL failed to populate configuration list\n");
|
||||
}
|
||||
|
||||
screen_window = 0;
|
||||
if (screen_create_window(&screen_window, screen_context)) {
|
||||
error_exit("Error creating screen window.\n");
|
||||
}
|
||||
|
||||
// query the total no of display avaibale from QNX CAR2 screen
|
||||
if (screen_get_context_property_iv(
|
||||
screen_context, SCREEN_PROPERTY_DISPLAY_COUNT, &displayCount)) {
|
||||
error_exit("Error getting context property\n");
|
||||
}
|
||||
|
||||
screenDisplayHandle =
|
||||
(screen_display_t *)malloc(displayCount * sizeof(screen_display_t));
|
||||
if (!screenDisplayHandle) {
|
||||
error_exit("Error allocating screen memory handle is getting failed\n");
|
||||
}
|
||||
|
||||
// query the display handle from QNX CAR2 screen
|
||||
if (screen_get_context_property_pv(screen_context, SCREEN_PROPERTY_DISPLAYS,
|
||||
(void **)screenDisplayHandle)) {
|
||||
error_exit("Error getting display handle\n");
|
||||
}
|
||||
|
||||
for (dispno = 0; dispno < displayCount; dispno++) {
|
||||
int active = 0;
|
||||
// Query the connected status from QNX CAR2 screen
|
||||
screen_get_display_property_iv(screenDisplayHandle[dispno],
|
||||
SCREEN_PROPERTY_ATTACHED, &active);
|
||||
if (active) {
|
||||
if (reqdispno == dispno) {
|
||||
// Map the window buffer to user requested display port
|
||||
screen_set_window_property_pv(screen_window, SCREEN_PROPERTY_DISPLAY,
|
||||
(void **)&screenDisplayHandle[reqdispno]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dispno == displayCount) {
|
||||
error_exit("Failed to set the requested display\n");
|
||||
}
|
||||
|
||||
free(screenDisplayHandle);
|
||||
|
||||
int format = SCREEN_FORMAT_RGBA8888;
|
||||
if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_FORMAT,
|
||||
&format)) {
|
||||
error_exit("Error setting SCREEN_PROPERTY_FORMAT\n");
|
||||
}
|
||||
|
||||
int usage = SCREEN_USAGE_OPENGL_ES2;
|
||||
if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_USAGE,
|
||||
&usage)) {
|
||||
error_exit("Error setting SCREEN_PROPERTY_USAGE\n");
|
||||
}
|
||||
|
||||
EGLint interval = 1;
|
||||
if (screen_set_window_property_iv(screen_window,
|
||||
SCREEN_PROPERTY_SWAP_INTERVAL, &interval)) {
|
||||
error_exit("Error setting SCREEN_PROPERTY_SWAP_INTERVAL\n");
|
||||
}
|
||||
|
||||
int windowSize[2];
|
||||
windowSize[0] = width;
|
||||
windowSize[1] = height;
|
||||
if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_SIZE,
|
||||
windowSize)) {
|
||||
error_exit("Error setting SCREEN_PROPERTY_SIZE\n");
|
||||
}
|
||||
|
||||
int windowOffset[2];
|
||||
windowOffset[0] = xpos;
|
||||
windowOffset[1] = ypos;
|
||||
if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_POSITION,
|
||||
windowOffset)) {
|
||||
error_exit("Error setting SCREEN_PROPERTY_POSITION\n");
|
||||
}
|
||||
|
||||
if (screen_create_window_buffers(screen_window, 2)) {
|
||||
error_exit("Error creating two window buffers.\n");
|
||||
}
|
||||
|
||||
eglSurface =
|
||||
eglCreateWindowSurface(eglDisplay, configList[0],
|
||||
(EGLNativeWindowType)screen_window, windowAttrs);
|
||||
if (!eglSurface) {
|
||||
error_exit("EGL couldn't create window\n");
|
||||
}
|
||||
|
||||
eglBindAPI(EGL_OPENGL_ES_API);
|
||||
|
||||
eglContext = eglCreateContext(eglDisplay, configList[0], NULL, contextAttrs);
|
||||
if (!eglContext) {
|
||||
error_exit("EGL couldn't create context\n");
|
||||
}
|
||||
|
||||
if (!eglMakeCurrent(eglDisplay, eglSurface, eglSurface, eglContext)) {
|
||||
error_exit("EGL couldn't make context/surface current\n");
|
||||
}
|
||||
|
||||
EGLint Context_RendererType;
|
||||
eglQueryContext(eglDisplay, eglContext, EGL_CONTEXT_CLIENT_TYPE,
|
||||
&Context_RendererType);
|
||||
|
||||
switch (Context_RendererType) {
|
||||
case EGL_OPENGL_API:
|
||||
printf("Using OpenGL API\n");
|
||||
break;
|
||||
case EGL_OPENGL_ES_API:
|
||||
printf("Using OpenGL ES API\n");
|
||||
break;
|
||||
case EGL_OPENVG_API:
|
||||
error_exit("Context Query Returned OpenVG. This is Unsupported\n");
|
||||
default:
|
||||
error_exit("Unknown Context Type. %04X\n", Context_RendererType);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
void graphics_set_windowtitle(const char *windowname) {
|
||||
// Do nothing on screen
|
||||
}
|
||||
|
||||
void graphics_swap_buffers() { eglSwapBuffers(eglDisplay, eglSurface); }
|
||||
|
||||
void graphics_close_window() {
|
||||
if (eglDisplay != EGL_NO_DISPLAY) {
|
||||
eglMakeCurrent(eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
|
||||
|
||||
if (eglContext != EGL_NO_CONTEXT) {
|
||||
eglDestroyContext(eglDisplay, eglContext);
|
||||
}
|
||||
|
||||
if (eglSurface != EGL_NO_SURFACE) {
|
||||
eglDestroySurface(eglDisplay, eglSurface);
|
||||
}
|
||||
|
||||
eglTerminate(eglDisplay);
|
||||
}
|
||||
|
||||
if (screen_window) {
|
||||
screen_destroy_window(screen_window);
|
||||
screen_window = NULL;
|
||||
}
|
||||
if (screen_context) {
|
||||
screen_destroy_context(screen_context);
|
||||
}
|
||||
if (screen_ev) {
|
||||
screen_destroy_event(screen_ev);
|
||||
}
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_FragColor = vec4(1.0, 0.0, 0.0, 1.0);
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
attribute vec4 position;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = position;
|
||||
}
|
@ -1,601 +0,0 @@
|
||||
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*
|
||||
This example demonstrates how to use the CUDA C bindings to OpenGL ES to
|
||||
dynamically modify a vertex buffer using a CUDA C kernel.
|
||||
|
||||
The steps are:
|
||||
1. Create an empty vertex buffer object (VBO)
|
||||
2. Register the VBO with CUDA C
|
||||
3. Map the VBO for writing from CUDA C
|
||||
4. Run CUDA C kernel to modify the vertex positions
|
||||
5. Unmap the VBO
|
||||
6. Render the results using OpenGL ES
|
||||
|
||||
Host code
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <unistd.h>
|
||||
#include <screen/screen.h>
|
||||
|
||||
#include "graphics_interface.c"
|
||||
|
||||
// includes, cuda
|
||||
#include <cuda_runtime.h>
|
||||
#include <cuda_gl_interop.h>
|
||||
|
||||
// Utilities and timing functions
|
||||
#include <helper_functions.h> // includes cuda.h and cuda_runtime_api.h
|
||||
|
||||
// CUDA helper functions
|
||||
#include <helper_cuda.h> // helper functions for CUDA error check
|
||||
|
||||
#include <vector_types.h>
|
||||
|
||||
#define MAX_EPSILON_ERROR 0.0f
|
||||
#define THRESHOLD 0.0f
|
||||
#define REFRESH_DELAY 1 // ms
|
||||
|
||||
#define GUI_IDLE 0x100
|
||||
#define GUI_ROTATE 0x101
|
||||
#define GUI_TRANSLATE 0x102
|
||||
|
||||
int gui_mode;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Default configuration
|
||||
unsigned int window_width = 512;
|
||||
unsigned int window_height = 512;
|
||||
unsigned int dispno = 0;
|
||||
|
||||
// constants
|
||||
const unsigned int mesh_width = 256;
|
||||
const unsigned int mesh_height = 256;
|
||||
|
||||
// OpenGL ES variables and interop with CUDA C
|
||||
GLuint mesh_vao, mesh_vbo;
|
||||
struct cudaGraphicsResource *cuda_vbo_resource;
|
||||
void *d_vbo_buffer = NULL;
|
||||
|
||||
float g_fAnim = 0.0;
|
||||
|
||||
// UI / mouse controls
|
||||
int mouse_old_x, mouse_old_y;
|
||||
int mouse_buttons = 0;
|
||||
float rotate_x = 0.0, rotate_y = 0.0;
|
||||
float translate_z = -3.0;
|
||||
|
||||
StopWatchInterface *timer = NULL;
|
||||
|
||||
// Frame statistics
|
||||
int frame;
|
||||
int fpsCount = 0; // FPS count for averaging
|
||||
int fpsLimit = 1; // FPS limit for sampling
|
||||
int g_Index = 0;
|
||||
float avgFPS = 0.0f;
|
||||
unsigned int frameCount = 0;
|
||||
unsigned int g_TotalErrors = 0;
|
||||
|
||||
// The default number of seconds after which the test will end.
|
||||
#define TIME_LIMIT 10.0 // 10 secs
|
||||
|
||||
// Flag indicating it is time to shut down
|
||||
static GLboolean shutdown = GL_FALSE;
|
||||
|
||||
// Callback to close window
|
||||
static void closeCB_app(void) { shutdown = GL_TRUE; }
|
||||
|
||||
// Callback to handle key presses
|
||||
static void keyCB_app(char key, int state) {
|
||||
// Ignoring releases
|
||||
if (!state) return;
|
||||
|
||||
if ((key == 'q') || (key == 'Q') || (key == NvGlDemoKeyCode_Escape))
|
||||
shutdown = GL_TRUE;
|
||||
}
|
||||
|
||||
// Auto-Verification Code
|
||||
bool g_bQAReadback = false;
|
||||
|
||||
int *pArgc = NULL;
|
||||
char **pArgv = NULL;
|
||||
|
||||
#define MAX(a, b) ((a > b) ? a : b)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// declaration, forward
|
||||
|
||||
// CUDA functionality
|
||||
void runCuda(struct cudaGraphicsResource **vbo_resource);
|
||||
void runAutoTest(int devID, char **argv, char *ref_file);
|
||||
void checkResultCuda(int argc, char **argv, const GLuint &vbo);
|
||||
|
||||
const char *sSDKsample = "simpleGLES on Screen (VBO)";
|
||||
|
||||
void computeFPS() {
|
||||
frameCount++;
|
||||
fpsCount++;
|
||||
|
||||
if (fpsCount == fpsLimit) {
|
||||
avgFPS = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
|
||||
fpsCount = 0;
|
||||
fpsLimit = (int)MAX(avgFPS, 1.f);
|
||||
|
||||
sdkResetTimer(&timer);
|
||||
}
|
||||
|
||||
char fps[256];
|
||||
sprintf(fps, "Cuda/OpenGL ES Interop (VBO): %3.1f fps (Max 1000 fps)",
|
||||
avgFPS);
|
||||
graphics_set_windowtitle(fps);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//! Simple kernel to modify vertex positions in sine wave pattern
|
||||
//! @param data data in global memory
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
__global__ void simple_vbo_kernel(float4 *pos, unsigned int width,
|
||||
unsigned int height, float time) {
|
||||
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
// calculate uv coordinates
|
||||
float u = x / (float)width;
|
||||
float v = y / (float)height;
|
||||
u = u * 2.0f - 1.0f;
|
||||
v = v * 2.0f - 1.0f;
|
||||
|
||||
// calculate simple sine wave pattern
|
||||
float freq = 4.0f;
|
||||
float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f;
|
||||
|
||||
// write output vertex
|
||||
pos[y * width + x] = make_float4(u, w, v, 1.0f);
|
||||
}
|
||||
|
||||
void launch_kernel(float4 *pos, unsigned int mesh_width,
|
||||
unsigned int mesh_height, float time) {
|
||||
// execute the kernel
|
||||
dim3 block(8, 8, 1);
|
||||
dim3 grid(mesh_width / block.x, mesh_height / block.y, 1);
|
||||
simple_vbo_kernel<<<grid, block>>>(pos, mesh_width, mesh_height, time);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run the Cuda part of the computation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void runCuda(struct cudaGraphicsResource **vbo_resource) {
|
||||
// map OpenGL buffer object for writing from CUDA
|
||||
float4 *dptr;
|
||||
cudaGraphicsMapResources(1, vbo_resource, 0);
|
||||
size_t num_bytes;
|
||||
cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes,
|
||||
*vbo_resource);
|
||||
|
||||
launch_kernel(dptr, mesh_width, mesh_height, g_fAnim);
|
||||
|
||||
// unmap buffer object
|
||||
cudaGraphicsUnmapResources(1, vbo_resource, 0);
|
||||
}
|
||||
|
||||
#ifndef FOPEN
|
||||
#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode))
|
||||
#endif
|
||||
|
||||
void sdkDumpBin2(void *data, unsigned int bytes, const char *filename) {
|
||||
printf("sdkDumpBin: <%s>\n", filename);
|
||||
FILE *fp;
|
||||
FOPEN(fp, filename, "wb");
|
||||
fwrite(data, bytes, 1, fp);
|
||||
fflush(fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run the Cuda part of the computation
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void runAutoTest(int devID, char **argv, char *ref_file) {
|
||||
char *reference_file = NULL;
|
||||
void *imageData = malloc(mesh_width * mesh_height * sizeof(float));
|
||||
|
||||
// execute the kernel
|
||||
launch_kernel((float4 *)d_vbo_buffer, mesh_width, mesh_height, g_fAnim);
|
||||
|
||||
cudaDeviceSynchronize();
|
||||
getLastCudaError("launch_kernel failed");
|
||||
|
||||
cudaMemcpy(imageData, d_vbo_buffer, mesh_width * mesh_height * sizeof(float),
|
||||
cudaMemcpyDeviceToHost);
|
||||
|
||||
sdkDumpBin2(imageData, mesh_width * mesh_height * sizeof(float),
|
||||
"simpleGLES_screen.bin");
|
||||
reference_file = sdkFindFilePath(ref_file, argv[0]);
|
||||
|
||||
if (reference_file &&
|
||||
!sdkCompareBin2BinFloat("simpleGLES_screen.bin", reference_file,
|
||||
mesh_width * mesh_height * sizeof(float),
|
||||
MAX_EPSILON_ERROR, THRESHOLD, pArgv[0])) {
|
||||
g_TotalErrors++;
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Display callback
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void display_thisframe(float time_delta) {
|
||||
sdkStartTimer(&timer);
|
||||
|
||||
// run CUDA kernel to generate vertex positions
|
||||
runCuda(&cuda_vbo_resource);
|
||||
|
||||
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
|
||||
|
||||
glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
|
||||
|
||||
glFinish();
|
||||
|
||||
g_fAnim += time_delta;
|
||||
|
||||
sdkStopTimer(&timer);
|
||||
computeFPS();
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Check if the result is correct or write data to file for external
|
||||
//! regression testing
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
void checkResultCuda(int argc, char **argv, const GLuint &vbo) {
|
||||
if (!d_vbo_buffer) {
|
||||
printf("%s: Mapping result buffer from OpenGL ES\n", __FUNCTION__);
|
||||
|
||||
cudaGraphicsUnregisterResource(cuda_vbo_resource);
|
||||
|
||||
// map buffer object
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
||||
float *data = (float *)glMapBufferRange(
|
||||
GL_ARRAY_BUFFER, 0, mesh_width * mesh_height * 4 * sizeof(float),
|
||||
GL_READ_ONLY);
|
||||
|
||||
// check result
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "regression")) {
|
||||
// write file for regression test
|
||||
sdkWriteFile<float>("./data/regression.dat", data,
|
||||
mesh_width * mesh_height * 3, 0.0, false);
|
||||
}
|
||||
|
||||
// unmap GL buffer object
|
||||
if (!glUnmapBuffer(GL_ARRAY_BUFFER)) {
|
||||
fprintf(stderr, "Unmap buffer failed.\n");
|
||||
fflush(stderr);
|
||||
}
|
||||
|
||||
checkCudaErrors(cudaGraphicsGLRegisterBuffer(
|
||||
&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsWriteDiscard));
|
||||
|
||||
CHECK_GLERROR();
|
||||
}
|
||||
}
|
||||
|
||||
GLuint mesh_shader = 0;
|
||||
|
||||
void readAndCompileShaderFromGLSLFile(GLuint new_shaderprogram,
|
||||
const char *filename, GLenum shaderType) {
|
||||
FILE *file = fopen(filename, "rb"); // open shader text file
|
||||
if (!file) {
|
||||
error_exit("Filename %s does not exist\n", filename);
|
||||
}
|
||||
|
||||
// get the size of the file and read it
|
||||
fseek(file, 0, SEEK_END);
|
||||
GLint size = ftell(file);
|
||||
char *data = (char *)malloc(sizeof(char) * (size + 1));
|
||||
memset(data, 0, sizeof(char) * (size + 1));
|
||||
fseek(file, 0, SEEK_SET);
|
||||
size_t res = fread(data, 1, size, file);
|
||||
fclose(file);
|
||||
|
||||
GLuint shader = glCreateShader(shaderType);
|
||||
glShaderSource(shader, 1, (const GLchar **)&data, &size);
|
||||
glCompileShader(shader);
|
||||
|
||||
CHECK_GLERROR();
|
||||
GLint compile_success = 0;
|
||||
glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_success);
|
||||
CHECK_GLERROR();
|
||||
|
||||
if (compile_success == GL_FALSE) {
|
||||
printf("Compilation of %s failed!\n Reason:\n", filename);
|
||||
|
||||
GLint maxLength = 0;
|
||||
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &maxLength);
|
||||
|
||||
char errorLog[maxLength];
|
||||
glGetShaderInfoLog(shader, maxLength, &maxLength, &errorLog[0]);
|
||||
|
||||
printf("%s", errorLog);
|
||||
|
||||
glDeleteShader(shader);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
glAttachShader(new_shaderprogram, shader);
|
||||
glDeleteShader(shader);
|
||||
|
||||
free(data);
|
||||
}
|
||||
|
||||
GLuint ShaderCreate(const char *vshader_filename,
|
||||
const char *fshader_filename) {
|
||||
printf("Loading GLSL shaders %s %s\n", vshader_filename, fshader_filename);
|
||||
|
||||
GLuint new_shaderprogram = glCreateProgram();
|
||||
|
||||
CHECK_GLERROR();
|
||||
if (vshader_filename) {
|
||||
readAndCompileShaderFromGLSLFile(new_shaderprogram, vshader_filename,
|
||||
GL_VERTEX_SHADER);
|
||||
}
|
||||
|
||||
CHECK_GLERROR();
|
||||
if (fshader_filename) {
|
||||
readAndCompileShaderFromGLSLFile(new_shaderprogram, fshader_filename,
|
||||
GL_FRAGMENT_SHADER);
|
||||
}
|
||||
|
||||
CHECK_GLERROR();
|
||||
|
||||
glLinkProgram(new_shaderprogram);
|
||||
|
||||
CHECK_GLERROR();
|
||||
GLint link_success;
|
||||
glGetProgramiv(new_shaderprogram, GL_LINK_STATUS, &link_success);
|
||||
|
||||
if (link_success == GL_FALSE) {
|
||||
printf("Linking of %s with %s failed!\n Reason:\n", vshader_filename,
|
||||
fshader_filename);
|
||||
|
||||
GLint maxLength = 0;
|
||||
glGetShaderiv(new_shaderprogram, GL_INFO_LOG_LENGTH, &maxLength);
|
||||
|
||||
char errorLog[maxLength];
|
||||
glGetShaderInfoLog(new_shaderprogram, maxLength, &maxLength, &errorLog[0]);
|
||||
|
||||
printf("%s", errorLog);
|
||||
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
return new_shaderprogram;
|
||||
}
|
||||
|
||||
//===========================================================================
|
||||
// InitGraphicsState() - initialize OpenGL
|
||||
//===========================================================================
|
||||
static void InitGraphicsState(void) {
|
||||
char *GL_version = (char *)glGetString(GL_VERSION);
|
||||
char *GL_vendor = (char *)glGetString(GL_VENDOR);
|
||||
char *GL_renderer = (char *)glGetString(GL_RENDERER);
|
||||
|
||||
printf("Version: %s\n", GL_version);
|
||||
printf("Vendor: %s\n", GL_vendor);
|
||||
printf("Renderer: %s\n", GL_renderer);
|
||||
|
||||
// RENDERING SETUP (OpenGL ES or OpenGL Core Profile!)
|
||||
glGenVertexArrays(1, &mesh_vao); // Features' Vertex Array Object allocation
|
||||
glBindVertexArray(mesh_vao); // bind VAO
|
||||
|
||||
// initialize buffer object
|
||||
glGenBuffers(1, &mesh_vbo);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, mesh_vbo);
|
||||
|
||||
unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
|
||||
glBufferData(GL_ARRAY_BUFFER, size, NULL, GL_DYNAMIC_DRAW);
|
||||
glVertexAttribPointer((GLuint)0, 4, GL_FLOAT, GL_FALSE, 0, 0);
|
||||
glEnableVertexAttribArray(0);
|
||||
|
||||
checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, mesh_vbo,
|
||||
cudaGraphicsMapFlagsNone));
|
||||
|
||||
// GLSL stuff
|
||||
char *vertex_shader_path = sdkFindFilePath("mesh.vert.glsl", pArgv[0]);
|
||||
char *fragment_shader_path = sdkFindFilePath("mesh.frag.glsl", pArgv[0]);
|
||||
|
||||
if (vertex_shader_path == NULL || fragment_shader_path == NULL) {
|
||||
printf("Error finding shader file\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
mesh_shader = ShaderCreate(vertex_shader_path, fragment_shader_path);
|
||||
CHECK_GLERROR();
|
||||
|
||||
free(vertex_shader_path);
|
||||
free(fragment_shader_path);
|
||||
|
||||
glUseProgram(mesh_shader);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//! Run a simple test for CUDA
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
bool runTest(int argc, char **argv, char *ref_file) {
|
||||
// command line mode only
|
||||
if (ref_file != NULL) {
|
||||
// This will pick the best possible CUDA capable device
|
||||
// int devID = findCudaDevice(argc, (const char **)argv);
|
||||
#if defined(__aarch64__) || defined(__arm__)
|
||||
// find iGPU on the system which is compute capable which will perform
|
||||
// GLES-CUDA interop
|
||||
int devID = findIntegratedGPU();
|
||||
#else
|
||||
// use command-line specified CUDA device, otherwise use device with highest
|
||||
// Gflops/s
|
||||
int devID = findCudaDevice(argc, (const char **)argv);
|
||||
#endif
|
||||
|
||||
// create VBO
|
||||
checkCudaErrors(cudaMalloc((void **)&d_vbo_buffer,
|
||||
mesh_width * mesh_height * 4 * sizeof(float)));
|
||||
|
||||
// run the cuda part
|
||||
runAutoTest(devID, argv, ref_file);
|
||||
|
||||
// check result of Cuda step
|
||||
checkResultCuda(argc, argv, mesh_vbo);
|
||||
|
||||
cudaFree(d_vbo_buffer);
|
||||
d_vbo_buffer = NULL;
|
||||
} else {
|
||||
double endTime = TIME_LIMIT;
|
||||
|
||||
// this would use command-line specified CUDA device, note that CUDA
|
||||
// defaults to highest Gflops/s device
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "device")) {
|
||||
error_exit("Device setting not yet implemented!\n");
|
||||
}
|
||||
|
||||
// display selection
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "dispno")) {
|
||||
dispno = getCmdLineArgumentInt(argc, (const char **)argv, "dispno");
|
||||
}
|
||||
|
||||
// Window width
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "width")) {
|
||||
window_width = getCmdLineArgumentInt(argc, (const char **)argv, "width");
|
||||
}
|
||||
|
||||
// Window Height
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "height")) {
|
||||
window_height =
|
||||
getCmdLineArgumentInt(argc, (const char **)argv, "height");
|
||||
}
|
||||
|
||||
// Determine how long to run for in secs: default is 10s
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "runtime")) {
|
||||
endTime = getCmdLineArgumentInt(argc, (const char **)argv, "runtime");
|
||||
}
|
||||
|
||||
SetCloseCB(closeCB_app);
|
||||
SetKeyCB(keyCB_app);
|
||||
|
||||
// create QNX screen window and set up associated OpenGL ES context
|
||||
graphics_setup_window(0, 0, window_width, window_height, sSDKsample,
|
||||
dispno);
|
||||
|
||||
#if defined(__aarch64__) || defined(__arm__)
|
||||
// find iGPU on the system which is compute capable which will perform
|
||||
// GLES-CUDA interop
|
||||
int devID = findIntegratedGPU();
|
||||
#else
|
||||
// use command-line specified CUDA device, otherwise use device with highest
|
||||
// Gflops/s
|
||||
int devID = findCudaDevice(argc, (const char **)argv);
|
||||
#endif
|
||||
InitGraphicsState(); // set up GLES stuff
|
||||
|
||||
glClearColor(0, 0.5, 1, 1); // blue-ish background
|
||||
glClear(GL_COLOR_BUFFER_BIT);
|
||||
|
||||
graphics_swap_buffers();
|
||||
|
||||
int frame = 0;
|
||||
|
||||
struct timeval begin, end;
|
||||
gettimeofday(&begin, NULL);
|
||||
|
||||
// Print runtime
|
||||
if (endTime < 0.0) {
|
||||
endTime = TIME_LIMIT;
|
||||
printf(" running forever...\n");
|
||||
} else {
|
||||
printf(" running for %f seconds...\n", endTime);
|
||||
}
|
||||
|
||||
while (!shutdown) {
|
||||
frame++;
|
||||
display_thisframe(0.010);
|
||||
usleep(1000);
|
||||
graphics_swap_buffers();
|
||||
CheckEvents();
|
||||
|
||||
gettimeofday(&end, 0);
|
||||
double elapsed = (end.tv_sec - begin.tv_sec) +
|
||||
((end.tv_usec - begin.tv_usec) / 1000000.0);
|
||||
|
||||
// Check whether time limit has been exceeded
|
||||
if (!shutdown) shutdown = (endTime <= elapsed);
|
||||
}
|
||||
|
||||
// NOTE: Before destroying OpenGL ES context, must unregister all shared
|
||||
// resources from CUDA !
|
||||
checkCudaErrors(cudaGraphicsUnregisterResource(cuda_vbo_resource));
|
||||
|
||||
graphics_close_window(); // close window and destroy OpenGL ES context
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Program main
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
int main(int argc, char **argv) {
|
||||
char *ref_file = NULL;
|
||||
|
||||
pArgc = &argc;
|
||||
pArgv = argv;
|
||||
|
||||
#if defined(__linux__)
|
||||
setenv("DISPLAY", ":0", 0);
|
||||
#endif
|
||||
|
||||
printf("%s starting...\n", sSDKsample);
|
||||
|
||||
if (argc > 1) {
|
||||
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
|
||||
// In this mode, we run without OpenGL and see if VBO is generated
|
||||
// correctly
|
||||
getCmdLineArgumentString(argc, (const char **)argv, "file",
|
||||
(char **)&ref_file);
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
runTest(argc, argv, ref_file);
|
||||
|
||||
printf("%s completed, returned %s\n", sSDKsample,
|
||||
(g_TotalErrors == 0) ? "OK" : "ERROR!");
|
||||
|
||||
exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user