Remove Tegra QNX samples nbody_screen, simpleGLES_screen

This commit is contained in:
Rob Armstrong 2025-01-10 08:02:14 -08:00
parent feffc60cbf
commit 2d0314212b
34 changed files with 0 additions and 5821 deletions

View File

@ -1,18 +0,0 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**",
"${workspaceFolder}/../../../Common"
],
"defines": [],
"compilerPath": "/usr/local/cuda/bin/nvcc",
"cStandard": "gnu17",
"cppStandard": "gnu++14",
"intelliSenseMode": "linux-gcc-x64",
"configurationProvider": "ms-vscode.makefile-tools"
}
],
"version": 4
}

View File

@ -1,7 +0,0 @@
{
"recommendations": [
"nvidia.nsight-vscode-edition",
"ms-vscode.cpptools",
"ms-vscode.makefile-tools"
]
}

View File

@ -1,10 +0,0 @@
{
"configurations": [
{
"name": "CUDA C++: Launch",
"type": "cuda-gdb",
"request": "launch",
"program": "${workspaceFolder}/nbody_screen"
}
]
}

View File

@ -1,15 +0,0 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "sample",
"type": "shell",
"command": "make dbg=1",
"problemMatcher": ["$nvcc"],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}

View File

@ -1,27 +0,0 @@
cmake_minimum_required(VERSION 3.20)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules")
project(nbody_screen LANGUAGES C CXX CUDA)
find_package(CUDAToolkit REQUIRED)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
endif()
# Include directories and libraries
include_directories(../../../Common)
# Source file
# Add target for nbody_screen
add_executable(nbody_screen nbody_screen.cu)
target_compile_options(nbody_screen PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(nbody_screen PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(nbody_screen PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

View File

@ -1,405 +0,0 @@
################################################################################
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
##############################
# start deprecated interface #
##############################
ifeq ($(x86_64),1)
$(info WARNING - x86_64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
TARGET_ARCH ?= x86_64
endif
ifeq ($(ARMv7),1)
$(info WARNING - ARMv7 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=armv7l instead)
TARGET_ARCH ?= armv7l
endif
ifeq ($(aarch64),1)
$(info WARNING - aarch64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
TARGET_ARCH ?= aarch64
endif
ifeq ($(ppc64le),1)
$(info WARNING - ppc64le variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
TARGET_ARCH ?= ppc64le
endif
ifneq ($(GCC),)
$(info WARNING - GCC variable has been deprecated)
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
HOST_COMPILER ?= $(GCC)
endif
ifneq ($(abi),)
$(error ERROR - abi variable has been removed)
endif
############################
# end deprecated interface #
############################
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
ifeq ($(HOST_ARCH),aarch64)
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
HOST_ARCH := sbsa
TARGET_ARCH := sbsa
endif
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
# host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++
endif
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
ifeq ($(TARGET_OS),linux)
HOST_COMPILER ?= arm-linux-gnueabihf-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
else ifeq ($(TARGET_OS),android)
HOST_COMPILER ?= arm-linux-androideabi-g++
endif
else ifeq ($(TARGET_ARCH),aarch64)
ifeq ($(TARGET_OS), linux)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
else ifeq ($(TARGET_OS), android)
HOST_COMPILER ?= aarch64-linux-android-clang++
endif
else ifeq ($(TARGET_ARCH),sbsa)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_ARCH),ppc64le)
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
endif
endif
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE}
CCFLAGS :=
LDFLAGS :=
# build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH)
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
else ifeq ($(TARGET_OS),android)
LDFLAGS += -pie
CCFLAGS += -fpie -fpic -fexceptions
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
NVCCFLAGS += -D_QNX_SOURCE
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
LDFLAGS += -lsocket
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
ifdef TARGET_OVERRIDE
LDFLAGS += -lslog2
endif
ifneq ($(TARGET_FS),)
LDFLAGS += -L$(TARGET_FS)/usr/lib
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
CCFLAGS += -I$(TARGET_FS)/../include
endif
endif
endif
ifdef TARGET_OVERRIDE # cuda toolkit targets override
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
endif
# Install directory of different arch
CUDA_INSTALL_TARGET_DIR :=
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
else ifeq ($(TARGET_ARCH),ppc64le)
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
SAMPLE_ENABLED := 1
# This sample is not supported on Linux x86_64
ifeq ($(TARGET_OS),linux)
ifeq ($(TARGET_ARCH),x86_64)
$(info >>> WARNING - nbody_screen is not supported on Linux x86_64 - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
endif
# This sample is not supported on Mac OSX
ifeq ($(TARGET_OS),darwin)
$(info >>> WARNING - nbody_screen is not supported on Mac OSX - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
# This sample is not supported on ARMv7
ifeq ($(TARGET_ARCH),armv7l)
$(info >>> WARNING - nbody_screen is not supported on ARMv7 - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
# This sample is not supported on aarch64
ifeq ($(TARGET_ARCH),aarch64)
ifneq ($(TARGET_OS),qnx)
$(info >>> WARNING - nbody_screen is not supported on aarch64-$(TARGET_OS) - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
endif
# This sample is not supported on sbsa
ifeq ($(TARGET_ARCH),sbsa)
$(info >>> WARNING - nbody_screen is not supported on sbsa - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I../../../Common
LIBRARIES :=
################################################################################
# Makefile include to help find GLES Libraries
include ./findgleslib.mk
# OpenGLES specific libraries
ifneq ($(TARGET_OS),darwin)
LIBRARIES += $(GLESLINK) -lGLESv2 -lEGL -lscreen
endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90
else
SMS ?= 50 52 60 61 70 75 80 86 89 90
endif
ifeq ($(SMS),)
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
ALL_CCFLAGS += -ftz=true --threads 0 --std=c++11
ifeq ($(SAMPLE_ENABLED),0)
EXEC ?= @echo "[@]"
endif
################################################################################
# Target rules
all: build
build: nbody_screen
check.deps:
ifeq ($(SAMPLE_ENABLED),0)
@echo "Sample will be waived due to the above missing dependencies"
else
@echo "Sample is ready - all dependencies have been met"
endif
bodysystemcuda.o:bodysystemcuda.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
nbody_screen.o:nbody_screen.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
render_particles.o:render_particles.cpp
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
nbody_screen: bodysystemcuda.o nbody_screen.o render_particles.o
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
$(EXEC) ./nbody_screen
testrun: build
$(EXEC) ./nbody_screen -benchmark -compare -cpu
clean:
rm -f nbody_screen bodysystemcuda.o nbody_screen.o render_particles.o
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/nbody_screen
clobber: clean

View File

@ -1,92 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry>
<name>nbody_screen</name>
<cflags>
<flag>-ftz=true</flag>
</cflags>
<cuda_api_list>
<toolkit>cudaGraphicsUnmapResources</toolkit>
<toolkit>cudaSetDeviceFlags</toolkit>
<toolkit>cudaGraphicsResourceSetMapFlags</toolkit>
<toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
<toolkit>cudaGraphicsMapResources</toolkit>
<toolkit>cudaSetDevice</toolkit>
<toolkit>cudaEventSynchronize</toolkit>
<toolkit>cudaGetDeviceProperties</toolkit>
<toolkit>cudaDeviceSynchronize</toolkit>
<toolkit>cudaEventRecord</toolkit>
<toolkit>cudaGetDevice</toolkit>
<toolkit>cudaMemcpyToSymbol</toolkit>
<toolkit>cudaStreamQuery</toolkit>
<toolkit>cudaEventDestroy</toolkit>
<toolkit>cudaEventElapsedTime</toolkit>
<toolkit>cudaGetDeviceCount</toolkit>
<toolkit>cudaEventCreate</toolkit>
</cuda_api_list>
<description><![CDATA[This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA. Unlike the OpenGL nbody sample, there is no user interaction.]]></description>
<devicecompilation>whole</devicecompilation>
<files>
<file>./galaxy_20K.bin</file>
</files>
<includepaths>
<path>./</path>
<path>../</path>
<path>../../../Common</path>
</includepaths>
<keyconcepts>
<concept level="advanced">Graphics Interop</concept>
<concept level="advanced">Data Parallel Algorithms</concept>
<concept level="advanced">Physically-Based Simulation</concept>
</keyconcepts>
<keywords>
<keyword>CUDA</keyword>
<keyword>GPGPU</keyword>
<keyword>n-body</keyword>
<keyword>simulation</keyword>
<keyword>astrophysics</keyword>
<keyword>OpenGL ES</keyword>
</keywords>
<libraries>
</libraries>
<librarypaths>
</librarypaths>
<nsight_eclipse>true</nsight_eclipse>
<primary_file>nbody.cpp</primary_file>
<qatests>
<qatest>-benchmark -compare -cpu</qatest>
</qatests>
<required_dependencies>
<dependency>screen</dependency>
<dependency>GLES</dependency>
</required_dependencies>
<scopes>
<scope>2:Graphics Interop</scope>
<scope>1:CUDA Advanced Topics</scope>
<scope>1:Data-Parallel Algorithms</scope>
<scope>3:Physically-Based Simulation</scope>
</scopes>
<sm-arch>sm50</sm-arch>
<sm-arch>sm52</sm-arch>
<sm-arch>sm53</sm-arch>
<sm-arch>sm60</sm-arch>
<sm-arch>sm61</sm-arch>
<sm-arch>sm70</sm-arch>
<sm-arch>sm72</sm-arch>
<sm-arch>sm75</sm-arch>
<sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch>
<supported_envs>
<env>
<platform>qnx</platform>
</env>
</supported_envs>
<supported_sm_architectures>
<include>all</include>
</supported_sm_architectures>
<title>CUDA N-Body Simulation on Screen</title>
<type>exe</type>
</entry>

View File

@ -1,37 +0,0 @@
# nbody_screen - CUDA N-Body Simulation on Screen
## Description
This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA. Unlike the OpenGL nbody sample, there is no user interaction.
## Key Concepts
Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation
## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
QNX
## Supported CPU Architecture
aarch64
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate
## Dependencies needed to build/run
[screen](../../../README.md#screen), [GLES](../../../README.md#gles)
## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## References (for more details)

View File

@ -1,286 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __BODYSYSTEM_H__
#define __BODYSYSTEM_H__
#include <algorithm>
enum NBodyConfig {
NBODY_CONFIG_RANDOM,
NBODY_CONFIG_SHELL,
NBODY_CONFIG_EXPAND,
NBODY_NUM_CONFIGS
};
enum BodyArray {
BODYSYSTEM_POSITION,
BODYSYSTEM_VELOCITY,
};
template <typename T>
struct vec3 {
typedef float Type;
}; // dummy
template <>
struct vec3<float> {
typedef float3 Type;
};
template <>
struct vec3<double> {
typedef double3 Type;
};
template <typename T>
struct vec4 {
typedef float Type;
}; // dummy
template <>
struct vec4<float> {
typedef float4 Type;
};
template <>
struct vec4<double> {
typedef double4 Type;
};
class string;
// BodySystem abstract base class
template <typename T>
class BodySystem {
public: // methods
BodySystem(int numBodies) {}
virtual ~BodySystem() {}
virtual void loadTipsyFile(const std::string &filename) = 0;
virtual void update(T deltaTime) = 0;
virtual void setSoftening(T softening) = 0;
virtual void setDamping(T damping) = 0;
virtual T *getArray(BodyArray array) = 0;
virtual void setArray(BodyArray array, const T *data) = 0;
virtual unsigned int getCurrentReadBuffer() const = 0;
virtual unsigned int getNumBodies() const = 0;
virtual void synchronizeThreads() const {};
protected: // methods
BodySystem() {} // default constructor
virtual void _initialize(int numBodies) = 0;
virtual void _finalize() = 0;
};
inline float3 scalevec(float3 &vector, float scalar) {
float3 rt = vector;
rt.x *= scalar;
rt.y *= scalar;
rt.z *= scalar;
return rt;
}
inline float normalize(float3 &vector) {
float dist =
sqrtf(vector.x * vector.x + vector.y * vector.y + vector.z * vector.z);
if (dist > 1e-6) {
vector.x /= dist;
vector.y /= dist;
vector.z /= dist;
}
return dist;
}
inline float dot(float3 v0, float3 v1) {
return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z;
}
inline float3 cross(float3 v0, float3 v1) {
float3 rt;
rt.x = v0.y * v1.z - v0.z * v1.y;
rt.y = v0.z * v1.x - v0.x * v1.z;
rt.z = v0.x * v1.y - v0.y * v1.x;
return rt;
}
// utility function
template <typename T>
void randomizeBodies(NBodyConfig config, T *pos, T *vel, float *color,
float clusterScale, float velocityScale, int numBodies,
bool vec4vel) {
switch (config) {
default:
case NBODY_CONFIG_RANDOM: {
float scale = clusterScale * std::max<float>(1.0f, numBodies / (1024.0f));
float vscale = velocityScale * scale;
int p = 0, v = 0;
int i = 0;
while (i < numBodies) {
float3 point;
// const int scale = 16;
point.x = rand() / (float)RAND_MAX * 2 - 1;
point.y = rand() / (float)RAND_MAX * 2 - 1;
point.z = rand() / (float)RAND_MAX * 2 - 1;
float lenSqr = dot(point, point);
if (lenSqr > 1) continue;
float3 velocity;
velocity.x = rand() / (float)RAND_MAX * 2 - 1;
velocity.y = rand() / (float)RAND_MAX * 2 - 1;
velocity.z = rand() / (float)RAND_MAX * 2 - 1;
lenSqr = dot(velocity, velocity);
if (lenSqr > 1) continue;
pos[p++] = point.x * scale; // pos.x
pos[p++] = point.y * scale; // pos.y
pos[p++] = point.z * scale; // pos.z
pos[p++] = 1.0f; // mass
vel[v++] = velocity.x * vscale; // pos.x
vel[v++] = velocity.y * vscale; // pos.x
vel[v++] = velocity.z * vscale; // pos.x
if (vec4vel) vel[v++] = 1.0f; // inverse mass
i++;
}
} break;
case NBODY_CONFIG_SHELL: {
float scale = clusterScale;
float vscale = scale * velocityScale;
float inner = 2.5f * scale;
float outer = 4.0f * scale;
int p = 0, v = 0;
int i = 0;
while (i < numBodies) // for(int i=0; i < numBodies; i++)
{
float x, y, z;
x = rand() / (float)RAND_MAX * 2 - 1;
y = rand() / (float)RAND_MAX * 2 - 1;
z = rand() / (float)RAND_MAX * 2 - 1;
float3 point = {x, y, z};
float len = normalize(point);
if (len > 1) continue;
pos[p++] =
point.x * (inner + (outer - inner) * rand() / (float)RAND_MAX);
pos[p++] =
point.y * (inner + (outer - inner) * rand() / (float)RAND_MAX);
pos[p++] =
point.z * (inner + (outer - inner) * rand() / (float)RAND_MAX);
pos[p++] = 1.0f;
x = 0.0f; // * (rand() / (float) RAND_MAX * 2 - 1);
y = 0.0f; // * (rand() / (float) RAND_MAX * 2 - 1);
z = 1.0f; // * (rand() / (float) RAND_MAX * 2 - 1);
float3 axis = {x, y, z};
normalize(axis);
if (1 - dot(point, axis) < 1e-6) {
axis.x = point.y;
axis.y = point.x;
normalize(axis);
}
// if (point.y < 0) axis = scalevec(axis, -1);
float3 vv = {(float)pos[4 * i], (float)pos[4 * i + 1],
(float)pos[4 * i + 2]};
vv = cross(vv, axis);
vel[v++] = vv.x * vscale;
vel[v++] = vv.y * vscale;
vel[v++] = vv.z * vscale;
if (vec4vel) vel[v++] = 1.0f;
i++;
}
} break;
case NBODY_CONFIG_EXPAND: {
float scale = clusterScale * numBodies / (1024.f);
if (scale < 1.0f) scale = clusterScale;
float vscale = scale * velocityScale;
int p = 0, v = 0;
for (int i = 0; i < numBodies;) {
float3 point;
point.x = rand() / (float)RAND_MAX * 2 - 1;
point.y = rand() / (float)RAND_MAX * 2 - 1;
point.z = rand() / (float)RAND_MAX * 2 - 1;
float lenSqr = dot(point, point);
if (lenSqr > 1) continue;
pos[p++] = point.x * scale; // pos.x
pos[p++] = point.y * scale; // pos.y
pos[p++] = point.z * scale; // pos.z
pos[p++] = 1.0f; // mass
vel[v++] = point.x * vscale; // pos.x
vel[v++] = point.y * vscale; // pos.x
vel[v++] = point.z * vscale; // pos.x
if (vec4vel) vel[v++] = 1.0f; // inverse mass
i++;
}
} break;
}
if (color) {
int v = 0;
for (int i = 0; i < numBodies; i++) {
// const int scale = 16;
color[v++] = rand() / (float)RAND_MAX;
color[v++] = rand() / (float)RAND_MAX;
color[v++] = rand() / (float)RAND_MAX;
color[v++] = 1.0f;
}
}
}
#endif // __BODYSYSTEM_H__

View File

@ -1,79 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __BODYSYSTEMCPU_H__
#define __BODYSYSTEMCPU_H__
#include "bodysystem.h"
// CPU Body System
template <typename T>
class BodySystemCPU : public BodySystem<T> {
public:
BodySystemCPU(int numBodies);
virtual ~BodySystemCPU();
virtual void loadTipsyFile(const std::string &filename);
virtual void update(T deltaTime);
virtual void setSoftening(T softening) {
m_softeningSquared = softening * softening;
}
virtual void setDamping(T damping) { m_damping = damping; }
virtual T *getArray(BodyArray array);
virtual void setArray(BodyArray array, const T *data);
virtual unsigned int getCurrentReadBuffer() const { return 0; }
virtual unsigned int getNumBodies() const { return m_numBodies; }
protected: // methods
BodySystemCPU() {} // default constructor
virtual void _initialize(int numBodies);
virtual void _finalize();
void _computeNBodyGravitation();
void _integrateNBodySystem(T deltaTime);
protected: // data
int m_numBodies;
bool m_bInitialized;
T *m_pos;
T *m_vel;
T *m_force;
T m_softeningSquared;
T m_damping;
};
#include "bodysystemcpu_impl.h"
#endif // __BODYSYSTEMCPU_H__

View File

@ -1,280 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "bodysystemcpu.h"
#include <assert.h>
#include <memory.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <helper_cuda.h>
#include <algorithm>
#include "tipsy.h"
#ifdef OPENMP
#include <omp.h>
#endif
template <typename T>
BodySystemCPU<T>::BodySystemCPU(int numBodies)
: m_numBodies(numBodies),
m_bInitialized(false),
m_force(0),
m_softeningSquared(.00125f),
m_damping(0.995f) {
m_pos = 0;
m_vel = 0;
_initialize(numBodies);
}
template <typename T>
BodySystemCPU<T>::~BodySystemCPU() {
_finalize();
m_numBodies = 0;
}
template <typename T>
void BodySystemCPU<T>::_initialize(int numBodies) {
assert(!m_bInitialized);
m_numBodies = numBodies;
m_pos = new T[m_numBodies * 4];
m_vel = new T[m_numBodies * 4];
m_force = new T[m_numBodies * 3];
memset(m_pos, 0, m_numBodies * 4 * sizeof(T));
memset(m_vel, 0, m_numBodies * 4 * sizeof(T));
memset(m_force, 0, m_numBodies * 3 * sizeof(T));
m_bInitialized = true;
}
template <typename T>
void BodySystemCPU<T>::_finalize() {
assert(m_bInitialized);
delete[] m_pos;
delete[] m_vel;
delete[] m_force;
m_bInitialized = false;
}
template <typename T>
void BodySystemCPU<T>::loadTipsyFile(const std::string &filename) {
if (m_bInitialized) _finalize();
vector<typename vec4<T>::Type> positions;
vector<typename vec4<T>::Type> velocities;
vector<int> ids;
int nBodies = 0;
int nFirst = 0, nSecond = 0, nThird = 0;
read_tipsy_file(positions, velocities, ids, filename, nBodies, nFirst,
nSecond, nThird);
_initialize(nBodies);
memcpy(m_pos, &positions[0], sizeof(vec4<T>) * nBodies);
memcpy(m_vel, &velocities[0], sizeof(vec4<T>) * nBodies);
}
template <typename T>
void BodySystemCPU<T>::update(T deltaTime) {
assert(m_bInitialized);
_integrateNBodySystem(deltaTime);
// std::swap(m_currentRead, m_currentWrite);
}
template <typename T>
T *BodySystemCPU<T>::getArray(BodyArray array) {
assert(m_bInitialized);
T *data = 0;
switch (array) {
default:
case BODYSYSTEM_POSITION:
data = m_pos;
break;
case BODYSYSTEM_VELOCITY:
data = m_vel;
break;
}
return data;
}
template <typename T>
void BodySystemCPU<T>::setArray(BodyArray array, const T *data) {
assert(m_bInitialized);
T *target = 0;
switch (array) {
default:
case BODYSYSTEM_POSITION:
target = m_pos;
break;
case BODYSYSTEM_VELOCITY:
target = m_vel;
break;
}
memcpy(target, data, m_numBodies * 4 * sizeof(T));
}
template <typename T>
T sqrt_T(T x) {
return sqrt(x);
}
template <>
float sqrt_T<float>(float x) {
return sqrtf(x);
}
template <typename T>
void bodyBodyInteraction(T accel[3], T posMass0[4], T posMass1[4],
T softeningSquared) {
T r[3];
// r_01 [3 FLOPS]
r[0] = posMass1[0] - posMass0[0];
r[1] = posMass1[1] - posMass0[1];
r[2] = posMass1[2] - posMass0[2];
// d^2 + e^2 [6 FLOPS]
T distSqr = r[0] * r[0] + r[1] * r[1] + r[2] * r[2];
distSqr += softeningSquared;
// invDistCube =1/distSqr^(3/2) [4 FLOPS (2 mul, 1 sqrt, 1 inv)]
T invDist = (T)1.0 / (T)sqrt((double)distSqr);
T invDistCube = invDist * invDist * invDist;
// s = m_j * invDistCube [1 FLOP]
T s = posMass1[3] * invDistCube;
// (m_1 * r_01) / (d^2 + e^2)^(3/2) [6 FLOPS]
accel[0] += r[0] * s;
accel[1] += r[1] * s;
accel[2] += r[2] * s;
}
template <typename T>
void BodySystemCPU<T>::_computeNBodyGravitation() {
#ifdef OPENMP
#pragma omp parallel for
#endif
for (int i = 0; i < m_numBodies; i++) {
int indexForce = 3 * i;
T acc[3] = {0, 0, 0};
// We unroll this loop 4X for a small performance boost.
int j = 0;
while (j < m_numBodies) {
bodyBodyInteraction<T>(acc, &m_pos[4 * i], &m_pos[4 * j],
m_softeningSquared);
j++;
bodyBodyInteraction<T>(acc, &m_pos[4 * i], &m_pos[4 * j],
m_softeningSquared);
j++;
bodyBodyInteraction<T>(acc, &m_pos[4 * i], &m_pos[4 * j],
m_softeningSquared);
j++;
bodyBodyInteraction<T>(acc, &m_pos[4 * i], &m_pos[4 * j],
m_softeningSquared);
j++;
}
m_force[indexForce] = acc[0];
m_force[indexForce + 1] = acc[1];
m_force[indexForce + 2] = acc[2];
}
}
template <typename T>
void BodySystemCPU<T>::_integrateNBodySystem(T deltaTime) {
_computeNBodyGravitation();
#ifdef OPENMP
#pragma omp parallel for
#endif
for (int i = 0; i < m_numBodies; ++i) {
int index = 4 * i;
int indexForce = 3 * i;
T pos[3], vel[3], force[3];
pos[0] = m_pos[index + 0];
pos[1] = m_pos[index + 1];
pos[2] = m_pos[index + 2];
T invMass = m_pos[index + 3];
vel[0] = m_vel[index + 0];
vel[1] = m_vel[index + 1];
vel[2] = m_vel[index + 2];
force[0] = m_force[indexForce + 0];
force[1] = m_force[indexForce + 1];
force[2] = m_force[indexForce + 2];
// acceleration = force / mass;
// new velocity = old velocity + acceleration * deltaTime
vel[0] += (force[0] * invMass) * deltaTime;
vel[1] += (force[1] * invMass) * deltaTime;
vel[2] += (force[2] * invMass) * deltaTime;
vel[0] *= m_damping;
vel[1] *= m_damping;
vel[2] *= m_damping;
// new position = old position + velocity * deltaTime
pos[0] += vel[0] * deltaTime;
pos[1] += vel[1] * deltaTime;
pos[2] += vel[2] * deltaTime;
m_pos[index + 0] = pos[0];
m_pos[index + 1] = pos[1];
m_pos[index + 2] = pos[2];
m_vel[index + 0] = vel[0];
m_vel[index + 1] = vel[1];
m_vel[index + 2] = vel[2];
}
}

View File

@ -1,276 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <helper_cuda.h>
#include <math.h>
//#include <GL/glew.h>
//#include <GL/freeglut.h>
// CUDA standard includes
#include <cuda_runtime.h>
//#include <cuda_gl_interop.h>
#include "bodysystem.h"
__constant__ float softeningSquared;
__constant__ double softeningSquared_fp64;
cudaError_t setSofteningSquared(float softeningSq) {
return cudaMemcpyToSymbol(softeningSquared, &softeningSq, sizeof(float), 0,
cudaMemcpyHostToDevice);
}
cudaError_t setSofteningSquared(double softeningSq) {
return cudaMemcpyToSymbol(softeningSquared_fp64, &softeningSq, sizeof(double),
0, cudaMemcpyHostToDevice);
}
template <class T>
struct SharedMemory {
__device__ inline operator T *() {
extern __shared__ int __smem[];
return (T *)__smem;
}
__device__ inline operator const T *() const {
extern __shared__ int __smem[];
return (T *)__smem;
}
};
template <typename T>
__device__ T rsqrt_T(T x) {
return rsqrt(x);
}
template <>
__device__ float rsqrt_T<float>(float x) {
return rsqrtf(x);
}
template <>
__device__ double rsqrt_T<double>(double x) {
return rsqrt(x);
}
// Macros to simplify shared memory addressing
#define SX(i) sharedPos[i + blockDim.x * threadIdx.y]
// This macro is only used when multithreadBodies is true (below)
#define SX_SUM(i, j) sharedPos[i + blockDim.x * j]
template <typename T>
__device__ T getSofteningSquared() {
return softeningSquared;
}
template <>
__device__ double getSofteningSquared<double>() {
return softeningSquared_fp64;
}
template <typename T>
struct DeviceData {
T *dPos[2]; // mapped host pointers
T *dVel;
cudaEvent_t event;
unsigned int offset;
unsigned int numBodies;
};
template <typename T>
__device__ typename vec3<T>::Type bodyBodyInteraction(
typename vec3<T>::Type ai, typename vec4<T>::Type bi,
typename vec4<T>::Type bj) {
typename vec3<T>::Type r;
// r_ij [3 FLOPS]
r.x = bj.x - bi.x;
r.y = bj.y - bi.y;
r.z = bj.z - bi.z;
// distSqr = dot(r_ij, r_ij) + EPS^2 [6 FLOPS]
T distSqr = r.x * r.x + r.y * r.y + r.z * r.z;
distSqr += getSofteningSquared<T>();
// invDistCube =1/distSqr^(3/2) [4 FLOPS (2 mul, 1 sqrt, 1 inv)]
T invDist = rsqrt_T(distSqr);
T invDistCube = invDist * invDist * invDist;
// s = m_j * invDistCube [1 FLOP]
T s = bj.w * invDistCube;
// a_i = a_i + s * r_ij [6 FLOPS]
ai.x += r.x * s;
ai.y += r.y * s;
ai.z += r.z * s;
return ai;
}
template <typename T>
__device__ typename vec3<T>::Type computeBodyAccel(
typename vec4<T>::Type bodyPos, typename vec4<T>::Type *positions,
int numTiles) {
typename vec4<T>::Type *sharedPos = SharedMemory<typename vec4<T>::Type>();
typename vec3<T>::Type acc = {0.0f, 0.0f, 0.0f};
for (int tile = 0; tile < numTiles; tile++) {
sharedPos[threadIdx.x] = positions[tile * blockDim.x + threadIdx.x];
__syncthreads();
// This is the "tile_calculation" from the GPUG3 article.
#pragma unroll 128
for (unsigned int counter = 0; counter < blockDim.x; counter++) {
acc = bodyBodyInteraction<T>(acc, bodyPos, sharedPos[counter]);
}
__syncthreads();
}
return acc;
}
template <typename T>
__global__ void integrateBodies(typename vec4<T>::Type *__restrict__ newPos,
typename vec4<T>::Type *__restrict__ oldPos,
typename vec4<T>::Type *vel,
unsigned int deviceOffset,
unsigned int deviceNumBodies, float deltaTime,
float damping, int numTiles) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index >= deviceNumBodies) {
return;
}
typename vec4<T>::Type position = oldPos[deviceOffset + index];
typename vec3<T>::Type accel =
computeBodyAccel<T>(position, oldPos, numTiles);
// acceleration = force / mass;
// new velocity = old velocity + acceleration * deltaTime
// note we factor out the body's mass from the equation, here and in
// bodyBodyInteraction (because they cancel out). Thus here force ==
// acceleration
typename vec4<T>::Type velocity = vel[deviceOffset + index];
velocity.x += accel.x * deltaTime;
velocity.y += accel.y * deltaTime;
velocity.z += accel.z * deltaTime;
velocity.x *= damping;
velocity.y *= damping;
velocity.z *= damping;
// new position = old position + velocity * deltaTime
position.x += velocity.x * deltaTime;
position.y += velocity.y * deltaTime;
position.z += velocity.z * deltaTime;
// store new position and velocity
newPos[deviceOffset + index] = position;
vel[deviceOffset + index] = velocity;
}
template <typename T>
void integrateNbodySystem(DeviceData<T> *deviceData,
cudaGraphicsResource **pgres,
unsigned int currentRead, float deltaTime,
float damping, unsigned int numBodies,
unsigned int numDevices, int blockSize,
bool bUsePBO) {
if (bUsePBO) {
checkCudaErrors(cudaGraphicsResourceSetMapFlags(
pgres[currentRead], cudaGraphicsMapFlagsReadOnly));
checkCudaErrors(cudaGraphicsResourceSetMapFlags(
pgres[1 - currentRead], cudaGraphicsMapFlagsWriteDiscard));
checkCudaErrors(cudaGraphicsMapResources(2, pgres, 0));
size_t bytes;
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
(void **)&(deviceData[0].dPos[currentRead]), &bytes,
pgres[currentRead]));
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
(void **)&(deviceData[0].dPos[1 - currentRead]), &bytes,
pgres[1 - currentRead]));
}
for (unsigned int dev = 0; dev != numDevices; dev++) {
if (numDevices > 1) {
cudaSetDevice(dev);
}
int numBlocks = (deviceData[dev].numBodies + blockSize - 1) / blockSize;
int numTiles = (numBodies + blockSize - 1) / blockSize;
int sharedMemSize = blockSize * 4 * sizeof(T); // 4 floats for pos
integrateBodies<T><<<numBlocks, blockSize, sharedMemSize>>>(
(typename vec4<T>::Type *)deviceData[dev].dPos[1 - currentRead],
(typename vec4<T>::Type *)deviceData[dev].dPos[currentRead],
(typename vec4<T>::Type *)deviceData[dev].dVel, deviceData[dev].offset,
deviceData[dev].numBodies, deltaTime, damping, numTiles);
if (numDevices > 1) {
checkCudaErrors(cudaEventRecord(deviceData[dev].event));
// MJH: Hack on older driver versions to force kernel launches to flush!
cudaStreamQuery(0);
}
// check if kernel invocation generated an error
getLastCudaError("Kernel execution failed");
}
if (numDevices > 1) {
for (unsigned int dev = 0; dev < numDevices; dev++) {
checkCudaErrors(cudaEventSynchronize(deviceData[dev].event));
}
}
if (bUsePBO) {
checkCudaErrors(cudaGraphicsUnmapResources(2, pgres, 0));
}
}
// Explicit specializations needed to generate code
template void integrateNbodySystem<float>(DeviceData<float> *deviceData,
cudaGraphicsResource **pgres,
unsigned int currentRead,
float deltaTime, float damping,
unsigned int numBodies,
unsigned int numDevices,
int blockSize, bool bUsePBO);
template void integrateNbodySystem<double>(DeviceData<double> *deviceData,
cudaGraphicsResource **pgres,
unsigned int currentRead,
float deltaTime, float damping,
unsigned int numBodies,
unsigned int numDevices,
int blockSize, bool bUsePBO);

View File

@ -1,99 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __BODYSYSTEMCUDA_H__
#define __BODYSYSTEMCUDA_H__
#include "bodysystem.h"
template <typename T>
struct DeviceData {
T *dPos[2]; // mapped host pointers
T *dVel;
cudaEvent_t event;
unsigned int offset;
unsigned int numBodies;
};
// CUDA BodySystem: runs on the GPU
template <typename T>
class BodySystemCUDA : public BodySystem<T> {
public:
BodySystemCUDA(unsigned int numBodies, unsigned int numDevices,
unsigned int blockSize, bool usePBO, bool useSysMem = false);
virtual ~BodySystemCUDA();
virtual void loadTipsyFile(const std::string &filename);
virtual void update(T deltaTime);
virtual void setSoftening(T softening);
virtual void setDamping(T damping);
virtual T *getArray(BodyArray array);
virtual void setArray(BodyArray array, const T *data);
virtual unsigned int getCurrentReadBuffer() const {
return m_pbo[m_currentRead];
}
virtual unsigned int getNumBodies() const { return m_numBodies; }
protected: // methods
BodySystemCUDA() {}
virtual void _initialize(int numBodies);
virtual void _finalize();
protected: // data
unsigned int m_numBodies;
unsigned int m_numDevices;
bool m_bInitialized;
// Host data
T *m_hPos[2];
T *m_hVel;
DeviceData<T> *m_deviceData;
bool m_bUsePBO;
bool m_bUseSysMem;
unsigned int m_SMVersion;
T m_damping;
unsigned int m_pbo[2];
cudaGraphicsResource *m_pGRes[2];
unsigned int m_currentRead;
unsigned int m_currentWrite;
unsigned int m_blockSize;
};
#include "bodysystemcuda_impl.h"
#endif // __BODYSYSTEMCUDA_H__

View File

@ -1,373 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <helper_cuda.h>
#include <assert.h>
#include <math.h>
#include <memory.h>
#include <cstdio>
#include <cstdlib>
#include <vector>
#include <algorithm>
//#include <GL/glew.h>
#include <cuda_gl_interop.h>
template <typename T>
void integrateNbodySystem(DeviceData<T> *deviceData,
cudaGraphicsResource **pgres,
unsigned int currentRead, float deltaTime,
float damping, unsigned int numBodies,
unsigned int numDevices, int blockSize, bool bUsePBO);
cudaError_t setSofteningSquared(float softeningSq);
cudaError_t setSofteningSquared(double softeningSq);
template <typename T>
BodySystemCUDA<T>::BodySystemCUDA(unsigned int numBodies,
unsigned int numDevices,
unsigned int blockSize, bool usePBO,
bool useSysMem)
: m_numBodies(numBodies),
m_numDevices(numDevices),
m_bInitialized(false),
m_bUsePBO(usePBO),
m_bUseSysMem(useSysMem),
m_currentRead(0),
m_currentWrite(1),
m_blockSize(blockSize) {
m_hPos[0] = m_hPos[1] = 0;
m_hVel = 0;
m_deviceData = 0;
_initialize(numBodies);
setSoftening(0.00125f);
setDamping(0.995f);
}
template <typename T>
BodySystemCUDA<T>::~BodySystemCUDA() {
_finalize();
m_numBodies = 0;
}
template <typename T>
void BodySystemCUDA<T>::_initialize(int numBodies) {
assert(!m_bInitialized);
m_numBodies = numBodies;
unsigned int memSize = sizeof(T) * 4 * numBodies;
m_deviceData = new DeviceData<T>[m_numDevices];
// divide up the workload amongst Devices
float *weights = new float[m_numDevices];
int *numSms = new int[m_numDevices];
float total = 0;
for (unsigned int i = 0; i < m_numDevices; i++) {
cudaDeviceProp props;
checkCudaErrors(cudaGetDeviceProperties(&props, i));
// Choose the weight based on the Compute Capability
// We estimate that a CC2.0 SM is about 4.0x faster than a CC 1.x SM for
// this application (since a 15-SM GF100 is about 2X faster than a 30-SM
// GT200).
numSms[i] = props.multiProcessorCount;
weights[i] = numSms[i] * (props.major >= 2 ? 4.f : 1.f);
total += weights[i];
}
unsigned int offset = 0;
unsigned int remaining = m_numBodies;
for (unsigned int i = 0; i < m_numDevices; i++) {
unsigned int count = (int)((weights[i] / total) * m_numBodies);
unsigned int round = numSms[i] * 256;
count = round * ((count + round - 1) / round);
if (count > remaining) {
count = remaining;
}
remaining -= count;
m_deviceData[i].offset = offset;
m_deviceData[i].numBodies = count;
offset += count;
if ((i == m_numDevices - 1) && (offset < m_numBodies - 1)) {
m_deviceData[i].numBodies += m_numBodies - offset;
}
}
delete[] weights;
delete[] numSms;
if (m_bUseSysMem) {
checkCudaErrors(cudaHostAlloc((void **)&m_hPos[0], memSize,
cudaHostAllocMapped | cudaHostAllocPortable));
checkCudaErrors(cudaHostAlloc((void **)&m_hPos[1], memSize,
cudaHostAllocMapped | cudaHostAllocPortable));
checkCudaErrors(cudaHostAlloc((void **)&m_hVel, memSize,
cudaHostAllocMapped | cudaHostAllocPortable));
memset(m_hPos[0], 0, memSize);
memset(m_hPos[1], 0, memSize);
memset(m_hVel, 0, memSize);
for (unsigned int i = 0; i < m_numDevices; i++) {
if (m_numDevices > 1) {
checkCudaErrors(cudaSetDevice(i));
}
checkCudaErrors(cudaEventCreate(&m_deviceData[i].event));
checkCudaErrors(cudaHostGetDevicePointer(
(void **)&m_deviceData[i].dPos[0], (void *)m_hPos[0], 0));
checkCudaErrors(cudaHostGetDevicePointer(
(void **)&m_deviceData[i].dPos[1], (void *)m_hPos[1], 0));
checkCudaErrors(cudaHostGetDevicePointer((void **)&m_deviceData[i].dVel,
(void *)m_hVel, 0));
}
} else {
m_hPos[0] = new T[m_numBodies * 4];
m_hVel = new T[m_numBodies * 4];
memset(m_hPos[0], 0, memSize);
memset(m_hVel, 0, memSize);
checkCudaErrors(cudaEventCreate(&m_deviceData[0].event));
if (m_bUsePBO) {
// create the position pixel buffer objects for rendering
// we will actually compute directly from this memory in CUDA too
glGenBuffers(2, (GLuint *)m_pbo);
for (int i = 0; i < 2; ++i) {
glBindBuffer(GL_ARRAY_BUFFER, m_pbo[i]);
glBufferData(GL_ARRAY_BUFFER, memSize, m_hPos[0], GL_DYNAMIC_DRAW);
int size = 0;
glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint *)&size);
if ((unsigned)size != memSize) {
fprintf(stderr, "WARNING: Pixel Buffer Object allocation failed!n");
}
glBindBuffer(GL_ARRAY_BUFFER, 0);
checkCudaErrors(cudaGraphicsGLRegisterBuffer(&m_pGRes[i], m_pbo[i],
cudaGraphicsMapFlagsNone));
}
} else {
checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[0], memSize));
checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[1], memSize));
}
checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dVel, memSize));
}
m_bInitialized = true;
}
template <typename T>
void BodySystemCUDA<T>::_finalize() {
assert(m_bInitialized);
if (m_bUseSysMem) {
checkCudaErrors(cudaFreeHost(m_hPos[0]));
checkCudaErrors(cudaFreeHost(m_hPos[1]));
checkCudaErrors(cudaFreeHost(m_hVel));
for (unsigned int i = 0; i < m_numDevices; i++) {
cudaEventDestroy(m_deviceData[i].event);
}
} else {
delete[] m_hPos[0];
delete[] m_hPos[1];
delete[] m_hVel;
checkCudaErrors(cudaFree((void **)m_deviceData[0].dVel));
if (m_bUsePBO) {
checkCudaErrors(cudaGraphicsUnregisterResource(m_pGRes[0]));
checkCudaErrors(cudaGraphicsUnregisterResource(m_pGRes[1]));
glDeleteBuffers(2, (const GLuint *)m_pbo);
} else {
checkCudaErrors(cudaFree((void **)m_deviceData[0].dPos[0]));
checkCudaErrors(cudaFree((void **)m_deviceData[0].dPos[1]));
}
}
delete[] m_deviceData;
m_bInitialized = false;
}
template <typename T>
void BodySystemCUDA<T>::loadTipsyFile(const std::string &filename) {
if (m_bInitialized) _finalize();
std::vector<typename vec4<T>::Type> positions;
std::vector<typename vec4<T>::Type> velocities;
std::vector<int> ids;
int nBodies = 0;
int nFirst = 0, nSecond = 0, nThird = 0;
read_tipsy_file(positions, velocities, ids, filename, nBodies, nFirst,
nSecond, nThird);
_initialize(nBodies);
setArray(BODYSYSTEM_POSITION, (T *)&positions[0]);
setArray(BODYSYSTEM_VELOCITY, (T *)&velocities[0]);
}
template <typename T>
void BodySystemCUDA<T>::setSoftening(T softening) {
T softeningSq = softening * softening;
for (unsigned int i = 0; i < m_numDevices; i++) {
if (m_numDevices > 1) {
checkCudaErrors(cudaSetDevice(i));
}
checkCudaErrors(setSofteningSquared(softeningSq));
}
}
template <typename T>
void BodySystemCUDA<T>::setDamping(T damping) {
m_damping = damping;
}
template <typename T>
void BodySystemCUDA<T>::update(T deltaTime) {
assert(m_bInitialized);
integrateNbodySystem<T>(m_deviceData, m_pGRes, m_currentRead,
(float)deltaTime, (float)m_damping, m_numBodies,
m_numDevices, m_blockSize, m_bUsePBO);
std::swap(m_currentRead, m_currentWrite);
}
template <typename T>
T *BodySystemCUDA<T>::getArray(BodyArray array) {
assert(m_bInitialized);
T *hdata = 0;
T *ddata = 0;
cudaGraphicsResource *pgres = NULL;
int currentReadHost = m_bUseSysMem ? m_currentRead : 0;
switch (array) {
default:
case BODYSYSTEM_POSITION:
hdata = m_hPos[currentReadHost];
ddata = m_deviceData[0].dPos[m_currentRead];
if (m_bUsePBO) {
pgres = m_pGRes[m_currentRead];
}
break;
case BODYSYSTEM_VELOCITY:
hdata = m_hVel;
ddata = m_deviceData[0].dVel;
break;
}
if (!m_bUseSysMem) {
if (pgres) {
checkCudaErrors(
cudaGraphicsResourceSetMapFlags(pgres, cudaGraphicsMapFlagsReadOnly));
checkCudaErrors(cudaGraphicsMapResources(1, &pgres, 0));
size_t bytes;
checkCudaErrors(
cudaGraphicsResourceGetMappedPointer((void **)&ddata, &bytes, pgres));
}
checkCudaErrors(cudaMemcpy(hdata, ddata, m_numBodies * 4 * sizeof(T),
cudaMemcpyDeviceToHost));
if (pgres) {
checkCudaErrors(cudaGraphicsUnmapResources(1, &pgres, 0));
}
}
return hdata;
}
template <typename T>
void BodySystemCUDA<T>::setArray(BodyArray array, const T *data) {
assert(m_bInitialized);
m_currentRead = 0;
m_currentWrite = 1;
switch (array) {
default:
case BODYSYSTEM_POSITION: {
if (m_bUsePBO) {
glBindBuffer(GL_ARRAY_BUFFER, m_pbo[m_currentRead]);
glBufferSubData(GL_ARRAY_BUFFER, 0, 4 * sizeof(T) * m_numBodies, data);
int size = 0;
glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint *)&size);
if ((unsigned)size != 4 * (sizeof(T) * m_numBodies)) {
fprintf(stderr, "WARNING: Pixel Buffer Object download failed!n");
}
glBindBuffer(GL_ARRAY_BUFFER, 0);
} else {
if (m_bUseSysMem) {
memcpy(m_hPos[m_currentRead], data, m_numBodies * 4 * sizeof(T));
} else
checkCudaErrors(cudaMemcpy(m_deviceData[0].dPos[m_currentRead], data,
m_numBodies * 4 * sizeof(T),
cudaMemcpyHostToDevice));
}
} break;
case BODYSYSTEM_VELOCITY:
if (m_bUseSysMem) {
memcpy(m_hVel, data, m_numBodies * 4 * sizeof(T));
} else
checkCudaErrors(cudaMemcpy(m_deviceData[0].dVel, data,
m_numBodies * 4 * sizeof(T),
cudaMemcpyHostToDevice));
break;
}
}

View File

@ -1,149 +0,0 @@
################################################################################
#
# Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
#
# NOTICE TO USER:
#
# This source code is subject to NVIDIA ownership rights under U.S. and
# international Copyright laws.
#
# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
# OR PERFORMANCE OF THIS SOURCE CODE.
#
# U.S. Government End Users. This source code is a "commercial item" as
# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
# "commercial computer software" and "commercial computer software
# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
# and is provided to the U.S. Government only as a commercial end item.
# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
# source code with only those rights set forth herein.
#
################################################################################
#
# findgleslib.mk is used to find the necessary GLES Libraries for specific distributions
# this is supported on Linux
#
################################################################################
# Determine OS platform and unix distribution
ifeq ("$(TARGET_OS)","linux")
# first search lsb_release
DISTRO := $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
ifeq ("$(DISTRO)","")
# second search and parse /etc/issue
DISTRO := $(shell awk '{print $$1}' /etc/issue | tr -d "[:space:]" | sed -e "/^$$/d" | tr "[:upper:]" "[:lower:]")
# ensure data from /etc/issue is valid
ifneq (,$(filter-out $(DISTRO),ubuntu fedora red rhel centos suse))
DISTRO :=
endif
ifeq ("$(DISTRO)","")
# third, we can search in /etc/os-release or /etc/{distro}-release
DISTRO := $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB")
endif
endif
endif
ifeq ("$(TARGET_OS)","linux")
# $(info) >> findgllib.mk -> LINUX path <<<)
# Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?)
FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?)
RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?)
SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?)
ifeq ("$(UBUNTU)","0")
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
GLPATH := /usr/arm-linux-gnueabihf/lib
GLLINK := -L/usr/arm-linux-gnueabihf/lib
ifneq ($(TARGET_FS),)
GLPATH += $(TARGET_FS)/usr/lib/arm-linux-gnueabihf
GLLINK += -L$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-aarch64)
GLPATH := /usr/aarch64-linux-gnu/lib
GLLINK := -L/usr/aarch64-linux-gnu/lib
ifneq ($(TARGET_FS),)
GLPATH += $(TARGET_FS)/usr/lib
GLPATH += $(TARGET_FS)/usr/lib/aarch64-linux-gnu
GLLINK += -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
endif
else
UBUNTU_PKG_NAME = $(shell which dpkg >/dev/null 2>&1 && dpkg -l 'nvidia-*' | grep '^ii' | awk '{print $$2}' | head -1)
ifneq ("$(UBUNTU_PKG_NAME)","")
GLPATH ?= /usr/lib/$(UBUNTU_PKG_NAME)
GLLINK ?= -L/usr/lib/$(UBUNTU_PKG_NAME)
endif
DFLT_PATH ?= /usr/lib
endif
endif
ifeq ("$(SUSE)","0")
GLPATH ?= /usr/X11R6/lib64
GLLINK ?= -L/usr/X11R6/lib64
DFLT_PATH ?= /usr/lib64
else
GLPATH ?= /usr/lib64/nvidia
GLLINK ?= -L/usr/lib64/nvidia
DFLT_PATH ?= /usr/lib64
endif
# find libGL, libGLU, libXi,
EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null)
GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
X11LIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libX11.so -print 2>/dev/null)
ifeq ("$(EGLLIB)","")
$(info >>> WARNING - libEGL.so not found, please install libEGL.so <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(GLESLIB)","")
$(info >>> WARNING - libGLES.so not found, please install libGLES.so <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(X11LIB)","")
$(info >>> WARNING - libX11.so not found, please install libX11.so <<<)
SAMPLE_ENABLED := 0
endif
HEADER_SEARCH_PATH ?= $(TARGET_FS)/usr/include
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
HEADER_SEARCH_PATH += /usr/arm-linux-gnueabihf/include
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-aarch64-linux)
HEADER_SEARCH_PATH += /usr/aarch64-linux-gnu/include
endif
EGLHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name egl.h -print 2>/dev/null)
EGLEXTHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name eglext.h -print 2>/dev/null)
GL31HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name gl31.h -print 2>/dev/null)
X11HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name Xlib.h -print 2>/dev/null)
ifeq ("$(EGLHEADER)","")
$(info >>> WARNING - egl.h not found, please install egl.h <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(EGLEXTHEADER)","")
$(info >>> WARNING - eglext.h not found, please install eglext.h <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(GL31HEADER)","")
$(info >>> WARNING - gl31.h not found, please install gl31.h <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(X11HEADER)","")
$(info >>> WARNING - Xlib.h not found, refer to CUDA Samples release notes for how to find and install them. <<<)
SAMPLE_ENABLED := 0
endif
else
endif

File diff suppressed because it is too large Load Diff

View File

@ -1,374 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "render_particles.h"
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#include <helper_cuda.h>
#include <math.h>
#include <assert.h>
void mat_identity(matrix4 m) {
m[0][1] = m[0][2] = m[0][3] = m[1][0] = m[1][2] = m[1][3] = m[2][0] =
m[2][1] = m[2][3] = m[3][0] = m[3][1] = m[3][2] = 0.0f;
m[0][0] = m[1][1] = m[2][2] = m[3][3] = 1.0f;
}
void mat_multiply(matrix4 m0, matrix4 m1) {
float m[4];
for (int r = 0; r < 4; r++) {
m[0] = m[1] = m[2] = m[3] = 0.0f;
for (int c = 0; c < 4; c++) {
for (int i = 0; i < 4; i++) {
m[c] += m0[i][r] * m1[c][i];
}
}
for (int c = 0; c < 4; c++) {
m0[c][r] = m[c];
}
}
}
void mat_translate(matrix4 m, vector3 v) {
matrix4 m2;
m2[0][0] = m2[1][1] = m2[2][2] = m2[3][3] = 1.0f;
m2[0][1] = m2[0][2] = m2[0][3] = m2[1][0] = m2[1][2] = m2[1][3] = m2[2][0] =
m2[2][1] = m2[2][3] = 0.0f;
m2[3][0] = v[0];
m2[3][1] = v[1];
m2[3][2] = v[2];
mat_multiply(m, m2);
}
void mat_perspective(matrix4 m, GLfloat fovy, GLfloat aspect, GLfloat znear,
GLfloat zfar) {
matrix4 m2;
m2[1][0] = m2[2][0] = m2[3][0] = m2[0][1] = m2[2][1] = m2[3][1] = m2[0][2] =
m2[1][2] = m2[0][3] = m2[1][3] = m2[3][3] = 0.0f;
m2[2][3] = -1.0f;
float f = 1 / tan((fovy * M_PI / 180) / 2);
m2[0][0] = f / aspect;
m2[1][1] = f;
m2[2][2] = ((znear + zfar) / (znear - zfar));
m2[3][2] = ((2 * znear * zfar) / (znear - zfar));
mat_multiply(m, m2);
}
ParticleRenderer::ParticleRenderer(unsigned int windowWidth,
unsigned int windowHeight)
: m_pos(0),
m_numParticles(0),
m_pointSize(1.0f),
m_spriteSize(2.0f),
m_vertexShader(0),
m_vertexShaderPoints(0),
m_fragmentShader(0),
m_programPoints(0),
m_programSprites(0),
m_texture(0),
m_pbo(0),
m_vboColor(0),
m_windowWidth(windowWidth),
m_windowHeight(windowHeight),
m_bFp64Positions(false) {
m_camera[0] = 0;
m_camera[1] = 0;
m_camera[2] = 0;
_initGL();
}
ParticleRenderer::~ParticleRenderer() { m_pos = 0; }
void ParticleRenderer::resetPBO() { glDeleteBuffers(1, (GLuint *)&m_pbo); }
void ParticleRenderer::setPositions(float *pos, int numParticles) {
m_pos = pos;
m_numParticles = numParticles;
if (!m_pbo) {
glGenBuffers(1, (GLuint *)&m_pbo);
}
glBindBuffer(GL_ARRAY_BUFFER, m_pbo);
glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(float), pos,
GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
checkGLErrors("Setting particle float position");
}
void ParticleRenderer::setPositions(double *pos, int numParticles) {
m_bFp64Positions = true;
m_pos_fp64 = pos;
m_numParticles = numParticles;
if (!m_pbo) {
glGenBuffers(1, (GLuint *)&m_pbo);
}
glBindBuffer(GL_ARRAY_BUFFER, m_pbo);
glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(double), pos,
GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
checkGLErrors("Setting particle double position");
}
void ParticleRenderer::setColors(float *color, int numParticles) {
glBindBuffer(GL_ARRAY_BUFFER, m_vboColor);
glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(float), color,
GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
void ParticleRenderer::setBaseColor(float color[4]) {
for (int i = 0; i < 4; i++) m_baseColor[i] = color[i];
}
void ParticleRenderer::setPBO(unsigned int pbo, int numParticles, bool fp64) {
m_pbo = pbo;
m_numParticles = numParticles;
if (fp64) m_bFp64Positions = true;
}
void ParticleRenderer::display() {
glEnable(GL_BLEND);
glBlendFunc(GL_SRC_ALPHA, GL_ONE);
glDepthMask(GL_FALSE);
glUseProgram(m_programSprites);
// Set modelview and projection matrices
GLint h_ModelViewMatrix = glGetUniformLocation(m_programSprites, "modelview");
GLint h_ProjectionMatrix =
glGetUniformLocation(m_programSprites, "projection");
matrix4 modelview;
matrix4 projection;
mat_identity(modelview);
mat_identity(projection);
mat_translate(modelview, m_camera);
mat_perspective(projection, 60, (float)m_windowWidth / (float)m_windowHeight,
0.1, 1000.0);
glUniformMatrix4fv(h_ModelViewMatrix, 1, GL_FALSE, (GLfloat *)modelview);
glUniformMatrix4fv(h_ProjectionMatrix, 1, GL_FALSE, (GLfloat *)projection);
// Set point size
GLint h_PointSize = glGetUniformLocation(m_programSprites, "size");
glUniform1f(h_PointSize, m_spriteSize);
// Set base and secondary colors
GLint h_BaseColor = glGetUniformLocation(m_programSprites, "baseColor");
GLint h_SecondaryColor =
glGetUniformLocation(m_programSprites, "secondaryColor");
glUniform4f(h_BaseColor, 1.0, 1.0, 1.0, 1.0);
glUniform4f(h_SecondaryColor, m_baseColor[0], m_baseColor[1], m_baseColor[2],
m_baseColor[3]);
// Set position coords
GLint h_position = glGetAttribLocation(m_programSprites, "a_position");
glBindBuffer(GL_ARRAY_BUFFER, m_pbo);
glEnableVertexAttribArray(h_position);
glVertexAttribPointer(h_position, 4, GL_FLOAT, GL_FALSE, 0, 0);
GLuint texLoc = glGetUniformLocation(m_programSprites, "splatTexture");
glUniform1i(texLoc, 0);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, m_texture);
glDrawArrays(GL_POINTS, 0, m_numParticles);
glDisableVertexAttribArray(h_position);
glUseProgram(0);
glDisable(GL_BLEND);
glDepthMask(GL_TRUE);
}
const char vertexShader[] = {
"attribute vec4 a_position;"
"uniform mat4 projection;"
"uniform mat4 modelview;"
"uniform float size;"
"void main()"
"{"
"float pointSize = 500.0 * size;"
"vec4 vert = a_position;"
"vert.w = 1.0;"
"vec3 pos_eye = vec3(modelview * vert);"
"gl_PointSize = max(1.0, pointSize / (1.0 - pos_eye.z));"
"gl_Position = projection * modelview * a_position;"
"}"};
const char fragmentShader[] = {
"uniform sampler2D splatTexture;"
"uniform lowp vec4 baseColor;"
"uniform lowp vec4 secondaryColor;"
"void main()"
"{"
"lowp vec4 textureColor = (0.6 + 0.4 * baseColor) * "
"texture2D(splatTexture, gl_PointCoord);"
"gl_FragColor = textureColor * secondaryColor;"
"}"};
// Checks if the shader is compiled.
static int CheckCompiled(GLuint shader) {
GLint isCompiled = 0;
glGetShaderiv(shader, GL_COMPILE_STATUS, &isCompiled);
if (!isCompiled) {
GLint infoLen = 0;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLen);
if (infoLen > 1) {
char *infoLog = (char *)malloc(sizeof(char) * infoLen);
glGetShaderInfoLog(shader, infoLen, NULL, infoLog);
printf("Error compiling program:\n%s\n", infoLog);
free(infoLog);
}
return 0;
}
return 1;
}
void ParticleRenderer::_initGL() {
m_vertexShader = glCreateShader(GL_VERTEX_SHADER);
m_fragmentShader = glCreateShader(GL_FRAGMENT_SHADER);
const char *v = vertexShader;
const char *f = fragmentShader;
glShaderSource(m_vertexShader, 1, &v, 0);
glShaderSource(m_fragmentShader, 1, &f, 0);
checkGLErrors("Shader Source");
glCompileShader(m_vertexShader);
glCompileShader(m_fragmentShader);
if (!CheckCompiled(m_vertexShader) || !CheckCompiled(m_fragmentShader)) {
printf("A shader failed to compile.\n");
exit(1);
}
m_programSprites = glCreateProgram();
checkGLErrors("create program");
glAttachShader(m_programSprites, m_vertexShader);
glAttachShader(m_programSprites, m_fragmentShader);
checkGLErrors("attaching shaders");
glLinkProgram(m_programSprites);
checkGLErrors("linking program");
EGLint linked;
glGetProgramiv(m_programSprites, GL_LINK_STATUS, &linked);
if (!linked) {
printf("A shader failed to link.\n");
exit(1);
}
_createTexture(32);
glGenBuffers(1, (GLuint *)&m_vboColor);
glBindBuffer(GL_ARRAY_BUFFER, m_vboColor);
glBufferData(GL_ARRAY_BUFFER, m_numParticles * 4 * sizeof(float), 0,
GL_STATIC_DRAW);
glBindBuffer(GL_ARRAY_BUFFER, 0);
}
//------------------------------------------------------------------------------
// Function : EvalHermite
// Description :
//------------------------------------------------------------------------------
/**
* EvalHermite(float pA, float pB, float vA, float vB, float u)
* @brief Evaluates Hermite basis functions for the specified coefficients.
*/
inline float evalHermite(float pA, float pB, float vA, float vB, float u) {
float u2 = (u * u), u3 = u2 * u;
float B0 = 2 * u3 - 3 * u2 + 1;
float B1 = -2 * u3 + 3 * u2;
float B2 = u3 - 2 * u2 + u;
float B3 = u3 - u;
return (B0 * pA + B1 * pB + B2 * vA + B3 * vB);
}
unsigned char *createGaussianMap(int N) {
float *M = new float[2 * N * N];
unsigned char *B = new unsigned char[4 * N * N];
float X, Y, Y2, Dist;
float Incr = 2.0f / N;
int i = 0;
int j = 0;
Y = -1.0f;
// float mmax = 0;
for (int y = 0; y < N; y++, Y += Incr) {
Y2 = Y * Y;
X = -1.0f;
for (int x = 0; x < N; x++, X += Incr, i += 2, j += 4) {
Dist = (float)sqrtf(X * X + Y2);
if (Dist > 1) Dist = 1;
M[i + 1] = M[i] = evalHermite(1.0f, 0, 0, 0, Dist);
B[j + 3] = B[j + 2] = B[j + 1] = B[j] = (unsigned char)(M[i] * 255);
}
}
delete[] M;
return (B);
}
void ParticleRenderer::_createTexture(int resolution) {
unsigned char *data = createGaussianMap(resolution);
glGenTextures(1, (GLuint *)&m_texture);
glBindTexture(GL_TEXTURE_2D, m_texture);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER,
GL_LINEAR); //_MIPMAP_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, resolution, resolution, 0, GL_RGBA,
GL_UNSIGNED_BYTE, data);
}

View File

@ -1,110 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __RENDER_PARTICLES__
#define __RENDER_PARTICLES__
#include <EGL/egl.h>
#include <EGL/eglext.h>
#include <GLES3/gl31.h>
#include <cstdio>
typedef float matrix4[4][4];
typedef float vector3[3];
// check for OpenGL errors
inline void checkGLErrors(const char *s) {
EGLenum error;
while ((error = glGetError()) != GL_NO_ERROR) {
fprintf(stderr, "%s: error - %d\n", s, error);
}
}
class ParticleRenderer {
public:
ParticleRenderer(unsigned int windowWidth = 720,
unsigned int windowHeight = 480);
~ParticleRenderer();
void setPositions(float *pos, int numParticles);
void setPositions(double *pos, int numParticles);
void setBaseColor(float color[4]);
void setColors(float *color, int numParticles);
void setPBO(unsigned int pbo, int numParticles, bool fp64);
enum DisplayMode {
PARTICLE_POINTS,
PARTICLE_SPRITES,
PARTICLE_SPRITES_COLOR,
PARTICLE_NUM_MODES
};
void display();
void setPointSize(float size) { m_pointSize = size; }
void setSpriteSize(float size) { m_spriteSize = size; }
void setCameraPos(vector3 camera_pos) {
m_camera[0] = camera_pos[0];
m_camera[1] = camera_pos[1];
m_camera[2] = camera_pos[2];
}
void resetPBO();
protected: // methods
void _initGL();
void _createTexture(int resolution);
protected: // data
float *m_pos;
double *m_pos_fp64;
int m_numParticles;
float m_pointSize;
float m_spriteSize;
vector3 m_camera;
unsigned int m_vertexShader;
unsigned int m_vertexShaderPoints;
unsigned int m_fragmentShader;
unsigned int m_programPoints;
unsigned int m_programSprites;
unsigned int m_texture;
unsigned int m_pbo;
unsigned int m_vboColor;
unsigned int m_windowWidth;
unsigned int m_windowHeight;
float m_baseColor[4];
bool m_bFp64Positions;
};
#endif //__ RENDER_PARTICLES__

View File

@ -1,172 +0,0 @@
#ifndef __TIPSY_H__
#define __TIPSY_H__
#include <string>
using namespace std;
#define MAXDIM 3
typedef float Real;
struct gas_particle
{
Real mass;
Real pos[MAXDIM];
Real vel[MAXDIM];
Real rho;
Real temp;
Real hsmooth;
Real metals ;
Real phi ;
} ;
//struct gas_particle *gas_particles;
struct dark_particle
{
Real mass;
Real pos[MAXDIM];
Real vel[MAXDIM];
Real eps;
int phi ;
} ;
//struct dark_particle *dark_particles;
struct star_particle
{
Real mass;
Real pos[MAXDIM];
Real vel[MAXDIM];
Real metals ;
Real tform ;
Real eps;
int phi ;
} ;
//struct star_particle *star_particles;
struct dump
{
double time ;
int nbodies ;
int ndim ;
int nsph ;
int ndark ;
int nstar ;
} ;
typedef struct dump header ;
template <typename real4>
void read_tipsy_file(vector<real4> &bodyPositions,
vector<real4> &bodyVelocities,
vector<int> &bodiesIDs,
const std::string &fileName,
int &NTotal,
int &NFirst,
int &NSecond,
int &NThird)
{
/*
Read in our custom version of the tipsy file format written by
Jeroen Bedorf. Most important change is that we store particle id on the
location where previously the potential was stored.
*/
char fullFileName[256];
sprintf(fullFileName, "%s", fileName.c_str());
cout << "Trying to read file: " << fullFileName << endl;
ifstream inputFile(fullFileName, ios::in | ios::binary);
if (!inputFile.is_open())
{
cout << "Can't open input file \n";
exit(EXIT_SUCCESS);
}
dump h;
inputFile.read((char *)&h, sizeof(h));
int idummy;
real4 positions;
real4 velocity;
//Read tipsy header
NTotal = h.nbodies;
NFirst = h.ndark;
NSecond = h.nstar;
NThird = h.nsph;
//Start reading
int particleCount = 0;
dark_particle d;
star_particle s;
for (int i=0; i < NTotal; i++)
{
if (i < NFirst)
{
inputFile.read((char *)&d, sizeof(d));
velocity.w = d.eps;
positions.w = d.mass;
positions.x = d.pos[0];
positions.y = d.pos[1];
positions.z = d.pos[2];
velocity.x = d.vel[0];
velocity.y = d.vel[1];
velocity.z = d.vel[2];
idummy = d.phi;
}
else
{
inputFile.read((char *)&s, sizeof(s));
velocity.w = s.eps;
positions.w = s.mass;
positions.x = s.pos[0];
positions.y = s.pos[1];
positions.z = s.pos[2];
velocity.x = s.vel[0];
velocity.y = s.vel[1];
velocity.z = s.vel[2];
idummy = s.phi;
}
bodyPositions.push_back(positions);
bodyVelocities.push_back(velocity);
bodiesIDs.push_back(idummy);
particleCount++;
}//end for
// round up to a multiple of 256 bodies since our kernel only supports that...
int newTotal = NTotal;
if (NTotal % 256)
{
newTotal = ((NTotal / 256) + 1) * 256;
}
for (int i = NTotal; i < newTotal; i++)
{
positions.w = positions.x = positions.y = positions.z = 0;
velocity.x = velocity.y = velocity.z = 0;
bodyPositions.push_back(positions);
bodyVelocities.push_back(velocity);
bodiesIDs.push_back(i);
NFirst++;
}
NTotal = newTotal;
inputFile.close();
cerr << "Read " << NTotal << " bodies" << endl;
}
#endif //__TIPSY_H__

View File

@ -1,18 +0,0 @@
{
"configurations": [
{
"name": "Linux",
"includePath": [
"${workspaceFolder}/**",
"${workspaceFolder}/../../../Common"
],
"defines": [],
"compilerPath": "/usr/local/cuda/bin/nvcc",
"cStandard": "gnu17",
"cppStandard": "gnu++14",
"intelliSenseMode": "linux-gcc-x64",
"configurationProvider": "ms-vscode.makefile-tools"
}
],
"version": 4
}

View File

@ -1,7 +0,0 @@
{
"recommendations": [
"nvidia.nsight-vscode-edition",
"ms-vscode.cpptools",
"ms-vscode.makefile-tools"
]
}

View File

@ -1,10 +0,0 @@
{
"configurations": [
{
"name": "CUDA C++: Launch",
"type": "cuda-gdb",
"request": "launch",
"program": "${workspaceFolder}/simpleGLES_screen"
}
]
}

View File

@ -1,15 +0,0 @@
{
"version": "2.0.0",
"tasks": [
{
"label": "sample",
"type": "shell",
"command": "make dbg=1",
"problemMatcher": ["$nvcc"],
"group": {
"kind": "build",
"isDefault": true
}
}
]
}

View File

@ -1,27 +0,0 @@
cmake_minimum_required(VERSION 3.20)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules")
project(simpleGLES_screen LANGUAGES C CXX CUDA)
find_package(CUDAToolkit REQUIRED)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90)
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
endif()
# Include directories and libraries
include_directories(../../../Common)
# Source file
# Add target for simpleGLES_screen
add_executable(simpleGLES_screen simpleGLES_screen.cu)
target_compile_options(simpleGLES_screen PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
target_compile_features(simpleGLES_screen PRIVATE cxx_std_17 cuda_std_17)
set_target_properties(simpleGLES_screen PROPERTIES CUDA_SEPARABLE_COMPILATION ON)

View File

@ -1,399 +0,0 @@
################################################################################
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
################################################################################
#
# Makefile project only supported on Mac OS X and Linux Platforms)
#
################################################################################
# Location of the CUDA Toolkit
CUDA_PATH ?= /usr/local/cuda
##############################
# start deprecated interface #
##############################
ifeq ($(x86_64),1)
$(info WARNING - x86_64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=x86_64 instead)
TARGET_ARCH ?= x86_64
endif
ifeq ($(ARMv7),1)
$(info WARNING - ARMv7 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=armv7l instead)
TARGET_ARCH ?= armv7l
endif
ifeq ($(aarch64),1)
$(info WARNING - aarch64 variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=aarch64 instead)
TARGET_ARCH ?= aarch64
endif
ifeq ($(ppc64le),1)
$(info WARNING - ppc64le variable has been deprecated)
$(info WARNING - please use TARGET_ARCH=ppc64le instead)
TARGET_ARCH ?= ppc64le
endif
ifneq ($(GCC),)
$(info WARNING - GCC variable has been deprecated)
$(info WARNING - please use HOST_COMPILER=$(GCC) instead)
HOST_COMPILER ?= $(GCC)
endif
ifneq ($(abi),)
$(error ERROR - abi variable has been removed)
endif
############################
# end deprecated interface #
############################
# architecture
HOST_ARCH := $(shell uname -m)
TARGET_ARCH ?= $(HOST_ARCH)
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l))
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le))
TARGET_SIZE := 64
else ifneq (,$(filter $(TARGET_ARCH),armv7l))
TARGET_SIZE := 32
endif
else
TARGET_SIZE := $(shell getconf LONG_BIT)
endif
else
$(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!)
endif
# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now.
ifeq ($(HOST_ARCH),aarch64)
ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null))
HOST_ARCH := sbsa
TARGET_ARCH := sbsa
endif
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le))
$(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!)
endif
endif
# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32)
TARGET_ARCH = armv7l
endif
# operating system
HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
TARGET_OS ?= $(HOST_OS)
ifeq (,$(filter $(TARGET_OS),linux darwin qnx android))
$(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!)
endif
# host compiler
ifdef HOST_COMPILER
CUSTOM_HOST_COMPILER = 1
endif
ifeq ($(TARGET_OS),darwin)
ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1)
HOST_COMPILER ?= clang++
endif
else ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
ifeq ($(TARGET_OS),linux)
HOST_COMPILER ?= arm-linux-gnueabihf-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++
else ifeq ($(TARGET_OS),android)
HOST_COMPILER ?= arm-linux-androideabi-g++
endif
else ifeq ($(TARGET_ARCH),aarch64)
ifeq ($(TARGET_OS), linux)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_OS),qnx)
ifeq ($(QNX_HOST),)
$(error ERROR - QNX_HOST must be passed to the QNX host toolchain)
endif
ifeq ($(QNX_TARGET),)
$(error ERROR - QNX_TARGET must be passed to the QNX target toolchain)
endif
export QNX_HOST
export QNX_TARGET
HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++
else ifeq ($(TARGET_OS), android)
HOST_COMPILER ?= aarch64-linux-android-clang++
endif
else ifeq ($(TARGET_ARCH),sbsa)
HOST_COMPILER ?= aarch64-linux-gnu-g++
else ifeq ($(TARGET_ARCH),ppc64le)
HOST_COMPILER ?= powerpc64le-linux-gnu-g++
endif
endif
HOST_COMPILER ?= g++
NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER)
# internal flags
NVCCFLAGS := -m${TARGET_SIZE}
CCFLAGS :=
LDFLAGS :=
# build flags
# Link flag for customized HOST_COMPILER with gcc realpath
GCC_PATH := $(shell which gcc)
ifeq ($(CUSTOM_HOST_COMPILER),1)
ifneq ($(filter /%,$(HOST_COMPILER)),)
ifneq ($(findstring gcc,$(HOST_COMPILER)),)
ifneq ($(GCC_PATH),$(HOST_COMPILER))
LDFLAGS += -lstdc++
endif
endif
endif
endif
ifeq ($(TARGET_OS),darwin)
LDFLAGS += -rpath $(CUDA_PATH)/lib
CCFLAGS += -arch $(HOST_ARCH)
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3
CCFLAGS += -mfloat-abi=hard
else ifeq ($(TARGET_OS),android)
LDFLAGS += -pie
CCFLAGS += -fpie -fpic -fexceptions
endif
ifneq ($(TARGET_ARCH),$(HOST_ARCH))
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
ifneq ($(TARGET_FS),)
GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6)
ifeq ($(GCCVERSIONLTEQ46),1)
CCFLAGS += --sysroot=$(TARGET_FS)
endif
LDFLAGS += --sysroot=$(TARGET_FS)
LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib
LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
LDFLAGS += --unresolved-symbols=ignore-in-shared-libs
CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm
CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu
endif
endif
ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
NVCCFLAGS += -D_QNX_SOURCE
NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le
CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu
LDFLAGS += -lsocket
LDFLAGS += -L/usr/lib/aarch64-qnx-gnu
CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu"
ifdef TARGET_OVERRIDE
LDFLAGS += -lslog2
endif
ifneq ($(TARGET_FS),)
LDFLAGS += -L$(TARGET_FS)/usr/lib
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib"
LDFLAGS += -L$(TARGET_FS)/usr/libnvidia
CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia"
CCFLAGS += -I$(TARGET_FS)/../include
endif
endif
endif
ifdef TARGET_OVERRIDE # cuda toolkit targets override
NVCCFLAGS += -target-dir $(TARGET_OVERRIDE)
endif
# Install directory of different arch
CUDA_INSTALL_TARGET_DIR :=
ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux)
CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android)
CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx)
CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/
else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx)
CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/
else ifeq ($(TARGET_ARCH),ppc64le)
CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/
endif
# Debug build flags
ifeq ($(dbg),1)
NVCCFLAGS += -g -G
BUILD_TYPE := debug
else
BUILD_TYPE := release
endif
ALL_CCFLAGS :=
ALL_CCFLAGS += $(NVCCFLAGS)
ALL_CCFLAGS += $(EXTRA_NVCCFLAGS)
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS))
ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS))
SAMPLE_ENABLED := 1
# This sample is not supported on Linux x86_64
ifeq ($(TARGET_OS),linux)
ifeq ($(TARGET_ARCH),x86_64)
$(info >>> WARNING - simpleGLES_screen is not supported on Linux x86_64 - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
endif
# This sample is not supported on Mac OSX
ifeq ($(TARGET_OS),darwin)
$(info >>> WARNING - simpleGLES_screen is not supported on Mac OSX - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
# This sample is not supported on ARMv7
ifeq ($(TARGET_ARCH),armv7l)
$(info >>> WARNING - simpleGLES_screen is not supported on ARMv7 - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
# This sample is not supported on aarch64
ifeq ($(TARGET_ARCH),aarch64)
ifneq ($(TARGET_OS),qnx)
$(info >>> WARNING - simpleGLES_screen is not supported on aarch64-$(TARGET_OS) - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
endif
# This sample is not supported on sbsa
ifeq ($(TARGET_ARCH),sbsa)
$(info >>> WARNING - simpleGLES_screen is not supported on sbsa - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ALL_LDFLAGS :=
ALL_LDFLAGS += $(ALL_CCFLAGS)
ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS))
ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS))
# Common includes and paths for CUDA
INCLUDES := -I../../../Common
LIBRARIES :=
################################################################################
# Makefile include to help find GLES Libraries
include ./findgleslib.mk
# OpenGLES specific libraries
ifneq ($(TARGET_OS),darwin)
LIBRARIES += $(GLESLINK) -lGLESv2 -lEGL -lscreen
endif
# Gencode arguments
ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa))
SMS ?= 53 61 70 72 75 80 86 87 90
else
SMS ?= 50 52 60 61 70 75 80 86 89 90
endif
ifeq ($(SMS),)
$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<)
SAMPLE_ENABLED := 0
endif
ifeq ($(GENCODE_FLAGS),)
# Generate SASS code for each SM architecture listed in $(SMS)
$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm)))
# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility
HIGHEST_SM := $(lastword $(sort $(SMS)))
ifneq ($(HIGHEST_SM),)
GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM)
endif
endif
ALL_CCFLAGS += -DUSE_CUDAINTEROP -DGRAPHICS_SETUP_EGL -DUSE_GLES -DWIN_INTERFACE_CUSTOM --threads 0 --std=c++11
ifeq ($(SAMPLE_ENABLED),0)
EXEC ?= @echo "[@]"
endif
################################################################################
# Target rules
all: build
build: simpleGLES_screen
check.deps:
ifeq ($(SAMPLE_ENABLED),0)
@echo "Sample will be waived due to the above missing dependencies"
else
@echo "Sample is ready - all dependencies have been met"
endif
simpleGLES_screen.o:simpleGLES_screen.cu
$(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $<
simpleGLES_screen: simpleGLES_screen.o
$(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES)
$(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
$(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)
run: build
$(EXEC) ./simpleGLES_screen
testrun: build
$(EXEC) ./simpleGLES_screen -file=ref_simpleGLES_screen.bin
clean:
rm -f simpleGLES_screen simpleGLES_screen.o
rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/simpleGLES_screen
clobber: clean

View File

@ -1,83 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE entry SYSTEM "SamplesInfo.dtd">
<entry>
<name>simpleGLES_screen</name>
<cflags>
<flag>-DUSE_CUDAINTEROP</flag>
<flag>-DGRAPHICS_SETUP_EGL</flag>
<flag>-DUSE_GLES</flag>
<flag>-DWIN_INTERFACE_CUSTOM</flag>
</cflags>
<cuda_api_list>
<toolkit>cudaGraphicsUnmapResources</toolkit>
<toolkit>cudaMemcpy</toolkit>
<toolkit>cudaFree</toolkit>
<toolkit>cudaGraphicsResourceGetMappedPointer</toolkit>
<toolkit>cudaGraphicsMapResources</toolkit>
<toolkit>cudaDeviceSynchronize</toolkit>
<toolkit>cudaGraphicsUnregisterResource</toolkit>
<toolkit>cudaMalloc</toolkit>
<toolkit>cudaGraphicsGLRegisterBuffer</toolkit>
</cuda_api_list>
<description><![CDATA[Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry.]]></description>
<devicecompilation>whole</devicecompilation>
<files>
<file>data\ref_simpleGLES_screen.bin</file>
</files>
<includepaths>
<path>./</path>
<path>../</path>
<path>../../../Common</path>
</includepaths>
<keyconcepts>
<concept level="basic">Graphics Interop</concept>
<concept level="basic">Vertex Buffers</concept>
<concept level="basic">3D Graphics</concept>
</keyconcepts>
<keywords>
<keyword>OpenGL ES</keyword>
</keywords>
<libraries>
</libraries>
<librarypaths>
</librarypaths>
<nsight_eclipse>true</nsight_eclipse>
<primary_file>simpleGLES_screen.cu</primary_file>
<qatests>
<qatest>-file=ref_simpleGLES_screen.bin</qatest>
</qatests>
<required_dependencies>
<dependency>screen</dependency>
<dependency>GLES</dependency>
</required_dependencies>
<scopes>
<scope>1:CUDA Basic Topics</scope>
<scope>2:Graphics Interop</scope>
</scopes>
<sm-arch>sm50</sm-arch>
<sm-arch>sm52</sm-arch>
<sm-arch>sm53</sm-arch>
<sm-arch>sm60</sm-arch>
<sm-arch>sm61</sm-arch>
<sm-arch>sm70</sm-arch>
<sm-arch>sm72</sm-arch>
<sm-arch>sm75</sm-arch>
<sm-arch>sm80</sm-arch>
<sm-arch>sm86</sm-arch>
<sm-arch>sm87</sm-arch>
<sm-arch>sm89</sm-arch>
<sm-arch>sm90</sm-arch>
<sources>
<ignore>graphics_interface.c</ignore>
</sources>
<supported_envs>
<env>
<platform>qnx</platform>
</env>
</supported_envs>
<supported_sm_architectures>
<include>all</include>
</supported_sm_architectures>
<title>Simple OpenGLES on Screen</title>
<type>exe</type>
</entry>

View File

@ -1,37 +0,0 @@
# simpleGLES_screen - Simple OpenGLES on Screen
## Description
Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry.
## Key Concepts
Graphics Interop, Vertex Buffers, 3D Graphics
## Supported SM Architectures
[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
## Supported OSes
QNX
## Supported CPU Architecture
aarch64
## CUDA APIs involved
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer
## Dependencies needed to build/run
[screen](../../../README.md#screen), [GLES](../../../README.md#gles)
## Prerequisites
Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
## References (for more details)

View File

@ -1,149 +0,0 @@
################################################################################
#
# Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
#
# NOTICE TO USER:
#
# This source code is subject to NVIDIA ownership rights under U.S. and
# international Copyright laws.
#
# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
# OR PERFORMANCE OF THIS SOURCE CODE.
#
# U.S. Government End Users. This source code is a "commercial item" as
# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
# "commercial computer software" and "commercial computer software
# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
# and is provided to the U.S. Government only as a commercial end item.
# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
# source code with only those rights set forth herein.
#
################################################################################
#
# findgleslib.mk is used to find the necessary GLES Libraries for specific distributions
# this is supported on Linux
#
################################################################################
# Determine OS platform and unix distribution
ifeq ("$(TARGET_OS)","linux")
# first search lsb_release
DISTRO := $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]")
ifeq ("$(DISTRO)","")
# second search and parse /etc/issue
DISTRO := $(shell awk '{print $$1}' /etc/issue | tr -d "[:space:]" | sed -e "/^$$/d" | tr "[:upper:]" "[:lower:]")
# ensure data from /etc/issue is valid
ifneq (,$(filter-out $(DISTRO),ubuntu fedora red rhel centos suse))
DISTRO :=
endif
ifeq ("$(DISTRO)","")
# third, we can search in /etc/os-release or /etc/{distro}-release
DISTRO := $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB")
endif
endif
endif
ifeq ("$(TARGET_OS)","linux")
# $(info) >> findgllib.mk -> LINUX path <<<)
# Each set of Linux Distros have different paths for where to find their OpenGL libraries reside
UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?)
FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?)
RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?)
CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?)
SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?)
KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?)
ifeq ("$(UBUNTU)","0")
ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l)
GLPATH := /usr/arm-linux-gnueabihf/lib
GLLINK := -L/usr/arm-linux-gnueabihf/lib
ifneq ($(TARGET_FS),)
GLPATH += $(TARGET_FS)/usr/lib/arm-linux-gnueabihf
GLLINK += -L$(TARGET_FS)/usr/lib/arm-linux-gnueabihf
endif
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-aarch64)
GLPATH := /usr/aarch64-linux-gnu/lib
GLLINK := -L/usr/aarch64-linux-gnu/lib
ifneq ($(TARGET_FS),)
GLPATH += $(TARGET_FS)/usr/lib
GLPATH += $(TARGET_FS)/usr/lib/aarch64-linux-gnu
GLLINK += -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu
endif
else
UBUNTU_PKG_NAME = $(shell which dpkg >/dev/null 2>&1 && dpkg -l 'nvidia-*' | grep '^ii' | awk '{print $$2}' | head -1)
ifneq ("$(UBUNTU_PKG_NAME)","")
GLPATH ?= /usr/lib/$(UBUNTU_PKG_NAME)
GLLINK ?= -L/usr/lib/$(UBUNTU_PKG_NAME)
endif
DFLT_PATH ?= /usr/lib
endif
endif
ifeq ("$(SUSE)","0")
GLPATH ?= /usr/X11R6/lib64
GLLINK ?= -L/usr/X11R6/lib64
DFLT_PATH ?= /usr/lib64
else
GLPATH ?= /usr/lib64/nvidia
GLLINK ?= -L/usr/lib64/nvidia
DFLT_PATH ?= /usr/lib64
endif
# find libGL, libGLU, libXi,
EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null)
GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null)
X11LIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libX11.so -print 2>/dev/null)
ifeq ("$(EGLLIB)","")
$(info >>> WARNING - libEGL.so not found, please install libEGL.so <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(GLESLIB)","")
$(info >>> WARNING - libGLES.so not found, please install libGLES.so <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(X11LIB)","")
$(info >>> WARNING - libX11.so not found, please install libX11.so <<<)
SAMPLE_ENABLED := 0
endif
HEADER_SEARCH_PATH ?= $(TARGET_FS)/usr/include
ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux)
HEADER_SEARCH_PATH += /usr/arm-linux-gnueabihf/include
else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-aarch64-linux)
HEADER_SEARCH_PATH += /usr/aarch64-linux-gnu/include
endif
EGLHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name egl.h -print 2>/dev/null)
EGLEXTHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name eglext.h -print 2>/dev/null)
GL31HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name gl31.h -print 2>/dev/null)
X11HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name Xlib.h -print 2>/dev/null)
ifeq ("$(EGLHEADER)","")
$(info >>> WARNING - egl.h not found, please install egl.h <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(EGLEXTHEADER)","")
$(info >>> WARNING - eglext.h not found, please install eglext.h <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(GL31HEADER)","")
$(info >>> WARNING - gl31.h not found, please install gl31.h <<<)
SAMPLE_ENABLED := 0
endif
ifeq ("$(X11HEADER)","")
$(info >>> WARNING - Xlib.h not found, refer to CUDA Samples release notes for how to find and install them. <<<)
SAMPLE_ENABLED := 0
endif
else
endif

View File

@ -1,379 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <GLES3/gl31.h>
#include <EGL/egl.h>
#include <EGL/eglext.h>
#include <sys/keycodes.h>
screen_window_t screen_window;
screen_context_t screen_context;
screen_event_t screen_ev;
EGLDisplay eglDisplay = EGL_NO_DISPLAY;
EGLSurface eglSurface = EGL_NO_SURFACE;
EGLContext eglContext = EGL_NO_CONTEXT;
void error_exit(const char *format, ...) {
va_list args;
va_start(args, format);
vfprintf(stderr, format, args);
va_end(args);
exit(1);
}
enum { NvGlDemoKeyCode_Escape = 27 };
typedef void (*GlCloseCB)(void);
typedef void (*GlKeyCB)(char key, int state);
GlCloseCB closeCB = NULL;
GlKeyCB keyCB = NULL;
void CHECK_GLERROR() {
GLenum err = glGetError();
if (err != GL_NO_ERROR) {
fprintf(stderr, "[%s line %d] OpenGL Error: 0x%x ", __FILE__, __LINE__,
err);
switch (err) {
case GL_INVALID_ENUM:
fprintf(stderr, "(GL_INVALID_ENUM)\n");
break;
case GL_INVALID_VALUE:
fprintf(stderr, "(GL_INVALID_VALUE)\n");
break;
case GL_INVALID_OPERATION:
fprintf(stderr, "(GL_INVALID_OPERATION)\n");
break;
case GL_OUT_OF_MEMORY:
fprintf(stderr, "(GL_OUT_OF_MEMORY)\n");
break;
case GL_INVALID_FRAMEBUFFER_OPERATION:
fprintf(stderr, "(GL_INVALID_FRAMEBUFFER_OPERATION)\n");
break;
default:
break;
}
fflush(stderr);
}
}
static void UpdateEventMask(void) {
static int rc = 1;
if (rc) {
rc = screen_create_event(&screen_ev);
}
}
void SetCloseCB(GlCloseCB cb) {
// Call the eglQnxScreenConsumer module if option is enabled
closeCB = cb;
UpdateEventMask();
}
void SetKeyCB(GlKeyCB cb) {
keyCB = cb;
UpdateEventMask();
}
// Add keys here, that are used in demo apps.
static unsigned char GetKeyPress(int *screenKey) {
unsigned char key = '\0';
switch (*screenKey) {
case KEYCODE_ESCAPE:
key = NvGlDemoKeyCode_Escape;
break;
default:
/* For "normal" keys, Screen KEYCODE is just ASCII. */
if (*screenKey <= 127) {
key = *screenKey;
}
break;
}
return key;
}
void CheckEvents(void) {
static int vis = 1, val = 1;
int rc;
/**
** We start the loop by processing any events that might be in our
** queue. The only event that is of interest to us are the resize
** and close events. The timeout variable is set to 0 (no wait) or
** forever depending if the window is visible or invisible.
**/
while (!screen_get_event(screen_context, screen_ev, vis ? 0ull : ~0ull)) {
// Get QNX CAR 2.1 event property
rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_TYPE, &val);
if (rc || val == SCREEN_EVENT_NONE) {
break;
}
switch (val) {
case SCREEN_EVENT_CLOSE:
/**
** All we have to do when we receive the close event is
** exit the application loop.
**/
if (closeCB) {
closeCB();
}
break;
case SCREEN_EVENT_KEYBOARD:
rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_FLAGS,
&val);
if (rc || val == SCREEN_EVENT_NONE) {
break;
}
if (val & KEY_DOWN) {
rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_SYM,
&val);
if (rc || val == SCREEN_EVENT_NONE) {
break;
}
unsigned char key;
key = GetKeyPress(&val);
if (key != '\0') {
keyCB(key, 1);
}
}
break;
default:
break;
}
}
}
int graphics_setup_window(int xpos, int ypos, int width, int height,
const char *windowname, int reqdispno) {
EGLint configAttrs[] = {
EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8,
EGL_BLUE_SIZE, 8, EGL_ALPHA_SIZE, 8,
EGL_DEPTH_SIZE, 16, EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
EGL_NONE};
EGLint contextAttrs[] = {EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE};
EGLint windowAttrs[] = {EGL_NONE};
EGLConfig *configList = NULL;
EGLint configCount;
int displayCount = 0;
int dispno;
screen_context = 0;
screen_display_t *screenDisplayHandle = NULL;
if (screen_create_context(&screen_context, 0)) {
error_exit("Error creating screen context.\n");
}
eglDisplay = eglGetDisplay(0);
if (eglDisplay == EGL_NO_DISPLAY) {
error_exit("EGL failed to obtain display\n");
}
if (!eglInitialize(eglDisplay, 0, 0)) {
error_exit("EGL failed to initialize\n");
}
if (!eglChooseConfig(eglDisplay, configAttrs, NULL, 0, &configCount) ||
!configCount) {
error_exit("EGL failed to return any matching configurations\n");
}
configList = (EGLConfig *)malloc(configCount * sizeof(EGLConfig));
if (!eglChooseConfig(eglDisplay, configAttrs, configList, configCount,
&configCount) ||
!configCount) {
error_exit("EGL failed to populate configuration list\n");
}
screen_window = 0;
if (screen_create_window(&screen_window, screen_context)) {
error_exit("Error creating screen window.\n");
}
// query the total no of display avaibale from QNX CAR2 screen
if (screen_get_context_property_iv(
screen_context, SCREEN_PROPERTY_DISPLAY_COUNT, &displayCount)) {
error_exit("Error getting context property\n");
}
screenDisplayHandle =
(screen_display_t *)malloc(displayCount * sizeof(screen_display_t));
if (!screenDisplayHandle) {
error_exit("Error allocating screen memory handle is getting failed\n");
}
// query the display handle from QNX CAR2 screen
if (screen_get_context_property_pv(screen_context, SCREEN_PROPERTY_DISPLAYS,
(void **)screenDisplayHandle)) {
error_exit("Error getting display handle\n");
}
for (dispno = 0; dispno < displayCount; dispno++) {
int active = 0;
// Query the connected status from QNX CAR2 screen
screen_get_display_property_iv(screenDisplayHandle[dispno],
SCREEN_PROPERTY_ATTACHED, &active);
if (active) {
if (reqdispno == dispno) {
// Map the window buffer to user requested display port
screen_set_window_property_pv(screen_window, SCREEN_PROPERTY_DISPLAY,
(void **)&screenDisplayHandle[reqdispno]);
break;
}
}
}
if (dispno == displayCount) {
error_exit("Failed to set the requested display\n");
}
free(screenDisplayHandle);
int format = SCREEN_FORMAT_RGBA8888;
if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_FORMAT,
&format)) {
error_exit("Error setting SCREEN_PROPERTY_FORMAT\n");
}
int usage = SCREEN_USAGE_OPENGL_ES2;
if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_USAGE,
&usage)) {
error_exit("Error setting SCREEN_PROPERTY_USAGE\n");
}
EGLint interval = 1;
if (screen_set_window_property_iv(screen_window,
SCREEN_PROPERTY_SWAP_INTERVAL, &interval)) {
error_exit("Error setting SCREEN_PROPERTY_SWAP_INTERVAL\n");
}
int windowSize[2];
windowSize[0] = width;
windowSize[1] = height;
if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_SIZE,
windowSize)) {
error_exit("Error setting SCREEN_PROPERTY_SIZE\n");
}
int windowOffset[2];
windowOffset[0] = xpos;
windowOffset[1] = ypos;
if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_POSITION,
windowOffset)) {
error_exit("Error setting SCREEN_PROPERTY_POSITION\n");
}
if (screen_create_window_buffers(screen_window, 2)) {
error_exit("Error creating two window buffers.\n");
}
eglSurface =
eglCreateWindowSurface(eglDisplay, configList[0],
(EGLNativeWindowType)screen_window, windowAttrs);
if (!eglSurface) {
error_exit("EGL couldn't create window\n");
}
eglBindAPI(EGL_OPENGL_ES_API);
eglContext = eglCreateContext(eglDisplay, configList[0], NULL, contextAttrs);
if (!eglContext) {
error_exit("EGL couldn't create context\n");
}
if (!eglMakeCurrent(eglDisplay, eglSurface, eglSurface, eglContext)) {
error_exit("EGL couldn't make context/surface current\n");
}
EGLint Context_RendererType;
eglQueryContext(eglDisplay, eglContext, EGL_CONTEXT_CLIENT_TYPE,
&Context_RendererType);
switch (Context_RendererType) {
case EGL_OPENGL_API:
printf("Using OpenGL API\n");
break;
case EGL_OPENGL_ES_API:
printf("Using OpenGL ES API\n");
break;
case EGL_OPENVG_API:
error_exit("Context Query Returned OpenVG. This is Unsupported\n");
default:
error_exit("Unknown Context Type. %04X\n", Context_RendererType);
}
return 1;
}
void graphics_set_windowtitle(const char *windowname) {
// Do nothing on screen
}
void graphics_swap_buffers() { eglSwapBuffers(eglDisplay, eglSurface); }
void graphics_close_window() {
if (eglDisplay != EGL_NO_DISPLAY) {
eglMakeCurrent(eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT);
if (eglContext != EGL_NO_CONTEXT) {
eglDestroyContext(eglDisplay, eglContext);
}
if (eglSurface != EGL_NO_SURFACE) {
eglDestroySurface(eglDisplay, eglSurface);
}
eglTerminate(eglDisplay);
}
if (screen_window) {
screen_destroy_window(screen_window);
screen_window = NULL;
}
if (screen_context) {
screen_destroy_context(screen_context);
}
if (screen_ev) {
screen_destroy_event(screen_ev);
}
}

View File

@ -1,31 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void main()
{
gl_FragColor = vec4(1.0, 0.0, 0.0, 1.0);
}

View File

@ -1,33 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
attribute vec4 position;
void main()
{
gl_Position = position;
}

View File

@ -1,601 +0,0 @@
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
This example demonstrates how to use the CUDA C bindings to OpenGL ES to
dynamically modify a vertex buffer using a CUDA C kernel.
The steps are:
1. Create an empty vertex buffer object (VBO)
2. Register the VBO with CUDA C
3. Map the VBO for writing from CUDA C
4. Run CUDA C kernel to modify the vertex positions
5. Unmap the VBO
6. Render the results using OpenGL ES
Host code
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <stdarg.h>
#include <unistd.h>
#include <screen/screen.h>
#include "graphics_interface.c"
// includes, cuda
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
// Utilities and timing functions
#include <helper_functions.h> // includes cuda.h and cuda_runtime_api.h
// CUDA helper functions
#include <helper_cuda.h> // helper functions for CUDA error check
#include <vector_types.h>
#define MAX_EPSILON_ERROR 0.0f
#define THRESHOLD 0.0f
#define REFRESH_DELAY 1 // ms
#define GUI_IDLE 0x100
#define GUI_ROTATE 0x101
#define GUI_TRANSLATE 0x102
int gui_mode;
////////////////////////////////////////////////////////////////////////////////
// Default configuration
unsigned int window_width = 512;
unsigned int window_height = 512;
unsigned int dispno = 0;
// constants
const unsigned int mesh_width = 256;
const unsigned int mesh_height = 256;
// OpenGL ES variables and interop with CUDA C
GLuint mesh_vao, mesh_vbo;
struct cudaGraphicsResource *cuda_vbo_resource;
void *d_vbo_buffer = NULL;
float g_fAnim = 0.0;
// UI / mouse controls
int mouse_old_x, mouse_old_y;
int mouse_buttons = 0;
float rotate_x = 0.0, rotate_y = 0.0;
float translate_z = -3.0;
StopWatchInterface *timer = NULL;
// Frame statistics
int frame;
int fpsCount = 0; // FPS count for averaging
int fpsLimit = 1; // FPS limit for sampling
int g_Index = 0;
float avgFPS = 0.0f;
unsigned int frameCount = 0;
unsigned int g_TotalErrors = 0;
// The default number of seconds after which the test will end.
#define TIME_LIMIT 10.0 // 10 secs
// Flag indicating it is time to shut down
static GLboolean shutdown = GL_FALSE;
// Callback to close window
static void closeCB_app(void) { shutdown = GL_TRUE; }
// Callback to handle key presses
static void keyCB_app(char key, int state) {
// Ignoring releases
if (!state) return;
if ((key == 'q') || (key == 'Q') || (key == NvGlDemoKeyCode_Escape))
shutdown = GL_TRUE;
}
// Auto-Verification Code
bool g_bQAReadback = false;
int *pArgc = NULL;
char **pArgv = NULL;
#define MAX(a, b) ((a > b) ? a : b)
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
// CUDA functionality
void runCuda(struct cudaGraphicsResource **vbo_resource);
void runAutoTest(int devID, char **argv, char *ref_file);
void checkResultCuda(int argc, char **argv, const GLuint &vbo);
const char *sSDKsample = "simpleGLES on Screen (VBO)";
void computeFPS() {
frameCount++;
fpsCount++;
if (fpsCount == fpsLimit) {
avgFPS = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
fpsCount = 0;
fpsLimit = (int)MAX(avgFPS, 1.f);
sdkResetTimer(&timer);
}
char fps[256];
sprintf(fps, "Cuda/OpenGL ES Interop (VBO): %3.1f fps (Max 1000 fps)",
avgFPS);
graphics_set_windowtitle(fps);
}
///////////////////////////////////////////////////////////////////////////////
//! Simple kernel to modify vertex positions in sine wave pattern
//! @param data data in global memory
///////////////////////////////////////////////////////////////////////////////
__global__ void simple_vbo_kernel(float4 *pos, unsigned int width,
unsigned int height, float time) {
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
// calculate uv coordinates
float u = x / (float)width;
float v = y / (float)height;
u = u * 2.0f - 1.0f;
v = v * 2.0f - 1.0f;
// calculate simple sine wave pattern
float freq = 4.0f;
float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f;
// write output vertex
pos[y * width + x] = make_float4(u, w, v, 1.0f);
}
void launch_kernel(float4 *pos, unsigned int mesh_width,
unsigned int mesh_height, float time) {
// execute the kernel
dim3 block(8, 8, 1);
dim3 grid(mesh_width / block.x, mesh_height / block.y, 1);
simple_vbo_kernel<<<grid, block>>>(pos, mesh_width, mesh_height, time);
}
////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda part of the computation
////////////////////////////////////////////////////////////////////////////////
void runCuda(struct cudaGraphicsResource **vbo_resource) {
// map OpenGL buffer object for writing from CUDA
float4 *dptr;
cudaGraphicsMapResources(1, vbo_resource, 0);
size_t num_bytes;
cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes,
*vbo_resource);
launch_kernel(dptr, mesh_width, mesh_height, g_fAnim);
// unmap buffer object
cudaGraphicsUnmapResources(1, vbo_resource, 0);
}
#ifndef FOPEN
#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode))
#endif
void sdkDumpBin2(void *data, unsigned int bytes, const char *filename) {
printf("sdkDumpBin: <%s>\n", filename);
FILE *fp;
FOPEN(fp, filename, "wb");
fwrite(data, bytes, 1, fp);
fflush(fp);
fclose(fp);
}
////////////////////////////////////////////////////////////////////////////////
//! Run the Cuda part of the computation
////////////////////////////////////////////////////////////////////////////////
void runAutoTest(int devID, char **argv, char *ref_file) {
char *reference_file = NULL;
void *imageData = malloc(mesh_width * mesh_height * sizeof(float));
// execute the kernel
launch_kernel((float4 *)d_vbo_buffer, mesh_width, mesh_height, g_fAnim);
cudaDeviceSynchronize();
getLastCudaError("launch_kernel failed");
cudaMemcpy(imageData, d_vbo_buffer, mesh_width * mesh_height * sizeof(float),
cudaMemcpyDeviceToHost);
sdkDumpBin2(imageData, mesh_width * mesh_height * sizeof(float),
"simpleGLES_screen.bin");
reference_file = sdkFindFilePath(ref_file, argv[0]);
if (reference_file &&
!sdkCompareBin2BinFloat("simpleGLES_screen.bin", reference_file,
mesh_width * mesh_height * sizeof(float),
MAX_EPSILON_ERROR, THRESHOLD, pArgv[0])) {
g_TotalErrors++;
}
}
////////////////////////////////////////////////////////////////////////////////
//! Display callback
////////////////////////////////////////////////////////////////////////////////
void display_thisframe(float time_delta) {
sdkStartTimer(&timer);
// run CUDA kernel to generate vertex positions
runCuda(&cuda_vbo_resource);
glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height);
glFinish();
g_fAnim += time_delta;
sdkStopTimer(&timer);
computeFPS();
}
////////////////////////////////////////////////////////////////////////////////
//! Check if the result is correct or write data to file for external
//! regression testing
////////////////////////////////////////////////////////////////////////////////
void checkResultCuda(int argc, char **argv, const GLuint &vbo) {
if (!d_vbo_buffer) {
printf("%s: Mapping result buffer from OpenGL ES\n", __FUNCTION__);
cudaGraphicsUnregisterResource(cuda_vbo_resource);
// map buffer object
glBindBuffer(GL_ARRAY_BUFFER, vbo);
float *data = (float *)glMapBufferRange(
GL_ARRAY_BUFFER, 0, mesh_width * mesh_height * 4 * sizeof(float),
GL_READ_ONLY);
// check result
if (checkCmdLineFlag(argc, (const char **)argv, "regression")) {
// write file for regression test
sdkWriteFile<float>("./data/regression.dat", data,
mesh_width * mesh_height * 3, 0.0, false);
}
// unmap GL buffer object
if (!glUnmapBuffer(GL_ARRAY_BUFFER)) {
fprintf(stderr, "Unmap buffer failed.\n");
fflush(stderr);
}
checkCudaErrors(cudaGraphicsGLRegisterBuffer(
&cuda_vbo_resource, vbo, cudaGraphicsMapFlagsWriteDiscard));
CHECK_GLERROR();
}
}
GLuint mesh_shader = 0;
void readAndCompileShaderFromGLSLFile(GLuint new_shaderprogram,
const char *filename, GLenum shaderType) {
FILE *file = fopen(filename, "rb"); // open shader text file
if (!file) {
error_exit("Filename %s does not exist\n", filename);
}
// get the size of the file and read it
fseek(file, 0, SEEK_END);
GLint size = ftell(file);
char *data = (char *)malloc(sizeof(char) * (size + 1));
memset(data, 0, sizeof(char) * (size + 1));
fseek(file, 0, SEEK_SET);
size_t res = fread(data, 1, size, file);
fclose(file);
GLuint shader = glCreateShader(shaderType);
glShaderSource(shader, 1, (const GLchar **)&data, &size);
glCompileShader(shader);
CHECK_GLERROR();
GLint compile_success = 0;
glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_success);
CHECK_GLERROR();
if (compile_success == GL_FALSE) {
printf("Compilation of %s failed!\n Reason:\n", filename);
GLint maxLength = 0;
glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &maxLength);
char errorLog[maxLength];
glGetShaderInfoLog(shader, maxLength, &maxLength, &errorLog[0]);
printf("%s", errorLog);
glDeleteShader(shader);
exit(1);
}
glAttachShader(new_shaderprogram, shader);
glDeleteShader(shader);
free(data);
}
GLuint ShaderCreate(const char *vshader_filename,
const char *fshader_filename) {
printf("Loading GLSL shaders %s %s\n", vshader_filename, fshader_filename);
GLuint new_shaderprogram = glCreateProgram();
CHECK_GLERROR();
if (vshader_filename) {
readAndCompileShaderFromGLSLFile(new_shaderprogram, vshader_filename,
GL_VERTEX_SHADER);
}
CHECK_GLERROR();
if (fshader_filename) {
readAndCompileShaderFromGLSLFile(new_shaderprogram, fshader_filename,
GL_FRAGMENT_SHADER);
}
CHECK_GLERROR();
glLinkProgram(new_shaderprogram);
CHECK_GLERROR();
GLint link_success;
glGetProgramiv(new_shaderprogram, GL_LINK_STATUS, &link_success);
if (link_success == GL_FALSE) {
printf("Linking of %s with %s failed!\n Reason:\n", vshader_filename,
fshader_filename);
GLint maxLength = 0;
glGetShaderiv(new_shaderprogram, GL_INFO_LOG_LENGTH, &maxLength);
char errorLog[maxLength];
glGetShaderInfoLog(new_shaderprogram, maxLength, &maxLength, &errorLog[0]);
printf("%s", errorLog);
exit(EXIT_FAILURE);
}
return new_shaderprogram;
}
//===========================================================================
// InitGraphicsState() - initialize OpenGL
//===========================================================================
static void InitGraphicsState(void) {
char *GL_version = (char *)glGetString(GL_VERSION);
char *GL_vendor = (char *)glGetString(GL_VENDOR);
char *GL_renderer = (char *)glGetString(GL_RENDERER);
printf("Version: %s\n", GL_version);
printf("Vendor: %s\n", GL_vendor);
printf("Renderer: %s\n", GL_renderer);
// RENDERING SETUP (OpenGL ES or OpenGL Core Profile!)
glGenVertexArrays(1, &mesh_vao); // Features' Vertex Array Object allocation
glBindVertexArray(mesh_vao); // bind VAO
// initialize buffer object
glGenBuffers(1, &mesh_vbo);
glBindBuffer(GL_ARRAY_BUFFER, mesh_vbo);
unsigned int size = mesh_width * mesh_height * 4 * sizeof(float);
glBufferData(GL_ARRAY_BUFFER, size, NULL, GL_DYNAMIC_DRAW);
glVertexAttribPointer((GLuint)0, 4, GL_FLOAT, GL_FALSE, 0, 0);
glEnableVertexAttribArray(0);
checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, mesh_vbo,
cudaGraphicsMapFlagsNone));
// GLSL stuff
char *vertex_shader_path = sdkFindFilePath("mesh.vert.glsl", pArgv[0]);
char *fragment_shader_path = sdkFindFilePath("mesh.frag.glsl", pArgv[0]);
if (vertex_shader_path == NULL || fragment_shader_path == NULL) {
printf("Error finding shader file\n");
exit(EXIT_FAILURE);
}
mesh_shader = ShaderCreate(vertex_shader_path, fragment_shader_path);
CHECK_GLERROR();
free(vertex_shader_path);
free(fragment_shader_path);
glUseProgram(mesh_shader);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
bool runTest(int argc, char **argv, char *ref_file) {
// command line mode only
if (ref_file != NULL) {
// This will pick the best possible CUDA capable device
// int devID = findCudaDevice(argc, (const char **)argv);
#if defined(__aarch64__) || defined(__arm__)
// find iGPU on the system which is compute capable which will perform
// GLES-CUDA interop
int devID = findIntegratedGPU();
#else
// use command-line specified CUDA device, otherwise use device with highest
// Gflops/s
int devID = findCudaDevice(argc, (const char **)argv);
#endif
// create VBO
checkCudaErrors(cudaMalloc((void **)&d_vbo_buffer,
mesh_width * mesh_height * 4 * sizeof(float)));
// run the cuda part
runAutoTest(devID, argv, ref_file);
// check result of Cuda step
checkResultCuda(argc, argv, mesh_vbo);
cudaFree(d_vbo_buffer);
d_vbo_buffer = NULL;
} else {
double endTime = TIME_LIMIT;
// this would use command-line specified CUDA device, note that CUDA
// defaults to highest Gflops/s device
if (checkCmdLineFlag(argc, (const char **)argv, "device")) {
error_exit("Device setting not yet implemented!\n");
}
// display selection
if (checkCmdLineFlag(argc, (const char **)argv, "dispno")) {
dispno = getCmdLineArgumentInt(argc, (const char **)argv, "dispno");
}
// Window width
if (checkCmdLineFlag(argc, (const char **)argv, "width")) {
window_width = getCmdLineArgumentInt(argc, (const char **)argv, "width");
}
// Window Height
if (checkCmdLineFlag(argc, (const char **)argv, "height")) {
window_height =
getCmdLineArgumentInt(argc, (const char **)argv, "height");
}
// Determine how long to run for in secs: default is 10s
if (checkCmdLineFlag(argc, (const char **)argv, "runtime")) {
endTime = getCmdLineArgumentInt(argc, (const char **)argv, "runtime");
}
SetCloseCB(closeCB_app);
SetKeyCB(keyCB_app);
// create QNX screen window and set up associated OpenGL ES context
graphics_setup_window(0, 0, window_width, window_height, sSDKsample,
dispno);
#if defined(__aarch64__) || defined(__arm__)
// find iGPU on the system which is compute capable which will perform
// GLES-CUDA interop
int devID = findIntegratedGPU();
#else
// use command-line specified CUDA device, otherwise use device with highest
// Gflops/s
int devID = findCudaDevice(argc, (const char **)argv);
#endif
InitGraphicsState(); // set up GLES stuff
glClearColor(0, 0.5, 1, 1); // blue-ish background
glClear(GL_COLOR_BUFFER_BIT);
graphics_swap_buffers();
int frame = 0;
struct timeval begin, end;
gettimeofday(&begin, NULL);
// Print runtime
if (endTime < 0.0) {
endTime = TIME_LIMIT;
printf(" running forever...\n");
} else {
printf(" running for %f seconds...\n", endTime);
}
while (!shutdown) {
frame++;
display_thisframe(0.010);
usleep(1000);
graphics_swap_buffers();
CheckEvents();
gettimeofday(&end, 0);
double elapsed = (end.tv_sec - begin.tv_sec) +
((end.tv_usec - begin.tv_usec) / 1000000.0);
// Check whether time limit has been exceeded
if (!shutdown) shutdown = (endTime <= elapsed);
}
// NOTE: Before destroying OpenGL ES context, must unregister all shared
// resources from CUDA !
checkCudaErrors(cudaGraphicsUnregisterResource(cuda_vbo_resource));
graphics_close_window(); // close window and destroy OpenGL ES context
}
return true;
}
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char **argv) {
char *ref_file = NULL;
pArgc = &argc;
pArgv = argv;
#if defined(__linux__)
setenv("DISPLAY", ":0", 0);
#endif
printf("%s starting...\n", sSDKsample);
if (argc > 1) {
if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
// In this mode, we run without OpenGL and see if VBO is generated
// correctly
getCmdLineArgumentString(argc, (const char **)argv, "file",
(char **)&ref_file);
}
}
printf("\n");
runTest(argc, argv, ref_file);
printf("%s completed, returned %s\n", sSDKsample,
(g_TotalErrors == 0) ? "OK" : "ERROR!");
exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}