diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/c_cpp_properties.json b/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/c_cpp_properties.json deleted file mode 100644 index f0066b0f..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/c_cpp_properties.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "configurations": [ - { - "name": "Linux", - "includePath": [ - "${workspaceFolder}/**", - "${workspaceFolder}/../../../Common" - ], - "defines": [], - "compilerPath": "/usr/local/cuda/bin/nvcc", - "cStandard": "gnu17", - "cppStandard": "gnu++14", - "intelliSenseMode": "linux-gcc-x64", - "configurationProvider": "ms-vscode.makefile-tools" - } - ], - "version": 4 -} diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/extensions.json b/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/extensions.json deleted file mode 100644 index c7eb54dc..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/extensions.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "recommendations": [ - "nvidia.nsight-vscode-edition", - "ms-vscode.cpptools", - "ms-vscode.makefile-tools" - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/launch.json deleted file mode 100644 index 65b45c08..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/nbody_screen" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/tasks.json b/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/tasks.json deleted file mode 100644 index 4509aeb1..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/.vscode/tasks.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "version": "2.0.0", - "tasks": [ - { - "label": "sample", - "type": "shell", - "command": "make dbg=1", - "problemMatcher": ["$nvcc"], - "group": { - "kind": "build", - "isDefault": true - } - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/nbody_screen/CMakeLists.txt deleted file mode 100644 index 73e9b2dd..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -cmake_minimum_required(VERSION 3.20) - -list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules") - -project(nbody_screen LANGUAGES C CXX CUDA) - -find_package(CUDAToolkit REQUIRED) - -set(CMAKE_POSITION_INDEPENDENT_CODE ON) - -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90) -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive) -endif() - -# Include directories and libraries -include_directories(../../../Common) - -# Source file -# Add target for nbody_screen -add_executable(nbody_screen nbody_screen.cu) - -target_compile_options(nbody_screen PRIVATE $<$:--extended-lambda>) - -target_compile_features(nbody_screen PRIVATE cxx_std_17 cuda_std_17) - -set_target_properties(nbody_screen PROPERTIES CUDA_SEPARABLE_COMPILATION ON) diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/Makefile b/Samples/8_Platform_Specific/Tegra/nbody_screen/Makefile deleted file mode 100644 index 74278b08..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/Makefile +++ /dev/null @@ -1,405 +0,0 @@ -################################################################################ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -################################################################################ -# -# Makefile project only supported on Mac OS X and Linux Platforms) -# -################################################################################ - -# Location of the CUDA Toolkit -CUDA_PATH ?= /usr/local/cuda - -############################## -# start deprecated interface # -############################## -ifeq ($(x86_64),1) - $(info WARNING - x86_64 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=x86_64 instead) - TARGET_ARCH ?= x86_64 -endif -ifeq ($(ARMv7),1) - $(info WARNING - ARMv7 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=armv7l instead) - TARGET_ARCH ?= armv7l -endif -ifeq ($(aarch64),1) - $(info WARNING - aarch64 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=aarch64 instead) - TARGET_ARCH ?= aarch64 -endif -ifeq ($(ppc64le),1) - $(info WARNING - ppc64le variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=ppc64le instead) - TARGET_ARCH ?= ppc64le -endif -ifneq ($(GCC),) - $(info WARNING - GCC variable has been deprecated) - $(info WARNING - please use HOST_COMPILER=$(GCC) instead) - HOST_COMPILER ?= $(GCC) -endif -ifneq ($(abi),) - $(error ERROR - abi variable has been removed) -endif -############################ -# end deprecated interface # -############################ - -# architecture -HOST_ARCH := $(shell uname -m) -TARGET_ARCH ?= $(HOST_ARCH) -ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l)) - ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le)) - TARGET_SIZE := 64 - else ifneq (,$(filter $(TARGET_ARCH),armv7l)) - TARGET_SIZE := 32 - endif - else - TARGET_SIZE := $(shell getconf LONG_BIT) - endif -else - $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!) -endif - -# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now. -ifeq ($(HOST_ARCH),aarch64) - ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null)) - HOST_ARCH := sbsa - TARGET_ARCH := sbsa - endif -endif - -ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le)) - $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!) - endif -endif - -# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l -ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32) - TARGET_ARCH = armv7l -endif - -# operating system -HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") -TARGET_OS ?= $(HOST_OS) -ifeq (,$(filter $(TARGET_OS),linux darwin qnx android)) - $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!) -endif - -# host compiler -ifdef HOST_COMPILER - CUSTOM_HOST_COMPILER = 1 -endif - -ifeq ($(TARGET_OS),darwin) - ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) - HOST_COMPILER ?= clang++ - endif -else ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) - ifeq ($(TARGET_OS),linux) - HOST_COMPILER ?= arm-linux-gnueabihf-g++ - else ifeq ($(TARGET_OS),qnx) - ifeq ($(QNX_HOST),) - $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) - endif - ifeq ($(QNX_TARGET),) - $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) - endif - export QNX_HOST - export QNX_TARGET - HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++ - else ifeq ($(TARGET_OS),android) - HOST_COMPILER ?= arm-linux-androideabi-g++ - endif - else ifeq ($(TARGET_ARCH),aarch64) - ifeq ($(TARGET_OS), linux) - HOST_COMPILER ?= aarch64-linux-gnu-g++ - else ifeq ($(TARGET_OS),qnx) - ifeq ($(QNX_HOST),) - $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) - endif - ifeq ($(QNX_TARGET),) - $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) - endif - export QNX_HOST - export QNX_TARGET - HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++ - else ifeq ($(TARGET_OS), android) - HOST_COMPILER ?= aarch64-linux-android-clang++ - endif - else ifeq ($(TARGET_ARCH),sbsa) - HOST_COMPILER ?= aarch64-linux-gnu-g++ - else ifeq ($(TARGET_ARCH),ppc64le) - HOST_COMPILER ?= powerpc64le-linux-gnu-g++ - endif -endif -HOST_COMPILER ?= g++ -NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) - -# internal flags -NVCCFLAGS := -m${TARGET_SIZE} -CCFLAGS := -LDFLAGS := - -# build flags - -# Link flag for customized HOST_COMPILER with gcc realpath -GCC_PATH := $(shell which gcc) -ifeq ($(CUSTOM_HOST_COMPILER),1) - ifneq ($(filter /%,$(HOST_COMPILER)),) - ifneq ($(findstring gcc,$(HOST_COMPILER)),) - ifneq ($(GCC_PATH),$(HOST_COMPILER)) - LDFLAGS += -lstdc++ - endif - endif - endif -endif - -ifeq ($(TARGET_OS),darwin) - LDFLAGS += -rpath $(CUDA_PATH)/lib - CCFLAGS += -arch $(HOST_ARCH) -else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux) - LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 - CCFLAGS += -mfloat-abi=hard -else ifeq ($(TARGET_OS),android) - LDFLAGS += -pie - CCFLAGS += -fpie -fpic -fexceptions -endif - -ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) - ifneq ($(TARGET_FS),) - GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) - ifeq ($(GCCVERSIONLTEQ46),1) - CCFLAGS += --sysroot=$(TARGET_FS) - endif - LDFLAGS += --sysroot=$(TARGET_FS) - LDFLAGS += -rpath-link=$(TARGET_FS)/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf - endif - endif - ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) - ifneq ($(TARGET_FS),) - GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) - ifeq ($(GCCVERSIONLTEQ46),1) - CCFLAGS += --sysroot=$(TARGET_FS) - endif - LDFLAGS += --sysroot=$(TARGET_FS) - LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu - LDFLAGS += --unresolved-symbols=ignore-in-shared-libs - CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm - CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu - endif - endif - ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) - NVCCFLAGS += -D_QNX_SOURCE - NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le - CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu - LDFLAGS += -lsocket - LDFLAGS += -L/usr/lib/aarch64-qnx-gnu - CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu" - ifdef TARGET_OVERRIDE - LDFLAGS += -lslog2 - endif - - ifneq ($(TARGET_FS),) - LDFLAGS += -L$(TARGET_FS)/usr/lib - CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib" - LDFLAGS += -L$(TARGET_FS)/usr/libnvidia - CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia" - CCFLAGS += -I$(TARGET_FS)/../include - endif - endif -endif - -ifdef TARGET_OVERRIDE # cuda toolkit targets override - NVCCFLAGS += -target-dir $(TARGET_OVERRIDE) -endif - -# Install directory of different arch -CUDA_INSTALL_TARGET_DIR := -ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) - CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux) - CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android) - CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx) - CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/ -else ifeq ($(TARGET_ARCH),ppc64le) - CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/ -endif - -# Debug build flags -ifeq ($(dbg),1) - NVCCFLAGS += -g -G - BUILD_TYPE := debug -else - BUILD_TYPE := release -endif - -ALL_CCFLAGS := -ALL_CCFLAGS += $(NVCCFLAGS) -ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) -ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) -ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) - -SAMPLE_ENABLED := 1 - -# This sample is not supported on Linux x86_64 -ifeq ($(TARGET_OS),linux) - ifeq ($(TARGET_ARCH),x86_64) - $(info >>> WARNING - nbody_screen is not supported on Linux x86_64 - waiving sample <<<) - SAMPLE_ENABLED := 0 - endif -endif - -# This sample is not supported on Mac OSX -ifeq ($(TARGET_OS),darwin) - $(info >>> WARNING - nbody_screen is not supported on Mac OSX - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -# This sample is not supported on ARMv7 -ifeq ($(TARGET_ARCH),armv7l) - $(info >>> WARNING - nbody_screen is not supported on ARMv7 - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -# This sample is not supported on aarch64 -ifeq ($(TARGET_ARCH),aarch64) - ifneq ($(TARGET_OS),qnx) - $(info >>> WARNING - nbody_screen is not supported on aarch64-$(TARGET_OS) - waiving sample <<<) - SAMPLE_ENABLED := 0 - endif -endif -# This sample is not supported on sbsa -ifeq ($(TARGET_ARCH),sbsa) - $(info >>> WARNING - nbody_screen is not supported on sbsa - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -ALL_LDFLAGS := -ALL_LDFLAGS += $(ALL_CCFLAGS) -ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) -ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) - -# Common includes and paths for CUDA -INCLUDES := -I../../../Common -LIBRARIES := - -################################################################################ - -# Makefile include to help find GLES Libraries -include ./findgleslib.mk - -# OpenGLES specific libraries -ifneq ($(TARGET_OS),darwin) - LIBRARIES += $(GLESLINK) -lGLESv2 -lEGL -lscreen -endif - -# Gencode arguments -ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 90 -else -SMS ?= 50 52 60 61 70 75 80 86 89 90 -endif - -ifeq ($(SMS),) -$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<) -SAMPLE_ENABLED := 0 -endif - -ifeq ($(GENCODE_FLAGS),) -# Generate SASS code for each SM architecture listed in $(SMS) -$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm))) - -# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility -HIGHEST_SM := $(lastword $(sort $(SMS))) -ifneq ($(HIGHEST_SM),) -GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) -endif -endif - -ALL_CCFLAGS += -ftz=true --threads 0 --std=c++11 - -ifeq ($(SAMPLE_ENABLED),0) -EXEC ?= @echo "[@]" -endif - -################################################################################ - -# Target rules -all: build - -build: nbody_screen - -check.deps: -ifeq ($(SAMPLE_ENABLED),0) - @echo "Sample will be waived due to the above missing dependencies" -else - @echo "Sample is ready - all dependencies have been met" -endif - -bodysystemcuda.o:bodysystemcuda.cu - $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $< - -nbody_screen.o:nbody_screen.cpp - $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $< - -render_particles.o:render_particles.cpp - $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $< - -nbody_screen: bodysystemcuda.o nbody_screen.o render_particles.o - $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES) - $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) - $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) - -run: build - $(EXEC) ./nbody_screen - -testrun: build - $(EXEC) ./nbody_screen -benchmark -compare -cpu - -clean: - rm -f nbody_screen bodysystemcuda.o nbody_screen.o render_particles.o - rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/nbody_screen - -clobber: clean diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/NsightEclipse.xml b/Samples/8_Platform_Specific/Tegra/nbody_screen/NsightEclipse.xml deleted file mode 100644 index 28d52f31..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/NsightEclipse.xml +++ /dev/null @@ -1,92 +0,0 @@ - - - - nbody_screen - - -ftz=true - - - cudaGraphicsUnmapResources - cudaSetDeviceFlags - cudaGraphicsResourceSetMapFlags - cudaGraphicsResourceGetMappedPointer - cudaGraphicsMapResources - cudaSetDevice - cudaEventSynchronize - cudaGetDeviceProperties - cudaDeviceSynchronize - cudaEventRecord - cudaGetDevice - cudaMemcpyToSymbol - cudaStreamQuery - cudaEventDestroy - cudaEventElapsedTime - cudaGetDeviceCount - cudaEventCreate - - - whole - - ./galaxy_20K.bin - - - ./ - ../ - ../../../Common - - - Graphics Interop - Data Parallel Algorithms - Physically-Based Simulation - - - CUDA - GPGPU - n-body - simulation - astrophysics - OpenGL ES - - - - - - true - nbody.cpp - - -benchmark -compare -cpu - - - screen - GLES - - - 2:Graphics Interop - 1:CUDA Advanced Topics - 1:Data-Parallel Algorithms - 3:Physically-Based Simulation - - sm50 - sm52 - sm53 - sm60 - sm61 - sm70 - sm72 - sm75 - sm80 - sm86 - sm87 - sm89 - sm90 - - - qnx - - - - all - - CUDA N-Body Simulation on Screen - exe - diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/README.md b/Samples/8_Platform_Specific/Tegra/nbody_screen/README.md deleted file mode 100644 index 3d8a057a..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/README.md +++ /dev/null @@ -1,37 +0,0 @@ -# nbody_screen - CUDA N-Body Simulation on Screen - -## Description - -This sample demonstrates efficient all-pairs simulation of a gravitational n-body simulation in CUDA. Unlike the OpenGL nbody sample, there is no user interaction. - -## Key Concepts - -Graphics Interop, Data Parallel Algorithms, Physically-Based Simulation - -## Supported SM Architectures - -[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) - -## Supported OSes - -QNX - -## Supported CPU Architecture - -aarch64 - -## CUDA APIs involved - -### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsUnmapResources, cudaSetDeviceFlags, cudaGraphicsResourceSetMapFlags, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaSetDevice, cudaEventSynchronize, cudaGetDeviceProperties, cudaDeviceSynchronize, cudaEventRecord, cudaGetDevice, cudaMemcpyToSymbol, cudaStreamQuery, cudaEventDestroy, cudaEventElapsedTime, cudaGetDeviceCount, cudaEventCreate - -## Dependencies needed to build/run -[screen](../../../README.md#screen), [GLES](../../../README.md#gles) - -## Prerequisites - -Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. -Make sure the dependencies mentioned in [Dependencies]() section above are installed. - -## References (for more details) - diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystem.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystem.h deleted file mode 100644 index 2d11c1ed..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystem.h +++ /dev/null @@ -1,286 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __BODYSYSTEM_H__ -#define __BODYSYSTEM_H__ - -#include - -enum NBodyConfig { - NBODY_CONFIG_RANDOM, - NBODY_CONFIG_SHELL, - NBODY_CONFIG_EXPAND, - NBODY_NUM_CONFIGS -}; - -enum BodyArray { - BODYSYSTEM_POSITION, - BODYSYSTEM_VELOCITY, -}; - -template -struct vec3 { - typedef float Type; -}; // dummy -template <> -struct vec3 { - typedef float3 Type; -}; -template <> -struct vec3 { - typedef double3 Type; -}; - -template -struct vec4 { - typedef float Type; -}; // dummy -template <> -struct vec4 { - typedef float4 Type; -}; -template <> -struct vec4 { - typedef double4 Type; -}; - -class string; - -// BodySystem abstract base class -template -class BodySystem { - public: // methods - BodySystem(int numBodies) {} - virtual ~BodySystem() {} - - virtual void loadTipsyFile(const std::string &filename) = 0; - - virtual void update(T deltaTime) = 0; - - virtual void setSoftening(T softening) = 0; - virtual void setDamping(T damping) = 0; - - virtual T *getArray(BodyArray array) = 0; - virtual void setArray(BodyArray array, const T *data) = 0; - - virtual unsigned int getCurrentReadBuffer() const = 0; - - virtual unsigned int getNumBodies() const = 0; - - virtual void synchronizeThreads() const {}; - - protected: // methods - BodySystem() {} // default constructor - - virtual void _initialize(int numBodies) = 0; - virtual void _finalize() = 0; -}; - -inline float3 scalevec(float3 &vector, float scalar) { - float3 rt = vector; - rt.x *= scalar; - rt.y *= scalar; - rt.z *= scalar; - return rt; -} - -inline float normalize(float3 &vector) { - float dist = - sqrtf(vector.x * vector.x + vector.y * vector.y + vector.z * vector.z); - - if (dist > 1e-6) { - vector.x /= dist; - vector.y /= dist; - vector.z /= dist; - } - - return dist; -} - -inline float dot(float3 v0, float3 v1) { - return v0.x * v1.x + v0.y * v1.y + v0.z * v1.z; -} - -inline float3 cross(float3 v0, float3 v1) { - float3 rt; - rt.x = v0.y * v1.z - v0.z * v1.y; - rt.y = v0.z * v1.x - v0.x * v1.z; - rt.z = v0.x * v1.y - v0.y * v1.x; - return rt; -} - -// utility function -template -void randomizeBodies(NBodyConfig config, T *pos, T *vel, float *color, - float clusterScale, float velocityScale, int numBodies, - bool vec4vel) { - switch (config) { - default: - case NBODY_CONFIG_RANDOM: { - float scale = clusterScale * std::max(1.0f, numBodies / (1024.0f)); - float vscale = velocityScale * scale; - - int p = 0, v = 0; - int i = 0; - - while (i < numBodies) { - float3 point; - // const int scale = 16; - point.x = rand() / (float)RAND_MAX * 2 - 1; - point.y = rand() / (float)RAND_MAX * 2 - 1; - point.z = rand() / (float)RAND_MAX * 2 - 1; - float lenSqr = dot(point, point); - - if (lenSqr > 1) continue; - - float3 velocity; - velocity.x = rand() / (float)RAND_MAX * 2 - 1; - velocity.y = rand() / (float)RAND_MAX * 2 - 1; - velocity.z = rand() / (float)RAND_MAX * 2 - 1; - lenSqr = dot(velocity, velocity); - - if (lenSqr > 1) continue; - - pos[p++] = point.x * scale; // pos.x - pos[p++] = point.y * scale; // pos.y - pos[p++] = point.z * scale; // pos.z - pos[p++] = 1.0f; // mass - - vel[v++] = velocity.x * vscale; // pos.x - vel[v++] = velocity.y * vscale; // pos.x - vel[v++] = velocity.z * vscale; // pos.x - - if (vec4vel) vel[v++] = 1.0f; // inverse mass - - i++; - } - } break; - - case NBODY_CONFIG_SHELL: { - float scale = clusterScale; - float vscale = scale * velocityScale; - float inner = 2.5f * scale; - float outer = 4.0f * scale; - - int p = 0, v = 0; - int i = 0; - - while (i < numBodies) // for(int i=0; i < numBodies; i++) - { - float x, y, z; - x = rand() / (float)RAND_MAX * 2 - 1; - y = rand() / (float)RAND_MAX * 2 - 1; - z = rand() / (float)RAND_MAX * 2 - 1; - - float3 point = {x, y, z}; - float len = normalize(point); - - if (len > 1) continue; - - pos[p++] = - point.x * (inner + (outer - inner) * rand() / (float)RAND_MAX); - pos[p++] = - point.y * (inner + (outer - inner) * rand() / (float)RAND_MAX); - pos[p++] = - point.z * (inner + (outer - inner) * rand() / (float)RAND_MAX); - pos[p++] = 1.0f; - - x = 0.0f; // * (rand() / (float) RAND_MAX * 2 - 1); - y = 0.0f; // * (rand() / (float) RAND_MAX * 2 - 1); - z = 1.0f; // * (rand() / (float) RAND_MAX * 2 - 1); - float3 axis = {x, y, z}; - normalize(axis); - - if (1 - dot(point, axis) < 1e-6) { - axis.x = point.y; - axis.y = point.x; - normalize(axis); - } - - // if (point.y < 0) axis = scalevec(axis, -1); - float3 vv = {(float)pos[4 * i], (float)pos[4 * i + 1], - (float)pos[4 * i + 2]}; - vv = cross(vv, axis); - vel[v++] = vv.x * vscale; - vel[v++] = vv.y * vscale; - vel[v++] = vv.z * vscale; - - if (vec4vel) vel[v++] = 1.0f; - - i++; - } - } break; - - case NBODY_CONFIG_EXPAND: { - float scale = clusterScale * numBodies / (1024.f); - - if (scale < 1.0f) scale = clusterScale; - - float vscale = scale * velocityScale; - - int p = 0, v = 0; - - for (int i = 0; i < numBodies;) { - float3 point; - - point.x = rand() / (float)RAND_MAX * 2 - 1; - point.y = rand() / (float)RAND_MAX * 2 - 1; - point.z = rand() / (float)RAND_MAX * 2 - 1; - - float lenSqr = dot(point, point); - - if (lenSqr > 1) continue; - - pos[p++] = point.x * scale; // pos.x - pos[p++] = point.y * scale; // pos.y - pos[p++] = point.z * scale; // pos.z - pos[p++] = 1.0f; // mass - vel[v++] = point.x * vscale; // pos.x - vel[v++] = point.y * vscale; // pos.x - vel[v++] = point.z * vscale; // pos.x - - if (vec4vel) vel[v++] = 1.0f; // inverse mass - - i++; - } - } break; - } - - if (color) { - int v = 0; - - for (int i = 0; i < numBodies; i++) { - // const int scale = 16; - color[v++] = rand() / (float)RAND_MAX; - color[v++] = rand() / (float)RAND_MAX; - color[v++] = rand() / (float)RAND_MAX; - color[v++] = 1.0f; - } - } -} - -#endif // __BODYSYSTEM_H__ diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu.h deleted file mode 100644 index 700e385a..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu.h +++ /dev/null @@ -1,79 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __BODYSYSTEMCPU_H__ -#define __BODYSYSTEMCPU_H__ - -#include "bodysystem.h" - -// CPU Body System -template -class BodySystemCPU : public BodySystem { - public: - BodySystemCPU(int numBodies); - virtual ~BodySystemCPU(); - - virtual void loadTipsyFile(const std::string &filename); - - virtual void update(T deltaTime); - - virtual void setSoftening(T softening) { - m_softeningSquared = softening * softening; - } - virtual void setDamping(T damping) { m_damping = damping; } - - virtual T *getArray(BodyArray array); - virtual void setArray(BodyArray array, const T *data); - - virtual unsigned int getCurrentReadBuffer() const { return 0; } - - virtual unsigned int getNumBodies() const { return m_numBodies; } - - protected: // methods - BodySystemCPU() {} // default constructor - - virtual void _initialize(int numBodies); - virtual void _finalize(); - - void _computeNBodyGravitation(); - void _integrateNBodySystem(T deltaTime); - - protected: // data - int m_numBodies; - bool m_bInitialized; - - T *m_pos; - T *m_vel; - T *m_force; - - T m_softeningSquared; - T m_damping; -}; - -#include "bodysystemcpu_impl.h" - -#endif // __BODYSYSTEMCPU_H__ diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu_impl.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu_impl.h deleted file mode 100644 index 14130064..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcpu_impl.h +++ /dev/null @@ -1,280 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "bodysystemcpu.h" - -#include -#include -#include -#include -#include -#include -#include -#include "tipsy.h" - -#ifdef OPENMP -#include -#endif - -template -BodySystemCPU::BodySystemCPU(int numBodies) - : m_numBodies(numBodies), - m_bInitialized(false), - m_force(0), - m_softeningSquared(.00125f), - m_damping(0.995f) { - m_pos = 0; - m_vel = 0; - - _initialize(numBodies); -} - -template -BodySystemCPU::~BodySystemCPU() { - _finalize(); - m_numBodies = 0; -} - -template -void BodySystemCPU::_initialize(int numBodies) { - assert(!m_bInitialized); - - m_numBodies = numBodies; - - m_pos = new T[m_numBodies * 4]; - m_vel = new T[m_numBodies * 4]; - m_force = new T[m_numBodies * 3]; - - memset(m_pos, 0, m_numBodies * 4 * sizeof(T)); - memset(m_vel, 0, m_numBodies * 4 * sizeof(T)); - memset(m_force, 0, m_numBodies * 3 * sizeof(T)); - - m_bInitialized = true; -} - -template -void BodySystemCPU::_finalize() { - assert(m_bInitialized); - - delete[] m_pos; - delete[] m_vel; - delete[] m_force; - - m_bInitialized = false; -} - -template -void BodySystemCPU::loadTipsyFile(const std::string &filename) { - if (m_bInitialized) _finalize(); - - vector::Type> positions; - vector::Type> velocities; - vector ids; - - int nBodies = 0; - int nFirst = 0, nSecond = 0, nThird = 0; - - read_tipsy_file(positions, velocities, ids, filename, nBodies, nFirst, - nSecond, nThird); - - _initialize(nBodies); - - memcpy(m_pos, &positions[0], sizeof(vec4) * nBodies); - memcpy(m_vel, &velocities[0], sizeof(vec4) * nBodies); -} - -template -void BodySystemCPU::update(T deltaTime) { - assert(m_bInitialized); - - _integrateNBodySystem(deltaTime); - - // std::swap(m_currentRead, m_currentWrite); -} - -template -T *BodySystemCPU::getArray(BodyArray array) { - assert(m_bInitialized); - - T *data = 0; - - switch (array) { - default: - case BODYSYSTEM_POSITION: - data = m_pos; - break; - - case BODYSYSTEM_VELOCITY: - data = m_vel; - break; - } - - return data; -} - -template -void BodySystemCPU::setArray(BodyArray array, const T *data) { - assert(m_bInitialized); - - T *target = 0; - - switch (array) { - default: - case BODYSYSTEM_POSITION: - target = m_pos; - break; - - case BODYSYSTEM_VELOCITY: - target = m_vel; - break; - } - - memcpy(target, data, m_numBodies * 4 * sizeof(T)); -} - -template -T sqrt_T(T x) { - return sqrt(x); -} - -template <> -float sqrt_T(float x) { - return sqrtf(x); -} - -template -void bodyBodyInteraction(T accel[3], T posMass0[4], T posMass1[4], - T softeningSquared) { - T r[3]; - - // r_01 [3 FLOPS] - r[0] = posMass1[0] - posMass0[0]; - r[1] = posMass1[1] - posMass0[1]; - r[2] = posMass1[2] - posMass0[2]; - - // d^2 + e^2 [6 FLOPS] - T distSqr = r[0] * r[0] + r[1] * r[1] + r[2] * r[2]; - distSqr += softeningSquared; - - // invDistCube =1/distSqr^(3/2) [4 FLOPS (2 mul, 1 sqrt, 1 inv)] - T invDist = (T)1.0 / (T)sqrt((double)distSqr); - T invDistCube = invDist * invDist * invDist; - - // s = m_j * invDistCube [1 FLOP] - T s = posMass1[3] * invDistCube; - - // (m_1 * r_01) / (d^2 + e^2)^(3/2) [6 FLOPS] - accel[0] += r[0] * s; - accel[1] += r[1] * s; - accel[2] += r[2] * s; -} - -template -void BodySystemCPU::_computeNBodyGravitation() { -#ifdef OPENMP -#pragma omp parallel for -#endif - - for (int i = 0; i < m_numBodies; i++) { - int indexForce = 3 * i; - - T acc[3] = {0, 0, 0}; - - // We unroll this loop 4X for a small performance boost. - int j = 0; - - while (j < m_numBodies) { - bodyBodyInteraction(acc, &m_pos[4 * i], &m_pos[4 * j], - m_softeningSquared); - j++; - bodyBodyInteraction(acc, &m_pos[4 * i], &m_pos[4 * j], - m_softeningSquared); - j++; - bodyBodyInteraction(acc, &m_pos[4 * i], &m_pos[4 * j], - m_softeningSquared); - j++; - bodyBodyInteraction(acc, &m_pos[4 * i], &m_pos[4 * j], - m_softeningSquared); - j++; - } - - m_force[indexForce] = acc[0]; - m_force[indexForce + 1] = acc[1]; - m_force[indexForce + 2] = acc[2]; - } -} - -template -void BodySystemCPU::_integrateNBodySystem(T deltaTime) { - _computeNBodyGravitation(); - -#ifdef OPENMP -#pragma omp parallel for -#endif - - for (int i = 0; i < m_numBodies; ++i) { - int index = 4 * i; - int indexForce = 3 * i; - - T pos[3], vel[3], force[3]; - pos[0] = m_pos[index + 0]; - pos[1] = m_pos[index + 1]; - pos[2] = m_pos[index + 2]; - T invMass = m_pos[index + 3]; - - vel[0] = m_vel[index + 0]; - vel[1] = m_vel[index + 1]; - vel[2] = m_vel[index + 2]; - - force[0] = m_force[indexForce + 0]; - force[1] = m_force[indexForce + 1]; - force[2] = m_force[indexForce + 2]; - - // acceleration = force / mass; - // new velocity = old velocity + acceleration * deltaTime - vel[0] += (force[0] * invMass) * deltaTime; - vel[1] += (force[1] * invMass) * deltaTime; - vel[2] += (force[2] * invMass) * deltaTime; - - vel[0] *= m_damping; - vel[1] *= m_damping; - vel[2] *= m_damping; - - // new position = old position + velocity * deltaTime - pos[0] += vel[0] * deltaTime; - pos[1] += vel[1] * deltaTime; - pos[2] += vel[2] * deltaTime; - - m_pos[index + 0] = pos[0]; - m_pos[index + 1] = pos[1]; - m_pos[index + 2] = pos[2]; - - m_vel[index + 0] = vel[0]; - m_vel[index + 1] = vel[1]; - m_vel[index + 2] = vel[2]; - } -} diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.cu b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.cu deleted file mode 100644 index 1c95980e..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.cu +++ /dev/null @@ -1,276 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include - -//#include -//#include - -// CUDA standard includes -#include -//#include - -#include "bodysystem.h" - -__constant__ float softeningSquared; -__constant__ double softeningSquared_fp64; - -cudaError_t setSofteningSquared(float softeningSq) { - return cudaMemcpyToSymbol(softeningSquared, &softeningSq, sizeof(float), 0, - cudaMemcpyHostToDevice); -} - -cudaError_t setSofteningSquared(double softeningSq) { - return cudaMemcpyToSymbol(softeningSquared_fp64, &softeningSq, sizeof(double), - 0, cudaMemcpyHostToDevice); -} - -template -struct SharedMemory { - __device__ inline operator T *() { - extern __shared__ int __smem[]; - return (T *)__smem; - } - - __device__ inline operator const T *() const { - extern __shared__ int __smem[]; - return (T *)__smem; - } -}; - -template -__device__ T rsqrt_T(T x) { - return rsqrt(x); -} - -template <> -__device__ float rsqrt_T(float x) { - return rsqrtf(x); -} - -template <> -__device__ double rsqrt_T(double x) { - return rsqrt(x); -} - -// Macros to simplify shared memory addressing -#define SX(i) sharedPos[i + blockDim.x * threadIdx.y] -// This macro is only used when multithreadBodies is true (below) -#define SX_SUM(i, j) sharedPos[i + blockDim.x * j] - -template -__device__ T getSofteningSquared() { - return softeningSquared; -} -template <> -__device__ double getSofteningSquared() { - return softeningSquared_fp64; -} - -template -struct DeviceData { - T *dPos[2]; // mapped host pointers - T *dVel; - cudaEvent_t event; - unsigned int offset; - unsigned int numBodies; -}; - -template -__device__ typename vec3::Type bodyBodyInteraction( - typename vec3::Type ai, typename vec4::Type bi, - typename vec4::Type bj) { - typename vec3::Type r; - - // r_ij [3 FLOPS] - r.x = bj.x - bi.x; - r.y = bj.y - bi.y; - r.z = bj.z - bi.z; - - // distSqr = dot(r_ij, r_ij) + EPS^2 [6 FLOPS] - T distSqr = r.x * r.x + r.y * r.y + r.z * r.z; - distSqr += getSofteningSquared(); - - // invDistCube =1/distSqr^(3/2) [4 FLOPS (2 mul, 1 sqrt, 1 inv)] - T invDist = rsqrt_T(distSqr); - T invDistCube = invDist * invDist * invDist; - - // s = m_j * invDistCube [1 FLOP] - T s = bj.w * invDistCube; - - // a_i = a_i + s * r_ij [6 FLOPS] - ai.x += r.x * s; - ai.y += r.y * s; - ai.z += r.z * s; - - return ai; -} - -template -__device__ typename vec3::Type computeBodyAccel( - typename vec4::Type bodyPos, typename vec4::Type *positions, - int numTiles) { - typename vec4::Type *sharedPos = SharedMemory::Type>(); - - typename vec3::Type acc = {0.0f, 0.0f, 0.0f}; - - for (int tile = 0; tile < numTiles; tile++) { - sharedPos[threadIdx.x] = positions[tile * blockDim.x + threadIdx.x]; - - __syncthreads(); - - // This is the "tile_calculation" from the GPUG3 article. -#pragma unroll 128 - - for (unsigned int counter = 0; counter < blockDim.x; counter++) { - acc = bodyBodyInteraction(acc, bodyPos, sharedPos[counter]); - } - - __syncthreads(); - } - - return acc; -} - -template -__global__ void integrateBodies(typename vec4::Type *__restrict__ newPos, - typename vec4::Type *__restrict__ oldPos, - typename vec4::Type *vel, - unsigned int deviceOffset, - unsigned int deviceNumBodies, float deltaTime, - float damping, int numTiles) { - int index = blockIdx.x * blockDim.x + threadIdx.x; - - if (index >= deviceNumBodies) { - return; - } - - typename vec4::Type position = oldPos[deviceOffset + index]; - - typename vec3::Type accel = - computeBodyAccel(position, oldPos, numTiles); - - // acceleration = force / mass; - // new velocity = old velocity + acceleration * deltaTime - // note we factor out the body's mass from the equation, here and in - // bodyBodyInteraction (because they cancel out). Thus here force == - // acceleration - typename vec4::Type velocity = vel[deviceOffset + index]; - - velocity.x += accel.x * deltaTime; - velocity.y += accel.y * deltaTime; - velocity.z += accel.z * deltaTime; - - velocity.x *= damping; - velocity.y *= damping; - velocity.z *= damping; - - // new position = old position + velocity * deltaTime - position.x += velocity.x * deltaTime; - position.y += velocity.y * deltaTime; - position.z += velocity.z * deltaTime; - - // store new position and velocity - newPos[deviceOffset + index] = position; - vel[deviceOffset + index] = velocity; -} - -template -void integrateNbodySystem(DeviceData *deviceData, - cudaGraphicsResource **pgres, - unsigned int currentRead, float deltaTime, - float damping, unsigned int numBodies, - unsigned int numDevices, int blockSize, - bool bUsePBO) { - if (bUsePBO) { - checkCudaErrors(cudaGraphicsResourceSetMapFlags( - pgres[currentRead], cudaGraphicsMapFlagsReadOnly)); - checkCudaErrors(cudaGraphicsResourceSetMapFlags( - pgres[1 - currentRead], cudaGraphicsMapFlagsWriteDiscard)); - checkCudaErrors(cudaGraphicsMapResources(2, pgres, 0)); - size_t bytes; - checkCudaErrors(cudaGraphicsResourceGetMappedPointer( - (void **)&(deviceData[0].dPos[currentRead]), &bytes, - pgres[currentRead])); - checkCudaErrors(cudaGraphicsResourceGetMappedPointer( - (void **)&(deviceData[0].dPos[1 - currentRead]), &bytes, - pgres[1 - currentRead])); - } - - for (unsigned int dev = 0; dev != numDevices; dev++) { - if (numDevices > 1) { - cudaSetDevice(dev); - } - - int numBlocks = (deviceData[dev].numBodies + blockSize - 1) / blockSize; - int numTiles = (numBodies + blockSize - 1) / blockSize; - int sharedMemSize = blockSize * 4 * sizeof(T); // 4 floats for pos - - integrateBodies<<>>( - (typename vec4::Type *)deviceData[dev].dPos[1 - currentRead], - (typename vec4::Type *)deviceData[dev].dPos[currentRead], - (typename vec4::Type *)deviceData[dev].dVel, deviceData[dev].offset, - deviceData[dev].numBodies, deltaTime, damping, numTiles); - - if (numDevices > 1) { - checkCudaErrors(cudaEventRecord(deviceData[dev].event)); - // MJH: Hack on older driver versions to force kernel launches to flush! - cudaStreamQuery(0); - } - - // check if kernel invocation generated an error - getLastCudaError("Kernel execution failed"); - } - - if (numDevices > 1) { - for (unsigned int dev = 0; dev < numDevices; dev++) { - checkCudaErrors(cudaEventSynchronize(deviceData[dev].event)); - } - } - - if (bUsePBO) { - checkCudaErrors(cudaGraphicsUnmapResources(2, pgres, 0)); - } -} - -// Explicit specializations needed to generate code -template void integrateNbodySystem(DeviceData *deviceData, - cudaGraphicsResource **pgres, - unsigned int currentRead, - float deltaTime, float damping, - unsigned int numBodies, - unsigned int numDevices, - int blockSize, bool bUsePBO); - -template void integrateNbodySystem(DeviceData *deviceData, - cudaGraphicsResource **pgres, - unsigned int currentRead, - float deltaTime, float damping, - unsigned int numBodies, - unsigned int numDevices, - int blockSize, bool bUsePBO); diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.h deleted file mode 100644 index 977d4856..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda.h +++ /dev/null @@ -1,99 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __BODYSYSTEMCUDA_H__ -#define __BODYSYSTEMCUDA_H__ - -#include "bodysystem.h" - -template -struct DeviceData { - T *dPos[2]; // mapped host pointers - T *dVel; - cudaEvent_t event; - unsigned int offset; - unsigned int numBodies; -}; - -// CUDA BodySystem: runs on the GPU -template -class BodySystemCUDA : public BodySystem { - public: - BodySystemCUDA(unsigned int numBodies, unsigned int numDevices, - unsigned int blockSize, bool usePBO, bool useSysMem = false); - virtual ~BodySystemCUDA(); - - virtual void loadTipsyFile(const std::string &filename); - - virtual void update(T deltaTime); - - virtual void setSoftening(T softening); - virtual void setDamping(T damping); - - virtual T *getArray(BodyArray array); - virtual void setArray(BodyArray array, const T *data); - - virtual unsigned int getCurrentReadBuffer() const { - return m_pbo[m_currentRead]; - } - - virtual unsigned int getNumBodies() const { return m_numBodies; } - - protected: // methods - BodySystemCUDA() {} - - virtual void _initialize(int numBodies); - virtual void _finalize(); - - protected: // data - unsigned int m_numBodies; - unsigned int m_numDevices; - bool m_bInitialized; - - // Host data - T *m_hPos[2]; - T *m_hVel; - - DeviceData *m_deviceData; - - bool m_bUsePBO; - bool m_bUseSysMem; - unsigned int m_SMVersion; - - T m_damping; - - unsigned int m_pbo[2]; - cudaGraphicsResource *m_pGRes[2]; - unsigned int m_currentRead; - unsigned int m_currentWrite; - - unsigned int m_blockSize; -}; - -#include "bodysystemcuda_impl.h" - -#endif // __BODYSYSTEMCUDA_H__ diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda_impl.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda_impl.h deleted file mode 100644 index 3e4c85d6..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/bodysystemcuda_impl.h +++ /dev/null @@ -1,373 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include - -#include -#include -#include -#include -#include -#include -#include -//#include - -#include - -template -void integrateNbodySystem(DeviceData *deviceData, - cudaGraphicsResource **pgres, - unsigned int currentRead, float deltaTime, - float damping, unsigned int numBodies, - unsigned int numDevices, int blockSize, bool bUsePBO); - -cudaError_t setSofteningSquared(float softeningSq); -cudaError_t setSofteningSquared(double softeningSq); - -template -BodySystemCUDA::BodySystemCUDA(unsigned int numBodies, - unsigned int numDevices, - unsigned int blockSize, bool usePBO, - bool useSysMem) - : m_numBodies(numBodies), - m_numDevices(numDevices), - m_bInitialized(false), - m_bUsePBO(usePBO), - m_bUseSysMem(useSysMem), - m_currentRead(0), - m_currentWrite(1), - m_blockSize(blockSize) { - m_hPos[0] = m_hPos[1] = 0; - m_hVel = 0; - - m_deviceData = 0; - - _initialize(numBodies); - setSoftening(0.00125f); - setDamping(0.995f); -} - -template -BodySystemCUDA::~BodySystemCUDA() { - _finalize(); - m_numBodies = 0; -} - -template -void BodySystemCUDA::_initialize(int numBodies) { - assert(!m_bInitialized); - - m_numBodies = numBodies; - - unsigned int memSize = sizeof(T) * 4 * numBodies; - - m_deviceData = new DeviceData[m_numDevices]; - - // divide up the workload amongst Devices - float *weights = new float[m_numDevices]; - int *numSms = new int[m_numDevices]; - float total = 0; - - for (unsigned int i = 0; i < m_numDevices; i++) { - cudaDeviceProp props; - checkCudaErrors(cudaGetDeviceProperties(&props, i)); - - // Choose the weight based on the Compute Capability - // We estimate that a CC2.0 SM is about 4.0x faster than a CC 1.x SM for - // this application (since a 15-SM GF100 is about 2X faster than a 30-SM - // GT200). - numSms[i] = props.multiProcessorCount; - weights[i] = numSms[i] * (props.major >= 2 ? 4.f : 1.f); - total += weights[i]; - } - - unsigned int offset = 0; - unsigned int remaining = m_numBodies; - - for (unsigned int i = 0; i < m_numDevices; i++) { - unsigned int count = (int)((weights[i] / total) * m_numBodies); - unsigned int round = numSms[i] * 256; - count = round * ((count + round - 1) / round); - - if (count > remaining) { - count = remaining; - } - - remaining -= count; - m_deviceData[i].offset = offset; - m_deviceData[i].numBodies = count; - offset += count; - - if ((i == m_numDevices - 1) && (offset < m_numBodies - 1)) { - m_deviceData[i].numBodies += m_numBodies - offset; - } - } - - delete[] weights; - delete[] numSms; - - if (m_bUseSysMem) { - checkCudaErrors(cudaHostAlloc((void **)&m_hPos[0], memSize, - cudaHostAllocMapped | cudaHostAllocPortable)); - checkCudaErrors(cudaHostAlloc((void **)&m_hPos[1], memSize, - cudaHostAllocMapped | cudaHostAllocPortable)); - checkCudaErrors(cudaHostAlloc((void **)&m_hVel, memSize, - cudaHostAllocMapped | cudaHostAllocPortable)); - - memset(m_hPos[0], 0, memSize); - memset(m_hPos[1], 0, memSize); - memset(m_hVel, 0, memSize); - - for (unsigned int i = 0; i < m_numDevices; i++) { - if (m_numDevices > 1) { - checkCudaErrors(cudaSetDevice(i)); - } - - checkCudaErrors(cudaEventCreate(&m_deviceData[i].event)); - checkCudaErrors(cudaHostGetDevicePointer( - (void **)&m_deviceData[i].dPos[0], (void *)m_hPos[0], 0)); - checkCudaErrors(cudaHostGetDevicePointer( - (void **)&m_deviceData[i].dPos[1], (void *)m_hPos[1], 0)); - checkCudaErrors(cudaHostGetDevicePointer((void **)&m_deviceData[i].dVel, - (void *)m_hVel, 0)); - } - } else { - m_hPos[0] = new T[m_numBodies * 4]; - m_hVel = new T[m_numBodies * 4]; - - memset(m_hPos[0], 0, memSize); - memset(m_hVel, 0, memSize); - - checkCudaErrors(cudaEventCreate(&m_deviceData[0].event)); - - if (m_bUsePBO) { - // create the position pixel buffer objects for rendering - // we will actually compute directly from this memory in CUDA too - glGenBuffers(2, (GLuint *)m_pbo); - - for (int i = 0; i < 2; ++i) { - glBindBuffer(GL_ARRAY_BUFFER, m_pbo[i]); - glBufferData(GL_ARRAY_BUFFER, memSize, m_hPos[0], GL_DYNAMIC_DRAW); - - int size = 0; - glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint *)&size); - - if ((unsigned)size != memSize) { - fprintf(stderr, "WARNING: Pixel Buffer Object allocation failed!n"); - } - - glBindBuffer(GL_ARRAY_BUFFER, 0); - checkCudaErrors(cudaGraphicsGLRegisterBuffer(&m_pGRes[i], m_pbo[i], - cudaGraphicsMapFlagsNone)); - } - } else { - checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[0], memSize)); - checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dPos[1], memSize)); - } - - checkCudaErrors(cudaMalloc((void **)&m_deviceData[0].dVel, memSize)); - } - - m_bInitialized = true; -} - -template -void BodySystemCUDA::_finalize() { - assert(m_bInitialized); - - if (m_bUseSysMem) { - checkCudaErrors(cudaFreeHost(m_hPos[0])); - checkCudaErrors(cudaFreeHost(m_hPos[1])); - checkCudaErrors(cudaFreeHost(m_hVel)); - - for (unsigned int i = 0; i < m_numDevices; i++) { - cudaEventDestroy(m_deviceData[i].event); - } - } else { - delete[] m_hPos[0]; - delete[] m_hPos[1]; - delete[] m_hVel; - - checkCudaErrors(cudaFree((void **)m_deviceData[0].dVel)); - - if (m_bUsePBO) { - checkCudaErrors(cudaGraphicsUnregisterResource(m_pGRes[0])); - checkCudaErrors(cudaGraphicsUnregisterResource(m_pGRes[1])); - glDeleteBuffers(2, (const GLuint *)m_pbo); - } else { - checkCudaErrors(cudaFree((void **)m_deviceData[0].dPos[0])); - checkCudaErrors(cudaFree((void **)m_deviceData[0].dPos[1])); - } - } - - delete[] m_deviceData; - - m_bInitialized = false; -} - -template -void BodySystemCUDA::loadTipsyFile(const std::string &filename) { - if (m_bInitialized) _finalize(); - - std::vector::Type> positions; - std::vector::Type> velocities; - std::vector ids; - - int nBodies = 0; - int nFirst = 0, nSecond = 0, nThird = 0; - - read_tipsy_file(positions, velocities, ids, filename, nBodies, nFirst, - nSecond, nThird); - - _initialize(nBodies); - - setArray(BODYSYSTEM_POSITION, (T *)&positions[0]); - setArray(BODYSYSTEM_VELOCITY, (T *)&velocities[0]); -} - -template -void BodySystemCUDA::setSoftening(T softening) { - T softeningSq = softening * softening; - - for (unsigned int i = 0; i < m_numDevices; i++) { - if (m_numDevices > 1) { - checkCudaErrors(cudaSetDevice(i)); - } - - checkCudaErrors(setSofteningSquared(softeningSq)); - } -} - -template -void BodySystemCUDA::setDamping(T damping) { - m_damping = damping; -} - -template -void BodySystemCUDA::update(T deltaTime) { - assert(m_bInitialized); - - integrateNbodySystem(m_deviceData, m_pGRes, m_currentRead, - (float)deltaTime, (float)m_damping, m_numBodies, - m_numDevices, m_blockSize, m_bUsePBO); - - std::swap(m_currentRead, m_currentWrite); -} - -template -T *BodySystemCUDA::getArray(BodyArray array) { - assert(m_bInitialized); - - T *hdata = 0; - T *ddata = 0; - - cudaGraphicsResource *pgres = NULL; - - int currentReadHost = m_bUseSysMem ? m_currentRead : 0; - - switch (array) { - default: - case BODYSYSTEM_POSITION: - hdata = m_hPos[currentReadHost]; - ddata = m_deviceData[0].dPos[m_currentRead]; - - if (m_bUsePBO) { - pgres = m_pGRes[m_currentRead]; - } - - break; - - case BODYSYSTEM_VELOCITY: - hdata = m_hVel; - ddata = m_deviceData[0].dVel; - break; - } - - if (!m_bUseSysMem) { - if (pgres) { - checkCudaErrors( - cudaGraphicsResourceSetMapFlags(pgres, cudaGraphicsMapFlagsReadOnly)); - checkCudaErrors(cudaGraphicsMapResources(1, &pgres, 0)); - size_t bytes; - checkCudaErrors( - cudaGraphicsResourceGetMappedPointer((void **)&ddata, &bytes, pgres)); - } - - checkCudaErrors(cudaMemcpy(hdata, ddata, m_numBodies * 4 * sizeof(T), - cudaMemcpyDeviceToHost)); - - if (pgres) { - checkCudaErrors(cudaGraphicsUnmapResources(1, &pgres, 0)); - } - } - - return hdata; -} - -template -void BodySystemCUDA::setArray(BodyArray array, const T *data) { - assert(m_bInitialized); - - m_currentRead = 0; - m_currentWrite = 1; - - switch (array) { - default: - case BODYSYSTEM_POSITION: { - if (m_bUsePBO) { - glBindBuffer(GL_ARRAY_BUFFER, m_pbo[m_currentRead]); - glBufferSubData(GL_ARRAY_BUFFER, 0, 4 * sizeof(T) * m_numBodies, data); - - int size = 0; - glGetBufferParameteriv(GL_ARRAY_BUFFER, GL_BUFFER_SIZE, (GLint *)&size); - - if ((unsigned)size != 4 * (sizeof(T) * m_numBodies)) { - fprintf(stderr, "WARNING: Pixel Buffer Object download failed!n"); - } - - glBindBuffer(GL_ARRAY_BUFFER, 0); - } else { - if (m_bUseSysMem) { - memcpy(m_hPos[m_currentRead], data, m_numBodies * 4 * sizeof(T)); - } else - checkCudaErrors(cudaMemcpy(m_deviceData[0].dPos[m_currentRead], data, - m_numBodies * 4 * sizeof(T), - cudaMemcpyHostToDevice)); - } - } break; - - case BODYSYSTEM_VELOCITY: - if (m_bUseSysMem) { - memcpy(m_hVel, data, m_numBodies * 4 * sizeof(T)); - } else - checkCudaErrors(cudaMemcpy(m_deviceData[0].dVel, data, - m_numBodies * 4 * sizeof(T), - cudaMemcpyHostToDevice)); - - break; - } -} diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/findgleslib.mk b/Samples/8_Platform_Specific/Tegra/nbody_screen/findgleslib.mk deleted file mode 100644 index 6da2f078..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/findgleslib.mk +++ /dev/null @@ -1,149 +0,0 @@ -################################################################################ -# -# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. -# -# NOTICE TO USER: -# -# This source code is subject to NVIDIA ownership rights under U.S. and -# international Copyright laws. -# -# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE -# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR -# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH -# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF -# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. -# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, -# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -# OR PERFORMANCE OF THIS SOURCE CODE. -# -# U.S. Government End Users. This source code is a "commercial item" as -# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of -# "commercial computer software" and "commercial computer software -# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) -# and is provided to the U.S. Government only as a commercial end item. -# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through -# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the -# source code with only those rights set forth herein. -# -################################################################################ -# -# findgleslib.mk is used to find the necessary GLES Libraries for specific distributions -# this is supported on Linux -# -################################################################################ - -# Determine OS platform and unix distribution -ifeq ("$(TARGET_OS)","linux") - # first search lsb_release - DISTRO := $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]") - ifeq ("$(DISTRO)","") - # second search and parse /etc/issue - DISTRO := $(shell awk '{print $$1}' /etc/issue | tr -d "[:space:]" | sed -e "/^$$/d" | tr "[:upper:]" "[:lower:]") - # ensure data from /etc/issue is valid - ifneq (,$(filter-out $(DISTRO),ubuntu fedora red rhel centos suse)) - DISTRO := - endif - ifeq ("$(DISTRO)","") - # third, we can search in /etc/os-release or /etc/{distro}-release - DISTRO := $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB") - endif - endif -endif - -ifeq ("$(TARGET_OS)","linux") - # $(info) >> findgllib.mk -> LINUX path <<<) - # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) - SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) - KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) - ifeq ("$(UBUNTU)","0") - ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) - GLPATH := /usr/arm-linux-gnueabihf/lib - GLLINK := -L/usr/arm-linux-gnueabihf/lib - ifneq ($(TARGET_FS),) - GLPATH += $(TARGET_FS)/usr/lib/arm-linux-gnueabihf - GLLINK += -L$(TARGET_FS)/usr/lib/arm-linux-gnueabihf - endif - else ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-aarch64) - GLPATH := /usr/aarch64-linux-gnu/lib - GLLINK := -L/usr/aarch64-linux-gnu/lib - ifneq ($(TARGET_FS),) - GLPATH += $(TARGET_FS)/usr/lib - GLPATH += $(TARGET_FS)/usr/lib/aarch64-linux-gnu - GLLINK += -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu - endif - else - UBUNTU_PKG_NAME = $(shell which dpkg >/dev/null 2>&1 && dpkg -l 'nvidia-*' | grep '^ii' | awk '{print $$2}' | head -1) - ifneq ("$(UBUNTU_PKG_NAME)","") - GLPATH ?= /usr/lib/$(UBUNTU_PKG_NAME) - GLLINK ?= -L/usr/lib/$(UBUNTU_PKG_NAME) - endif - - DFLT_PATH ?= /usr/lib - endif - endif - - ifeq ("$(SUSE)","0") - GLPATH ?= /usr/X11R6/lib64 - GLLINK ?= -L/usr/X11R6/lib64 - DFLT_PATH ?= /usr/lib64 - else - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - - # find libGL, libGLU, libXi, - EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) - GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) - X11LIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libX11.so -print 2>/dev/null) - - ifeq ("$(EGLLIB)","") - $(info >>> WARNING - libEGL.so not found, please install libEGL.so <<<) - SAMPLE_ENABLED := 0 - endif - ifeq ("$(GLESLIB)","") - $(info >>> WARNING - libGLES.so not found, please install libGLES.so <<<) - SAMPLE_ENABLED := 0 - endif - ifeq ("$(X11LIB)","") - $(info >>> WARNING - libX11.so not found, please install libX11.so <<<) - SAMPLE_ENABLED := 0 - endif - - HEADER_SEARCH_PATH ?= $(TARGET_FS)/usr/include - ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux) - HEADER_SEARCH_PATH += /usr/arm-linux-gnueabihf/include - else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-aarch64-linux) - HEADER_SEARCH_PATH += /usr/aarch64-linux-gnu/include - endif - - EGLHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name egl.h -print 2>/dev/null) - EGLEXTHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name eglext.h -print 2>/dev/null) - GL31HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name gl31.h -print 2>/dev/null) - X11HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name Xlib.h -print 2>/dev/null) - - ifeq ("$(EGLHEADER)","") - $(info >>> WARNING - egl.h not found, please install egl.h <<<) - SAMPLE_ENABLED := 0 - endif - ifeq ("$(EGLEXTHEADER)","") - $(info >>> WARNING - eglext.h not found, please install eglext.h <<<) - SAMPLE_ENABLED := 0 - endif - ifeq ("$(GL31HEADER)","") - $(info >>> WARNING - gl31.h not found, please install gl31.h <<<) - SAMPLE_ENABLED := 0 - endif - ifeq ("$(X11HEADER)","") - $(info >>> WARNING - Xlib.h not found, refer to CUDA Samples release notes for how to find and install them. <<<) - SAMPLE_ENABLED := 0 - endif -else -endif - diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/galaxy_20K.bin b/Samples/8_Platform_Specific/Tegra/nbody_screen/galaxy_20K.bin deleted file mode 100644 index 193029b5..00000000 Binary files a/Samples/8_Platform_Specific/Tegra/nbody_screen/galaxy_20K.bin and /dev/null differ diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/nbody_screen.cpp b/Samples/8_Platform_Specific/Tegra/nbody_screen/nbody_screen.cpp deleted file mode 100644 index 0b2ab770..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/nbody_screen.cpp +++ /dev/null @@ -1,1223 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "render_particles.h" - -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -#include "bodysystemcuda.h" -#include "bodysystemcpu.h" -#include "cuda_runtime.h" - -int screen; -screen_window_t screen_window; -screen_context_t screen_context; - -EGLDisplay eglDisplay = EGL_NO_DISPLAY; -EGLSurface eglSurface = EGL_NO_SURFACE; -EGLContext eglContext = EGL_NO_CONTEXT; - -// view params -int ox = 0, oy = 0; -int buttonState = 0; -float camera_trans[] = {0, -2, -150}; -float camera_rot[] = {0, 0, 0}; -float camera_trans_lag[] = {0, -2, -150}; -float camera_rot_lag[] = {0, 0, 0}; -const float inertia = 0.1f; - -bool benchmark = false; -bool compareToCPU = false; -bool QATest = false; -int blockSize = 256; -bool useHostMem = false; -bool fp64 = false; -bool useCpu = false; -int numDevsRequested = 1; -bool displayEnabled = true; -unsigned int dispno = 0; -unsigned int window_width = 720; -unsigned int window_height = 480; -bool bPause = false; -bool bFullscreen = false; -bool bDispInteractions = false; -bool bSupportDouble = false; -int flopsPerInteraction = 20; - -char deviceName[100]; - -enum { M_VIEW = 0, M_MOVE }; - -int numBodies = 16384; - -std::string tipsyFile = ""; - -int numIterations = 0; // run until exit - -void computePerfStats(double &interactionsPerSecond, double &gflops, - float milliseconds, int iterations) { - // double precision uses intrinsic operation followed by refinement, - // resulting in higher operation count per interaction. - // (Note Astrophysicists use 38 flops per interaction no matter what, - // based on "historical precedent", but they are using FLOP/s as a - // measure of "science throughput". We are using it as a measure of - // hardware throughput. They should really use interactions/s... - // const int flopsPerInteraction = fp64 ? 30 : 20; - interactionsPerSecond = (float)numBodies * (float)numBodies; - interactionsPerSecond *= 1e-9 * iterations * 1000 / milliseconds; - gflops = interactionsPerSecond * (float)flopsPerInteraction; -} - -//////////////////////////////////////// -// Demo Parameters -//////////////////////////////////////// -struct NBodyParams { - float m_timestep; - float m_clusterScale; - float m_velocityScale; - float m_softening; - float m_damping; - float m_pointSize; - float m_x, m_y, m_z; - - void print() { - printf("{ %f, %f, %f, %f, %f, %f, %f, %f, %f },\n", m_timestep, - m_clusterScale, m_velocityScale, m_softening, m_damping, m_pointSize, - m_x, m_y, m_z); - } -}; - -NBodyParams demoParams[] = { - {0.016f, 1.54f, 8.0f, 0.1f, 1.0f, 1.0f, 0, -2, -100}, - {0.016f, 0.68f, 20.0f, 0.1f, 1.0f, 0.8f, 0, -2, -30}, - {0.0006f, 0.16f, 1000.0f, 1.0f, 1.0f, 0.07f, 0, 0, -1.5f}, - {0.0006f, 0.16f, 1000.0f, 1.0f, 1.0f, 0.07f, 0, 0, -1.5f}, - {0.0019f, 0.32f, 276.0f, 1.0f, 1.0f, 0.07f, 0, 0, -5}, - {0.0016f, 0.32f, 272.0f, 0.145f, 1.0f, 0.08f, 0, 0, -5}, - {0.016000f, 6.040000f, 0.000000f, 1.000000f, 1.000000f, 0.760000f, 0, 0, - -50}, -}; - -int numDemos = sizeof(demoParams) / sizeof(NBodyParams); -bool cycleDemo = true; -int activeDemo = 0; -float demoTime = 10000.0f; // ms -StopWatchInterface *demoTimer = NULL, *timer = NULL; - -// run multiple iterations to compute an average sort time - -NBodyParams activeParams = demoParams[activeDemo]; - -// The UI. -bool bShowSliders = true; - -// fps -static int fpsCount = 0; -static int fpsLimit = 5; -cudaEvent_t startEvent, stopEvent; -cudaEvent_t hostMemSyncEvent; - -template -class NBodyDemo { - public: - static void Create() { m_singleton = new NBodyDemo; } - static void Destroy() { delete m_singleton; } - - static void init(int numBodies, int numDevices, int blockSize, bool usePBO, - bool useHostMem, bool useCpu) { - m_singleton->_init(numBodies, numDevices, blockSize, usePBO, useHostMem, - useCpu); - } - - static void reset(int numBodies, NBodyConfig config) { - m_singleton->_reset(numBodies, config); - } - - static void selectDemo(int index) { m_singleton->_selectDemo(index); } - - static bool compareResults(int numBodies) { - return m_singleton->_compareResults(numBodies); - } - - static void runBenchmark(int iterations) { - m_singleton->_runBenchmark(iterations); - } - - static void updateParams() { - m_singleton->m_nbody->setSoftening(activeParams.m_softening); - m_singleton->m_nbody->setDamping(activeParams.m_damping); - } - - static void updateSimulation() { - m_singleton->m_nbody->update(activeParams.m_timestep); - } - - static void display() { - m_singleton->m_renderer->setSpriteSize(activeParams.m_pointSize); - - if (useHostMem) { - // This event sync is required because we are rendering from the host - // memory that CUDA is writing. If we don't wait until CUDA is done - // updating it, we will render partially updated data, resulting in a - // jerky frame rate. - if (!useCpu) { - cudaEventSynchronize(hostMemSyncEvent); - } - - m_singleton->m_renderer->setPositions( - m_singleton->m_nbody->getArray(BODYSYSTEM_POSITION), - m_singleton->m_nbody->getNumBodies()); - } else { - m_singleton->m_renderer->setPBO( - m_singleton->m_nbody->getCurrentReadBuffer(), - m_singleton->m_nbody->getNumBodies(), (sizeof(T) > 4)); - } - - // display particles - m_singleton->m_renderer->display(); - } - - static void getArrays(T *pos, T *vel) { - T *_pos = m_singleton->m_nbody->getArray(BODYSYSTEM_POSITION); - T *_vel = m_singleton->m_nbody->getArray(BODYSYSTEM_VELOCITY); - memcpy(pos, _pos, m_singleton->m_nbody->getNumBodies() * 4 * sizeof(T)); - memcpy(vel, _vel, m_singleton->m_nbody->getNumBodies() * 4 * sizeof(T)); - } - - static void setArrays(const T *pos, const T *vel) { - if (pos != m_singleton->m_hPos) { - memcpy(m_singleton->m_hPos, pos, numBodies * 4 * sizeof(T)); - } - - if (vel != m_singleton->m_hVel) { - memcpy(m_singleton->m_hVel, vel, numBodies * 4 * sizeof(T)); - } - - m_singleton->m_nbody->setArray(BODYSYSTEM_POSITION, m_singleton->m_hPos); - m_singleton->m_nbody->setArray(BODYSYSTEM_VELOCITY, m_singleton->m_hVel); - - if (!benchmark && !useCpu && !compareToCPU) { - m_singleton->_resetRenderer(); - } - } - - private: - static NBodyDemo *m_singleton; - - BodySystem *m_nbody; - BodySystemCUDA *m_nbodyCuda; - BodySystemCPU *m_nbodyCpu; - - ParticleRenderer *m_renderer; - - T *m_hPos; - T *m_hVel; - float *m_hColor; - - private: - NBodyDemo() - : m_nbody(0), - m_nbodyCuda(0), - m_nbodyCpu(0), - m_renderer(0), - m_hPos(0), - m_hVel(0), - m_hColor(0) {} - - ~NBodyDemo() { - if (m_nbodyCpu) { - delete m_nbodyCpu; - } - - if (m_nbodyCuda) { - delete m_nbodyCuda; - } - - if (m_hPos) { - delete[] m_hPos; - } - - if (m_hVel) { - delete[] m_hVel; - } - - if (m_hColor) { - delete[] m_hColor; - } - - sdkDeleteTimer(&demoTimer); - - if (!benchmark && !compareToCPU) delete m_renderer; - } - - void _init(int numBodies, int numDevices, int blockSize, bool bUsePBO, - bool useHostMem, bool useCpu) { - if (useCpu) { - m_nbodyCpu = new BodySystemCPU(numBodies); - m_nbody = m_nbodyCpu; - m_nbodyCuda = 0; - } else { - m_nbodyCuda = new BodySystemCUDA(numBodies, numDevices, blockSize, - bUsePBO, useHostMem); - m_nbody = m_nbodyCuda; - m_nbodyCpu = 0; - } - - // allocate host memory - m_hPos = new T[numBodies * 4]; - m_hVel = new T[numBodies * 4]; - m_hColor = new float[numBodies * 4]; - - m_nbody->setSoftening(activeParams.m_softening); - m_nbody->setDamping(activeParams.m_damping); - - if (useCpu) { - sdkCreateTimer(&timer); - sdkStartTimer(&timer); - } else { - checkCudaErrors(cudaEventCreate(&startEvent)); - checkCudaErrors(cudaEventCreate(&stopEvent)); - checkCudaErrors(cudaEventCreate(&hostMemSyncEvent)); - } - - if (!benchmark && !compareToCPU) { - m_renderer = new ParticleRenderer(window_width, window_height); - _resetRenderer(); - } - - sdkCreateTimer(&demoTimer); - sdkStartTimer(&demoTimer); - } - - void _reset(int numBodies, NBodyConfig config) { - if (tipsyFile == "") { - randomizeBodies(config, m_hPos, m_hVel, m_hColor, - activeParams.m_clusterScale, activeParams.m_velocityScale, - numBodies, true); - setArrays(m_hPos, m_hVel); - } else { - m_nbody->loadTipsyFile(tipsyFile); - ::numBodies = m_nbody->getNumBodies(); - } - } - - void _resetRenderer() { - if (fp64) { - float color[4] = {0.4f, 0.8f, 0.1f, 1.0f}; - m_renderer->setBaseColor(color); - } else { - float color[4] = {1.0f, 0.6f, 0.3f, 1.0f}; - m_renderer->setBaseColor(color); - } - - m_renderer->setColors(m_hColor, m_nbody->getNumBodies()); - m_renderer->setSpriteSize(activeParams.m_pointSize); - m_renderer->setCameraPos(camera_trans); - } - - void _selectDemo(int index) { - assert(index < numDemos); - - activeParams = demoParams[index]; - camera_trans[0] = camera_trans_lag[0] = activeParams.m_x; - camera_trans[1] = camera_trans_lag[1] = activeParams.m_y; - camera_trans[2] = camera_trans_lag[2] = activeParams.m_z; - reset(numBodies, NBODY_CONFIG_SHELL); - sdkResetTimer(&demoTimer); - - m_singleton->m_renderer->setCameraPos(camera_trans); - } - - bool _compareResults(int numBodies) { - assert(m_nbodyCuda); - - bool passed = true; - - m_nbody->update(0.001f); - - { - m_nbodyCpu = new BodySystemCPU(numBodies); - - m_nbodyCpu->setArray(BODYSYSTEM_POSITION, m_hPos); - m_nbodyCpu->setArray(BODYSYSTEM_VELOCITY, m_hVel); - - m_nbodyCpu->update(0.001f); - - T *cudaPos = m_nbodyCuda->getArray(BODYSYSTEM_POSITION); - T *cpuPos = m_nbodyCpu->getArray(BODYSYSTEM_POSITION); - - T tolerance = 0.0005f; - - for (int i = 0; i < numBodies; i++) { - if (fabs(cpuPos[i] - cudaPos[i]) > tolerance) { - passed = false; - printf("Error: (host)%f != (device)%f\n", cpuPos[i], cudaPos[i]); - } - } - } - return passed; - } - - void _runBenchmark(int iterations) { - // once without timing to prime the device - if (!useCpu) { - m_nbody->update(activeParams.m_timestep); - } - - if (useCpu) { - sdkCreateTimer(&timer); - sdkStartTimer(&timer); - } else { - checkCudaErrors(cudaEventRecord(startEvent, 0)); - } - - for (int i = 0; i < iterations; ++i) { - m_nbody->update(activeParams.m_timestep); - } - - float milliseconds = 0; - - if (useCpu) { - sdkStopTimer(&timer); - milliseconds = sdkGetTimerValue(&timer); - sdkStartTimer(&timer); - } else { - checkCudaErrors(cudaEventRecord(stopEvent, 0)); - checkCudaErrors(cudaEventSynchronize(stopEvent)); - checkCudaErrors( - cudaEventElapsedTime(&milliseconds, startEvent, stopEvent)); - } - - double interactionsPerSecond = 0; - double gflops = 0; - computePerfStats(interactionsPerSecond, gflops, milliseconds, iterations); - - printf("%d bodies, total time for %d iterations: %.3f ms\n", numBodies, - iterations, milliseconds); - printf("= %.3f billion interactions per second\n", interactionsPerSecond); - printf("= %.3f %s-precision GFLOP/s at %d flops per interaction\n", gflops, - (sizeof(T) > 4) ? "double" : "single", flopsPerInteraction); - } -}; - -void finalize() { - if (!useCpu) { - checkCudaErrors(cudaEventDestroy(startEvent)); - checkCudaErrors(cudaEventDestroy(stopEvent)); - checkCudaErrors(cudaEventDestroy(hostMemSyncEvent)); - } - - NBodyDemo::Destroy(); - - if (bSupportDouble) NBodyDemo::Destroy(); -} - -template <> -NBodyDemo *NBodyDemo::m_singleton = 0; -template <> -NBodyDemo *NBodyDemo::m_singleton = 0; - -template -void switchDemoPrecision() { - cudaDeviceSynchronize(); - - fp64 = !fp64; - flopsPerInteraction = fp64 ? 30 : 20; - - T_old *oldPos = new T_old[numBodies * 4]; - T_old *oldVel = new T_old[numBodies * 4]; - - NBodyDemo::getArrays(oldPos, oldVel); - - // convert float to double - T_new *newPos = new T_new[numBodies * 4]; - T_new *newVel = new T_new[numBodies * 4]; - - for (int i = 0; i < numBodies * 4; i++) { - newPos[i] = (T_new)oldPos[i]; - newVel[i] = (T_new)oldVel[i]; - } - - NBodyDemo::setArrays(newPos, newVel); - - cudaDeviceSynchronize(); - - delete[] oldPos; - delete[] oldVel; - delete[] newPos; - delete[] newVel; -} - -void initGL(int *argc, char **argv) { - EGLint configAttrs[] = {EGL_RED_SIZE, - 1, - EGL_GREEN_SIZE, - 1, - EGL_BLUE_SIZE, - 1, - EGL_DEPTH_SIZE, - 16, - EGL_SAMPLE_BUFFERS, - 0, - EGL_SAMPLES, - 0, - EGL_RENDERABLE_TYPE, - EGL_OPENGL_ES2_BIT, - EGL_ALPHA_SIZE, - 1, - EGL_NONE}; - - EGLint contextAttrs[] = {EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE}; - - EGLint windowAttrs[] = {EGL_NONE}; - EGLConfig *configList = NULL; - EGLint configCount; - - screen_context = 0; - - screen_display_t *screenDisplayHandle = NULL; - - if (screen_create_context(&screen_context, 0)) { - printf("Error creating screen context.\n"); - exit(EXIT_FAILURE); - } - - screen = 0; - - eglDisplay = eglGetDisplay(0); - - if (eglDisplay == EGL_NO_DISPLAY) { - printf("EGL failed to obtain display\n"); - exit(EXIT_FAILURE); - } - - if (!eglInitialize(eglDisplay, 0, 0)) { - printf("EGL failed to initialize\n"); - exit(EXIT_FAILURE); - } - - if (!eglChooseConfig(eglDisplay, configAttrs, NULL, 0, &configCount) || - !configCount) { - printf("EGL failed to return matching configs\n"); - exit(EXIT_FAILURE); - } - - configList = (EGLConfig *)malloc(configCount * sizeof(EGLConfig)); - - if (!eglChooseConfig(eglDisplay, configAttrs, configList, configCount, - &configCount) || - !configCount) { - printf("EGL failed to populate config list\n"); - exit(EXIT_FAILURE); - } - - screen_window = 0; - if (screen_create_window(&screen_window, screen_context)) { - printf("Error creating screen window\n"); - exit(EXIT_FAILURE); - } - - // query the total no of display available from QNX CAR2 screen - int displayCount = 0; - if (screen_get_context_property_iv( - screen_context, SCREEN_PROPERTY_DISPLAY_COUNT, &displayCount)) { - printf("Error getting context property\n"); - exit(EXIT_FAILURE); - } - - screenDisplayHandle = - (screen_display_t *)malloc(displayCount * sizeof(screen_display_t)); - if (!screenDisplayHandle) { - printf("Error allocating screen display handle\n"); - exit(EXIT_FAILURE); - } - - // query the display handle from QNX CAR2 screen - if (screen_get_context_property_pv(screen_context, SCREEN_PROPERTY_DISPLAYS, - (void **)screenDisplayHandle)) { - printf("Error getting display handle\n"); - exit(EXIT_FAILURE); - } - - int dispno_it; - for (dispno_it = 0; dispno_it < displayCount; dispno_it++) { - int active = 0; - // Query the connected status from QNX CAR2 screen - screen_get_display_property_iv(screenDisplayHandle[dispno_it], - SCREEN_PROPERTY_ATTACHED, &active); - if (active) { - if (dispno == dispno_it) { - // Map the window buffer to user requested display port - screen_set_window_property_pv(screen_window, SCREEN_PROPERTY_DISPLAY, - (void **)&screenDisplayHandle[dispno]); - break; - } - } - } - - if (dispno_it == displayCount) { - printf("Failed to set the requested display\n"); - exit(EXIT_FAILURE); - } - - free(screenDisplayHandle); - - int format = SCREEN_FORMAT_RGBA8888; - if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_FORMAT, - &format)) { - printf("Error setting SCREEN_PROPERTY_FORMAT\n"); - exit(EXIT_FAILURE); - } - - int usage = (1 << 11); - if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_USAGE, - &usage)) { - printf("Error setting SCREEN_PROPERTY_USAGE\n"); - exit(EXIT_FAILURE); - } - - EGLint interval = 1; - if (screen_set_window_property_iv(screen_window, - SCREEN_PROPERTY_SWAP_INTERVAL, &interval)) { - printf("Error setting SCREEN_PROPERTY_SWAP_INTERVAL\n"); - exit(EXIT_FAILURE); - } - - if (bFullscreen) { - // QNX screen will use the full screen resolution by default - int windowSize[2]; - if (screen_get_window_property_iv(screen_window, SCREEN_PROPERTY_SIZE, - windowSize)) { - printf("Error getting default SCREEN_PROPERTY_SIZE\n"); - exit(EXIT_FAILURE); - } - } else { - int windowSize[2]; - windowSize[0] = window_width; - windowSize[1] = window_height; - if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_SIZE, - windowSize)) { - printf("Error setting SCREEN_PROPERTY_SIZE\n"); - exit(EXIT_FAILURE); - } - } - - int windowOffset[2]; - windowOffset[0] = 0; - windowOffset[1] = 0; - if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_POSITION, - windowOffset)) { - printf("Error setting SCREEN_PROPERTY_POSITION\n"); - exit(EXIT_FAILURE); - } - - if (screen_create_window_buffers(screen_window, 2)) { - printf("Error creating two window buffers.\n"); - exit(EXIT_FAILURE); - } - - eglSurface = - eglCreateWindowSurface(eglDisplay, configList[0], - (EGLNativeWindowType)screen_window, windowAttrs); - if (!eglSurface) { - printf("EGL couldn't create window\n"); - exit(EXIT_FAILURE); - } - - eglBindAPI(EGL_OPENGL_ES_API); - - eglContext = eglCreateContext(eglDisplay, configList[0], NULL, contextAttrs); - if (!eglContext) { - printf("EGL couldn't create context\n"); - exit(EXIT_FAILURE); - } - - if (!eglMakeCurrent(eglDisplay, eglSurface, eglSurface, eglContext)) { - printf("EGL couldn't make context/surface current\n"); - exit(EXIT_FAILURE); - } - - EGLint contextRendererType; - eglQueryContext(eglDisplay, eglContext, EGL_CONTEXT_CLIENT_TYPE, - &contextRendererType); - - switch (contextRendererType) { - case EGL_OPENGL_ES_API: - printf("Using OpenGL ES API\n"); - break; - case EGL_OPENGL_API: - printf("Using OpenGL API - this is unsupported\n"); - exit(EXIT_FAILURE); - case EGL_OPENVG_API: - printf("Using OpenVG API - this is unsupported\n"); - exit(EXIT_FAILURE); - default: - printf("Unknown context type\n"); - exit(EXIT_FAILURE); - } -} - -void selectDemo(int activeDemo) { - if (fp64) { - NBodyDemo::selectDemo(activeDemo); - } else { - NBodyDemo::selectDemo(activeDemo); - } -} - -void updateSimulation() { - if (fp64) { - NBodyDemo::updateSimulation(); - } else { - NBodyDemo::updateSimulation(); - } -} - -void displayNBodySystem() { - if (fp64) { - NBodyDemo::display(); - } else { - NBodyDemo::display(); - } -} - -void display() { - static double gflops = 0; - static double ifps = 0; - static double interactionsPerSecond = 0; - - // update the simulation - if (!bPause) { - if (cycleDemo && (sdkGetTimerValue(&demoTimer) > demoTime)) { - activeDemo = (activeDemo + 1) % numDemos; - selectDemo(activeDemo); - } - - updateSimulation(); - - if (!useCpu) { - cudaEventRecord(hostMemSyncEvent, - 0); // insert an event to wait on before rendering - } - } - - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - - if (displayEnabled) { - // view transform - for (int c = 0; c < 3; ++c) { - camera_trans_lag[c] += (camera_trans[c] - camera_trans_lag[c]) * inertia; - camera_rot_lag[c] += (camera_rot[c] - camera_rot_lag[c]) * inertia; - } - - displayNBodySystem(); - } - - fpsCount++; - - // this displays the frame rate updated every second (independent of frame - // rate) - if (fpsCount >= fpsLimit) { - char fps[256]; - - float milliseconds = 1; - - // stop timer - if (useCpu) { - milliseconds = sdkGetTimerValue(&timer); - sdkResetTimer(&timer); - } else { - checkCudaErrors(cudaEventRecord(stopEvent, 0)); - checkCudaErrors(cudaEventSynchronize(stopEvent)); - } - - milliseconds /= (float)fpsCount; - computePerfStats(interactionsPerSecond, gflops, milliseconds, 1); - - ifps = 1.f / (milliseconds / 1000.f); - sprintf(fps, - "CUDA N-Body (%d bodies): " - "%0.1f fps | %0.1f BIPS | %0.1f GFLOP/s | %s", - numBodies, ifps, interactionsPerSecond, gflops, - fp64 ? "double precision" : "single precision"); - - fpsCount = 0; - fpsLimit = (ifps > 1.f) ? (int)ifps : 1; - - if (bPause) { - fpsLimit = 0; - } - - // restart timer - if (!useCpu) { - checkCudaErrors(cudaEventRecord(startEvent, 0)); - } - } -} - -void updateParams() { - if (fp64) { - NBodyDemo::updateParams(); - } else { - NBodyDemo::updateParams(); - } -} - -// commented out to remove unused parameter warnings in Linux -void key(unsigned char key, int /*x*/, int /*y*/) { - switch (key) { - case ' ': - bPause = !bPause; - break; - - case 27: // escape - case 'q': - case 'Q': - finalize(); - - exit(EXIT_SUCCESS); - break; - - case 13: // return - if (bSupportDouble) { - if (fp64) { - switchDemoPrecision(); - } else { - switchDemoPrecision(); - } - - printf("> %s precision floating point simulation\n", - fp64 ? "Double" : "Single"); - } - - break; - - case '`': - bShowSliders = !bShowSliders; - break; - - case 'g': - case 'G': - bDispInteractions = !bDispInteractions; - break; - - case 'c': - case 'C': - cycleDemo = !cycleDemo; - printf("Cycle Demo Parameters: %s\n", cycleDemo ? "ON" : "OFF"); - break; - - case '[': - activeDemo = - (activeDemo == 0) ? numDemos - 1 : (activeDemo - 1) % numDemos; - selectDemo(activeDemo); - break; - - case ']': - activeDemo = (activeDemo + 1) % numDemos; - selectDemo(activeDemo); - break; - - case 'd': - case 'D': - displayEnabled = !displayEnabled; - break; - - case 'o': - case 'O': - activeParams.print(); - break; - - case '1': - if (fp64) { - NBodyDemo::reset(numBodies, NBODY_CONFIG_SHELL); - } else { - NBodyDemo::reset(numBodies, NBODY_CONFIG_SHELL); - } - - break; - - case '2': - if (fp64) { - NBodyDemo::reset(numBodies, NBODY_CONFIG_RANDOM); - } else { - NBodyDemo::reset(numBodies, NBODY_CONFIG_RANDOM); - } - - break; - - case '3': - if (fp64) { - NBodyDemo::reset(numBodies, NBODY_CONFIG_EXPAND); - } else { - NBodyDemo::reset(numBodies, NBODY_CONFIG_EXPAND); - } - - break; - } -} - -void showHelp() { - printf("\t-fullscreen (run n-body simulation in fullscreen mode)\n"); - printf( - "\t-fp64 (use double precision floating point values for " - "simulation)\n"); - printf("\t-hostmem (stores simulation data in host memory)\n"); - printf("\t-benchmark (run benchmark to measure performance) \n"); - printf( - "\t-numbodies= (number of bodies (>= 1) to run in simulation) \n"); - printf( - "\t-device= (where d=0,1,2.... for the CUDA device to use)\n"); - printf("\t-dispno= (where n represents the display to use)\n"); - printf( - "\t-width= (where w represents the width of the window to " - "open)\n"); - printf( - "\t-width= (where h represents the height of the window to " - "open)\n"); - printf( - "\t-numdevices= (where i=(number of CUDA devices > 0) to use for " - "simulation)\n"); - printf( - "\t-compare (compares simulation results running once on the " - "default GPU and once on the CPU)\n"); - printf("\t-cpu (run n-body simulation on the CPU)\n"); - printf("\t-tipsy= (load a tipsy model file for simulation)\n\n"); -} - -////////////////////////////////////////////////////////////////////////////// -// Program main -////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { - bool bTestResults = true; - -#if defined(__linux__) - setenv("DISPLAY", ":0", 0); -#endif - - if (checkCmdLineFlag(argc, (const char **)argv, "help")) { - printf("\n> Command line options\n"); - showHelp(); - return 0; - } - - printf( - "Run \"nbody_screen -benchmark [-numbodies=]\" to measure " - "performance.\n"); - showHelp(); - - bFullscreen = - (checkCmdLineFlag(argc, (const char **)argv, "fullscreen") != 0); - - if (bFullscreen) { - bShowSliders = false; - } - - benchmark = (checkCmdLineFlag(argc, (const char **)argv, "benchmark") != 0); - - compareToCPU = - ((checkCmdLineFlag(argc, (const char **)argv, "compare") != 0) || - (checkCmdLineFlag(argc, (const char **)argv, "qatest") != 0)); - - QATest = (checkCmdLineFlag(argc, (const char **)argv, "qatest") != 0); - useHostMem = (checkCmdLineFlag(argc, (const char **)argv, "hostmem") != 0); - fp64 = (checkCmdLineFlag(argc, (const char **)argv, "fp64") != 0); - - flopsPerInteraction = fp64 ? 30 : 20; - - useCpu = (checkCmdLineFlag(argc, (const char **)argv, "cpu") != 0); - - if (checkCmdLineFlag(argc, (const char **)argv, "numdevices")) { - numDevsRequested = - getCmdLineArgumentInt(argc, (const char **)argv, "numdevices"); - - if (numDevsRequested < 1) { - printf( - "Error: \"number of CUDA devices\" specified %d is invalid. Value " - "should be >= 1\n", - numDevsRequested); - exit(bTestResults ? EXIT_SUCCESS : EXIT_FAILURE); - } else { - printf("number of CUDA devices = %d\n", numDevsRequested); - } - } - - if (checkCmdLineFlag(argc, (const char **)argv, "dispno")) { - dispno = getCmdLineArgumentInt(argc, (const char **)argv, "dispno"); - } - - if (checkCmdLineFlag(argc, (const char **)argv, "width")) { - window_width = getCmdLineArgumentInt(argc, (const char **)argv, "width"); - } - - if (checkCmdLineFlag(argc, (const char **)argv, "height")) { - window_height = getCmdLineArgumentInt(argc, (const char **)argv, "height"); - } - - // for multi-device we currently require using host memory -- the devices - // share data via the host - if (numDevsRequested > 1) { - useHostMem = true; - } - - int numDevsAvailable = 0; - bool customGPU = false; - cudaGetDeviceCount(&numDevsAvailable); - - if (numDevsAvailable < numDevsRequested) { - printf("Error: only %d Devices available, %d requested. Exiting.\n", - numDevsAvailable, numDevsRequested); - exit(EXIT_SUCCESS); - } - - printf("> %s mode\n", bFullscreen ? "Fullscreen" : "Windowed"); - printf("> Simulation data stored in %s memory\n", - useHostMem ? "system" : "video"); - printf("> %s precision floating point simulation\n", - fp64 ? "Double" : "Single"); - printf("> %d Devices used for simulation\n", numDevsRequested); - - int devID; - cudaDeviceProp props; - - if (useCpu) { - useHostMem = true; - compareToCPU = false; - bSupportDouble = true; - -#ifdef OPENMP - printf("> Simulation with CPU using OpenMP\n"); -#else - printf("> Simulation with CPU\n"); -#endif - } - - if (!benchmark && !compareToCPU) { - initGL(&argc, argv); - } - - if (!useCpu) { - if (checkCmdLineFlag(argc, (const char **)argv, "device")) { - customGPU = true; - } - - devID = findCudaDevice(argc, (const char **)argv); - - checkCudaErrors(cudaGetDevice(&devID)); - checkCudaErrors(cudaGetDeviceProperties(&props, devID)); - - bSupportDouble = true; - - // Initialize devices - if (numDevsRequested > 1 && customGPU) { - printf("You can't use --numdevices and --device at the same time.\n"); - exit(EXIT_SUCCESS); - } - - if (customGPU || numDevsRequested == 1) { - cudaDeviceProp props; - checkCudaErrors(cudaGetDeviceProperties(&props, devID)); - printf("> Compute %d.%d CUDA device: [%s]\n", props.major, props.minor, - props.name); - } else { - for (int i = 0; i < numDevsRequested; i++) { - cudaDeviceProp props; - checkCudaErrors(cudaGetDeviceProperties(&props, i)); - - printf("> Compute %d.%d CUDA device: [%s]\n", props.major, props.minor, - props.name); - - if (useHostMem) { - if (!props.canMapHostMemory) { - fprintf(stderr, "Device %d cannot map host memory!\n", devID); - exit(EXIT_SUCCESS); - } - - if (numDevsRequested > 1) { - checkCudaErrors(cudaSetDevice(i)); - } - - checkCudaErrors(cudaSetDeviceFlags(cudaDeviceMapHost)); - } - } - - // CC 1.2 and earlier do not support double precision - if (props.major * 10 + props.minor <= 12) { - bSupportDouble = false; - } - } - - // if(numDevsRequested > 1) - // checkCudaErrors(cudaSetDevice(devID)); - - if (fp64 && !bSupportDouble) { - fprintf(stderr, - "One or more of the requested devices does not support double " - "precision floating-point\n"); - exit(EXIT_SUCCESS); - } - } - - numIterations = 0; - blockSize = 0; - - if (checkCmdLineFlag(argc, (const char **)argv, "i")) { - numIterations = getCmdLineArgumentInt(argc, (const char **)argv, "i"); - } - - if (checkCmdLineFlag(argc, (const char **)argv, "blockSize")) { - blockSize = getCmdLineArgumentInt(argc, (const char **)argv, "blockSize"); - } - - if (blockSize == 0) // blockSize not set on command line - blockSize = 256; - - // default number of bodies is #SMs * 4 * CTA size - if (useCpu) -#ifdef OPENMP - numBodies = 8192; - -#else - numBodies = 4096; -#endif - else if (numDevsRequested == 1) { - numBodies = compareToCPU ? 4096 : blockSize * 4 * props.multiProcessorCount; - } else { - numBodies = 0; - - for (int i = 0; i < numDevsRequested; i++) { - cudaDeviceProp props; - checkCudaErrors(cudaGetDeviceProperties(&props, i)); - numBodies += - blockSize * (props.major >= 2 ? 4 : 1) * props.multiProcessorCount; - } - } - - if (checkCmdLineFlag(argc, (const char **)argv, "numbodies")) { - numBodies = getCmdLineArgumentInt(argc, (const char **)argv, "numbodies"); - - if (numBodies < 1) { - printf( - "Error: \"number of bodies\" specified %d is invalid. Value should " - "be >= 1\n", - numBodies); - exit(bTestResults ? EXIT_SUCCESS : EXIT_FAILURE); - } else if (numBodies % blockSize) { - int newNumBodies = ((numBodies / blockSize) + 1) * blockSize; - printf( - "Warning: \"number of bodies\" specified %d is not a multiple of " - "%d.\n", - numBodies, blockSize); - printf("Rounding up to the nearest multiple: %d.\n", newNumBodies); - numBodies = newNumBodies; - } else { - printf("number of bodies = %d\n", numBodies); - } - } - - char *fname; - - if (getCmdLineArgumentString(argc, (const char **)argv, "tipsy", &fname)) { - tipsyFile.assign(fname, strlen(fname)); - cycleDemo = false; - bShowSliders = false; - } - - if (numBodies <= 1024) { - activeParams.m_clusterScale = 1.52f; - activeParams.m_velocityScale = 2.f; - } else if (numBodies <= 2048) { - activeParams.m_clusterScale = 1.56f; - activeParams.m_velocityScale = 2.64f; - } else if (numBodies <= 4096) { - activeParams.m_clusterScale = 1.68f; - activeParams.m_velocityScale = 2.98f; - } else if (numBodies <= 8192) { - activeParams.m_clusterScale = 1.98f; - activeParams.m_velocityScale = 2.9f; - } else if (numBodies <= 16384) { - activeParams.m_clusterScale = 1.54f; - activeParams.m_velocityScale = 8.f; - } else if (numBodies <= 32768) { - activeParams.m_clusterScale = 1.44f; - activeParams.m_velocityScale = 11.f; - } - - NBodyDemo::Create(); - - NBodyDemo::init(numBodies, numDevsRequested, blockSize, - !(benchmark || compareToCPU || useHostMem), useHostMem, - useCpu); - NBodyDemo::reset(numBodies, NBODY_CONFIG_SHELL); - - if (bSupportDouble) { - NBodyDemo::Create(); - NBodyDemo::init(numBodies, numDevsRequested, blockSize, - !(benchmark || compareToCPU || useHostMem), - useHostMem, useCpu); - NBodyDemo::reset(numBodies, NBODY_CONFIG_SHELL); - } - - if (benchmark) { - if (numIterations <= 0) { - numIterations = 10; - } - - NBodyDemo::runBenchmark(numIterations); - } else if (compareToCPU) { - bTestResults = NBodyDemo::compareResults(numBodies); - } else { - glClear(GL_COLOR_BUFFER_BIT); - - eglSwapBuffers(eglDisplay, eglSurface); - - while (1) { - display(); - usleep(1000); - eglSwapBuffers(eglDisplay, eglSurface); - } - - if (!useCpu) { - checkCudaErrors(cudaEventRecord(startEvent, 0)); - } - } - - finalize(); - exit(bTestResults ? EXIT_SUCCESS : EXIT_FAILURE); -} diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.cpp b/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.cpp deleted file mode 100644 index 3af7c32a..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.cpp +++ /dev/null @@ -1,374 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "render_particles.h" - -#include -#include - -#include - -#include -#include - -void mat_identity(matrix4 m) { - m[0][1] = m[0][2] = m[0][3] = m[1][0] = m[1][2] = m[1][3] = m[2][0] = - m[2][1] = m[2][3] = m[3][0] = m[3][1] = m[3][2] = 0.0f; - m[0][0] = m[1][1] = m[2][2] = m[3][3] = 1.0f; -} - -void mat_multiply(matrix4 m0, matrix4 m1) { - float m[4]; - - for (int r = 0; r < 4; r++) { - m[0] = m[1] = m[2] = m[3] = 0.0f; - - for (int c = 0; c < 4; c++) { - for (int i = 0; i < 4; i++) { - m[c] += m0[i][r] * m1[c][i]; - } - } - - for (int c = 0; c < 4; c++) { - m0[c][r] = m[c]; - } - } -} - -void mat_translate(matrix4 m, vector3 v) { - matrix4 m2; - m2[0][0] = m2[1][1] = m2[2][2] = m2[3][3] = 1.0f; - m2[0][1] = m2[0][2] = m2[0][3] = m2[1][0] = m2[1][2] = m2[1][3] = m2[2][0] = - m2[2][1] = m2[2][3] = 0.0f; - m2[3][0] = v[0]; - m2[3][1] = v[1]; - m2[3][2] = v[2]; - mat_multiply(m, m2); -} - -void mat_perspective(matrix4 m, GLfloat fovy, GLfloat aspect, GLfloat znear, - GLfloat zfar) { - matrix4 m2; - m2[1][0] = m2[2][0] = m2[3][0] = m2[0][1] = m2[2][1] = m2[3][1] = m2[0][2] = - m2[1][2] = m2[0][3] = m2[1][3] = m2[3][3] = 0.0f; - m2[2][3] = -1.0f; - - float f = 1 / tan((fovy * M_PI / 180) / 2); - m2[0][0] = f / aspect; - m2[1][1] = f; - - m2[2][2] = ((znear + zfar) / (znear - zfar)); - m2[3][2] = ((2 * znear * zfar) / (znear - zfar)); - - mat_multiply(m, m2); -} - -ParticleRenderer::ParticleRenderer(unsigned int windowWidth, - unsigned int windowHeight) - : m_pos(0), - m_numParticles(0), - m_pointSize(1.0f), - m_spriteSize(2.0f), - m_vertexShader(0), - m_vertexShaderPoints(0), - m_fragmentShader(0), - m_programPoints(0), - m_programSprites(0), - m_texture(0), - m_pbo(0), - m_vboColor(0), - m_windowWidth(windowWidth), - m_windowHeight(windowHeight), - m_bFp64Positions(false) { - m_camera[0] = 0; - m_camera[1] = 0; - m_camera[2] = 0; - _initGL(); -} - -ParticleRenderer::~ParticleRenderer() { m_pos = 0; } - -void ParticleRenderer::resetPBO() { glDeleteBuffers(1, (GLuint *)&m_pbo); } - -void ParticleRenderer::setPositions(float *pos, int numParticles) { - m_pos = pos; - m_numParticles = numParticles; - - if (!m_pbo) { - glGenBuffers(1, (GLuint *)&m_pbo); - } - - glBindBuffer(GL_ARRAY_BUFFER, m_pbo); - glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(float), pos, - GL_STATIC_DRAW); - glBindBuffer(GL_ARRAY_BUFFER, 0); - checkGLErrors("Setting particle float position"); -} - -void ParticleRenderer::setPositions(double *pos, int numParticles) { - m_bFp64Positions = true; - m_pos_fp64 = pos; - m_numParticles = numParticles; - - if (!m_pbo) { - glGenBuffers(1, (GLuint *)&m_pbo); - } - - glBindBuffer(GL_ARRAY_BUFFER, m_pbo); - glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(double), pos, - GL_STATIC_DRAW); - glBindBuffer(GL_ARRAY_BUFFER, 0); - checkGLErrors("Setting particle double position"); -} - -void ParticleRenderer::setColors(float *color, int numParticles) { - glBindBuffer(GL_ARRAY_BUFFER, m_vboColor); - glBufferData(GL_ARRAY_BUFFER, numParticles * 4 * sizeof(float), color, - GL_STATIC_DRAW); - glBindBuffer(GL_ARRAY_BUFFER, 0); -} - -void ParticleRenderer::setBaseColor(float color[4]) { - for (int i = 0; i < 4; i++) m_baseColor[i] = color[i]; -} - -void ParticleRenderer::setPBO(unsigned int pbo, int numParticles, bool fp64) { - m_pbo = pbo; - m_numParticles = numParticles; - - if (fp64) m_bFp64Positions = true; -} - -void ParticleRenderer::display() { - glEnable(GL_BLEND); - glBlendFunc(GL_SRC_ALPHA, GL_ONE); - glDepthMask(GL_FALSE); - - glUseProgram(m_programSprites); - - // Set modelview and projection matrices - GLint h_ModelViewMatrix = glGetUniformLocation(m_programSprites, "modelview"); - GLint h_ProjectionMatrix = - glGetUniformLocation(m_programSprites, "projection"); - matrix4 modelview; - matrix4 projection; - mat_identity(modelview); - mat_identity(projection); - mat_translate(modelview, m_camera); - mat_perspective(projection, 60, (float)m_windowWidth / (float)m_windowHeight, - 0.1, 1000.0); - glUniformMatrix4fv(h_ModelViewMatrix, 1, GL_FALSE, (GLfloat *)modelview); - glUniformMatrix4fv(h_ProjectionMatrix, 1, GL_FALSE, (GLfloat *)projection); - - // Set point size - GLint h_PointSize = glGetUniformLocation(m_programSprites, "size"); - glUniform1f(h_PointSize, m_spriteSize); - - // Set base and secondary colors - GLint h_BaseColor = glGetUniformLocation(m_programSprites, "baseColor"); - GLint h_SecondaryColor = - glGetUniformLocation(m_programSprites, "secondaryColor"); - glUniform4f(h_BaseColor, 1.0, 1.0, 1.0, 1.0); - glUniform4f(h_SecondaryColor, m_baseColor[0], m_baseColor[1], m_baseColor[2], - m_baseColor[3]); - - // Set position coords - GLint h_position = glGetAttribLocation(m_programSprites, "a_position"); - glBindBuffer(GL_ARRAY_BUFFER, m_pbo); - glEnableVertexAttribArray(h_position); - glVertexAttribPointer(h_position, 4, GL_FLOAT, GL_FALSE, 0, 0); - - GLuint texLoc = glGetUniformLocation(m_programSprites, "splatTexture"); - glUniform1i(texLoc, 0); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, m_texture); - - glDrawArrays(GL_POINTS, 0, m_numParticles); - - glDisableVertexAttribArray(h_position); - - glUseProgram(0); - - glDisable(GL_BLEND); - glDepthMask(GL_TRUE); -} - -const char vertexShader[] = { - "attribute vec4 a_position;" - - "uniform mat4 projection;" - "uniform mat4 modelview;" - "uniform float size;" - - "void main()" - "{" - "float pointSize = 500.0 * size;" - "vec4 vert = a_position;" - "vert.w = 1.0;" - "vec3 pos_eye = vec3(modelview * vert);" - "gl_PointSize = max(1.0, pointSize / (1.0 - pos_eye.z));" - "gl_Position = projection * modelview * a_position;" - "}"}; - -const char fragmentShader[] = { - "uniform sampler2D splatTexture;" - "uniform lowp vec4 baseColor;" - "uniform lowp vec4 secondaryColor;" - - "void main()" - "{" - "lowp vec4 textureColor = (0.6 + 0.4 * baseColor) * " - "texture2D(splatTexture, gl_PointCoord);" - "gl_FragColor = textureColor * secondaryColor;" - "}"}; - -// Checks if the shader is compiled. -static int CheckCompiled(GLuint shader) { - GLint isCompiled = 0; - glGetShaderiv(shader, GL_COMPILE_STATUS, &isCompiled); - - if (!isCompiled) { - GLint infoLen = 0; - glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &infoLen); - - if (infoLen > 1) { - char *infoLog = (char *)malloc(sizeof(char) * infoLen); - - glGetShaderInfoLog(shader, infoLen, NULL, infoLog); - printf("Error compiling program:\n%s\n", infoLog); - free(infoLog); - } - - return 0; - } - - return 1; -} - -void ParticleRenderer::_initGL() { - m_vertexShader = glCreateShader(GL_VERTEX_SHADER); - m_fragmentShader = glCreateShader(GL_FRAGMENT_SHADER); - - const char *v = vertexShader; - const char *f = fragmentShader; - glShaderSource(m_vertexShader, 1, &v, 0); - glShaderSource(m_fragmentShader, 1, &f, 0); - - checkGLErrors("Shader Source"); - - glCompileShader(m_vertexShader); - glCompileShader(m_fragmentShader); - - if (!CheckCompiled(m_vertexShader) || !CheckCompiled(m_fragmentShader)) { - printf("A shader failed to compile.\n"); - exit(1); - } - - m_programSprites = glCreateProgram(); - - checkGLErrors("create program"); - - glAttachShader(m_programSprites, m_vertexShader); - glAttachShader(m_programSprites, m_fragmentShader); - - checkGLErrors("attaching shaders"); - - glLinkProgram(m_programSprites); - - checkGLErrors("linking program"); - - EGLint linked; - glGetProgramiv(m_programSprites, GL_LINK_STATUS, &linked); - if (!linked) { - printf("A shader failed to link.\n"); - exit(1); - } - - _createTexture(32); - - glGenBuffers(1, (GLuint *)&m_vboColor); - glBindBuffer(GL_ARRAY_BUFFER, m_vboColor); - glBufferData(GL_ARRAY_BUFFER, m_numParticles * 4 * sizeof(float), 0, - GL_STATIC_DRAW); - glBindBuffer(GL_ARRAY_BUFFER, 0); -} - -//------------------------------------------------------------------------------ -// Function : EvalHermite -// Description : -//------------------------------------------------------------------------------ -/** - * EvalHermite(float pA, float pB, float vA, float vB, float u) - * @brief Evaluates Hermite basis functions for the specified coefficients. - */ -inline float evalHermite(float pA, float pB, float vA, float vB, float u) { - float u2 = (u * u), u3 = u2 * u; - float B0 = 2 * u3 - 3 * u2 + 1; - float B1 = -2 * u3 + 3 * u2; - float B2 = u3 - 2 * u2 + u; - float B3 = u3 - u; - return (B0 * pA + B1 * pB + B2 * vA + B3 * vB); -} - -unsigned char *createGaussianMap(int N) { - float *M = new float[2 * N * N]; - unsigned char *B = new unsigned char[4 * N * N]; - float X, Y, Y2, Dist; - float Incr = 2.0f / N; - int i = 0; - int j = 0; - Y = -1.0f; - - // float mmax = 0; - for (int y = 0; y < N; y++, Y += Incr) { - Y2 = Y * Y; - X = -1.0f; - - for (int x = 0; x < N; x++, X += Incr, i += 2, j += 4) { - Dist = (float)sqrtf(X * X + Y2); - - if (Dist > 1) Dist = 1; - - M[i + 1] = M[i] = evalHermite(1.0f, 0, 0, 0, Dist); - B[j + 3] = B[j + 2] = B[j + 1] = B[j] = (unsigned char)(M[i] * 255); - } - } - - delete[] M; - return (B); -} - -void ParticleRenderer::_createTexture(int resolution) { - unsigned char *data = createGaussianMap(resolution); - glGenTextures(1, (GLuint *)&m_texture); - glBindTexture(GL_TEXTURE_2D, m_texture); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, - GL_LINEAR); //_MIPMAP_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, resolution, resolution, 0, GL_RGBA, - GL_UNSIGNED_BYTE, data); -} diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.h deleted file mode 100644 index e028c8e5..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/render_particles.h +++ /dev/null @@ -1,110 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __RENDER_PARTICLES__ -#define __RENDER_PARTICLES__ - -#include -#include -#include - -#include - -typedef float matrix4[4][4]; -typedef float vector3[3]; - -// check for OpenGL errors -inline void checkGLErrors(const char *s) { - EGLenum error; - - while ((error = glGetError()) != GL_NO_ERROR) { - fprintf(stderr, "%s: error - %d\n", s, error); - } -} - -class ParticleRenderer { - public: - ParticleRenderer(unsigned int windowWidth = 720, - unsigned int windowHeight = 480); - ~ParticleRenderer(); - - void setPositions(float *pos, int numParticles); - void setPositions(double *pos, int numParticles); - void setBaseColor(float color[4]); - void setColors(float *color, int numParticles); - void setPBO(unsigned int pbo, int numParticles, bool fp64); - - enum DisplayMode { - PARTICLE_POINTS, - PARTICLE_SPRITES, - PARTICLE_SPRITES_COLOR, - PARTICLE_NUM_MODES - }; - - void display(); - - void setPointSize(float size) { m_pointSize = size; } - void setSpriteSize(float size) { m_spriteSize = size; } - - void setCameraPos(vector3 camera_pos) { - m_camera[0] = camera_pos[0]; - m_camera[1] = camera_pos[1]; - m_camera[2] = camera_pos[2]; - } - - void resetPBO(); - - protected: // methods - void _initGL(); - void _createTexture(int resolution); - - protected: // data - float *m_pos; - double *m_pos_fp64; - int m_numParticles; - - float m_pointSize; - float m_spriteSize; - vector3 m_camera; - - unsigned int m_vertexShader; - unsigned int m_vertexShaderPoints; - unsigned int m_fragmentShader; - unsigned int m_programPoints; - unsigned int m_programSprites; - unsigned int m_texture; - unsigned int m_pbo; - unsigned int m_vboColor; - unsigned int m_windowWidth; - unsigned int m_windowHeight; - - float m_baseColor[4]; - - bool m_bFp64Positions; -}; - -#endif //__ RENDER_PARTICLES__ diff --git a/Samples/8_Platform_Specific/Tegra/nbody_screen/tipsy.h b/Samples/8_Platform_Specific/Tegra/nbody_screen/tipsy.h deleted file mode 100644 index 99692a9c..00000000 --- a/Samples/8_Platform_Specific/Tegra/nbody_screen/tipsy.h +++ /dev/null @@ -1,172 +0,0 @@ -#ifndef __TIPSY_H__ -#define __TIPSY_H__ - -#include - -using namespace std; - -#define MAXDIM 3 - -typedef float Real; - -struct gas_particle -{ - Real mass; - Real pos[MAXDIM]; - Real vel[MAXDIM]; - Real rho; - Real temp; - Real hsmooth; - Real metals ; - Real phi ; -} ; - -//struct gas_particle *gas_particles; - -struct dark_particle -{ - Real mass; - Real pos[MAXDIM]; - Real vel[MAXDIM]; - Real eps; - int phi ; -} ; - -//struct dark_particle *dark_particles; - -struct star_particle -{ - Real mass; - Real pos[MAXDIM]; - Real vel[MAXDIM]; - Real metals ; - Real tform ; - Real eps; - int phi ; -} ; - -//struct star_particle *star_particles; - -struct dump -{ - double time ; - int nbodies ; - int ndim ; - int nsph ; - int ndark ; - int nstar ; -} ; - -typedef struct dump header ; - -template -void read_tipsy_file(vector &bodyPositions, - vector &bodyVelocities, - vector &bodiesIDs, - const std::string &fileName, - int &NTotal, - int &NFirst, - int &NSecond, - int &NThird) -{ - /* - Read in our custom version of the tipsy file format written by - Jeroen Bedorf. Most important change is that we store particle id on the - location where previously the potential was stored. - */ - - char fullFileName[256]; - sprintf(fullFileName, "%s", fileName.c_str()); - - cout << "Trying to read file: " << fullFileName << endl; - - ifstream inputFile(fullFileName, ios::in | ios::binary); - - if (!inputFile.is_open()) - { - cout << "Can't open input file \n"; - exit(EXIT_SUCCESS); - } - - dump h; - inputFile.read((char *)&h, sizeof(h)); - - int idummy; - real4 positions; - real4 velocity; - - - //Read tipsy header - NTotal = h.nbodies; - NFirst = h.ndark; - NSecond = h.nstar; - NThird = h.nsph; - - //Start reading - int particleCount = 0; - - dark_particle d; - star_particle s; - - for (int i=0; i < NTotal; i++) - { - if (i < NFirst) - { - inputFile.read((char *)&d, sizeof(d)); - velocity.w = d.eps; - positions.w = d.mass; - positions.x = d.pos[0]; - positions.y = d.pos[1]; - positions.z = d.pos[2]; - velocity.x = d.vel[0]; - velocity.y = d.vel[1]; - velocity.z = d.vel[2]; - idummy = d.phi; - } - else - { - inputFile.read((char *)&s, sizeof(s)); - velocity.w = s.eps; - positions.w = s.mass; - positions.x = s.pos[0]; - positions.y = s.pos[1]; - positions.z = s.pos[2]; - velocity.x = s.vel[0]; - velocity.y = s.vel[1]; - velocity.z = s.vel[2]; - idummy = s.phi; - } - - bodyPositions.push_back(positions); - bodyVelocities.push_back(velocity); - bodiesIDs.push_back(idummy); - - particleCount++; - }//end for - - // round up to a multiple of 256 bodies since our kernel only supports that... - int newTotal = NTotal; - - if (NTotal % 256) - { - newTotal = ((NTotal / 256) + 1) * 256; - } - - for (int i = NTotal; i < newTotal; i++) - { - positions.w = positions.x = positions.y = positions.z = 0; - velocity.x = velocity.y = velocity.z = 0; - bodyPositions.push_back(positions); - bodyVelocities.push_back(velocity); - bodiesIDs.push_back(i); - NFirst++; - } - - NTotal = newTotal; - - inputFile.close(); - - cerr << "Read " << NTotal << " bodies" << endl; -} - -#endif //__TIPSY_H__ diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/c_cpp_properties.json b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/c_cpp_properties.json deleted file mode 100644 index f0066b0f..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/c_cpp_properties.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "configurations": [ - { - "name": "Linux", - "includePath": [ - "${workspaceFolder}/**", - "${workspaceFolder}/../../../Common" - ], - "defines": [], - "compilerPath": "/usr/local/cuda/bin/nvcc", - "cStandard": "gnu17", - "cppStandard": "gnu++14", - "intelliSenseMode": "linux-gcc-x64", - "configurationProvider": "ms-vscode.makefile-tools" - } - ], - "version": 4 -} diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/extensions.json b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/extensions.json deleted file mode 100644 index c7eb54dc..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/extensions.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "recommendations": [ - "nvidia.nsight-vscode-edition", - "ms-vscode.cpptools", - "ms-vscode.makefile-tools" - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/launch.json b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/launch.json deleted file mode 100644 index 117dccd9..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/launch.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "configurations": [ - { - "name": "CUDA C++: Launch", - "type": "cuda-gdb", - "request": "launch", - "program": "${workspaceFolder}/simpleGLES_screen" - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/tasks.json b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/tasks.json deleted file mode 100644 index 4509aeb1..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/.vscode/tasks.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "version": "2.0.0", - "tasks": [ - { - "label": "sample", - "type": "shell", - "command": "make dbg=1", - "problemMatcher": ["$nvcc"], - "group": { - "kind": "build", - "isDefault": true - } - } - ] -} diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/CMakeLists.txt b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/CMakeLists.txt deleted file mode 100644 index 6036abff..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/CMakeLists.txt +++ /dev/null @@ -1,27 +0,0 @@ -cmake_minimum_required(VERSION 3.20) - -list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake/Modules") - -project(simpleGLES_screen LANGUAGES C CXX CUDA) - -find_package(CUDAToolkit REQUIRED) - -set(CMAKE_POSITION_INDEPENDENT_CODE ON) - -set(CMAKE_CUDA_ARCHITECTURES 50 52 60 61 70 75 80 86 89 90) -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive) -endif() - -# Include directories and libraries -include_directories(../../../Common) - -# Source file -# Add target for simpleGLES_screen -add_executable(simpleGLES_screen simpleGLES_screen.cu) - -target_compile_options(simpleGLES_screen PRIVATE $<$:--extended-lambda>) - -target_compile_features(simpleGLES_screen PRIVATE cxx_std_17 cuda_std_17) - -set_target_properties(simpleGLES_screen PROPERTIES CUDA_SEPARABLE_COMPILATION ON) diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/Makefile b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/Makefile deleted file mode 100644 index f2672bd8..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/Makefile +++ /dev/null @@ -1,399 +0,0 @@ -################################################################################ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -################################################################################ -# -# Makefile project only supported on Mac OS X and Linux Platforms) -# -################################################################################ - -# Location of the CUDA Toolkit -CUDA_PATH ?= /usr/local/cuda - -############################## -# start deprecated interface # -############################## -ifeq ($(x86_64),1) - $(info WARNING - x86_64 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=x86_64 instead) - TARGET_ARCH ?= x86_64 -endif -ifeq ($(ARMv7),1) - $(info WARNING - ARMv7 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=armv7l instead) - TARGET_ARCH ?= armv7l -endif -ifeq ($(aarch64),1) - $(info WARNING - aarch64 variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=aarch64 instead) - TARGET_ARCH ?= aarch64 -endif -ifeq ($(ppc64le),1) - $(info WARNING - ppc64le variable has been deprecated) - $(info WARNING - please use TARGET_ARCH=ppc64le instead) - TARGET_ARCH ?= ppc64le -endif -ifneq ($(GCC),) - $(info WARNING - GCC variable has been deprecated) - $(info WARNING - please use HOST_COMPILER=$(GCC) instead) - HOST_COMPILER ?= $(GCC) -endif -ifneq ($(abi),) - $(error ERROR - abi variable has been removed) -endif -############################ -# end deprecated interface # -############################ - -# architecture -HOST_ARCH := $(shell uname -m) -TARGET_ARCH ?= $(HOST_ARCH) -ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le armv7l)) - ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifneq (,$(filter $(TARGET_ARCH),x86_64 aarch64 sbsa ppc64le)) - TARGET_SIZE := 64 - else ifneq (,$(filter $(TARGET_ARCH),armv7l)) - TARGET_SIZE := 32 - endif - else - TARGET_SIZE := $(shell getconf LONG_BIT) - endif -else - $(error ERROR - unsupported value $(TARGET_ARCH) for TARGET_ARCH!) -endif - -# sbsa and aarch64 systems look similar. Need to differentiate them at host level for now. -ifeq ($(HOST_ARCH),aarch64) - ifeq ($(CUDA_PATH)/targets/sbsa-linux,$(shell ls -1d $(CUDA_PATH)/targets/sbsa-linux 2>/dev/null)) - HOST_ARCH := sbsa - TARGET_ARCH := sbsa - endif -endif - -ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq (,$(filter $(HOST_ARCH)-$(TARGET_ARCH),aarch64-armv7l x86_64-armv7l x86_64-aarch64 x86_64-sbsa x86_64-ppc64le)) - $(error ERROR - cross compiling from $(HOST_ARCH) to $(TARGET_ARCH) is not supported!) - endif -endif - -# When on native aarch64 system with userspace of 32-bit, change TARGET_ARCH to armv7l -ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_SIZE),aarch64-aarch64-32) - TARGET_ARCH = armv7l -endif - -# operating system -HOST_OS := $(shell uname -s 2>/dev/null | tr "[:upper:]" "[:lower:]") -TARGET_OS ?= $(HOST_OS) -ifeq (,$(filter $(TARGET_OS),linux darwin qnx android)) - $(error ERROR - unsupported value $(TARGET_OS) for TARGET_OS!) -endif - -# host compiler -ifdef HOST_COMPILER - CUSTOM_HOST_COMPILER = 1 -endif - -ifeq ($(TARGET_OS),darwin) - ifeq ($(shell expr `xcodebuild -version | grep -i xcode | awk '{print $$2}' | cut -d'.' -f1` \>= 5),1) - HOST_COMPILER ?= clang++ - endif -else ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) - ifeq ($(TARGET_OS),linux) - HOST_COMPILER ?= arm-linux-gnueabihf-g++ - else ifeq ($(TARGET_OS),qnx) - ifeq ($(QNX_HOST),) - $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) - endif - ifeq ($(QNX_TARGET),) - $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) - endif - export QNX_HOST - export QNX_TARGET - HOST_COMPILER ?= $(QNX_HOST)/usr/bin/arm-unknown-nto-qnx6.6.0eabi-g++ - else ifeq ($(TARGET_OS),android) - HOST_COMPILER ?= arm-linux-androideabi-g++ - endif - else ifeq ($(TARGET_ARCH),aarch64) - ifeq ($(TARGET_OS), linux) - HOST_COMPILER ?= aarch64-linux-gnu-g++ - else ifeq ($(TARGET_OS),qnx) - ifeq ($(QNX_HOST),) - $(error ERROR - QNX_HOST must be passed to the QNX host toolchain) - endif - ifeq ($(QNX_TARGET),) - $(error ERROR - QNX_TARGET must be passed to the QNX target toolchain) - endif - export QNX_HOST - export QNX_TARGET - HOST_COMPILER ?= $(QNX_HOST)/usr/bin/q++ - else ifeq ($(TARGET_OS), android) - HOST_COMPILER ?= aarch64-linux-android-clang++ - endif - else ifeq ($(TARGET_ARCH),sbsa) - HOST_COMPILER ?= aarch64-linux-gnu-g++ - else ifeq ($(TARGET_ARCH),ppc64le) - HOST_COMPILER ?= powerpc64le-linux-gnu-g++ - endif -endif -HOST_COMPILER ?= g++ -NVCC := $(CUDA_PATH)/bin/nvcc -ccbin $(HOST_COMPILER) - -# internal flags -NVCCFLAGS := -m${TARGET_SIZE} -CCFLAGS := -LDFLAGS := - -# build flags - -# Link flag for customized HOST_COMPILER with gcc realpath -GCC_PATH := $(shell which gcc) -ifeq ($(CUSTOM_HOST_COMPILER),1) - ifneq ($(filter /%,$(HOST_COMPILER)),) - ifneq ($(findstring gcc,$(HOST_COMPILER)),) - ifneq ($(GCC_PATH),$(HOST_COMPILER)) - LDFLAGS += -lstdc++ - endif - endif - endif -endif - -ifeq ($(TARGET_OS),darwin) - LDFLAGS += -rpath $(CUDA_PATH)/lib - CCFLAGS += -arch $(HOST_ARCH) -else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux) - LDFLAGS += --dynamic-linker=/lib/ld-linux-armhf.so.3 - CCFLAGS += -mfloat-abi=hard -else ifeq ($(TARGET_OS),android) - LDFLAGS += -pie - CCFLAGS += -fpie -fpic -fexceptions -endif - -ifneq ($(TARGET_ARCH),$(HOST_ARCH)) - ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) - ifneq ($(TARGET_FS),) - GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) - ifeq ($(GCCVERSIONLTEQ46),1) - CCFLAGS += --sysroot=$(TARGET_FS) - endif - LDFLAGS += --sysroot=$(TARGET_FS) - LDFLAGS += -rpath-link=$(TARGET_FS)/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/arm-linux-gnueabihf - endif - endif - ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) - ifneq ($(TARGET_FS),) - GCCVERSIONLTEQ46 := $(shell expr `$(HOST_COMPILER) -dumpversion` \<= 4.6) - ifeq ($(GCCVERSIONLTEQ46),1) - CCFLAGS += --sysroot=$(TARGET_FS) - endif - LDFLAGS += --sysroot=$(TARGET_FS) - LDFLAGS += -rpath-link=$(TARGET_FS)/lib -L$(TARGET_FS)/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/lib/aarch64-linux-gnu -L$(TARGET_FS)/lib/aarch64-linux-gnu - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib -L$(TARGET_FS)/usr/lib - LDFLAGS += -rpath-link=$(TARGET_FS)/usr/lib/aarch64-linux-gnu -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu - LDFLAGS += --unresolved-symbols=ignore-in-shared-libs - CCFLAGS += -isystem=$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include -I$(TARGET_FS)/usr/include/libdrm - CCFLAGS += -isystem=$(TARGET_FS)/usr/include/aarch64-linux-gnu -I$(TARGET_FS)/usr/include/aarch64-linux-gnu - endif - endif - ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) - NVCCFLAGS += -D_QNX_SOURCE - NVCCFLAGS += --qpp-config 8.3.0,gcc_ntoaarch64le - CCFLAGS += -DWIN_INTERFACE_CUSTOM -I/usr/include/aarch64-qnx-gnu - LDFLAGS += -lsocket - LDFLAGS += -L/usr/lib/aarch64-qnx-gnu - CCFLAGS += "-Wl\,-rpath-link\,/usr/lib/aarch64-qnx-gnu" - ifdef TARGET_OVERRIDE - LDFLAGS += -lslog2 - endif - - ifneq ($(TARGET_FS),) - LDFLAGS += -L$(TARGET_FS)/usr/lib - CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/lib" - LDFLAGS += -L$(TARGET_FS)/usr/libnvidia - CCFLAGS += "-Wl\,-rpath-link\,$(TARGET_FS)/usr/libnvidia" - CCFLAGS += -I$(TARGET_FS)/../include - endif - endif -endif - -ifdef TARGET_OVERRIDE # cuda toolkit targets override - NVCCFLAGS += -target-dir $(TARGET_OVERRIDE) -endif - -# Install directory of different arch -CUDA_INSTALL_TARGET_DIR := -ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-linux) - CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-gnueabihf/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-linux) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),sbsa-linux) - CUDA_INSTALL_TARGET_DIR = targets/sbsa-linux/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-android) - CUDA_INSTALL_TARGET_DIR = targets/armv7-linux-androideabi/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-android) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-linux-androideabi/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),armv7l-qnx) - CUDA_INSTALL_TARGET_DIR = targets/ARMv7-linux-QNX/ -else ifeq ($(TARGET_ARCH)-$(TARGET_OS),aarch64-qnx) - CUDA_INSTALL_TARGET_DIR = targets/aarch64-qnx/ -else ifeq ($(TARGET_ARCH),ppc64le) - CUDA_INSTALL_TARGET_DIR = targets/ppc64le-linux/ -endif - -# Debug build flags -ifeq ($(dbg),1) - NVCCFLAGS += -g -G - BUILD_TYPE := debug -else - BUILD_TYPE := release -endif - -ALL_CCFLAGS := -ALL_CCFLAGS += $(NVCCFLAGS) -ALL_CCFLAGS += $(EXTRA_NVCCFLAGS) -ALL_CCFLAGS += $(addprefix -Xcompiler ,$(CCFLAGS)) -ALL_CCFLAGS += $(addprefix -Xcompiler ,$(EXTRA_CCFLAGS)) - -SAMPLE_ENABLED := 1 - -# This sample is not supported on Linux x86_64 -ifeq ($(TARGET_OS),linux) - ifeq ($(TARGET_ARCH),x86_64) - $(info >>> WARNING - simpleGLES_screen is not supported on Linux x86_64 - waiving sample <<<) - SAMPLE_ENABLED := 0 - endif -endif - -# This sample is not supported on Mac OSX -ifeq ($(TARGET_OS),darwin) - $(info >>> WARNING - simpleGLES_screen is not supported on Mac OSX - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -# This sample is not supported on ARMv7 -ifeq ($(TARGET_ARCH),armv7l) - $(info >>> WARNING - simpleGLES_screen is not supported on ARMv7 - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -# This sample is not supported on aarch64 -ifeq ($(TARGET_ARCH),aarch64) - ifneq ($(TARGET_OS),qnx) - $(info >>> WARNING - simpleGLES_screen is not supported on aarch64-$(TARGET_OS) - waiving sample <<<) - SAMPLE_ENABLED := 0 - endif -endif -# This sample is not supported on sbsa -ifeq ($(TARGET_ARCH),sbsa) - $(info >>> WARNING - simpleGLES_screen is not supported on sbsa - waiving sample <<<) - SAMPLE_ENABLED := 0 -endif - -ALL_LDFLAGS := -ALL_LDFLAGS += $(ALL_CCFLAGS) -ALL_LDFLAGS += $(addprefix -Xlinker ,$(LDFLAGS)) -ALL_LDFLAGS += $(addprefix -Xlinker ,$(EXTRA_LDFLAGS)) - -# Common includes and paths for CUDA -INCLUDES := -I../../../Common -LIBRARIES := - -################################################################################ - -# Makefile include to help find GLES Libraries -include ./findgleslib.mk - -# OpenGLES specific libraries -ifneq ($(TARGET_OS),darwin) - LIBRARIES += $(GLESLINK) -lGLESv2 -lEGL -lscreen -endif - -# Gencode arguments -ifeq ($(TARGET_ARCH),$(filter $(TARGET_ARCH),armv7l aarch64 sbsa)) -SMS ?= 53 61 70 72 75 80 86 87 90 -else -SMS ?= 50 52 60 61 70 75 80 86 89 90 -endif - -ifeq ($(SMS),) -$(info >>> WARNING - no SM architectures have been specified - waiving sample <<<) -SAMPLE_ENABLED := 0 -endif - -ifeq ($(GENCODE_FLAGS),) -# Generate SASS code for each SM architecture listed in $(SMS) -$(foreach sm,$(SMS),$(eval GENCODE_FLAGS += -gencode arch=compute_$(sm),code=sm_$(sm))) - -# Generate PTX code from the highest SM architecture in $(SMS) to guarantee forward-compatibility -HIGHEST_SM := $(lastword $(sort $(SMS))) -ifneq ($(HIGHEST_SM),) -GENCODE_FLAGS += -gencode arch=compute_$(HIGHEST_SM),code=compute_$(HIGHEST_SM) -endif -endif - -ALL_CCFLAGS += -DUSE_CUDAINTEROP -DGRAPHICS_SETUP_EGL -DUSE_GLES -DWIN_INTERFACE_CUSTOM --threads 0 --std=c++11 - -ifeq ($(SAMPLE_ENABLED),0) -EXEC ?= @echo "[@]" -endif - -################################################################################ - -# Target rules -all: build - -build: simpleGLES_screen - -check.deps: -ifeq ($(SAMPLE_ENABLED),0) - @echo "Sample will be waived due to the above missing dependencies" -else - @echo "Sample is ready - all dependencies have been met" -endif - -simpleGLES_screen.o:simpleGLES_screen.cu - $(EXEC) $(NVCC) $(INCLUDES) $(ALL_CCFLAGS) $(GENCODE_FLAGS) -o $@ -c $< - -simpleGLES_screen: simpleGLES_screen.o - $(EXEC) $(NVCC) $(ALL_LDFLAGS) $(GENCODE_FLAGS) -o $@ $+ $(LIBRARIES) - $(EXEC) mkdir -p ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) - $(EXEC) cp $@ ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE) - -run: build - $(EXEC) ./simpleGLES_screen - -testrun: build - $(EXEC) ./simpleGLES_screen -file=ref_simpleGLES_screen.bin - -clean: - rm -f simpleGLES_screen simpleGLES_screen.o - rm -rf ../../../bin/$(TARGET_ARCH)/$(TARGET_OS)/$(BUILD_TYPE)/simpleGLES_screen - -clobber: clean diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/NsightEclipse.xml b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/NsightEclipse.xml deleted file mode 100644 index 325ca176..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/NsightEclipse.xml +++ /dev/null @@ -1,83 +0,0 @@ - - - - simpleGLES_screen - - -DUSE_CUDAINTEROP - -DGRAPHICS_SETUP_EGL - -DUSE_GLES - -DWIN_INTERFACE_CUSTOM - - - cudaGraphicsUnmapResources - cudaMemcpy - cudaFree - cudaGraphicsResourceGetMappedPointer - cudaGraphicsMapResources - cudaDeviceSynchronize - cudaGraphicsUnregisterResource - cudaMalloc - cudaGraphicsGLRegisterBuffer - - - whole - - data\ref_simpleGLES_screen.bin - - - ./ - ../ - ../../../Common - - - Graphics Interop - Vertex Buffers - 3D Graphics - - - OpenGL ES - - - - - - true - simpleGLES_screen.cu - - -file=ref_simpleGLES_screen.bin - - - screen - GLES - - - 1:CUDA Basic Topics - 2:Graphics Interop - - sm50 - sm52 - sm53 - sm60 - sm61 - sm70 - sm72 - sm75 - sm80 - sm86 - sm87 - sm89 - sm90 - - graphics_interface.c - - - - qnx - - - - all - - Simple OpenGLES on Screen - exe - diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/README.md b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/README.md deleted file mode 100644 index b1f72fa1..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/README.md +++ /dev/null @@ -1,37 +0,0 @@ -# simpleGLES_screen - Simple OpenGLES on Screen - -## Description - -Demonstrates data exchange between CUDA and OpenGL ES (aka Graphics interop). The program modifies vertex positions with CUDA and uses OpenGL ES to render the geometry. - -## Key Concepts - -Graphics Interop, Vertex Buffers, 3D Graphics - -## Supported SM Architectures - -[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) - -## Supported OSes - -QNX - -## Supported CPU Architecture - -aarch64 - -## CUDA APIs involved - -### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsUnmapResources, cudaMemcpy, cudaFree, cudaGraphicsResourceGetMappedPointer, cudaGraphicsMapResources, cudaDeviceSynchronize, cudaGraphicsUnregisterResource, cudaMalloc, cudaGraphicsGLRegisterBuffer - -## Dependencies needed to build/run -[screen](../../../README.md#screen), [GLES](../../../README.md#gles) - -## Prerequisites - -Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. -Make sure the dependencies mentioned in [Dependencies]() section above are installed. - -## References (for more details) - diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/data/ref_simpleGLES_screen.bin b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/data/ref_simpleGLES_screen.bin deleted file mode 100644 index 3a307293..00000000 Binary files a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/data/ref_simpleGLES_screen.bin and /dev/null differ diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/findgleslib.mk b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/findgleslib.mk deleted file mode 100644 index 6da2f078..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/findgleslib.mk +++ /dev/null @@ -1,149 +0,0 @@ -################################################################################ -# -# Copyright 1993-2013 NVIDIA Corporation. All rights reserved. -# -# NOTICE TO USER: -# -# This source code is subject to NVIDIA ownership rights under U.S. and -# international Copyright laws. -# -# NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE -# CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR -# IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH -# REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF -# MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. -# IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, -# OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -# OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -# OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -# OR PERFORMANCE OF THIS SOURCE CODE. -# -# U.S. Government End Users. This source code is a "commercial item" as -# that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of -# "commercial computer software" and "commercial computer software -# documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) -# and is provided to the U.S. Government only as a commercial end item. -# Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through -# 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the -# source code with only those rights set forth herein. -# -################################################################################ -# -# findgleslib.mk is used to find the necessary GLES Libraries for specific distributions -# this is supported on Linux -# -################################################################################ - -# Determine OS platform and unix distribution -ifeq ("$(TARGET_OS)","linux") - # first search lsb_release - DISTRO := $(shell lsb_release -i -s 2>/dev/null | tr "[:upper:]" "[:lower:]") - ifeq ("$(DISTRO)","") - # second search and parse /etc/issue - DISTRO := $(shell awk '{print $$1}' /etc/issue | tr -d "[:space:]" | sed -e "/^$$/d" | tr "[:upper:]" "[:lower:]") - # ensure data from /etc/issue is valid - ifneq (,$(filter-out $(DISTRO),ubuntu fedora red rhel centos suse)) - DISTRO := - endif - ifeq ("$(DISTRO)","") - # third, we can search in /etc/os-release or /etc/{distro}-release - DISTRO := $(shell awk '/ID/' /etc/*-release | sed 's/ID=//' | grep -v "VERSION" | grep -v "ID" | grep -v "DISTRIB") - endif - endif -endif - -ifeq ("$(TARGET_OS)","linux") - # $(info) >> findgllib.mk -> LINUX path <<<) - # Each set of Linux Distros have different paths for where to find their OpenGL libraries reside - UBUNTU = $(shell echo $(DISTRO) | grep -i ubuntu >/dev/null 2>&1; echo $$?) - FEDORA = $(shell echo $(DISTRO) | grep -i fedora >/dev/null 2>&1; echo $$?) - RHEL = $(shell echo $(DISTRO) | grep -i 'red\|rhel' >/dev/null 2>&1; echo $$?) - CENTOS = $(shell echo $(DISTRO) | grep -i centos >/dev/null 2>&1; echo $$?) - SUSE = $(shell echo $(DISTRO) | grep -i 'suse\|sles' >/dev/null 2>&1; echo $$?) - KYLIN = $(shell echo $(DISTRO) | grep -i kylin >/dev/null 2>&1; echo $$?) - ifeq ("$(UBUNTU)","0") - ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-armv7l) - GLPATH := /usr/arm-linux-gnueabihf/lib - GLLINK := -L/usr/arm-linux-gnueabihf/lib - ifneq ($(TARGET_FS),) - GLPATH += $(TARGET_FS)/usr/lib/arm-linux-gnueabihf - GLLINK += -L$(TARGET_FS)/usr/lib/arm-linux-gnueabihf - endif - else ifeq ($(HOST_ARCH)-$(TARGET_ARCH),x86_64-aarch64) - GLPATH := /usr/aarch64-linux-gnu/lib - GLLINK := -L/usr/aarch64-linux-gnu/lib - ifneq ($(TARGET_FS),) - GLPATH += $(TARGET_FS)/usr/lib - GLPATH += $(TARGET_FS)/usr/lib/aarch64-linux-gnu - GLLINK += -L$(TARGET_FS)/usr/lib/aarch64-linux-gnu - endif - else - UBUNTU_PKG_NAME = $(shell which dpkg >/dev/null 2>&1 && dpkg -l 'nvidia-*' | grep '^ii' | awk '{print $$2}' | head -1) - ifneq ("$(UBUNTU_PKG_NAME)","") - GLPATH ?= /usr/lib/$(UBUNTU_PKG_NAME) - GLLINK ?= -L/usr/lib/$(UBUNTU_PKG_NAME) - endif - - DFLT_PATH ?= /usr/lib - endif - endif - - ifeq ("$(SUSE)","0") - GLPATH ?= /usr/X11R6/lib64 - GLLINK ?= -L/usr/X11R6/lib64 - DFLT_PATH ?= /usr/lib64 - else - GLPATH ?= /usr/lib64/nvidia - GLLINK ?= -L/usr/lib64/nvidia - DFLT_PATH ?= /usr/lib64 - endif - - # find libGL, libGLU, libXi, - EGLLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libEGL.so -print 2>/dev/null) - GLESLIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libGLESv2.so -print 2>/dev/null) - X11LIB := $(shell find -L $(GLPATH) $(DFLT_PATH) -name libX11.so -print 2>/dev/null) - - ifeq ("$(EGLLIB)","") - $(info >>> WARNING - libEGL.so not found, please install libEGL.so <<<) - SAMPLE_ENABLED := 0 - endif - ifeq ("$(GLESLIB)","") - $(info >>> WARNING - libGLES.so not found, please install libGLES.so <<<) - SAMPLE_ENABLED := 0 - endif - ifeq ("$(X11LIB)","") - $(info >>> WARNING - libX11.so not found, please install libX11.so <<<) - SAMPLE_ENABLED := 0 - endif - - HEADER_SEARCH_PATH ?= $(TARGET_FS)/usr/include - ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-armv7l-linux) - HEADER_SEARCH_PATH += /usr/arm-linux-gnueabihf/include - else ifeq ($(HOST_ARCH)-$(TARGET_ARCH)-$(TARGET_OS),x86_64-aarch64-linux) - HEADER_SEARCH_PATH += /usr/aarch64-linux-gnu/include - endif - - EGLHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name egl.h -print 2>/dev/null) - EGLEXTHEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name eglext.h -print 2>/dev/null) - GL31HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name gl31.h -print 2>/dev/null) - X11HEADER := $(shell find -L $(HEADER_SEARCH_PATH) -name Xlib.h -print 2>/dev/null) - - ifeq ("$(EGLHEADER)","") - $(info >>> WARNING - egl.h not found, please install egl.h <<<) - SAMPLE_ENABLED := 0 - endif - ifeq ("$(EGLEXTHEADER)","") - $(info >>> WARNING - eglext.h not found, please install eglext.h <<<) - SAMPLE_ENABLED := 0 - endif - ifeq ("$(GL31HEADER)","") - $(info >>> WARNING - gl31.h not found, please install gl31.h <<<) - SAMPLE_ENABLED := 0 - endif - ifeq ("$(X11HEADER)","") - $(info >>> WARNING - Xlib.h not found, refer to CUDA Samples release notes for how to find and install them. <<<) - SAMPLE_ENABLED := 0 - endif -else -endif - diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/graphics_interface.c b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/graphics_interface.c deleted file mode 100644 index 530375c7..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/graphics_interface.c +++ /dev/null @@ -1,379 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include - -screen_window_t screen_window; -screen_context_t screen_context; -screen_event_t screen_ev; - -EGLDisplay eglDisplay = EGL_NO_DISPLAY; -EGLSurface eglSurface = EGL_NO_SURFACE; -EGLContext eglContext = EGL_NO_CONTEXT; - -void error_exit(const char *format, ...) { - va_list args; - va_start(args, format); - vfprintf(stderr, format, args); - va_end(args); - exit(1); -} - -enum { NvGlDemoKeyCode_Escape = 27 }; - -typedef void (*GlCloseCB)(void); -typedef void (*GlKeyCB)(char key, int state); -GlCloseCB closeCB = NULL; -GlKeyCB keyCB = NULL; - -void CHECK_GLERROR() { - GLenum err = glGetError(); - - if (err != GL_NO_ERROR) { - fprintf(stderr, "[%s line %d] OpenGL Error: 0x%x ", __FILE__, __LINE__, - err); - - switch (err) { - case GL_INVALID_ENUM: - fprintf(stderr, "(GL_INVALID_ENUM)\n"); - break; - case GL_INVALID_VALUE: - fprintf(stderr, "(GL_INVALID_VALUE)\n"); - break; - case GL_INVALID_OPERATION: - fprintf(stderr, "(GL_INVALID_OPERATION)\n"); - break; - case GL_OUT_OF_MEMORY: - fprintf(stderr, "(GL_OUT_OF_MEMORY)\n"); - break; - case GL_INVALID_FRAMEBUFFER_OPERATION: - fprintf(stderr, "(GL_INVALID_FRAMEBUFFER_OPERATION)\n"); - break; - default: - break; - } - - fflush(stderr); - } -} - -static void UpdateEventMask(void) { - static int rc = 1; - - if (rc) { - rc = screen_create_event(&screen_ev); - } -} - -void SetCloseCB(GlCloseCB cb) { - // Call the eglQnxScreenConsumer module if option is enabled - closeCB = cb; - UpdateEventMask(); -} - -void SetKeyCB(GlKeyCB cb) { - keyCB = cb; - UpdateEventMask(); -} - -// Add keys here, that are used in demo apps. -static unsigned char GetKeyPress(int *screenKey) { - unsigned char key = '\0'; - - switch (*screenKey) { - case KEYCODE_ESCAPE: - key = NvGlDemoKeyCode_Escape; - break; - default: - /* For "normal" keys, Screen KEYCODE is just ASCII. */ - if (*screenKey <= 127) { - key = *screenKey; - } - break; - } - - return key; -} - -void CheckEvents(void) { - static int vis = 1, val = 1; - int rc; - - /** - ** We start the loop by processing any events that might be in our - ** queue. The only event that is of interest to us are the resize - ** and close events. The timeout variable is set to 0 (no wait) or - ** forever depending if the window is visible or invisible. - **/ - - while (!screen_get_event(screen_context, screen_ev, vis ? 0ull : ~0ull)) { - // Get QNX CAR 2.1 event property - rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_TYPE, &val); - if (rc || val == SCREEN_EVENT_NONE) { - break; - } - - switch (val) { - case SCREEN_EVENT_CLOSE: - /** - ** All we have to do when we receive the close event is - ** exit the application loop. - **/ - if (closeCB) { - closeCB(); - } - break; - - case SCREEN_EVENT_KEYBOARD: - rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_FLAGS, - &val); - if (rc || val == SCREEN_EVENT_NONE) { - break; - } - if (val & KEY_DOWN) { - rc = screen_get_event_property_iv(screen_ev, SCREEN_PROPERTY_SYM, - &val); - if (rc || val == SCREEN_EVENT_NONE) { - break; - } - unsigned char key; - key = GetKeyPress(&val); - if (key != '\0') { - keyCB(key, 1); - } - } - break; - - default: - break; - } - } -} - -int graphics_setup_window(int xpos, int ypos, int width, int height, - const char *windowname, int reqdispno) { - EGLint configAttrs[] = { - EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8, - EGL_BLUE_SIZE, 8, EGL_ALPHA_SIZE, 8, - EGL_DEPTH_SIZE, 16, EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, - EGL_NONE}; - - EGLint contextAttrs[] = {EGL_CONTEXT_CLIENT_VERSION, 3, EGL_NONE}; - - EGLint windowAttrs[] = {EGL_NONE}; - EGLConfig *configList = NULL; - EGLint configCount; - - int displayCount = 0; - int dispno; - - screen_context = 0; - - screen_display_t *screenDisplayHandle = NULL; - - if (screen_create_context(&screen_context, 0)) { - error_exit("Error creating screen context.\n"); - } - - eglDisplay = eglGetDisplay(0); - - if (eglDisplay == EGL_NO_DISPLAY) { - error_exit("EGL failed to obtain display\n"); - } - - if (!eglInitialize(eglDisplay, 0, 0)) { - error_exit("EGL failed to initialize\n"); - } - - if (!eglChooseConfig(eglDisplay, configAttrs, NULL, 0, &configCount) || - !configCount) { - error_exit("EGL failed to return any matching configurations\n"); - } - - configList = (EGLConfig *)malloc(configCount * sizeof(EGLConfig)); - - if (!eglChooseConfig(eglDisplay, configAttrs, configList, configCount, - &configCount) || - !configCount) { - error_exit("EGL failed to populate configuration list\n"); - } - - screen_window = 0; - if (screen_create_window(&screen_window, screen_context)) { - error_exit("Error creating screen window.\n"); - } - - // query the total no of display avaibale from QNX CAR2 screen - if (screen_get_context_property_iv( - screen_context, SCREEN_PROPERTY_DISPLAY_COUNT, &displayCount)) { - error_exit("Error getting context property\n"); - } - - screenDisplayHandle = - (screen_display_t *)malloc(displayCount * sizeof(screen_display_t)); - if (!screenDisplayHandle) { - error_exit("Error allocating screen memory handle is getting failed\n"); - } - - // query the display handle from QNX CAR2 screen - if (screen_get_context_property_pv(screen_context, SCREEN_PROPERTY_DISPLAYS, - (void **)screenDisplayHandle)) { - error_exit("Error getting display handle\n"); - } - - for (dispno = 0; dispno < displayCount; dispno++) { - int active = 0; - // Query the connected status from QNX CAR2 screen - screen_get_display_property_iv(screenDisplayHandle[dispno], - SCREEN_PROPERTY_ATTACHED, &active); - if (active) { - if (reqdispno == dispno) { - // Map the window buffer to user requested display port - screen_set_window_property_pv(screen_window, SCREEN_PROPERTY_DISPLAY, - (void **)&screenDisplayHandle[reqdispno]); - break; - } - } - } - - if (dispno == displayCount) { - error_exit("Failed to set the requested display\n"); - } - - free(screenDisplayHandle); - - int format = SCREEN_FORMAT_RGBA8888; - if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_FORMAT, - &format)) { - error_exit("Error setting SCREEN_PROPERTY_FORMAT\n"); - } - - int usage = SCREEN_USAGE_OPENGL_ES2; - if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_USAGE, - &usage)) { - error_exit("Error setting SCREEN_PROPERTY_USAGE\n"); - } - - EGLint interval = 1; - if (screen_set_window_property_iv(screen_window, - SCREEN_PROPERTY_SWAP_INTERVAL, &interval)) { - error_exit("Error setting SCREEN_PROPERTY_SWAP_INTERVAL\n"); - } - - int windowSize[2]; - windowSize[0] = width; - windowSize[1] = height; - if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_SIZE, - windowSize)) { - error_exit("Error setting SCREEN_PROPERTY_SIZE\n"); - } - - int windowOffset[2]; - windowOffset[0] = xpos; - windowOffset[1] = ypos; - if (screen_set_window_property_iv(screen_window, SCREEN_PROPERTY_POSITION, - windowOffset)) { - error_exit("Error setting SCREEN_PROPERTY_POSITION\n"); - } - - if (screen_create_window_buffers(screen_window, 2)) { - error_exit("Error creating two window buffers.\n"); - } - - eglSurface = - eglCreateWindowSurface(eglDisplay, configList[0], - (EGLNativeWindowType)screen_window, windowAttrs); - if (!eglSurface) { - error_exit("EGL couldn't create window\n"); - } - - eglBindAPI(EGL_OPENGL_ES_API); - - eglContext = eglCreateContext(eglDisplay, configList[0], NULL, contextAttrs); - if (!eglContext) { - error_exit("EGL couldn't create context\n"); - } - - if (!eglMakeCurrent(eglDisplay, eglSurface, eglSurface, eglContext)) { - error_exit("EGL couldn't make context/surface current\n"); - } - - EGLint Context_RendererType; - eglQueryContext(eglDisplay, eglContext, EGL_CONTEXT_CLIENT_TYPE, - &Context_RendererType); - - switch (Context_RendererType) { - case EGL_OPENGL_API: - printf("Using OpenGL API\n"); - break; - case EGL_OPENGL_ES_API: - printf("Using OpenGL ES API\n"); - break; - case EGL_OPENVG_API: - error_exit("Context Query Returned OpenVG. This is Unsupported\n"); - default: - error_exit("Unknown Context Type. %04X\n", Context_RendererType); - } - - return 1; -} - -void graphics_set_windowtitle(const char *windowname) { - // Do nothing on screen -} - -void graphics_swap_buffers() { eglSwapBuffers(eglDisplay, eglSurface); } - -void graphics_close_window() { - if (eglDisplay != EGL_NO_DISPLAY) { - eglMakeCurrent(eglDisplay, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); - - if (eglContext != EGL_NO_CONTEXT) { - eglDestroyContext(eglDisplay, eglContext); - } - - if (eglSurface != EGL_NO_SURFACE) { - eglDestroySurface(eglDisplay, eglSurface); - } - - eglTerminate(eglDisplay); - } - - if (screen_window) { - screen_destroy_window(screen_window); - screen_window = NULL; - } - if (screen_context) { - screen_destroy_context(screen_context); - } - if (screen_ev) { - screen_destroy_event(screen_ev); - } -} diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.frag.glsl b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.frag.glsl deleted file mode 100644 index e5864bee..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.frag.glsl +++ /dev/null @@ -1,31 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -void main() -{ - gl_FragColor = vec4(1.0, 0.0, 0.0, 1.0); -} diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.vert.glsl b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.vert.glsl deleted file mode 100644 index 1de61b17..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/mesh.vert.glsl +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -attribute vec4 position; - -void main() -{ - gl_Position = position; -} diff --git a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/simpleGLES_screen.cu b/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/simpleGLES_screen.cu deleted file mode 100644 index c0dd9053..00000000 --- a/Samples/8_Platform_Specific/Tegra/simpleGLES_screen/simpleGLES_screen.cu +++ /dev/null @@ -1,601 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -/* - This example demonstrates how to use the CUDA C bindings to OpenGL ES to - dynamically modify a vertex buffer using a CUDA C kernel. - - The steps are: - 1. Create an empty vertex buffer object (VBO) - 2. Register the VBO with CUDA C - 3. Map the VBO for writing from CUDA C - 4. Run CUDA C kernel to modify the vertex positions - 5. Unmap the VBO - 6. Render the results using OpenGL ES - - Host code - */ - -#include -#include -#include -#include - -#include -#include -#include - -#include "graphics_interface.c" - -// includes, cuda -#include -#include - -// Utilities and timing functions -#include // includes cuda.h and cuda_runtime_api.h - -// CUDA helper functions -#include // helper functions for CUDA error check - -#include - -#define MAX_EPSILON_ERROR 0.0f -#define THRESHOLD 0.0f -#define REFRESH_DELAY 1 // ms - -#define GUI_IDLE 0x100 -#define GUI_ROTATE 0x101 -#define GUI_TRANSLATE 0x102 - -int gui_mode; - -//////////////////////////////////////////////////////////////////////////////// -// Default configuration -unsigned int window_width = 512; -unsigned int window_height = 512; -unsigned int dispno = 0; - -// constants -const unsigned int mesh_width = 256; -const unsigned int mesh_height = 256; - -// OpenGL ES variables and interop with CUDA C -GLuint mesh_vao, mesh_vbo; -struct cudaGraphicsResource *cuda_vbo_resource; -void *d_vbo_buffer = NULL; - -float g_fAnim = 0.0; - -// UI / mouse controls -int mouse_old_x, mouse_old_y; -int mouse_buttons = 0; -float rotate_x = 0.0, rotate_y = 0.0; -float translate_z = -3.0; - -StopWatchInterface *timer = NULL; - -// Frame statistics -int frame; -int fpsCount = 0; // FPS count for averaging -int fpsLimit = 1; // FPS limit for sampling -int g_Index = 0; -float avgFPS = 0.0f; -unsigned int frameCount = 0; -unsigned int g_TotalErrors = 0; - -// The default number of seconds after which the test will end. -#define TIME_LIMIT 10.0 // 10 secs - -// Flag indicating it is time to shut down -static GLboolean shutdown = GL_FALSE; - -// Callback to close window -static void closeCB_app(void) { shutdown = GL_TRUE; } - -// Callback to handle key presses -static void keyCB_app(char key, int state) { - // Ignoring releases - if (!state) return; - - if ((key == 'q') || (key == 'Q') || (key == NvGlDemoKeyCode_Escape)) - shutdown = GL_TRUE; -} - -// Auto-Verification Code -bool g_bQAReadback = false; - -int *pArgc = NULL; -char **pArgv = NULL; - -#define MAX(a, b) ((a > b) ? a : b) - -//////////////////////////////////////////////////////////////////////////////// -// declaration, forward - -// CUDA functionality -void runCuda(struct cudaGraphicsResource **vbo_resource); -void runAutoTest(int devID, char **argv, char *ref_file); -void checkResultCuda(int argc, char **argv, const GLuint &vbo); - -const char *sSDKsample = "simpleGLES on Screen (VBO)"; - -void computeFPS() { - frameCount++; - fpsCount++; - - if (fpsCount == fpsLimit) { - avgFPS = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f); - fpsCount = 0; - fpsLimit = (int)MAX(avgFPS, 1.f); - - sdkResetTimer(&timer); - } - - char fps[256]; - sprintf(fps, "Cuda/OpenGL ES Interop (VBO): %3.1f fps (Max 1000 fps)", - avgFPS); - graphics_set_windowtitle(fps); -} - -/////////////////////////////////////////////////////////////////////////////// -//! Simple kernel to modify vertex positions in sine wave pattern -//! @param data data in global memory -/////////////////////////////////////////////////////////////////////////////// -__global__ void simple_vbo_kernel(float4 *pos, unsigned int width, - unsigned int height, float time) { - unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; - unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; - - // calculate uv coordinates - float u = x / (float)width; - float v = y / (float)height; - u = u * 2.0f - 1.0f; - v = v * 2.0f - 1.0f; - - // calculate simple sine wave pattern - float freq = 4.0f; - float w = sinf(u * freq + time) * cosf(v * freq + time) * 0.5f; - - // write output vertex - pos[y * width + x] = make_float4(u, w, v, 1.0f); -} - -void launch_kernel(float4 *pos, unsigned int mesh_width, - unsigned int mesh_height, float time) { - // execute the kernel - dim3 block(8, 8, 1); - dim3 grid(mesh_width / block.x, mesh_height / block.y, 1); - simple_vbo_kernel<<>>(pos, mesh_width, mesh_height, time); -} - -//////////////////////////////////////////////////////////////////////////////// -//! Run the Cuda part of the computation -//////////////////////////////////////////////////////////////////////////////// -void runCuda(struct cudaGraphicsResource **vbo_resource) { - // map OpenGL buffer object for writing from CUDA - float4 *dptr; - cudaGraphicsMapResources(1, vbo_resource, 0); - size_t num_bytes; - cudaGraphicsResourceGetMappedPointer((void **)&dptr, &num_bytes, - *vbo_resource); - - launch_kernel(dptr, mesh_width, mesh_height, g_fAnim); - - // unmap buffer object - cudaGraphicsUnmapResources(1, vbo_resource, 0); -} - -#ifndef FOPEN -#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode)) -#endif - -void sdkDumpBin2(void *data, unsigned int bytes, const char *filename) { - printf("sdkDumpBin: <%s>\n", filename); - FILE *fp; - FOPEN(fp, filename, "wb"); - fwrite(data, bytes, 1, fp); - fflush(fp); - fclose(fp); -} - -//////////////////////////////////////////////////////////////////////////////// -//! Run the Cuda part of the computation -//////////////////////////////////////////////////////////////////////////////// -void runAutoTest(int devID, char **argv, char *ref_file) { - char *reference_file = NULL; - void *imageData = malloc(mesh_width * mesh_height * sizeof(float)); - - // execute the kernel - launch_kernel((float4 *)d_vbo_buffer, mesh_width, mesh_height, g_fAnim); - - cudaDeviceSynchronize(); - getLastCudaError("launch_kernel failed"); - - cudaMemcpy(imageData, d_vbo_buffer, mesh_width * mesh_height * sizeof(float), - cudaMemcpyDeviceToHost); - - sdkDumpBin2(imageData, mesh_width * mesh_height * sizeof(float), - "simpleGLES_screen.bin"); - reference_file = sdkFindFilePath(ref_file, argv[0]); - - if (reference_file && - !sdkCompareBin2BinFloat("simpleGLES_screen.bin", reference_file, - mesh_width * mesh_height * sizeof(float), - MAX_EPSILON_ERROR, THRESHOLD, pArgv[0])) { - g_TotalErrors++; - } -} - -//////////////////////////////////////////////////////////////////////////////// -//! Display callback -//////////////////////////////////////////////////////////////////////////////// -void display_thisframe(float time_delta) { - sdkStartTimer(&timer); - - // run CUDA kernel to generate vertex positions - runCuda(&cuda_vbo_resource); - - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); - - glDrawArrays(GL_POINTS, 0, mesh_width * mesh_height); - - glFinish(); - - g_fAnim += time_delta; - - sdkStopTimer(&timer); - computeFPS(); -} - -//////////////////////////////////////////////////////////////////////////////// -//! Check if the result is correct or write data to file for external -//! regression testing -//////////////////////////////////////////////////////////////////////////////// -void checkResultCuda(int argc, char **argv, const GLuint &vbo) { - if (!d_vbo_buffer) { - printf("%s: Mapping result buffer from OpenGL ES\n", __FUNCTION__); - - cudaGraphicsUnregisterResource(cuda_vbo_resource); - - // map buffer object - glBindBuffer(GL_ARRAY_BUFFER, vbo); - float *data = (float *)glMapBufferRange( - GL_ARRAY_BUFFER, 0, mesh_width * mesh_height * 4 * sizeof(float), - GL_READ_ONLY); - - // check result - if (checkCmdLineFlag(argc, (const char **)argv, "regression")) { - // write file for regression test - sdkWriteFile("./data/regression.dat", data, - mesh_width * mesh_height * 3, 0.0, false); - } - - // unmap GL buffer object - if (!glUnmapBuffer(GL_ARRAY_BUFFER)) { - fprintf(stderr, "Unmap buffer failed.\n"); - fflush(stderr); - } - - checkCudaErrors(cudaGraphicsGLRegisterBuffer( - &cuda_vbo_resource, vbo, cudaGraphicsMapFlagsWriteDiscard)); - - CHECK_GLERROR(); - } -} - -GLuint mesh_shader = 0; - -void readAndCompileShaderFromGLSLFile(GLuint new_shaderprogram, - const char *filename, GLenum shaderType) { - FILE *file = fopen(filename, "rb"); // open shader text file - if (!file) { - error_exit("Filename %s does not exist\n", filename); - } - - // get the size of the file and read it - fseek(file, 0, SEEK_END); - GLint size = ftell(file); - char *data = (char *)malloc(sizeof(char) * (size + 1)); - memset(data, 0, sizeof(char) * (size + 1)); - fseek(file, 0, SEEK_SET); - size_t res = fread(data, 1, size, file); - fclose(file); - - GLuint shader = glCreateShader(shaderType); - glShaderSource(shader, 1, (const GLchar **)&data, &size); - glCompileShader(shader); - - CHECK_GLERROR(); - GLint compile_success = 0; - glGetShaderiv(shader, GL_COMPILE_STATUS, &compile_success); - CHECK_GLERROR(); - - if (compile_success == GL_FALSE) { - printf("Compilation of %s failed!\n Reason:\n", filename); - - GLint maxLength = 0; - glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &maxLength); - - char errorLog[maxLength]; - glGetShaderInfoLog(shader, maxLength, &maxLength, &errorLog[0]); - - printf("%s", errorLog); - - glDeleteShader(shader); - exit(1); - } - - glAttachShader(new_shaderprogram, shader); - glDeleteShader(shader); - - free(data); -} - -GLuint ShaderCreate(const char *vshader_filename, - const char *fshader_filename) { - printf("Loading GLSL shaders %s %s\n", vshader_filename, fshader_filename); - - GLuint new_shaderprogram = glCreateProgram(); - - CHECK_GLERROR(); - if (vshader_filename) { - readAndCompileShaderFromGLSLFile(new_shaderprogram, vshader_filename, - GL_VERTEX_SHADER); - } - - CHECK_GLERROR(); - if (fshader_filename) { - readAndCompileShaderFromGLSLFile(new_shaderprogram, fshader_filename, - GL_FRAGMENT_SHADER); - } - - CHECK_GLERROR(); - - glLinkProgram(new_shaderprogram); - - CHECK_GLERROR(); - GLint link_success; - glGetProgramiv(new_shaderprogram, GL_LINK_STATUS, &link_success); - - if (link_success == GL_FALSE) { - printf("Linking of %s with %s failed!\n Reason:\n", vshader_filename, - fshader_filename); - - GLint maxLength = 0; - glGetShaderiv(new_shaderprogram, GL_INFO_LOG_LENGTH, &maxLength); - - char errorLog[maxLength]; - glGetShaderInfoLog(new_shaderprogram, maxLength, &maxLength, &errorLog[0]); - - printf("%s", errorLog); - - exit(EXIT_FAILURE); - } - - return new_shaderprogram; -} - -//=========================================================================== -// InitGraphicsState() - initialize OpenGL -//=========================================================================== -static void InitGraphicsState(void) { - char *GL_version = (char *)glGetString(GL_VERSION); - char *GL_vendor = (char *)glGetString(GL_VENDOR); - char *GL_renderer = (char *)glGetString(GL_RENDERER); - - printf("Version: %s\n", GL_version); - printf("Vendor: %s\n", GL_vendor); - printf("Renderer: %s\n", GL_renderer); - - // RENDERING SETUP (OpenGL ES or OpenGL Core Profile!) - glGenVertexArrays(1, &mesh_vao); // Features' Vertex Array Object allocation - glBindVertexArray(mesh_vao); // bind VAO - - // initialize buffer object - glGenBuffers(1, &mesh_vbo); - glBindBuffer(GL_ARRAY_BUFFER, mesh_vbo); - - unsigned int size = mesh_width * mesh_height * 4 * sizeof(float); - glBufferData(GL_ARRAY_BUFFER, size, NULL, GL_DYNAMIC_DRAW); - glVertexAttribPointer((GLuint)0, 4, GL_FLOAT, GL_FALSE, 0, 0); - glEnableVertexAttribArray(0); - - checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_vbo_resource, mesh_vbo, - cudaGraphicsMapFlagsNone)); - - // GLSL stuff - char *vertex_shader_path = sdkFindFilePath("mesh.vert.glsl", pArgv[0]); - char *fragment_shader_path = sdkFindFilePath("mesh.frag.glsl", pArgv[0]); - - if (vertex_shader_path == NULL || fragment_shader_path == NULL) { - printf("Error finding shader file\n"); - exit(EXIT_FAILURE); - } - - mesh_shader = ShaderCreate(vertex_shader_path, fragment_shader_path); - CHECK_GLERROR(); - - free(vertex_shader_path); - free(fragment_shader_path); - - glUseProgram(mesh_shader); -} - -//////////////////////////////////////////////////////////////////////////////// -//! Run a simple test for CUDA -//////////////////////////////////////////////////////////////////////////////// -bool runTest(int argc, char **argv, char *ref_file) { - // command line mode only - if (ref_file != NULL) { - // This will pick the best possible CUDA capable device - // int devID = findCudaDevice(argc, (const char **)argv); -#if defined(__aarch64__) || defined(__arm__) - // find iGPU on the system which is compute capable which will perform - // GLES-CUDA interop - int devID = findIntegratedGPU(); -#else - // use command-line specified CUDA device, otherwise use device with highest - // Gflops/s - int devID = findCudaDevice(argc, (const char **)argv); -#endif - - // create VBO - checkCudaErrors(cudaMalloc((void **)&d_vbo_buffer, - mesh_width * mesh_height * 4 * sizeof(float))); - - // run the cuda part - runAutoTest(devID, argv, ref_file); - - // check result of Cuda step - checkResultCuda(argc, argv, mesh_vbo); - - cudaFree(d_vbo_buffer); - d_vbo_buffer = NULL; - } else { - double endTime = TIME_LIMIT; - - // this would use command-line specified CUDA device, note that CUDA - // defaults to highest Gflops/s device - if (checkCmdLineFlag(argc, (const char **)argv, "device")) { - error_exit("Device setting not yet implemented!\n"); - } - - // display selection - if (checkCmdLineFlag(argc, (const char **)argv, "dispno")) { - dispno = getCmdLineArgumentInt(argc, (const char **)argv, "dispno"); - } - - // Window width - if (checkCmdLineFlag(argc, (const char **)argv, "width")) { - window_width = getCmdLineArgumentInt(argc, (const char **)argv, "width"); - } - - // Window Height - if (checkCmdLineFlag(argc, (const char **)argv, "height")) { - window_height = - getCmdLineArgumentInt(argc, (const char **)argv, "height"); - } - - // Determine how long to run for in secs: default is 10s - if (checkCmdLineFlag(argc, (const char **)argv, "runtime")) { - endTime = getCmdLineArgumentInt(argc, (const char **)argv, "runtime"); - } - - SetCloseCB(closeCB_app); - SetKeyCB(keyCB_app); - - // create QNX screen window and set up associated OpenGL ES context - graphics_setup_window(0, 0, window_width, window_height, sSDKsample, - dispno); - -#if defined(__aarch64__) || defined(__arm__) - // find iGPU on the system which is compute capable which will perform - // GLES-CUDA interop - int devID = findIntegratedGPU(); -#else - // use command-line specified CUDA device, otherwise use device with highest - // Gflops/s - int devID = findCudaDevice(argc, (const char **)argv); -#endif - InitGraphicsState(); // set up GLES stuff - - glClearColor(0, 0.5, 1, 1); // blue-ish background - glClear(GL_COLOR_BUFFER_BIT); - - graphics_swap_buffers(); - - int frame = 0; - - struct timeval begin, end; - gettimeofday(&begin, NULL); - - // Print runtime - if (endTime < 0.0) { - endTime = TIME_LIMIT; - printf(" running forever...\n"); - } else { - printf(" running for %f seconds...\n", endTime); - } - - while (!shutdown) { - frame++; - display_thisframe(0.010); - usleep(1000); - graphics_swap_buffers(); - CheckEvents(); - - gettimeofday(&end, 0); - double elapsed = (end.tv_sec - begin.tv_sec) + - ((end.tv_usec - begin.tv_usec) / 1000000.0); - - // Check whether time limit has been exceeded - if (!shutdown) shutdown = (endTime <= elapsed); - } - - // NOTE: Before destroying OpenGL ES context, must unregister all shared - // resources from CUDA ! - checkCudaErrors(cudaGraphicsUnregisterResource(cuda_vbo_resource)); - - graphics_close_window(); // close window and destroy OpenGL ES context - } - - return true; -} - -//////////////////////////////////////////////////////////////////////////////// -// Program main -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { - char *ref_file = NULL; - - pArgc = &argc; - pArgv = argv; - -#if defined(__linux__) - setenv("DISPLAY", ":0", 0); -#endif - - printf("%s starting...\n", sSDKsample); - - if (argc > 1) { - if (checkCmdLineFlag(argc, (const char **)argv, "file")) { - // In this mode, we run without OpenGL and see if VBO is generated - // correctly - getCmdLineArgumentString(argc, (const char **)argv, "file", - (char **)&ref_file); - } - } - - printf("\n"); - - runTest(argc, argv, ref_file); - - printf("%s completed, returned %s\n", sSDKsample, - (g_TotalErrors == 0) ? "OK" : "ERROR!"); - - exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE); -}