mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-04-29 11:03:08 +01:00
Compare commits
11 Commits
c46370ca80
...
87adb2c27e
Author | SHA1 | Date | |
---|---|---|---|
![]() |
87adb2c27e | ||
![]() |
3e8f91d1a1 | ||
![]() |
f3b7c41ad6 | ||
![]() |
29fb758e62 | ||
![]() |
3bc08136ff | ||
![]() |
85eefa06c4 | ||
![]() |
c357dd1e6b | ||
![]() |
efb46383e0 | ||
![]() |
8d564d5e3a | ||
![]() |
37c5bcbef4 | ||
![]() |
4b1236548a |
@ -36,6 +36,7 @@
|
|||||||
* `cuDLALayerwiseStatsHybrid`
|
* `cuDLALayerwiseStatsHybrid`
|
||||||
* `cuDLALayerwiseStatsStandalone`
|
* `cuDLALayerwiseStatsStandalone`
|
||||||
* `cuDLAStandaloneMode`
|
* `cuDLAStandaloneMode`
|
||||||
|
* `cudaNvSciBufMultiplanar`
|
||||||
* `cudaNvSciNvMedia`
|
* `cudaNvSciNvMedia`
|
||||||
* `fluidsGLES`
|
* `fluidsGLES`
|
||||||
* `nbody_opengles`
|
* `nbody_opengles`
|
||||||
|
@ -263,27 +263,27 @@ inline __host__ __device__ uint4 make_uint4(int4 a)
|
|||||||
// negate
|
// negate
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
inline __host__ __device__ float2 operator-(float2 &a)
|
inline __host__ __device__ float2 operator-(float2 a)
|
||||||
{
|
{
|
||||||
return make_float2(-a.x, -a.y);
|
return make_float2(-a.x, -a.y);
|
||||||
}
|
}
|
||||||
inline __host__ __device__ int2 operator-(int2 &a)
|
inline __host__ __device__ int2 operator-(int2 a)
|
||||||
{
|
{
|
||||||
return make_int2(-a.x, -a.y);
|
return make_int2(-a.x, -a.y);
|
||||||
}
|
}
|
||||||
inline __host__ __device__ float3 operator-(float3 &a)
|
inline __host__ __device__ float3 operator-(float3 a)
|
||||||
{
|
{
|
||||||
return make_float3(-a.x, -a.y, -a.z);
|
return make_float3(-a.x, -a.y, -a.z);
|
||||||
}
|
}
|
||||||
inline __host__ __device__ int3 operator-(int3 &a)
|
inline __host__ __device__ int3 operator-(int3 a)
|
||||||
{
|
{
|
||||||
return make_int3(-a.x, -a.y, -a.z);
|
return make_int3(-a.x, -a.y, -a.z);
|
||||||
}
|
}
|
||||||
inline __host__ __device__ float4 operator-(float4 &a)
|
inline __host__ __device__ float4 operator-(float4 a)
|
||||||
{
|
{
|
||||||
return make_float4(-a.x, -a.y, -a.z, -a.w);
|
return make_float4(-a.x, -a.y, -a.z, -a.w);
|
||||||
}
|
}
|
||||||
inline __host__ __device__ int4 operator-(int4 &a)
|
inline __host__ __device__ int4 operator-(int4 a)
|
||||||
{
|
{
|
||||||
return make_int4(-a.x, -a.y, -a.z, -a.w);
|
return make_int4(-a.x, -a.y, -a.z, -a.w);
|
||||||
}
|
}
|
||||||
|
@ -203,7 +203,7 @@ Vulkan is a low-overhead, cross-platform 3D graphics and compute API. Vulkan tar
|
|||||||
#### GLFW
|
#### GLFW
|
||||||
GLFW is a lightweight, open-source library designed for managing OpenGL, OpenGL ES, and Vulkan contexts. It simplifies the process of creating and managing windows, handling user input (keyboard, mouse, and joystick), and working with multiple monitors in a cross-platform manner.
|
GLFW is a lightweight, open-source library designed for managing OpenGL, OpenGL ES, and Vulkan contexts. It simplifies the process of creating and managing windows, handling user input (keyboard, mouse, and joystick), and working with multiple monitors in a cross-platform manner.
|
||||||
|
|
||||||
To set up GLFW on a Windows system, Download the pre-built binaries from [GLFW website](https://www.glfw.org/download.html) and extract the zip file into the folder, pass the GLFW include header as `-DGLFW_INCLUDE_DIR` for cmake configuring and follow the Build_instructions.txt in the sample folder to set up the t.
|
To set up GLFW on a Windows system, Download the pre-built binaries from [GLFW website](https://www.glfw.org/download.html) and extract the zip file into the folder, pass the GLFW include header folder as `-DGLFW_INCLUDE_DIR` and lib folder as `-DGLFW_LIB_DIR` for cmake configuring.
|
||||||
|
|
||||||
#### OpenMP
|
#### OpenMP
|
||||||
|
|
||||||
|
@ -55,6 +55,7 @@ add_subdirectory(simpleTexture3D)
|
|||||||
add_subdirectory(simpleTextureDrv)
|
add_subdirectory(simpleTextureDrv)
|
||||||
add_subdirectory(simpleVoteIntrinsics)
|
add_subdirectory(simpleVoteIntrinsics)
|
||||||
add_subdirectory(simpleZeroCopy)
|
add_subdirectory(simpleZeroCopy)
|
||||||
|
add_subdirectory(template)
|
||||||
add_subdirectory(systemWideAtomics)
|
add_subdirectory(systemWideAtomics)
|
||||||
add_subdirectory(vectorAdd)
|
add_subdirectory(vectorAdd)
|
||||||
add_subdirectory(vectorAddDrv)
|
add_subdirectory(vectorAddDrv)
|
||||||
|
@ -20,7 +20,7 @@ include_directories(../../../Common)
|
|||||||
|
|
||||||
# Source file
|
# Source file
|
||||||
# Add target for template
|
# Add target for template
|
||||||
add_executable(template template.cu)
|
add_executable(template template.cu template_cpu.cpp)
|
||||||
|
|
||||||
target_compile_options(template PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
|
target_compile_options(template PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
|
||||||
|
|
||||||
|
@ -77,7 +77,6 @@ int filter_radius = 14;
|
|||||||
int nthreads = 64;
|
int nthreads = 64;
|
||||||
unsigned int width, height;
|
unsigned int width, height;
|
||||||
unsigned int *h_img = NULL;
|
unsigned int *h_img = NULL;
|
||||||
unsigned int *d_img = NULL;
|
|
||||||
unsigned int *d_temp = NULL;
|
unsigned int *d_temp = NULL;
|
||||||
|
|
||||||
GLuint pbo; // OpenGL pixel buffer object
|
GLuint pbo; // OpenGL pixel buffer object
|
||||||
@ -108,11 +107,11 @@ extern "C" void computeGold(float *id, float *od, int w, int h, int n);
|
|||||||
// These are CUDA functions to handle allocation and launching the kernels
|
// These are CUDA functions to handle allocation and launching the kernels
|
||||||
extern "C" void initTexture(int width, int height, void *pImage, bool useRGBA);
|
extern "C" void initTexture(int width, int height, void *pImage, bool useRGBA);
|
||||||
extern "C" void freeTextures();
|
extern "C" void freeTextures();
|
||||||
extern "C" double boxFilter(float *d_src, float *d_temp, float *d_dest,
|
extern "C" double boxFilter(float *d_temp, float *d_dest,
|
||||||
int width, int height, int radius, int iterations,
|
int width, int height, int radius, int iterations,
|
||||||
int nthreads, StopWatchInterface *timer);
|
int nthreads, StopWatchInterface *timer);
|
||||||
|
|
||||||
extern "C" double boxFilterRGBA(unsigned int *d_src, unsigned int *d_temp,
|
extern "C" double boxFilterRGBA(unsigned int *d_temp,
|
||||||
unsigned int *d_dest, int width, int height,
|
unsigned int *d_dest, int width, int height,
|
||||||
int radius, int iterations, int nthreads,
|
int radius, int iterations, int nthreads,
|
||||||
StopWatchInterface *timer);
|
StopWatchInterface *timer);
|
||||||
@ -165,7 +164,7 @@ void display() {
|
|||||||
size_t num_bytes;
|
size_t num_bytes;
|
||||||
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
||||||
(void **)&d_result, &num_bytes, cuda_pbo_resource));
|
(void **)&d_result, &num_bytes, cuda_pbo_resource));
|
||||||
boxFilterRGBA(d_img, d_temp, d_result, width, height, filter_radius,
|
boxFilterRGBA(d_temp, d_result, width, height, filter_radius,
|
||||||
iterations, nthreads, kernel_timer);
|
iterations, nthreads, kernel_timer);
|
||||||
|
|
||||||
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));
|
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));
|
||||||
@ -282,11 +281,7 @@ void reshape(int x, int y) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void initCuda(bool useRGBA) {
|
void initCuda(bool useRGBA) {
|
||||||
// allocate device memory
|
checkCudaErrors(cudaMalloc((void **)&d_temp, (width * height * sizeof(unsigned int))));
|
||||||
checkCudaErrors(
|
|
||||||
cudaMalloc((void **)&d_img, (width * height * sizeof(unsigned int))));
|
|
||||||
checkCudaErrors(
|
|
||||||
cudaMalloc((void **)&d_temp, (width * height * sizeof(unsigned int))));
|
|
||||||
|
|
||||||
// Refer to boxFilter_kernel.cu for implementation
|
// Refer to boxFilter_kernel.cu for implementation
|
||||||
initTexture(width, height, h_img, useRGBA);
|
initTexture(width, height, h_img, useRGBA);
|
||||||
@ -304,11 +299,6 @@ void cleanup() {
|
|||||||
h_img = NULL;
|
h_img = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (d_img) {
|
|
||||||
cudaFree(d_img);
|
|
||||||
d_img = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (d_temp) {
|
if (d_temp) {
|
||||||
cudaFree(d_temp);
|
cudaFree(d_temp);
|
||||||
d_temp = NULL;
|
d_temp = NULL;
|
||||||
@ -413,7 +403,7 @@ int runBenchmark() {
|
|||||||
cudaMalloc((void **)&d_result, width * height * sizeof(unsigned int)));
|
cudaMalloc((void **)&d_result, width * height * sizeof(unsigned int)));
|
||||||
|
|
||||||
// warm-up
|
// warm-up
|
||||||
boxFilterRGBA(d_img, d_temp, d_temp, width, height, filter_radius, iterations,
|
boxFilterRGBA(d_temp, d_temp, width, height, filter_radius, iterations,
|
||||||
nthreads, kernel_timer);
|
nthreads, kernel_timer);
|
||||||
checkCudaErrors(cudaDeviceSynchronize());
|
checkCudaErrors(cudaDeviceSynchronize());
|
||||||
|
|
||||||
@ -426,7 +416,7 @@ int runBenchmark() {
|
|||||||
|
|
||||||
for (int i = 0; i < iCycles; i++) {
|
for (int i = 0; i < iCycles; i++) {
|
||||||
dProcessingTime +=
|
dProcessingTime +=
|
||||||
boxFilterRGBA(d_img, d_temp, d_img, width, height, filter_radius,
|
boxFilterRGBA(d_temp, d_temp, width, height, filter_radius,
|
||||||
iterations, nthreads, kernel_timer);
|
iterations, nthreads, kernel_timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -469,7 +459,7 @@ int runSingleTest(char *ref_file, char *exec_path) {
|
|||||||
{
|
{
|
||||||
printf("%s (radius=%d) (passes=%d) ", sSDKsample, filter_radius,
|
printf("%s (radius=%d) (passes=%d) ", sSDKsample, filter_radius,
|
||||||
iterations);
|
iterations);
|
||||||
boxFilterRGBA(d_img, d_temp, d_result, width, height, filter_radius,
|
boxFilterRGBA(d_temp, d_result, width, height, filter_radius,
|
||||||
iterations, nthreads, kernel_timer);
|
iterations, nthreads, kernel_timer);
|
||||||
|
|
||||||
// check if kernel execution generated an error
|
// check if kernel execution generated an error
|
||||||
|
@ -399,7 +399,6 @@ extern "C" void freeTextures() {
|
|||||||
Perform 2D box filter on image using CUDA
|
Perform 2D box filter on image using CUDA
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
d_src - pointer to input image in device memory
|
|
||||||
d_temp - pointer to temporary storage in device memory
|
d_temp - pointer to temporary storage in device memory
|
||||||
d_dest - pointer to destination image in device memory
|
d_dest - pointer to destination image in device memory
|
||||||
width - image width
|
width - image width
|
||||||
@ -408,7 +407,7 @@ extern "C" void freeTextures() {
|
|||||||
iterations - number of iterations
|
iterations - number of iterations
|
||||||
|
|
||||||
*/
|
*/
|
||||||
extern "C" double boxFilter(float *d_src, float *d_temp, float *d_dest,
|
extern "C" double boxFilter(float *d_temp, float *d_dest,
|
||||||
int width, int height, int radius, int iterations,
|
int width, int height, int radius, int iterations,
|
||||||
int nthreads, StopWatchInterface *timer) {
|
int nthreads, StopWatchInterface *timer) {
|
||||||
// var for kernel timing
|
// var for kernel timing
|
||||||
@ -447,7 +446,7 @@ extern "C" double boxFilter(float *d_src, float *d_temp, float *d_dest,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RGBA version
|
// RGBA version
|
||||||
extern "C" double boxFilterRGBA(unsigned int *d_src, unsigned int *d_temp,
|
extern "C" double boxFilterRGBA(unsigned int *d_temp,
|
||||||
unsigned int *d_dest, int width, int height,
|
unsigned int *d_dest, int width, int height,
|
||||||
int radius, int iterations, int nthreads,
|
int radius, int iterations, int nthreads,
|
||||||
StopWatchInterface *timer) {
|
StopWatchInterface *timer) {
|
||||||
|
@ -34,13 +34,12 @@
|
|||||||
#define _KERNELS_H_
|
#define _KERNELS_H_
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <thrust/functional.h>
|
|
||||||
|
|
||||||
#include "common.cuh"
|
#include "common.cuh"
|
||||||
|
|
||||||
// Functors used with thrust library.
|
// Functors used with thrust library.
|
||||||
template <typename Input>
|
template <typename Input>
|
||||||
struct IsGreaterEqualThan : public thrust::unary_function<Input, bool>
|
struct IsGreaterEqualThan
|
||||||
{
|
{
|
||||||
__host__ __device__ IsGreaterEqualThan(uint upperBound) :
|
__host__ __device__ IsGreaterEqualThan(uint upperBound) :
|
||||||
upperBound_(upperBound) {}
|
upperBound_(upperBound) {}
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
||||||
This sample implements bitonic sort and odd-even merge sort (also known as Batcher's sort), algorithms belonging to the class of sorting networks. While generally subefficient, for large sequences compared to algorithms with better asymptotic algorithmic complexity (i.e. merge sort or radix sort), this may be the preferred algorithms of choice for sorting batches of short-sized to mid-sized (key, value) array pairs. Refer to an excellent tutorial by H. W. Lang http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/networks/indexen.htm
|
This sample implements bitonic sort and odd-even merge sort (also known as Batcher's sort), algorithms belonging to the class of sorting networks. While generally subefficient, for large sequences compared to algorithms with better asymptotic algorithmic complexity (i.e. merge sort or radix sort), this may be the preferred algorithms of choice for sorting batches of short-sized to mid-sized (key, value) array pairs. Refer to an excellent tutorial by H. W. Lang https://hwlang.de/algorithmen/sortieren/bitonic/bitonicen.htm
|
||||||
|
|
||||||
## Key Concepts
|
## Key Concepts
|
||||||
|
|
||||||
|
@ -493,12 +493,14 @@ static void parentProcess(char *app) {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j = 0; j < nprocesses; j++) {
|
for (int j = 0; j < selectedDevices.size(); j++) {
|
||||||
int canAccessPeerIJ, canAccessPeerJI;
|
int canAccessPeerIJ, canAccessPeerJI;
|
||||||
checkCudaErrors(
|
checkCudaErrors(cuDeviceCanAccessPeer(&canAccessPeerJI,
|
||||||
cuDeviceCanAccessPeer(&canAccessPeerJI, devices[j], devices[i]));
|
devices[selectedDevices[j]],
|
||||||
checkCudaErrors(
|
devices[i]));
|
||||||
cuDeviceCanAccessPeer(&canAccessPeerIJ, devices[i], devices[j]));
|
checkCudaErrors(cuDeviceCanAccessPeer(&canAccessPeerIJ,
|
||||||
|
devices[i],
|
||||||
|
devices[selectedDevices[j]]));
|
||||||
if (!canAccessPeerIJ || !canAccessPeerJI) {
|
if (!canAccessPeerIJ || !canAccessPeerJI) {
|
||||||
allPeers = false;
|
allPeers = false;
|
||||||
break;
|
break;
|
||||||
@ -513,10 +515,10 @@ static void parentProcess(char *app) {
|
|||||||
// setup the peers for the device. For systems that only allow 8
|
// setup the peers for the device. For systems that only allow 8
|
||||||
// peers per GPU at a time, this acts to remove devices from CanAccessPeer
|
// peers per GPU at a time, this acts to remove devices from CanAccessPeer
|
||||||
for (int j = 0; j < nprocesses; j++) {
|
for (int j = 0; j < nprocesses; j++) {
|
||||||
checkCudaErrors(cuCtxSetCurrent(ctxs[i]));
|
checkCudaErrors(cuCtxSetCurrent(ctxs.back()));
|
||||||
checkCudaErrors(cuCtxEnablePeerAccess(ctxs[j], 0));
|
checkCudaErrors(cuCtxEnablePeerAccess(ctxs[j], 0));
|
||||||
checkCudaErrors(cuCtxSetCurrent(ctxs[j]));
|
checkCudaErrors(cuCtxSetCurrent(ctxs[j]));
|
||||||
checkCudaErrors(cuCtxEnablePeerAccess(ctxs[i], 0));
|
checkCudaErrors(cuCtxEnablePeerAccess(ctxs.back(), 0));
|
||||||
}
|
}
|
||||||
selectedDevices.push_back(i);
|
selectedDevices.push_back(i);
|
||||||
nprocesses++;
|
nprocesses++;
|
||||||
|
@ -231,6 +231,10 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (buffer) {
|
||||||
|
checkCudaErrors(cudaFree(buffer));
|
||||||
|
}
|
||||||
|
|
||||||
cusparseDestroy(cusparseHandle);
|
cusparseDestroy(cusparseHandle);
|
||||||
cublasDestroy(cublasHandle);
|
cublasDestroy(cublasHandle);
|
||||||
if (matA) {
|
if (matA) {
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
## Description
|
## Description
|
||||||
|
|
||||||
This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread &amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp;amp; rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04
|
This sample demonstrates CUDA-NvSciBuf/NvSciSync Interop. Two CPU threads import the NvSciBuf and NvSciSync into CUDA to perform two image processing algorithms on a ppm image - image rotation in 1st thread & rgba to grayscale conversion of rotated image in 2nd thread. Currently only supported on Ubuntu 18.04
|
||||||
|
|
||||||
## Key Concepts
|
## Key Concepts
|
||||||
|
|
||||||
|
@ -65,14 +65,14 @@ target_compile_features(Mandelbrot PRIVATE cxx_std_17 cuda_std_17)
|
|||||||
POST_BUILD
|
POST_BUILD
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy
|
COMMAND ${CMAKE_COMMAND} -E copy
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../bin/win64/$<CONFIGURATION>/freeglut.dll
|
${CMAKE_CURRENT_SOURCE_DIR}/../../../bin/win64/$<CONFIGURATION>/freeglut.dll
|
||||||
${CMAKE_CURRENT_BINARY_DIR}
|
${CMAKE_CURRENT_BINARY_DIR}/$<CONFIGURATION>
|
||||||
)
|
)
|
||||||
|
|
||||||
add_custom_command(TARGET Mandelbrot
|
add_custom_command(TARGET Mandelbrot
|
||||||
POST_BUILD
|
POST_BUILD
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy
|
COMMAND ${CMAKE_COMMAND} -E copy
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../bin/win64/$<CONFIGURATION>/glew64.dll
|
${CMAKE_CURRENT_SOURCE_DIR}/../../../bin/win64/$<CONFIGURATION>/glew64.dll
|
||||||
${CMAKE_CURRENT_BINARY_DIR}
|
${CMAKE_CURRENT_BINARY_DIR}/$<CONFIGURATION>
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -20,16 +20,19 @@ include_directories(../../../Common)
|
|||||||
find_package(Vulkan)
|
find_package(Vulkan)
|
||||||
find_package(OpenGL)
|
find_package(OpenGL)
|
||||||
|
|
||||||
|
|
||||||
# Include the check_include_file macro
|
# Include the check_include_file macro
|
||||||
include(CheckIncludeFile)
|
include(CheckIncludeFile)
|
||||||
|
|
||||||
# Check for the GLFW/glfw3.h header
|
# Check for the GLFW/glfw3.h header
|
||||||
check_include_file("GLFW/glfw3.h" HAVE_GLFW3_H)
|
check_include_file("GLFW/glfw3.h" HAVE_GLFW3_H)
|
||||||
|
|
||||||
# Find GLFW/glfw3.h header for Windows
|
# Find GLFW header and lib for Windows
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
find_file(GLFW3_H "glfw3.h" PATH "$ENV{GLFW_INCLUDES_DIR}/GLFW")
|
find_file(GLFW3_H "GLFW/glfw3.h" PATH "${GLFW_INCLUDE_DIR}")
|
||||||
if(GLFW3_H)
|
find_library(GLFW3_LIB "glfw3" PATH "${GLFW_LIB_DIR}")
|
||||||
|
if(GLFW3_H AND GLFW3_LIB)
|
||||||
|
message(STATUS "Found GLFW/glfw3.h and GLFW library.")
|
||||||
set(HAVE_GLFW3_H 1)
|
set(HAVE_GLFW3_H 1)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
@ -51,21 +54,22 @@ if(${Vulkan_FOUND})
|
|||||||
${Vulkan_INCLUDE_DIRS}
|
${Vulkan_INCLUDE_DIRS}
|
||||||
${CUDAToolkit_INCLUDE_DIRS}
|
${CUDAToolkit_INCLUDE_DIRS}
|
||||||
)
|
)
|
||||||
|
|
||||||
if(WIN32)
|
|
||||||
target_link_libraries(simpleVulkan
|
target_link_libraries(simpleVulkan
|
||||||
${Vulkan_LIBRARIES}
|
${Vulkan_LIBRARIES}
|
||||||
OpenGL::GL
|
OpenGL::GL
|
||||||
glfw3.dll
|
)
|
||||||
|
if(WIN32)
|
||||||
|
target_include_directories(simpleVulkan PUBLIC
|
||||||
|
${GLFW_INCLUDE_DIR}
|
||||||
|
)
|
||||||
|
target_link_libraries(simpleVulkan
|
||||||
|
${GLFW3_LIB}
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
target_link_libraries(simpleVulkan
|
target_link_libraries(simpleVulkan
|
||||||
${Vulkan_LIBRARIES}
|
|
||||||
OpenGL::GL
|
|
||||||
glfw
|
glfw
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_custom_command(TARGET simpleVulkan POST_BUILD
|
add_custom_command(TARGET simpleVulkan POST_BUILD
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/sinewave.frag
|
${CMAKE_CURRENT_SOURCE_DIR}/sinewave.frag
|
||||||
|
@ -26,10 +26,12 @@ include(CheckIncludeFile)
|
|||||||
# Check for the GLFW/glfw3.h header
|
# Check for the GLFW/glfw3.h header
|
||||||
check_include_file("GLFW/glfw3.h" HAVE_GLFW3_H)
|
check_include_file("GLFW/glfw3.h" HAVE_GLFW3_H)
|
||||||
|
|
||||||
# Find GLFW/glfw3.h header for Windows
|
# Find GLFW header and lib for Windows
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
find_file(GLFW3_H "glfw3.h" PATH "$ENV{GLFW_INCLUDES_DIR}/GLFW")
|
find_file(GLFW3_H "GLFW/glfw3.h" PATH "${GLFW_INCLUDE_DIR}")
|
||||||
if(GLFW3_H)
|
find_library(GLFW3_LIB "glfw3" PATH "${GLFW_LIB_DIR}")
|
||||||
|
if(GLFW3_H AND GLFW3_LIB)
|
||||||
|
message(STATUS "Found GLFW/glfw3.h and GLFW library.")
|
||||||
set(HAVE_GLFW3_H 1)
|
set(HAVE_GLFW3_H 1)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
@ -51,23 +53,23 @@ if(${Vulkan_FOUND})
|
|||||||
${Vulkan_INCLUDE_DIRS}
|
${Vulkan_INCLUDE_DIRS}
|
||||||
${CUDAToolkit_INCLUDE_DIRS}
|
${CUDAToolkit_INCLUDE_DIRS}
|
||||||
)
|
)
|
||||||
|
|
||||||
if(WIN32)
|
|
||||||
target_link_libraries(simpleVulkanMMAP
|
target_link_libraries(simpleVulkanMMAP
|
||||||
${Vulkan_LIBRARIES}
|
${Vulkan_LIBRARIES}
|
||||||
OpenGL::GL
|
OpenGL::GL
|
||||||
CUDA::cuda_driver
|
CUDA::cuda_driver
|
||||||
glfw3.dll
|
)
|
||||||
|
if(WIN32)
|
||||||
|
target_include_directories(simpleVulkanMMAP PUBLIC
|
||||||
|
${GLFW_INCLUDE_DIR}
|
||||||
|
)
|
||||||
|
target_link_libraries(simpleVulkanMMAP
|
||||||
|
${GLFW3_LIB}
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
target_link_libraries(simpleVulkanMMAP
|
target_link_libraries(simpleVulkanMMAP
|
||||||
${Vulkan_LIBRARIES}
|
|
||||||
OpenGL::GL
|
|
||||||
CUDA::cuda_driver
|
|
||||||
glfw
|
glfw
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_custom_command(TARGET simpleVulkanMMAP POST_BUILD
|
add_custom_command(TARGET simpleVulkanMMAP POST_BUILD
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/montecarlo.frag
|
${CMAKE_CURRENT_SOURCE_DIR}/montecarlo.frag
|
||||||
|
@ -71,7 +71,7 @@ if(${OpenGL_FOUND})
|
|||||||
POST_BUILD
|
POST_BUILD
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy
|
COMMAND ${CMAKE_COMMAND} -E copy
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../bin/win64/$<CONFIGURATION>/glew64.dll
|
${CMAKE_CURRENT_SOURCE_DIR}/../../../bin/win64/$<CONFIGURATION>/glew64.dll
|
||||||
${CMAKE_CURRENT_BINARY_DIR}
|
${CMAKE_CURRENT_BINARY_DIR}/$<CONFIGURATION>
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
@ -26,10 +26,12 @@ include(CheckIncludeFile)
|
|||||||
# Check for the GLFW/glfw3.h header
|
# Check for the GLFW/glfw3.h header
|
||||||
check_include_file("GLFW/glfw3.h" HAVE_GLFW3_H)
|
check_include_file("GLFW/glfw3.h" HAVE_GLFW3_H)
|
||||||
|
|
||||||
# Find GLFW/glfw3.h header for Windows
|
# Find GLFW header and lib for Windows
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
find_file(GLFW3_H "glfw3.h" PATH "$ENV{GLFW_INCLUDES_DIR}/GLFW")
|
find_file(GLFW3_H "GLFW/glfw3.h" PATH "${GLFW_INCLUDE_DIR}")
|
||||||
if(GLFW3_H)
|
find_file(GLFW3_LIB "glfw3" PATH "${GLFW_LIB_DIR}")
|
||||||
|
if(GLFW3_H AND GLFW3_LIB)
|
||||||
|
message(STATUS "Found GLFW/glfw3.h and GLFW library.")
|
||||||
set(HAVE_GLFW3_H 1)
|
set(HAVE_GLFW3_H 1)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
@ -51,21 +53,22 @@ if(${Vulkan_FOUND})
|
|||||||
${Vulkan_INCLUDE_DIRS}
|
${Vulkan_INCLUDE_DIRS}
|
||||||
${CUDAToolkit_INCLUDE_DIRS}
|
${CUDAToolkit_INCLUDE_DIRS}
|
||||||
)
|
)
|
||||||
|
|
||||||
if(WIN32)
|
|
||||||
target_link_libraries(vulkanImageCUDA
|
target_link_libraries(vulkanImageCUDA
|
||||||
${Vulkan_LIBRARIES}
|
${Vulkan_LIBRARIES}
|
||||||
OpenGL::GL
|
OpenGL::GL
|
||||||
glfw3.dll
|
)
|
||||||
|
if(WIN32)
|
||||||
|
target_include_directories(vulkanImageCUDA PUBLIC
|
||||||
|
${GLFW_INCLUDE_DIR}
|
||||||
|
)
|
||||||
|
target_link_libraries(vulkanImageCUDA
|
||||||
|
${GLFW3_LIB}
|
||||||
)
|
)
|
||||||
else()
|
else()
|
||||||
target_link_libraries(vulkanImageCUDA
|
target_link_libraries(vulkanImageCUDA
|
||||||
${Vulkan_LIBRARIES}
|
|
||||||
OpenGL::GL
|
|
||||||
glfw
|
glfw
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_custom_command(TARGET vulkanImageCUDA POST_BUILD
|
add_custom_command(TARGET vulkanImageCUDA POST_BUILD
|
||||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/shader.frag
|
${CMAKE_CURRENT_SOURCE_DIR}/shader.frag
|
||||||
|
@ -53,7 +53,7 @@ const char *sSDKsample = "Transpose";
|
|||||||
// TILE_DIM/BLOCK_ROWS elements. TILE_DIM must be an integral multiple of
|
// TILE_DIM/BLOCK_ROWS elements. TILE_DIM must be an integral multiple of
|
||||||
// BLOCK_ROWS
|
// BLOCK_ROWS
|
||||||
|
|
||||||
#define TILE_DIM 16
|
#define TILE_DIM 32
|
||||||
#define BLOCK_ROWS 16
|
#define BLOCK_ROWS 16
|
||||||
|
|
||||||
// This sample assumes that MATRIX_SIZE_X = MATRIX_SIZE_Y
|
// This sample assumes that MATRIX_SIZE_X = MATRIX_SIZE_Y
|
||||||
|
Loading…
x
Reference in New Issue
Block a user