mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-04-10 18:22:11 +01:00
Compare commits
13 Commits
f702ec1ed9
...
601384af49
Author | SHA1 | Date | |
---|---|---|---|
![]() |
601384af49 | ||
![]() |
8d564d5e3a | ||
![]() |
37c5bcbef4 | ||
![]() |
940a4c7a91 | ||
![]() |
61bd39800d | ||
![]() |
8a96d2eee7 | ||
![]() |
e762d58260 | ||
![]() |
8fd1701744 | ||
![]() |
94765c1597 | ||
![]() |
c87881f02c | ||
![]() |
6aac4717b8 | ||
![]() |
42ff742bf5 | ||
![]() |
8ccb13c6f0 |
@ -241,7 +241,7 @@ inline int gpuGetMaxGflopsDeviceIdDRV() {
|
||||
}
|
||||
|
||||
unsigned long long compute_perf =
|
||||
(unsigned long long)(multiProcessorCount * sm_per_multiproc *
|
||||
((unsigned long long)multiProcessorCount * sm_per_multiproc *
|
||||
clockRate);
|
||||
|
||||
if (compute_perf > max_compute_perf) {
|
||||
|
@ -203,7 +203,7 @@ Vulkan is a low-overhead, cross-platform 3D graphics and compute API. Vulkan tar
|
||||
#### GLFW
|
||||
GLFW is a lightweight, open-source library designed for managing OpenGL, OpenGL ES, and Vulkan contexts. It simplifies the process of creating and managing windows, handling user input (keyboard, mouse, and joystick), and working with multiple monitors in a cross-platform manner.
|
||||
|
||||
To set up GLFW on a Windows system, Download the pre-built binaries from [GLFW website](https://www.glfw.org/download.html) and extract the zip file into the folder, pass the GLFW include header as `-DGLFW_INCLUDE_DIR` for cmake configuring and follow the Build_instructions.txt in the sample folder to set up the t.
|
||||
To set up GLFW on a Windows system, Download the pre-built binaries from [GLFW website](https://www.glfw.org/download.html) and extract the zip file into the folder, pass the GLFW include header folder as `-DGLFW_INCLUDE_DIR` and lib folder as `-DGLFW_LIB_DIR` for cmake configuring.
|
||||
|
||||
#### OpenMP
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
## Description
|
||||
|
||||
This sample implements matrix multiplication and is exactly the same as Chapter 6 of the programming guide. It has been written for clarity of exposition to illustrate various CUDA programming principles, not with the goal of providing the most performant generic kernel for matrix multiplication. To illustrate GPU performance for matrix multiply, this sample also shows how to use the new CUDA 4.0 interface for CUBLAS to demonstrate high-performance performance for matrix multiplication.
|
||||
This sample implements matrix multiplication and is exactly the same as the second example of the [Shared Memory](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory) section of the programming guide. It has been written for clarity of exposition to illustrate various CUDA programming principles, not with the goal of providing the most performant generic kernel for matrix multiplication. To illustrate GPU performance for matrix multiply, this sample also shows how to use the CUDA 4.0+ interface for cuBLAS to demonstrate high-performance performance for matrix multiplication.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
## Description
|
||||
|
||||
This sample implements matrix multiplication and is exactly the same as Chapter 6 of the programming guide. It has been written for clarity of exposition to illustrate various CUDA programming principles, not with the goal of providing the most performant generic kernel for matrix multiplication. To illustrate GPU performance for matrix multiply, this sample also shows how to use the new CUDA 4.0 interface for CUBLAS to demonstrate high-performance performance for matrix multiplication.
|
||||
This sample implements matrix multiplication and is exactly the same as the second example of the [Shared Memory](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory) section of the programming guide. It has been written for clarity of exposition to illustrate various CUDA programming principles, not with the goal of providing the most performant generic kernel for matrix multiplication. To illustrate GPU performance for matrix multiply, this sample also shows how to use the CUDA 4.0+ interface for cuBLAS to demonstrate high-performance performance for matrix multiplication.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
|
@ -57,7 +57,7 @@ int main(int argc, char **argv) {
|
||||
// Get GPU information
|
||||
checkCudaErrors(cudaGetDevice(&devID));
|
||||
checkCudaErrors(cudaGetDeviceProperties(&props, devID));
|
||||
printf("Device %d: \"%s\" with Compute %d.%d capability\n", devID, props.name,
|
||||
printf("Device %d: \"%s\" with Compute capability %d.%d\n", devID, props.name,
|
||||
props.major, props.minor);
|
||||
|
||||
printf("printf() is called. Output:\n\n");
|
||||
|
@ -11,5 +11,4 @@ This sample enumerates the properties of the CUDA devices present in the system.
|
||||
This sample enumerates the properties of the CUDA devices present using CUDA Driver API calls
|
||||
|
||||
### [topologyQuery](./topologyQuery)
|
||||
A simple exemple on how to query the topology of a system with multiple GPU
|
||||
|
||||
A simple example on how to query the topology of a system with multiple GPU
|
||||
|
@ -34,13 +34,12 @@
|
||||
#define _KERNELS_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <thrust/functional.h>
|
||||
|
||||
#include "common.cuh"
|
||||
|
||||
// Functors used with thrust library.
|
||||
template <typename Input>
|
||||
struct IsGreaterEqualThan : public thrust::unary_function<Input, bool>
|
||||
struct IsGreaterEqualThan
|
||||
{
|
||||
__host__ __device__ IsGreaterEqualThan(uint upperBound) :
|
||||
upperBound_(upperBound) {}
|
||||
|
@ -128,7 +128,7 @@ __global__ void MatrixMulAsyncCopyMultiStageLargeChunk(
|
||||
a <= aEnd; a += aStep, b += bStep, ++i) {
|
||||
// Load the matrices from device memory to shared memory; each thread loads
|
||||
// one element of each matrix
|
||||
for (; aStage <= a + aStep * maxPipelineStages;
|
||||
for (; aStage < a + aStep * maxPipelineStages;
|
||||
aStage += aStep, bStage += bStep, ++iStage) {
|
||||
pipe.producer_acquire();
|
||||
if (aStage <= aEnd && t4x < BLOCK_SIZE) {
|
||||
@ -137,7 +137,7 @@ __global__ void MatrixMulAsyncCopyMultiStageLargeChunk(
|
||||
cuda::memcpy_async(&As[j][threadIdx.y][t4x],
|
||||
&A[aStage + wA * threadIdx.y + t4x], shape4, pipe);
|
||||
cuda::memcpy_async(&Bs[j][threadIdx.y][t4x],
|
||||
&B[aStage + wA * threadIdx.y + t4x], shape4, pipe);
|
||||
&B[bStage + wB * threadIdx.y + t4x], shape4, pipe);
|
||||
}
|
||||
pipe.producer_commit();
|
||||
}
|
||||
@ -222,7 +222,7 @@ __global__ void MatrixMulAsyncCopyLargeChunk(float *__restrict__ C,
|
||||
|
||||
cuda::memcpy_async(&As[threadIdx.y][t4x], &A[a + wA * threadIdx.y + t4x],
|
||||
shape4, pipe);
|
||||
cuda::memcpy_async(&Bs[threadIdx.y][t4x], &B[a + wA * threadIdx.y + t4x],
|
||||
cuda::memcpy_async(&Bs[threadIdx.y][t4x], &B[b + wB * threadIdx.y + t4x],
|
||||
shape4, pipe);
|
||||
|
||||
pipe.producer_commit();
|
||||
|
@ -31,7 +31,6 @@
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <cstring>
|
||||
#include <iostream>
|
||||
#include "cuda.h"
|
||||
@ -293,6 +292,11 @@ static void memMapGetDeviceFunction(char **argv) {
|
||||
jitNumOptions, jitOptions,
|
||||
(void **)jitOptVals));
|
||||
printf("> PTX JIT log:\n%s\n", jitLogBuffer);
|
||||
|
||||
// Clean up dynamically allocated memory
|
||||
delete[] jitOptions;
|
||||
delete[] jitOptVals;
|
||||
delete[] jitLogBuffer;
|
||||
} else {
|
||||
checkCudaErrors(cuModuleLoad(&cuModule, module_path.c_str()));
|
||||
}
|
||||
@ -379,7 +383,7 @@ static void childProcess(int devId, int id, char **argv) {
|
||||
// deterministic.
|
||||
barrierWait(&shm->barrier, &shm->sense, (unsigned int)procCount);
|
||||
if (id == 0) {
|
||||
printf("Step %lld done\n", (unsigned long long)i);
|
||||
printf("Step %llu done\n", (unsigned long long)i);
|
||||
}
|
||||
}
|
||||
|
||||
@ -550,7 +554,7 @@ static void parentProcess(char *app) {
|
||||
// Launch the child processes!
|
||||
for (i = 0; i < nprocesses; i++) {
|
||||
char devIdx[10];
|
||||
char procIdx[10];
|
||||
char procIdx[12];
|
||||
char *const args[] = {app, devIdx, procIdx, NULL};
|
||||
Process process;
|
||||
|
||||
|
@ -20,16 +20,19 @@ include_directories(../../../Common)
|
||||
find_package(Vulkan)
|
||||
find_package(OpenGL)
|
||||
|
||||
|
||||
# Include the check_include_file macro
|
||||
include(CheckIncludeFile)
|
||||
|
||||
# Check for the GLFW/glfw3.h header
|
||||
check_include_file("GLFW/glfw3.h" HAVE_GLFW3_H)
|
||||
|
||||
# Find GLFW/glfw3.h header for Windows
|
||||
# Find GLFW header and lib for Windows
|
||||
if(WIN32)
|
||||
find_file(GLFW3_H "glfw3.h" PATH "$ENV{GLFW_INCLUDES_DIR}/GLFW")
|
||||
if(GLFW3_H)
|
||||
find_file(GLFW3_H "GLFW/glfw3.h" PATH "${GLFW_INCLUDE_DIR}")
|
||||
find_library(GLFW3_LIB "glfw3" PATH "${GLFW_LIB_DIR}")
|
||||
if(GLFW3_H AND GLFW3_LIB)
|
||||
message(STATUS "Found GLFW/glfw3.h and GLFW library.")
|
||||
set(HAVE_GLFW3_H 1)
|
||||
endif()
|
||||
endif()
|
||||
@ -51,21 +54,22 @@ if(${Vulkan_FOUND})
|
||||
${Vulkan_INCLUDE_DIRS}
|
||||
${CUDAToolkit_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
target_link_libraries(simpleVulkan
|
||||
${Vulkan_LIBRARIES}
|
||||
OpenGL::GL
|
||||
)
|
||||
if(WIN32)
|
||||
target_include_directories(simpleVulkan PUBLIC
|
||||
${GLFW_INCLUDE_DIR}
|
||||
)
|
||||
target_link_libraries(simpleVulkan
|
||||
${Vulkan_LIBRARIES}
|
||||
OpenGL::GL
|
||||
glfw3.dll
|
||||
${GLFW3_LIB}
|
||||
)
|
||||
else()
|
||||
target_link_libraries(simpleVulkan
|
||||
${Vulkan_LIBRARIES}
|
||||
OpenGL::GL
|
||||
glfw
|
||||
)
|
||||
endif()
|
||||
|
||||
add_custom_command(TARGET simpleVulkan POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/sinewave.frag
|
||||
|
@ -26,10 +26,12 @@ include(CheckIncludeFile)
|
||||
# Check for the GLFW/glfw3.h header
|
||||
check_include_file("GLFW/glfw3.h" HAVE_GLFW3_H)
|
||||
|
||||
# Find GLFW/glfw3.h header for Windows
|
||||
# Find GLFW header and lib for Windows
|
||||
if(WIN32)
|
||||
find_file(GLFW3_H "glfw3.h" PATH "$ENV{GLFW_INCLUDES_DIR}/GLFW")
|
||||
if(GLFW3_H)
|
||||
find_file(GLFW3_H "GLFW/glfw3.h" PATH "${GLFW_INCLUDE_DIR}")
|
||||
find_library(GLFW3_LIB "glfw3" PATH "${GLFW_LIB_DIR}")
|
||||
if(GLFW3_H AND GLFW3_LIB)
|
||||
message(STATUS "Found GLFW/glfw3.h and GLFW library.")
|
||||
set(HAVE_GLFW3_H 1)
|
||||
endif()
|
||||
endif()
|
||||
@ -51,23 +53,23 @@ if(${Vulkan_FOUND})
|
||||
${Vulkan_INCLUDE_DIRS}
|
||||
${CUDAToolkit_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
target_link_libraries(simpleVulkanMMAP
|
||||
${Vulkan_LIBRARIES}
|
||||
OpenGL::GL
|
||||
CUDA::cuda_driver
|
||||
)
|
||||
if(WIN32)
|
||||
target_include_directories(simpleVulkanMMAP PUBLIC
|
||||
${GLFW_INCLUDE_DIR}
|
||||
)
|
||||
target_link_libraries(simpleVulkanMMAP
|
||||
${Vulkan_LIBRARIES}
|
||||
OpenGL::GL
|
||||
CUDA::cuda_driver
|
||||
glfw3.dll
|
||||
${GLFW3_LIB}
|
||||
)
|
||||
else()
|
||||
target_link_libraries(simpleVulkanMMAP
|
||||
${Vulkan_LIBRARIES}
|
||||
OpenGL::GL
|
||||
CUDA::cuda_driver
|
||||
glfw
|
||||
)
|
||||
endif()
|
||||
|
||||
add_custom_command(TARGET simpleVulkanMMAP POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/montecarlo.frag
|
||||
|
@ -71,7 +71,7 @@ if(${OpenGL_FOUND})
|
||||
POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../../../bin/win64/$<CONFIGURATION>/glew64.dll
|
||||
${CMAKE_CURRENT_BINARY_DIR}
|
||||
${CMAKE_CURRENT_BINARY_DIR}/$<CONFIGURATION>
|
||||
)
|
||||
endif()
|
||||
|
||||
|
@ -26,10 +26,12 @@ include(CheckIncludeFile)
|
||||
# Check for the GLFW/glfw3.h header
|
||||
check_include_file("GLFW/glfw3.h" HAVE_GLFW3_H)
|
||||
|
||||
# Find GLFW/glfw3.h header for Windows
|
||||
# Find GLFW header and lib for Windows
|
||||
if(WIN32)
|
||||
find_file(GLFW3_H "glfw3.h" PATH "$ENV{GLFW_INCLUDES_DIR}/GLFW")
|
||||
if(GLFW3_H)
|
||||
find_file(GLFW3_H "GLFW/glfw3.h" PATH "${GLFW_INCLUDE_DIR}")
|
||||
find_file(GLFW3_LIB "glfw3" PATH "${GLFW_LIB_DIR}")
|
||||
if(GLFW3_H AND GLFW3_LIB)
|
||||
message(STATUS "Found GLFW/glfw3.h and GLFW library.")
|
||||
set(HAVE_GLFW3_H 1)
|
||||
endif()
|
||||
endif()
|
||||
@ -51,21 +53,22 @@ if(${Vulkan_FOUND})
|
||||
${Vulkan_INCLUDE_DIRS}
|
||||
${CUDAToolkit_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
target_link_libraries(vulkanImageCUDA
|
||||
${Vulkan_LIBRARIES}
|
||||
OpenGL::GL
|
||||
)
|
||||
if(WIN32)
|
||||
target_include_directories(vulkanImageCUDA PUBLIC
|
||||
${GLFW_INCLUDE_DIR}
|
||||
)
|
||||
target_link_libraries(vulkanImageCUDA
|
||||
${Vulkan_LIBRARIES}
|
||||
OpenGL::GL
|
||||
glfw3.dll
|
||||
${GLFW3_LIB}
|
||||
)
|
||||
else()
|
||||
target_link_libraries(vulkanImageCUDA
|
||||
${Vulkan_LIBRARIES}
|
||||
OpenGL::GL
|
||||
glfw
|
||||
)
|
||||
endif()
|
||||
|
||||
add_custom_command(TARGET vulkanImageCUDA POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/shader.frag
|
||||
|
Loading…
x
Reference in New Issue
Block a user