mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-04-20 14:51:23 +01:00
parent
c357dd1e6b
commit
85eefa06c4
@ -77,7 +77,6 @@ int filter_radius = 14;
|
|||||||
int nthreads = 64;
|
int nthreads = 64;
|
||||||
unsigned int width, height;
|
unsigned int width, height;
|
||||||
unsigned int *h_img = NULL;
|
unsigned int *h_img = NULL;
|
||||||
unsigned int *d_img = NULL;
|
|
||||||
unsigned int *d_temp = NULL;
|
unsigned int *d_temp = NULL;
|
||||||
|
|
||||||
GLuint pbo; // OpenGL pixel buffer object
|
GLuint pbo; // OpenGL pixel buffer object
|
||||||
@ -108,11 +107,11 @@ extern "C" void computeGold(float *id, float *od, int w, int h, int n);
|
|||||||
// These are CUDA functions to handle allocation and launching the kernels
|
// These are CUDA functions to handle allocation and launching the kernels
|
||||||
extern "C" void initTexture(int width, int height, void *pImage, bool useRGBA);
|
extern "C" void initTexture(int width, int height, void *pImage, bool useRGBA);
|
||||||
extern "C" void freeTextures();
|
extern "C" void freeTextures();
|
||||||
extern "C" double boxFilter(float *d_src, float *d_temp, float *d_dest,
|
extern "C" double boxFilter(float *d_temp, float *d_dest,
|
||||||
int width, int height, int radius, int iterations,
|
int width, int height, int radius, int iterations,
|
||||||
int nthreads, StopWatchInterface *timer);
|
int nthreads, StopWatchInterface *timer);
|
||||||
|
|
||||||
extern "C" double boxFilterRGBA(unsigned int *d_src, unsigned int *d_temp,
|
extern "C" double boxFilterRGBA(unsigned int *d_temp,
|
||||||
unsigned int *d_dest, int width, int height,
|
unsigned int *d_dest, int width, int height,
|
||||||
int radius, int iterations, int nthreads,
|
int radius, int iterations, int nthreads,
|
||||||
StopWatchInterface *timer);
|
StopWatchInterface *timer);
|
||||||
@ -165,7 +164,7 @@ void display() {
|
|||||||
size_t num_bytes;
|
size_t num_bytes;
|
||||||
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
|
||||||
(void **)&d_result, &num_bytes, cuda_pbo_resource));
|
(void **)&d_result, &num_bytes, cuda_pbo_resource));
|
||||||
boxFilterRGBA(d_img, d_temp, d_result, width, height, filter_radius,
|
boxFilterRGBA(d_temp, d_result, width, height, filter_radius,
|
||||||
iterations, nthreads, kernel_timer);
|
iterations, nthreads, kernel_timer);
|
||||||
|
|
||||||
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));
|
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));
|
||||||
@ -282,11 +281,7 @@ void reshape(int x, int y) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void initCuda(bool useRGBA) {
|
void initCuda(bool useRGBA) {
|
||||||
// allocate device memory
|
checkCudaErrors(cudaMalloc((void **)&d_temp, (width * height * sizeof(unsigned int))));
|
||||||
checkCudaErrors(
|
|
||||||
cudaMalloc((void **)&d_img, (width * height * sizeof(unsigned int))));
|
|
||||||
checkCudaErrors(
|
|
||||||
cudaMalloc((void **)&d_temp, (width * height * sizeof(unsigned int))));
|
|
||||||
|
|
||||||
// Refer to boxFilter_kernel.cu for implementation
|
// Refer to boxFilter_kernel.cu for implementation
|
||||||
initTexture(width, height, h_img, useRGBA);
|
initTexture(width, height, h_img, useRGBA);
|
||||||
@ -304,11 +299,6 @@ void cleanup() {
|
|||||||
h_img = NULL;
|
h_img = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (d_img) {
|
|
||||||
cudaFree(d_img);
|
|
||||||
d_img = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (d_temp) {
|
if (d_temp) {
|
||||||
cudaFree(d_temp);
|
cudaFree(d_temp);
|
||||||
d_temp = NULL;
|
d_temp = NULL;
|
||||||
@ -413,7 +403,7 @@ int runBenchmark() {
|
|||||||
cudaMalloc((void **)&d_result, width * height * sizeof(unsigned int)));
|
cudaMalloc((void **)&d_result, width * height * sizeof(unsigned int)));
|
||||||
|
|
||||||
// warm-up
|
// warm-up
|
||||||
boxFilterRGBA(d_img, d_temp, d_temp, width, height, filter_radius, iterations,
|
boxFilterRGBA(d_temp, d_temp, width, height, filter_radius, iterations,
|
||||||
nthreads, kernel_timer);
|
nthreads, kernel_timer);
|
||||||
checkCudaErrors(cudaDeviceSynchronize());
|
checkCudaErrors(cudaDeviceSynchronize());
|
||||||
|
|
||||||
@ -426,7 +416,7 @@ int runBenchmark() {
|
|||||||
|
|
||||||
for (int i = 0; i < iCycles; i++) {
|
for (int i = 0; i < iCycles; i++) {
|
||||||
dProcessingTime +=
|
dProcessingTime +=
|
||||||
boxFilterRGBA(d_img, d_temp, d_img, width, height, filter_radius,
|
boxFilterRGBA(d_temp, d_temp, width, height, filter_radius,
|
||||||
iterations, nthreads, kernel_timer);
|
iterations, nthreads, kernel_timer);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -469,7 +459,7 @@ int runSingleTest(char *ref_file, char *exec_path) {
|
|||||||
{
|
{
|
||||||
printf("%s (radius=%d) (passes=%d) ", sSDKsample, filter_radius,
|
printf("%s (radius=%d) (passes=%d) ", sSDKsample, filter_radius,
|
||||||
iterations);
|
iterations);
|
||||||
boxFilterRGBA(d_img, d_temp, d_result, width, height, filter_radius,
|
boxFilterRGBA(d_temp, d_result, width, height, filter_radius,
|
||||||
iterations, nthreads, kernel_timer);
|
iterations, nthreads, kernel_timer);
|
||||||
|
|
||||||
// check if kernel execution generated an error
|
// check if kernel execution generated an error
|
||||||
|
@ -399,7 +399,6 @@ extern "C" void freeTextures() {
|
|||||||
Perform 2D box filter on image using CUDA
|
Perform 2D box filter on image using CUDA
|
||||||
|
|
||||||
Parameters:
|
Parameters:
|
||||||
d_src - pointer to input image in device memory
|
|
||||||
d_temp - pointer to temporary storage in device memory
|
d_temp - pointer to temporary storage in device memory
|
||||||
d_dest - pointer to destination image in device memory
|
d_dest - pointer to destination image in device memory
|
||||||
width - image width
|
width - image width
|
||||||
@ -408,7 +407,7 @@ extern "C" void freeTextures() {
|
|||||||
iterations - number of iterations
|
iterations - number of iterations
|
||||||
|
|
||||||
*/
|
*/
|
||||||
extern "C" double boxFilter(float *d_src, float *d_temp, float *d_dest,
|
extern "C" double boxFilter(float *d_temp, float *d_dest,
|
||||||
int width, int height, int radius, int iterations,
|
int width, int height, int radius, int iterations,
|
||||||
int nthreads, StopWatchInterface *timer) {
|
int nthreads, StopWatchInterface *timer) {
|
||||||
// var for kernel timing
|
// var for kernel timing
|
||||||
@ -447,7 +446,7 @@ extern "C" double boxFilter(float *d_src, float *d_temp, float *d_dest,
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RGBA version
|
// RGBA version
|
||||||
extern "C" double boxFilterRGBA(unsigned int *d_src, unsigned int *d_temp,
|
extern "C" double boxFilterRGBA(unsigned int *d_temp,
|
||||||
unsigned int *d_dest, int width, int height,
|
unsigned int *d_dest, int width, int height,
|
||||||
int radius, int iterations, int nthreads,
|
int radius, int iterations, int nthreads,
|
||||||
StopWatchInterface *timer) {
|
StopWatchInterface *timer) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user