diff --git a/CHANGELOG.md b/CHANGELOG.md index 6b7f1d9c..fb4ff376 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ * `batchedLabelMarkersAndLabelCompressionNPP` demonstrating NPP features (reason: some functionality removed from library) * `5_Domain_Specific` * Legacy Direct3D 9 and 10 interoperability samples: + * `fluidsD3D9` * `simpleD3D10` * `simpleD3D10RenderTarget` * `simpleD3D10Texture` diff --git a/Samples/5_Domain_Specific/fluidsD3D9/Makefile b/Samples/5_Domain_Specific/fluidsD3D9/Makefile deleted file mode 100644 index 82808308..00000000 --- a/Samples/5_Domain_Specific/fluidsD3D9/Makefile +++ /dev/null @@ -1,46 +0,0 @@ -################################################################################ -# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -################################################################################ -# -# Makefile project only supported on Mac OS X and Linux Platforms) -# -################################################################################ - -# Target rules -all: build - -build: - $(info >>> WARNING - fluidsD3D9 is not supported on Linux - waiving sample <<<) - -run: build - -testrun: build - -clean: - -clobber: clean diff --git a/Samples/5_Domain_Specific/fluidsD3D9/README.md b/Samples/5_Domain_Specific/fluidsD3D9/README.md deleted file mode 100644 index 84e45b07..00000000 --- a/Samples/5_Domain_Specific/fluidsD3D9/README.md +++ /dev/null @@ -1,49 +0,0 @@ -# fluidsD3D9 - Fluids (Direct3D Version) - -## Description - -An example of fluid simulation using CUDA and CUFFT, with Direct3D 9 rendering. A Direct3D Capable device is required. - -## Key Concepts - -Graphics Interop, CUFFT Library, Physically-Based Simulation - -## Supported SM Architectures - -[SM 5.0 ](https://developer.nvidia.com/cuda-gpus) [SM 5.2 ](https://developer.nvidia.com/cuda-gpus) [SM 5.3 ](https://developer.nvidia.com/cuda-gpus) [SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) - -## Supported OSes - -Windows - -## Supported CPU Architecture - -x86_64 - -## CUDA APIs involved - -### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html) -cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGetDevice, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGetDeviceProperties - -## Dependencies needed to build/run -[DirectX](../../../README.md#directx) - -## Prerequisites - -Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform. -Make sure the dependencies mentioned in [Dependencies]() section above are installed. - -## Build and Run - -### Windows -The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format: -``` -*_vs.sln - for Visual Studio -``` -Each individual sample has its own set of solution files in its directory: - -To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used. -> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details." - -## References (for more details) - diff --git a/Samples/5_Domain_Specific/fluidsD3D9/data/ref_fluidsD3D9.ppm b/Samples/5_Domain_Specific/fluidsD3D9/data/ref_fluidsD3D9.ppm deleted file mode 100644 index 333449c1..00000000 Binary files a/Samples/5_Domain_Specific/fluidsD3D9/data/ref_fluidsD3D9.ppm and /dev/null differ diff --git a/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_lg.gif b/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_lg.gif deleted file mode 100644 index 0ae7f616..00000000 Binary files a/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_lg.gif and /dev/null differ diff --git a/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_md.gif b/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_md.gif deleted file mode 100644 index 4cb7e6ca..00000000 Binary files a/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_md.gif and /dev/null differ diff --git a/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_sm.gif b/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_sm.gif deleted file mode 100644 index 84b9057f..00000000 Binary files a/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_sm.gif and /dev/null differ diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9.cpp b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9.cpp deleted file mode 100644 index 56b1a37c..00000000 --- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9.cpp +++ /dev/null @@ -1,829 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#pragma warning(disable : 4312) - -#include -#include -#include -#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) -#define WINDOWS_LEAN_AND_MEAN -#include -#endif - -// including CUDA headers and helper functions -#include -#include -#include -#include - -// SDK helper functions -#include -#include -#include "fluidsD3D9_kernels.h" -#include -#include -using namespace DirectX; - -#define MAX_EPSILON 10 - -static char *SDK_name = "fluidsD3D9"; - -int *pArgc = NULL; -char **pArgv = NULL; - -// CUDA example code that implements the frequency space version of -// Jos Stam's paper 'Stable Fluids' in 2D. This application uses the -// CUDA FFT library (CUFFT) to perform velocity diffusion and to -// force non-divergence in the velocity field at each time step. It uses -// CUDA-OpenGL interoperability to update the particle field directly -// instead of doing a copy to system memory before drawing. Texture is -// used for automatic bilinear interpolation at the velocity advection step. - -HWND hWnd; // Window handle -LPDIRECT3D9EX g_pD3D = NULL; // Used to create the D3DDevice -unsigned int g_iAdapter = NULL; // Adapter -LPDIRECT3DDEVICE9EX g_pD3DDevice = NULL; // Rendering device -LPDIRECT3DVERTEXBUFFER9 g_pVB = NULL; // Buffer to hold particles -LPDIRECT3DTEXTURE9 g_pTexture = NULL; // Texture to render points - -struct cudaGraphicsResource *cuda_VB_resource; // handles D3D9-CUDA exchange - -HRESULT InitD3D9(HWND hWnd); -HRESULT InitD3D9RenderState(); -HRESULT InitCUDA(); -HRESULT InitCUFFT(); -HRESULT InitVertexBuffer(); -HRESULT FreeVertexBuffer(); -HRESULT InitPointTexture(); -HRESULT RestoreContextResources(); - -#define D3DFVF_CUSTOMVERTEX (D3DFVF_XYZ | D3DFVF_DIFFUSE) -void updateVB(void); -void initParticles(cData *p, int dx, int dy); - -// CUFFT plan handle -static cufftHandle g_planr2c; -static cufftHandle g_planc2r; -static cData *g_vxfield = NULL; -static cData *g_vyfield = NULL; - -cData *g_hvfield = NULL; -cData *g_dvfield = NULL; -static int wWidth = MAX(512, DIM); -static int wHeight = MAX(512, DIM); - -static int clicked = 0; -static int fpsCount = 0; -static int fpsLimit = 1; -StopWatchInterface *timer = NULL; - -// Particle data -static Vertex *g_mparticles = NULL; -static cData *g_particles = NULL; -static int lastx = 0, lasty = 0; - -// Texture pitch -// unsigned int g_tPitch = 0; -size_t g_tPitch = 0; - -D3DDISPLAYMODEEX g_d3ddm; -D3DPRESENT_PARAMETERS g_d3dpp; - -bool g_bWindowed = true; -bool g_bDeviceLost = false; -bool g_bPassed = true; -int g_iFrameToCompare = 100; -bool g_bQAAddTestForce = true; -char *ref_file = NULL; - -#define NAME_LEN 512 - -char device_name[NAME_LEN]; - -VOID Cleanup() { - // Unregister vertex buffer - FreeVertexBuffer(); - - deleteTexture(); - - // Free all host and device resources - free(g_hvfield); - free(g_particles); - cudaFree(g_dvfield); - cudaFree(g_vxfield); - cudaFree(g_vyfield); - - cufftDestroy(g_planr2c); - cufftDestroy(g_planc2r); - - if (g_pTexture != NULL) { - g_pTexture->Release(); - g_pTexture = NULL; - } - - if (g_pD3DDevice != NULL) { - g_pD3DDevice->Release(); - g_pD3DDevice = NULL; - } - - if (g_pD3D != NULL) { - g_pD3D->Release(); - g_pD3D = NULL; - } - - sdkDeleteTimer(&timer); -} - -LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) { - switch (msg) { - case WM_DESTROY: - Cleanup(); - PostQuitMessage(0); - exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE); - return 0; - - case WM_KEYDOWN: - switch (wParam) { - case 27: - Cleanup(); - PostQuitMessage(0); - break; - - case 0x52: - memset(g_hvfield, 0, sizeof(cData) * DS); - cudaMemcpy(g_dvfield, g_hvfield, sizeof(cData) * DS, - cudaMemcpyHostToDevice); - - initParticles(g_particles, DIM, DIM); - cudaGraphicsUnregisterResource(cuda_VB_resource); - - updateVB(); - - cudaGraphicsD3D9RegisterResource(&cuda_VB_resource, g_pVB, - cudaD3D9RegisterFlagsNone); - getLastCudaError("cudaGraphicsD3D9RegisterResource failed"); - break; - - default: - break; - } - - break; - - case WM_SIZE: - wWidth = LOWORD(lParam); - wHeight = HIWORD(lParam); - break; - - case WM_MOUSEMOVE: - if (wParam == MK_LBUTTON) { - clicked = 1; - } else { - clicked = 0; - } - - int x = LOWORD(lParam), y = HIWORD(lParam); - - // Convert motion coordinates to domain - float fx = (x / (float)wWidth); - float fy = (y / (float)wHeight); - int nx = (int)(fx * DIM); - int ny = (int)(fy * DIM); - - if (clicked && nx < DIM - FR && nx > FR - 1 && ny < DIM - FR && - ny > FR - 1) { - int ddx = LOWORD(lParam) - lastx; - int ddy = HIWORD(lParam) - lasty; - - fx = ddx / (float)wWidth; - fy = ddy / (float)wHeight; - int spy = ny - FR; - int spx = nx - FR; - addForces(g_dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, - FORCE * DT * fy, FR, g_tPitch); - lastx = x; - lasty = y; - } - - break; - } - - return DefWindowProc(hWnd, msg, wParam, lParam); -} - -HRESULT InitVertexBuffer() { - // Create the vertex buffer. - if (FAILED(g_pD3DDevice->CreateVertexBuffer(DS * sizeof(Vertex), 0, - D3DFVF_CUSTOMVERTEX, - D3DPOOL_DEFAULT, &g_pVB, NULL))) { - return E_FAIL; - } - - // Initialize the Vertex Buffer with the particles - updateVB(); - - cudaGraphicsD3D9RegisterResource(&cuda_VB_resource, g_pVB, - cudaD3D9RegisterFlagsNone); - getLastCudaError("cudaGraphicsD3D9RegisterResource failed"); - - return S_OK; -} - -HRESULT InitPointTexture() { - // Create the texture. - int width = 64; - int height = width; - - if (FAILED(g_pD3DDevice->CreateTexture( - width, height, 0, D3DUSAGE_AUTOGENMIPMAP | D3DUSAGE_DYNAMIC, - D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &g_pTexture, NULL))) { - return E_FAIL; - } - - // Fill in top level - D3DLOCKED_RECT rect; - - if (FAILED(g_pTexture->LockRect(0, &rect, 0, 0))) { - return E_FAIL; - } - - typedef unsigned int TexelType; - TexelType *texel = (TexelType *)rect.pBits; - - for (int y = -height / 2; y < height / 2; ++y) { - float yf = y + 0.5f; - TexelType *t = texel; - - for (int x = -width / 2; x < width / 2; ++x) { - float xf = x + 0.5f; - float radius = (float)width / 32; - float dist = sqrtf(xf * xf + yf * yf) / radius; - float n = 0.1f; - float value; - - if (dist < 1) { - value = 1 - 0.5f * powf(dist, n); - } else if (dist < 2) { - value = 0.5f * powf(2 - dist, n); - } else { - value = 0; - } - - value *= 75; - unsigned char *c = (unsigned char *)t; - c[0] = c[1] = c[2] = c[3] = (unsigned char)value; - ++t; - } - - texel += rect.Pitch / sizeof(TexelType); - } - - if (FAILED(g_pTexture->UnlockRect(0))) { - return E_FAIL; - } - - // Set sampler state - if (FAILED(g_pD3DDevice->SetSamplerState(0, D3DSAMP_MINFILTER, - D3DTEXF_LINEAR))) { - return E_FAIL; - } - - if (FAILED(g_pD3DDevice->SetSamplerState(0, D3DSAMP_MAGFILTER, - D3DTEXF_LINEAR))) { - return E_FAIL; - } - - return S_OK; -} - -//----------------------------------------------------------------------------- -// Name: FreeVertexBuffer() -// Desc: Free's the Vertex Buffer resource -//----------------------------------------------------------------------------- -HRESULT FreeVertexBuffer() { - if (g_pVB != NULL) { - // Unregister vertex buffer - cudaGraphicsUnregisterResource(cuda_VB_resource); - getLastCudaError("cudaGraphicsUnregisterResource failed"); - - g_pVB->Release(); - } - - return S_OK; -} - -void updateVB(void) { - Vertex *data = new Vertex[DS]; - g_pVB->Lock(0, DS * sizeof(Vertex), (void **)&data, 0); - - for (int i = 0; i < DS; i++) { - data[i].x = g_particles[i].x; - data[i].y = g_particles[i].y; - data[i].z = 0.f; - data[i].c = 0xff00ff00; - } - - g_pVB->Unlock(); -} - -HRESULT InitD3D9(HWND hWnd) { - // Create the D3D object. - if (S_OK != Direct3DCreate9Ex(D3D_SDK_VERSION, &g_pD3D)) { - return E_FAIL; - } - - D3DADAPTER_IDENTIFIER9 adapterId; - int device; - bool bDeviceFound = false; - printf("\n"); - - cudaError cuStatus; - - for (g_iAdapter = 0; g_iAdapter < g_pD3D->GetAdapterCount(); g_iAdapter++) { - HRESULT hr = g_pD3D->GetAdapterIdentifier(g_iAdapter, 0, &adapterId); - - if (FAILED(hr)) { - continue; - } - - // clear any errors we got while querying invalid compute devices - cuStatus = cudaGetLastError(); - cuStatus = cudaD3D9GetDevice(&device, adapterId.DeviceName); - printLastCudaError("cudaD3D9GetDevice failed"); // This prints and resets - // the cudaError to - // cudaSuccess - - printf("> Display Device #%d: \"%s\" %s Direct3D9\n", g_iAdapter, - adapterId.Description, - (cuStatus == cudaSuccess) ? "supports" : "does not support"); - - if (cudaSuccess == cuStatus) { - bDeviceFound = true; - STRCPY(device_name, NAME_LEN, adapterId.Description); - break; - } - } - - // we check to make sure we have found a cuda-compatible D3D device to work on - if (!bDeviceFound) { - printf("\nNo CUDA-compatible Direct3D9 device available\n"); - // Release the D3D device - g_pD3D->Release(); - exit(EXIT_SUCCESS); - } - - cudaGetDevice(&device); - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, device); - strcpy(device_name, deviceProp.name); - - RECT rc; - GetClientRect(hWnd, &rc); - g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL); - - // Set up the structure used to create the D3DDevice - D3DPRESENT_PARAMETERS d3dpp; - ZeroMemory(&d3dpp, sizeof(d3dpp)); - d3dpp.Windowed = TRUE; - d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD; - d3dpp.BackBufferFormat = g_d3ddm.Format; // D3DFMT_UNKNOWN; - - // Create the D3DDevice - if (FAILED(g_pD3D->CreateDeviceEx(g_iAdapter, D3DDEVTYPE_HAL, hWnd, - D3DCREATE_HARDWARE_VERTEXPROCESSING, &d3dpp, - NULL, &g_pD3DDevice))) { - return E_FAIL; - } else { - return S_OK; - } -} - -// Initialize the D3D Rendering State -HRESULT InitD3D9RenderState() { - // Set projection matrix - XMMATRIX matProj; - XMFLOAT4X4 matProjFloat; - matProj = XMMatrixOrthographicOffCenterLH(0, 1, 1, 0, 0, 1); - XMStoreFloat4x4(&matProjFloat, matProj); - g_pD3DDevice->SetTransform(D3DTS_PROJECTION, (D3DMATRIX *)&matProjFloat); - - // Turn off D3D lighting, since we are providing our own vertex colors - if (FAILED(g_pD3DDevice->SetRenderState(D3DRS_LIGHTING, FALSE))) { - return E_FAIL; - } - - return S_OK; -} - -HRESULT InitCUDA() { - printf("InitCUDA() g_pD3DDevice = %p\n", g_pD3DDevice); - - // Now we need to bind a CUDA context to the DX9 device - // This is the CUDA 2.0 DX9 interface (required for Windows XP and Vista) - cudaD3D9SetDirect3DDevice(g_pD3DDevice); - getLastCudaError("cudaD3D9SetDirect3DDevice failed"); - - return S_OK; -} - -//////////////////////////////////////////////////////////////////////////////// -//! RestoreContextResourcess -// - this function restores all of the CUDA/D3D resources and contexts -//////////////////////////////////////////////////////////////////////////////// -HRESULT RestoreContextResources() { - // Reinitialize D3D9 resources, CUDA resources/contexts - InitCUDA(); - InitD3D9RenderState(); - InitCUFFT(); - InitVertexBuffer(); - InitPointTexture(); - - return S_OK; -} - -//////////////////////////////////////////////////////////////////////////////// -//! DeviceLostHandler -// - this function handles reseting and initialization of the D3D device -// in the event this Device gets Lost -//////////////////////////////////////////////////////////////////////////////// -HRESULT DeviceLostHandler() { - HRESULT hr = S_OK; - - // test the cooperative level to see if it's okay - // to render - if (FAILED(hr = g_pD3DDevice->TestCooperativeLevel())) { - // if the device was truly lost, (i.e., a fullscreen device just lost - // focus), wait - // until we g_et it back - if (hr == D3DERR_DEVICELOST) { - return S_OK; - } - - // eventually, we will g_et this return value, - // indicating that we can now reset the device - if (hr == D3DERR_DEVICENOTRESET) { - // if we are windowed, read the desktop mode and use the same format for - // the back buffer; this effectively turns off color conversion - - if (g_bWindowed) { - g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL); - g_d3dpp.BackBufferFormat = g_d3ddm.Format; - } - - // now try to reset the device - if (FAILED(hr = g_pD3DDevice->Reset(&g_d3dpp))) { - return hr; - } else { - // This is a common function we use to restore all hardware - // resources/state - RestoreContextResources(); - - // we have acquired the device - g_bDeviceLost = false; - } - } - } - - return hr; -} - -HRESULT InitCUFFT() { - // You can only call CUDA D3D9 device has been bound to the CUDA - // context, otherwise it will not work - g_hvfield = (cData *)malloc(sizeof(cData) * DS); - memset(g_hvfield, 0, sizeof(cData) * DS); - - // Allocate and initialize device data - cudaMallocPitch((void **)&g_dvfield, &g_tPitch, sizeof(cData) * DIM, DIM); - - cudaMemcpy(g_dvfield, g_hvfield, sizeof(cData) * DS, cudaMemcpyHostToDevice); - - // Temporary complex velocity field data - cudaMalloc((void **)&g_vxfield, sizeof(cData) * PDS); - cudaMalloc((void **)&g_vyfield, sizeof(cData) * PDS); - - setupTexture(DIM, DIM); - - // Create particle array - g_particles = (cData *)malloc(sizeof(cData) * DS); - memset(g_particles, 0, sizeof(cData) * DS); - - initParticles(g_particles, DIM, DIM); - - // Create CUFFT transform plan configuration - cufftPlan2d(&g_planr2c, DIM, DIM, CUFFT_R2C); - cufftPlan2d(&g_planc2r, DIM, DIM, CUFFT_C2R); - - return S_OK; -} - -HRESULT Render(void) { - HRESULT hr = S_OK; - - // Normal case where CUDA Device is not lost - if (!g_bDeviceLost) { - sdkStartTimer(&timer); - - advectVelocity(g_dvfield, (float *)g_vxfield, (float *)g_vyfield, DIM, - RPADW, DIM, DT, g_tPitch); - { - // Forward FFT - cufftExecR2C(g_planr2c, (cufftReal *)g_vxfield, - (cufftComplex *)g_vxfield); - cufftExecR2C(g_planr2c, (cufftReal *)g_vyfield, - (cufftComplex *)g_vyfield); - - diffuseProject(g_vxfield, g_vyfield, CPADW, DIM, DT, VIS, g_tPitch); - - // Inverse FFT - cufftExecC2R(g_planc2r, (cufftComplex *)g_vxfield, - (cufftReal *)g_vxfield); - cufftExecC2R(g_planc2r, (cufftComplex *)g_vyfield, - (cufftReal *)g_vyfield); - } - updateVelocity(g_dvfield, (float *)g_vxfield, (float *)g_vyfield, DIM, - RPADW, DIM, g_tPitch); - - // Map D3D9 vertex buffer to CUDA - { - size_t num_bytes; - checkCudaErrors(cudaGraphicsMapResources(1, &cuda_VB_resource, 0)); - getLastCudaError("cudaGraphicsMapResources failed"); - // This gets a pointer from the Vertex Buffer - checkCudaErrors(cudaGraphicsResourceGetMappedPointer( - (void **)&g_mparticles, &num_bytes, cuda_VB_resource)); - getLastCudaError("cudaGraphicsResourceGetMappedPointer failed"); - - advectParticles(g_mparticles, g_dvfield, DIM, DIM, DT, g_tPitch); - - // Unmap vertex buffer - checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_VB_resource, 0)); - getLastCudaError("cudaGraphicsUnmapResource failed"); - } - - g_pD3DDevice->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f, - 0); - g_pD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, FALSE); - g_pD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE); - g_pD3DDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE); - g_pD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ONE); - g_pD3DDevice->SetRenderState(D3DRS_POINTSPRITEENABLE, TRUE); - float size = 16; - g_pD3DDevice->SetRenderState(D3DRS_POINTSIZE, *((DWORD *)&size)); - g_pD3DDevice->SetTexture(0, g_pTexture); - - if (SUCCEEDED(g_pD3DDevice->BeginScene())) { - // Draw particles - g_pD3DDevice->SetStreamSource(0, g_pVB, 0, sizeof(Vertex)); - g_pD3DDevice->SetFVF(D3DFVF_CUSTOMVERTEX); - g_pD3DDevice->DrawPrimitive(D3DPT_POINTLIST, 0, DS); - - g_pD3DDevice->EndScene(); - } - - // Finish timing before swap buffers to avoid refresh sync - sdkStopTimer(&timer); - // Present the backbuffer contents to the display - hr = g_pD3DDevice->Present(NULL, NULL, NULL, NULL); - - if (hr == D3DERR_DEVICELOST) { - fprintf(stderr, "drawScene Present = %08x detected D3D DeviceLost\n", hr); - g_bDeviceLost = true; - - FreeVertexBuffer(); - } - - fpsCount++; - - if (fpsCount == fpsLimit) { - char fps[256]; - float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f); - sprintf(fps, "CUDA/D3D9 Stable Fluids (%d x %d): %3.1f fps", DIM, DIM, - ifps); - SetWindowText(hWnd, fps); - fpsCount = 0; - fpsLimit = (int)MAX(ifps, 1.f); - sdkResetTimer(&timer); - } - } else { - // Begin code to handle case where the D3D9 device is lost - if (FAILED(hr = DeviceLostHandler())) { - fprintf(stderr, "DeviceLostHandler FAILED returned %08x\n", hr); - return hr; - } - - fprintf(stderr, "Render DeviceLost handler\n"); - - // test the cooperative level to see if it's okay - // to render - if (FAILED(hr = g_pD3DDevice->TestCooperativeLevel())) { - fprintf(stderr, - "TestCooperativeLevel = %08x failed, will attempt to reset\n", - hr); - - // if the device was truly lost, (i.e., a fullscreen device just lost - // focus), wait - // until we g_et it back - - if (hr == D3DERR_DEVICELOST) { - fprintf( - stderr, - "TestCooperativeLevel = %08x DeviceLost, will retry next call\n", - hr); - return S_OK; - } - - // eventually, we will g_et this return value, - // indicating that we can now reset the device - if (hr == D3DERR_DEVICENOTRESET) { - fprintf(stderr, - "TestCooperativeLevel = %08x will try to RESET the device\n", - hr); - // if we are windowed, read the desktop mode and use the same format for - // the back buffer; this effectively turns off color conversion - - if (g_bWindowed) { - g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL); - g_d3dpp.BackBufferFormat = g_d3ddm.Format; - } - - // now try to reset the device - if (FAILED(hr = g_pD3DDevice->Reset(&g_d3dpp))) { - fprintf(stderr, "TestCooperativeLevel = %08x RESET device FAILED\n", - hr); - return hr; - } else { - fprintf(stderr, "TestCooperativeLevel = %08x RESET device SUCCESS!\n", - hr); - - // Reinitialize D3D9 resources, CUDA resources/contexts - RestoreContextResources(); - - fprintf(stderr, "TestCooperativeLevel = %08x INIT device SUCCESS!\n", - hr); - - // we have acquired the device - g_bDeviceLost = false; - } - } - } - } - - return hr; -} - -// very simple von neumann middle-square prng. can't use rand() in -qatest -// mode because its implementation varies across platforms which makes testing -// for consistency in the important parts of this program difficult. -float myrand(void) { - static int seed = 72191; - char sq[22]; - - if (ref_file) { - seed *= seed; - sprintf(sq, "%010d", seed); - // pull the middle 5 digits out of sq - sq[8] = 0; - seed = atoi(&sq[3]); - - return seed / 99999.f; - } else { - return rand() / (float)RAND_MAX; - } -} - -void initParticles(cData *p, int dx, int dy) { - int i, j; - - for (i = 0; i < dy; i++) { - for (j = 0; j < dx; j++) { - p[i * dx + j].x = (j + 0.5f + (myrand() - 0.5f)) / dx; - p[i * dx + j].y = (i + 0.5f + (myrand() - 0.5f)) / dy; - } - } -} - -int main(int argc, char **argv) { - pArgc = &argc; - pArgv = argv; - - printf("%s Starting...\n\n", argv[0]); - - printf( - "NOTE: The CUDA Samples are not meant for performance measurements. " - "Results may vary when GPU Boost is enabled.\n\n"); - - sdkCreateTimer(&timer); - sdkResetTimer(&timer); - - // command line options - // automated build testing harness - if (checkCmdLineFlag(argc, (const char **)argv, "file")) { - getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file); - } - - HINSTANCE hInst = GetModuleHandle(NULL); - // Register the window class - WNDCLASSEX wc = {sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L, - GetModuleHandle(NULL), NULL, NULL, NULL, NULL, - "fluidsD3D9", NULL}; - RegisterClassEx(&wc); - - // Create the application's window - int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME); - int yCaption = ::GetSystemMetrics(SM_CYCAPTION); - int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME); - hWnd = CreateWindow("fluidsD3D9", "CUDA/D3D9 Stable Fluids", - WS_OVERLAPPEDWINDOW, 100, 100, wWidth + 2 * xBorder, - wHeight + 2 * yBorder + yCaption, NULL, NULL, - wc.hInstance, NULL); - - if (SUCCEEDED(InitD3D9(hWnd)) && SUCCEEDED(InitCUDA()) && - SUCCEEDED(InitD3D9RenderState()) && SUCCEEDED(InitCUFFT()) && - SUCCEEDED(InitVertexBuffer()) && SUCCEEDED(InitPointTexture())) { - ShowWindow(hWnd, SW_SHOWDEFAULT); - UpdateWindow(hWnd); - - // Rendering loop - MSG msg; - ZeroMemory(&msg, sizeof(msg)); - - while (msg.message != WM_QUIT) { - if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) { - TranslateMessage(&msg); - DispatchMessage(&msg); - } else { - Render(); - - if (ref_file) { - for (int count = 0; count < g_iFrameToCompare; count++) { - // add in a little force so the automated testing is interesing. - int x = wWidth / (count + 1); - int y = wHeight / (count + 1); - float fx = (x / (float)wWidth); - float fy = (y / (float)wHeight); - int nx = (int)(fx * DIM); - int ny = (int)(fy * DIM); - - int ddx = 35; - int ddy = 35; - fx = ddx / (float)wWidth; - fy = ddy / (float)wHeight; - int spy = ny - FR; - int spx = nx - FR; - - addForces(g_dvfield, DIM, DIM, spx, spy, FORCE * DT * fx, - FORCE * DT * fy, FR, g_tPitch); - // g_bQAAddTestForce = false; // only add it once - - Render(); - } - - const char *cur_image_path = "qatest_fluidsD3D9.ppm"; - - // Save a reference of our current test run image - CheckRenderD3D9::BackbufferToPPM(g_pD3DDevice, cur_image_path); - - // compare to official reference image, printing PASS or FAIL. - g_bPassed = CheckRenderD3D9::PPMvsPPM(cur_image_path, ref_file, - argv[0], MAX_EPSILON, 0.30f); - - PostQuitMessage(0); - } - } - } - } - - UnregisterClass("fluidsD3D9", wc.hInstance); - - // - // and exit - // - printf("> %s running on %s exiting...\n", SDK_name, device_name); - exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE); -} diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_kernels.cu b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_kernels.cu deleted file mode 100644 index 78fc30ed..00000000 --- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_kernels.cu +++ /dev/null @@ -1,333 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include "fluidsD3D9_kernels.h" - -// Texture object for reading velocity field -cudaTextureObject_t texObj; -static cudaArray *array = NULL; - -void setupTexture(int x, int y) { - cudaChannelFormatDesc desc = cudaCreateChannelDesc(); - - cudaMallocArray(&array, &desc, y, x); - getLastCudaError("cudaMalloc failed"); - - cudaResourceDesc texRes; - memset(&texRes, 0, sizeof(cudaResourceDesc)); - - texRes.resType = cudaResourceTypeArray; - texRes.res.array.array = array; - - cudaTextureDesc texDescr; - memset(&texDescr, 0, sizeof(cudaTextureDesc)); - - texDescr.normalizedCoords = false; - texDescr.filterMode = cudaFilterModeLinear; - texDescr.addressMode[0] = cudaAddressModeWrap; - texDescr.readMode = cudaReadModeElementType; - - checkCudaErrors(cudaCreateTextureObject(&texObj, &texRes, &texDescr, NULL)); -} - -void updateTexture(cData *data, size_t wib, size_t h, size_t pitch) { - checkCudaErrors(cudaMemcpy2DToArray(array, 0, 0, data, pitch, wib, h, - cudaMemcpyDeviceToDevice)); -} - -void deleteTexture(void) { - checkCudaErrors(cudaDestroyTextureObject(texObj)); - checkCudaErrors(cudaFreeArray(array)); -} - -// Note that these kernels are designed to work with arbitrary -// domain sizes, not just domains that are multiples of the tile -// size. Therefore, we have extra code that checks to make sure -// a given thread location falls within the domain boundaries in -// both X and Y. Also, the domain is covered by looping over -// multiple elements in the Y direction, while there is a one-to-one -// mapping between threads in X and the tile size in X. -// Nolan Goodnight 9/22/06 - -// This method adds constant force vectors to the velocity field -// stored in 'v' according to v(x,t+1) = v(x,t) + dt * f. -__global__ void addForces_k(cData *v, int dx, int dy, int spx, int spy, - float fx, float fy, int r, size_t pitch) { - int tx = threadIdx.x; - int ty = threadIdx.y; - cData *fj = (cData *)((char *)v + (ty + spy) * pitch) + tx + spx; - - cData vterm = *fj; - tx -= r; - ty -= r; - float s = 1.f / (1.f + tx * tx * tx * tx + ty * ty * ty * ty); - vterm.x += s * fx; - vterm.y += s * fy; - *fj = vterm; -} - -// This method performs the velocity advection step, where we -// trace velocity vectors back in time to update each grid cell. -// That is, v(x,t+1) = v(p(x,-dt),t). Here we perform bilinear -// interpolation in the velocity space. -__global__ void advectVelocity_k(cData *v, float *vx, float *vy, int dx, - int pdx, int dy, float dt, int lb, - cudaTextureObject_t texObject) { - int gtidx = blockIdx.x * blockDim.x + threadIdx.x; - int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb; - int p; - - cData vterm, ploc; - float vxterm, vyterm; - - // gtidx is the domain location in x for this thread - if (gtidx < dx) { - for (p = 0; p < lb; p++) { - // fi is the domain location in y for this thread - int fi = gtidy + p; - - if (fi < dy) { - int fj = fi * pdx + gtidx; - vterm = tex2D(texObject, (float)gtidx, (float)fi); - ploc.x = (gtidx + 0.5f) - (dt * vterm.x * dx); - ploc.y = (fi + 0.5f) - (dt * vterm.y * dy); - vterm = tex2D(texObject, ploc.x, ploc.y); - vxterm = vterm.x; - vyterm = vterm.y; - vx[fj] = vxterm; - vy[fj] = vyterm; - } - } - } -} - -// This method performs velocity diffusion and forces mass conservation -// in the frequency domain. The inputs 'vx' and 'vy' are complex-valued -// arrays holding the Fourier coefficients of the velocity field in -// X and Y. Diffusion in this space takes a simple form described as: -// v(k,t) = v(k,t) / (1 + visc * dt * k^2), where visc is the viscosity, -// and k is the wavenumber. The projection step forces the Fourier -// velocity vectors to be orthogonal to the vectors for each -// wavenumber: v(k,t) = v(k,t) - ((k dot v(k,t) * k) / k^2. -__global__ void diffuseProject_k(cData *vx, cData *vy, int dx, int dy, float dt, - float visc, int lb) { - int gtidx = blockIdx.x * blockDim.x + threadIdx.x; - int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb; - int p; - - cData xterm, yterm; - - // gtidx is the domain location in x for this thread - if (gtidx < dx) { - for (p = 0; p < lb; p++) { - // fi is the domain location in y for this thread - int fi = gtidy + p; - - if (fi < dy) { - int fj = fi * dx + gtidx; - xterm = vx[fj]; - yterm = vy[fj]; - - // Compute the index of the wavenumber based on the - // data order produced by a standard NN FFT. - int iix = gtidx; - int iiy = (fi > dy / 2) ? (fi - (dy)) : fi; - - // Velocity diffusion - float kk = (float)(iix * iix + iiy * iiy); // k^2 - float diff = 1.f / (1.f + visc * dt * kk); - xterm.x *= diff; - xterm.y *= diff; - yterm.x *= diff; - yterm.y *= diff; - - // Velocity projection - if (kk > 0.f) { - float rkk = 1.f / kk; - // Real portion of velocity projection - float rkp = (iix * xterm.x + iiy * yterm.x); - // Imaginary portion of velocity projection - float ikp = (iix * xterm.y + iiy * yterm.y); - xterm.x -= rkk * rkp * iix; - xterm.y -= rkk * ikp * iix; - yterm.x -= rkk * rkp * iiy; - yterm.y -= rkk * ikp * iiy; - } - - vx[fj] = xterm; - vy[fj] = yterm; - } - } - } -} - -// This method updates the velocity field 'v' using the two complex -// arrays from the previous step: 'vx' and 'vy'. Here we scale the -// real components by 1/(dx*dy) to account for an unnormalized FFT. -__global__ void updateVelocity_k(cData *v, float *vx, float *vy, int dx, - int pdx, int dy, int lb, size_t pitch) { - int gtidx = blockIdx.x * blockDim.x + threadIdx.x; - int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb; - int p; - - float vxterm, vyterm; - cData nvterm; - - // gtidx is the domain location in x for this thread - if (gtidx < dx) { - for (p = 0; p < lb; p++) { - // fi is the domain location in y for this thread - int fi = gtidy + p; - - if (fi < dy) { - int fjr = fi * pdx + gtidx; - vxterm = vx[fjr]; - vyterm = vy[fjr]; - - // Normalize the result of the inverse FFT - float scale = 1.f / (dx * dy); - nvterm.x = vxterm * scale; - nvterm.y = vyterm * scale; - - cData *fj = (cData *)((char *)v + fi * pitch) + gtidx; - *fj = nvterm; - } - } // If this thread is inside the domain in Y - } // If this thread is inside the domain in X -} - -// This method updates the particles by moving particle positions -// according to the velocity field and time step. That is, for each -// particle: p(t+1) = p(t) + dt * v(p(t)). -__global__ void advectParticles_k(Vertex *part, cData *v, int dx, int dy, - float dt, int lb, size_t pitch) { - int gtidx = blockIdx.x * blockDim.x + threadIdx.x; - int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb; - int p; - - // gtidx is the domain location in x for this thread - cData vterm; - Vertex pterm; - - if (gtidx < dx) { - for (p = 0; p < lb; p++) { - // fi is the domain location in y for this thread - int fi = gtidy + p; - - if (fi < dy) { - int fj = fi * dx + gtidx; - pterm = part[fj]; - - int xvi = ((int)(pterm.x * dx)); - int yvi = ((int)(pterm.y * dy)); - vterm = *((cData *)((char *)v + yvi * pitch) + xvi); - - pterm.x += dt * vterm.x; - pterm.x = pterm.x - (int)pterm.x; - pterm.x += 1.f; - pterm.x = pterm.x - (int)pterm.x; - pterm.y += dt * vterm.y; - pterm.y = pterm.y - (int)pterm.y; - pterm.y += 1.f; - pterm.y = pterm.y - (int)pterm.y; - - part[fj] = pterm; - } - } // If this thread is inside the domain in Y - } // If this thread is inside the domain in X -} - -extern "C" void addForces(cData *v, int dx, int dy, int spx, int spy, float fx, - float fy, int r, size_t tPitch) { - dim3 tids(2 * r + 1, 2 * r + 1); - - addForces_k<<<1, tids>>>(v, dx, dy, spx, spy, fx, fy, r, tPitch); - getLastCudaError("addForces_k failed."); -} - -extern "C" void advectVelocity(cData *v, float *vx, float *vy, int dx, int pdx, - int dy, float dt, size_t tPitch) { - dim3 grid((dx / TILEX) + (!(dx % TILEX) ? 0 : 1), - (dy / TILEY) + (!(dy % TILEY) ? 0 : 1)); - - dim3 tids(TIDSX, TIDSY); - - updateTexture(v, DIM * sizeof(cData), DIM, tPitch); - advectVelocity_k<<>>(v, vx, vy, dx, pdx, dy, dt, TILEY / TIDSY, - texObj); - - getLastCudaError("advectVelocity_k failed."); -} - -extern "C" void diffuseProject(cData *vx, cData *vy, int dx, int dy, float dt, - float visc, size_t tPitch) { - // Forward FFT - // cufftExecR2C(planr2c, (cufftReal*)vx, (cufftComplex*)vx); - // cufftExecR2C(planr2c, (cufftReal*)vy, (cufftComplex*)vy); - - uint3 grid = make_uint3((dx / TILEX) + (!(dx % TILEX) ? 0 : 1), - (dy / TILEY) + (!(dy % TILEY) ? 0 : 1), 1); - - uint3 tids = make_uint3(TIDSX, TIDSY, 1); - - diffuseProject_k<<>>(vx, vy, dx, dy, dt, visc, TILEY / TIDSY); - getLastCudaError("diffuseProject_k failed."); - - // Inverse FFT - // cufftExecC2R(planc2r, (cufftComplex*)vx, (cufftReal*)vx); - // cufftExecC2R(planc2r, (cufftComplex*)vy, (cufftReal*)vy); -} - -extern "C" void updateVelocity(cData *v, float *vx, float *vy, int dx, int pdx, - int dy, size_t tPitch) { - dim3 grid((dx / TILEX) + (!(dx % TILEX) ? 0 : 1), - (dy / TILEY) + (!(dy % TILEY) ? 0 : 1)); - - dim3 tids(TIDSX, TIDSY); - - updateVelocity_k<<>>(v, vx, vy, dx, pdx, dy, TILEY / TIDSY, - tPitch); - getLastCudaError("updateVelocity_k failed."); -} - -extern "C" void advectParticles(Vertex *p, cData *v, int dx, int dy, float dt, - size_t tPitch) { - dim3 grid((dx / TILEX) + (!(dx % TILEX) ? 0 : 1), - (dy / TILEY) + (!(dy % TILEY) ? 0 : 1)); - - dim3 tids(TIDSX, TIDSY); - - advectParticles_k<<>>(p, v, dx, dy, dt, TILEY / TIDSY, tPitch); - getLastCudaError("advectParticles_k failed."); -} diff --git a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_kernels.h b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_kernels.h deleted file mode 100644 index c57db90f..00000000 --- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_kernels.h +++ /dev/null @@ -1,109 +0,0 @@ -/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __STABLEFLUIDS_KERNELS_H_ -#define __STABLEFLUIDS_KERNELS_H_ - -#define DIM 512 // Square size of solver domain -#define DS (DIM * DIM) // Total domain size -#define CPADW (DIM / 2 + 1) // Padded width for real->complex in-place FFT -#define RPADW \ - (2 * (DIM / 2 + 1)) // Padded width for real->complex in-place FFT -#define PDS (DIM * CPADW) // Padded total domain size - -#define DT 0.09f // Delta T for interative solver -#define VIS 0.0025f // Viscosity constant -#define FORCE (5.8f * DIM) // Force scale factor -#define FR 4 // Force update radius - -#define TILEX 64 // Tile width -#define TILEY 64 // Tile height -#define TIDSX 64 // Tids in X -#define TIDSY 4 // Tids in Y - -typedef unsigned long DWORD; - -typedef struct vertex { - float x, y, z; - DWORD c; -} Vertex; - -// Vector data type used to velocity and force fields -typedef float2 cData; - -extern "C" void setupTexture(int x, int y); -extern "C" void updateTexture(cData *data, size_t w, size_t h, size_t pitch); -extern "C" void deleteTexture(void); - -// This method adds constant force vectors to the velocity field -// stored in 'v' according to v(x,t+1) = v(x,t) + dt * f. -__global__ void addForces_k(cData *v, int dx, int dy, int spx, int spy, - float fx, float fy, int r, size_t pitch); - -// This method performs the velocity advection step, where we -// trace velocity vectors back in time to update each grid cell. -// That is, v(x,t+1) = v(p(x,-dt),t). Here we perform bilinear -// interpolation in the velocity space. -__global__ void advectVelocity_k(cData *v, float *vx, float *vy, int dx, - int pdx, int dy, float dt, int lb, - cudaTextureObject_t tex); - -// This method performs velocity diffusion and forces mass conservation -// in the frequency domain. The inputs 'vx' and 'vy' are complex-valued -// arrays holding the Fourier coefficients of the velocity field in -// X and Y. Diffusion in this space takes a simple form described as: -// v(k,t) = v(k,t) / (1 + visc * dt * k^2), where visc is the viscosity, -// and k is the wavenumber. The projection step forces the Fourier -// velocity vectors to be orthogonal to the wave wave vectors for each -// wavenumber: v(k,t) = v(k,t) - ((k dot v(k,t) * k) / k^2. -__global__ void diffuseProject_k(cData *vx, cData *vy, int dx, int dy, float dt, - float visc, int lb); - -// This method updates the velocity field 'v' using the two complex -// arrays from the previous step: 'vx' and 'vy'. Here we scale the -// real components by 1/(dx*dy) to account for an unnormalized FFT. -__global__ void updateVelocity_k(cData *v, float *vx, float *vy, int dx, - int pdx, int dy, int lb, size_t pitch); - -// This method updates the particles by moving particle positions -// according to the velocity field and time step. That is, for each -// particle: p(t+1) = p(t) + dt * v(p(t)). -__global__ void advectParticles_k(Vertex *part, cData *v, int dx, int dy, - float dt, int lb, size_t pitch); - -extern "C" void addForces(cData *v, int dx, int dy, int spx, int spy, float fx, - float fy, int r, size_t tPitch); -extern "C" void advectVelocity(cData *v, float *vx, float *vy, int dx, int pdx, - int dy, float dt, size_t tPitch); -extern "C" void diffuseProject(cData *vx, cData *vy, int dx, int dy, float dt, - float visc, size_t tPitch); -extern "C" void updateVelocity(cData *v, float *vx, float *vy, int dx, int pdx, - int dy, size_t tPitch); -extern "C" void advectParticles(Vertex *p, cData *v, int dx, int dy, float dt, - size_t tPitch); - -#endif