Remove legacy fluidsD3D9

2025-04-14 20:21:21 +01:00 · 2024-12-13 15:37:58 -08:00 · 2024-12-13 15:37:58 -08:00 · c9794ff283
commit c9794ff283
parent 035dcfd357
10 changed files with 1 additions and 1366 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -17,6 +17,7 @@
        * `batchedLabelMarkersAndLabelCompressionNPP` demonstrating NPP features (reason: some functionality removed from library)
    * `5_Domain_Specific`
        * Legacy Direct3D 9 and 10 interoperability samples:
+            * `fluidsD3D9`
            * `simpleD3D10`
            * `simpleD3D10RenderTarget`
            * `simpleD3D10Texture`
--- a/Samples/5_Domain_Specific/fluidsD3D9/Makefile
+++ b/Samples/5_Domain_Specific/fluidsD3D9/Makefile
@ -1,46 +0,0 @@
-################################################################################
-# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#  * Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#  * Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#  * Neither the name of NVIDIA CORPORATION nor the names of its
-#    contributors may be used to endorse or promote products derived
-#    from this software without specific prior written permission.
-#
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
-# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-################################################################################
-#
-# Makefile project only supported on Mac OS X and Linux Platforms)
-#
-################################################################################
-
-# Target rules
-all: build
-
-build:
-	$(info >>> WARNING - fluidsD3D9 is not supported on Linux - waiving sample <<<)
-
-run: build
-
-testrun: build
-
-clean:
-
-clobber: clean
--- a/Samples/5_Domain_Specific/fluidsD3D9/README.md
+++ b/Samples/5_Domain_Specific/fluidsD3D9/README.md
@ -1,49 +0,0 @@
-# fluidsD3D9 - Fluids (Direct3D Version)
-
-## Description
-
-An example of fluid simulation using CUDA and CUFFT, with Direct3D 9 rendering.  A Direct3D Capable device is required.
-
-## Key Concepts
-
-Graphics Interop, CUFFT Library, Physically-Based Simulation
-
-## Supported SM Architectures
-
-[SM 5.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 5.3 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)
-
-## Supported OSes
-
-Windows
-
-## Supported CPU Architecture
-
-x86_64
-
-## CUDA APIs involved
-
-### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
-cudaGraphicsUnmapResources, cudaMemcpy, cudaMallocArray, cudaFreeArray, cudaFree, cudaMallocPitch, cudaGraphicsResourceGetMappedPointer, cudaGetLastError, cudaGraphicsMapResources, cudaDestroyTextureObject, cudaGetDevice, cudaCreateTextureObject, cudaGraphicsUnregisterResource, cudaMalloc, cudaGetDeviceProperties
-
-## Dependencies needed to build/run
-[DirectX](../../../README.md#directx)
-
-## Prerequisites
-
-Download and install the [CUDA Toolkit 12.5](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
-Make sure the dependencies mentioned in [Dependencies]() section above are installed.
-
-## Build and Run
-
-### Windows
-The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
-```
-*_vs<version>.sln - for Visual Studio <version>
-```
-Each individual sample has its own set of solution files in its directory:
-
-To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
-> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
-
-## References (for more details)
-
--- a/Samples/5_Domain_Specific/fluidsD3D9/data/ref_fluidsD3D9.ppm
+++ b/Samples/5_Domain_Specific/fluidsD3D9/data/ref_fluidsD3D9.ppm
--- a/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_lg.gif
+++ b/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_lg.gif
--- a/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_md.gif
+++ b/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_md.gif
--- a/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_sm.gif
+++ b/Samples/5_Domain_Specific/fluidsD3D9/doc/fluidsD3D9_sm.gif
--- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9.cpp
+++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9.cpp
@ -1,829 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *  * Neither the name of NVIDIA CORPORATION nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#pragma warning(disable : 4312)
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-#define WINDOWS_LEAN_AND_MEAN
-#include <windows.h>
-#endif
-
-// including CUDA headers and helper functions
-#include <builtin_types.h>
-#include <cuda_runtime.h>
-#include <cufft.h>
-#include <cuda_d3d9_interop.h>
-
-// SDK helper functions
-#include <helper_cuda.h>
-#include <helper_functions.h>
-#include "fluidsD3D9_kernels.h"
-#include <rendercheck_d3d9.h>
-#include <DirectXMath.h>
-using namespace DirectX;
-
-#define MAX_EPSILON 10
-
-static char *SDK_name = "fluidsD3D9";
-
-int *pArgc = NULL;
-char **pArgv = NULL;
-
-// CUDA example code that implements the frequency space version of
-// Jos Stam's paper 'Stable Fluids' in 2D. This application uses the
-// CUDA FFT library (CUFFT) to perform velocity diffusion and to
-// force non-divergence in the velocity field at each time step. It uses
-// CUDA-OpenGL interoperability to update the particle field directly
-// instead of doing a copy to system memory before drawing. Texture is
-// used for automatic bilinear interpolation at the velocity advection step.
-
-HWND hWnd;                                // Window handle
-LPDIRECT3D9EX g_pD3D = NULL;              // Used to create the D3DDevice
-unsigned int g_iAdapter = NULL;           // Adapter
-LPDIRECT3DDEVICE9EX g_pD3DDevice = NULL;  // Rendering device
-LPDIRECT3DVERTEXBUFFER9 g_pVB = NULL;     // Buffer to hold particles
-LPDIRECT3DTEXTURE9 g_pTexture = NULL;     // Texture to render points
-
-struct cudaGraphicsResource *cuda_VB_resource;  // handles D3D9-CUDA exchange
-
-HRESULT InitD3D9(HWND hWnd);
-HRESULT InitD3D9RenderState();
-HRESULT InitCUDA();
-HRESULT InitCUFFT();
-HRESULT InitVertexBuffer();
-HRESULT FreeVertexBuffer();
-HRESULT InitPointTexture();
-HRESULT RestoreContextResources();
-
-#define D3DFVF_CUSTOMVERTEX (D3DFVF_XYZ | D3DFVF_DIFFUSE)
-void updateVB(void);
-void initParticles(cData *p, int dx, int dy);
-
-// CUFFT plan handle
-static cufftHandle g_planr2c;
-static cufftHandle g_planc2r;
-static cData *g_vxfield = NULL;
-static cData *g_vyfield = NULL;
-
-cData *g_hvfield = NULL;
-cData *g_dvfield = NULL;
-static int wWidth = MAX(512, DIM);
-static int wHeight = MAX(512, DIM);
-
-static int clicked = 0;
-static int fpsCount = 0;
-static int fpsLimit = 1;
-StopWatchInterface *timer = NULL;
-
-// Particle data
-static Vertex *g_mparticles = NULL;
-static cData *g_particles = NULL;
-static int lastx = 0, lasty = 0;
-
-// Texture pitch
-// unsigned int g_tPitch = 0;
-size_t g_tPitch = 0;
-
-D3DDISPLAYMODEEX g_d3ddm;
-D3DPRESENT_PARAMETERS g_d3dpp;
-
-bool g_bWindowed = true;
-bool g_bDeviceLost = false;
-bool g_bPassed = true;
-int g_iFrameToCompare = 100;
-bool g_bQAAddTestForce = true;
-char *ref_file = NULL;
-
-#define NAME_LEN 512
-
-char device_name[NAME_LEN];
-
-VOID Cleanup() {
-  // Unregister vertex buffer
-  FreeVertexBuffer();
-
-  deleteTexture();
-
-  // Free all host and device resources
-  free(g_hvfield);
-  free(g_particles);
-  cudaFree(g_dvfield);
-  cudaFree(g_vxfield);
-  cudaFree(g_vyfield);
-
-  cufftDestroy(g_planr2c);
-  cufftDestroy(g_planc2r);
-
-  if (g_pTexture != NULL) {
-    g_pTexture->Release();
-    g_pTexture = NULL;
-  }
-
-  if (g_pD3DDevice != NULL) {
-    g_pD3DDevice->Release();
-    g_pD3DDevice = NULL;
-  }
-
-  if (g_pD3D != NULL) {
-    g_pD3D->Release();
-    g_pD3D = NULL;
-  }
-
-  sdkDeleteTimer(&timer);
-}
-
-LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) {
-  switch (msg) {
-    case WM_DESTROY:
-      Cleanup();
-      PostQuitMessage(0);
-      exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
-      return 0;
-
-    case WM_KEYDOWN:
-      switch (wParam) {
-        case 27:
-          Cleanup();
-          PostQuitMessage(0);
-          break;
-
-        case 0x52:
-          memset(g_hvfield, 0, sizeof(cData) * DS);
-          cudaMemcpy(g_dvfield, g_hvfield, sizeof(cData) * DS,
-                     cudaMemcpyHostToDevice);
-
-          initParticles(g_particles, DIM, DIM);
-          cudaGraphicsUnregisterResource(cuda_VB_resource);
-
-          updateVB();
-
-          cudaGraphicsD3D9RegisterResource(&cuda_VB_resource, g_pVB,
-                                           cudaD3D9RegisterFlagsNone);
-          getLastCudaError("cudaGraphicsD3D9RegisterResource failed");
-          break;
-
-        default:
-          break;
-      }
-
-      break;
-
-    case WM_SIZE:
-      wWidth = LOWORD(lParam);
-      wHeight = HIWORD(lParam);
-      break;
-
-    case WM_MOUSEMOVE:
-      if (wParam == MK_LBUTTON) {
-        clicked = 1;
-      } else {
-        clicked = 0;
-      }
-
-      int x = LOWORD(lParam), y = HIWORD(lParam);
-
-      // Convert motion coordinates to domain
-      float fx = (x / (float)wWidth);
-      float fy = (y / (float)wHeight);
-      int nx = (int)(fx * DIM);
-      int ny = (int)(fy * DIM);
-
-      if (clicked && nx < DIM - FR && nx > FR - 1 && ny < DIM - FR &&
-          ny > FR - 1) {
-        int ddx = LOWORD(lParam) - lastx;
-        int ddy = HIWORD(lParam) - lasty;
-
-        fx = ddx / (float)wWidth;
-        fy = ddy / (float)wHeight;
-        int spy = ny - FR;
-        int spx = nx - FR;
-        addForces(g_dvfield, DIM, DIM, spx, spy, FORCE * DT * fx,
-                  FORCE * DT * fy, FR, g_tPitch);
-        lastx = x;
-        lasty = y;
-      }
-
-      break;
-  }
-
-  return DefWindowProc(hWnd, msg, wParam, lParam);
-}
-
-HRESULT InitVertexBuffer() {
-  // Create the vertex buffer.
-  if (FAILED(g_pD3DDevice->CreateVertexBuffer(DS * sizeof(Vertex), 0,
-                                              D3DFVF_CUSTOMVERTEX,
-                                              D3DPOOL_DEFAULT, &g_pVB, NULL))) {
-    return E_FAIL;
-  }
-
-  // Initialize the Vertex Buffer with the particles
-  updateVB();
-
-  cudaGraphicsD3D9RegisterResource(&cuda_VB_resource, g_pVB,
-                                   cudaD3D9RegisterFlagsNone);
-  getLastCudaError("cudaGraphicsD3D9RegisterResource failed");
-
-  return S_OK;
-}
-
-HRESULT InitPointTexture() {
-  // Create the texture.
-  int width = 64;
-  int height = width;
-
-  if (FAILED(g_pD3DDevice->CreateTexture(
-          width, height, 0, D3DUSAGE_AUTOGENMIPMAP | D3DUSAGE_DYNAMIC,
-          D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &g_pTexture, NULL))) {
-    return E_FAIL;
-  }
-
-  // Fill in top level
-  D3DLOCKED_RECT rect;
-
-  if (FAILED(g_pTexture->LockRect(0, &rect, 0, 0))) {
-    return E_FAIL;
-  }
-
-  typedef unsigned int TexelType;
-  TexelType *texel = (TexelType *)rect.pBits;
-
-  for (int y = -height / 2; y < height / 2; ++y) {
-    float yf = y + 0.5f;
-    TexelType *t = texel;
-
-    for (int x = -width / 2; x < width / 2; ++x) {
-      float xf = x + 0.5f;
-      float radius = (float)width / 32;
-      float dist = sqrtf(xf * xf + yf * yf) / radius;
-      float n = 0.1f;
-      float value;
-
-      if (dist < 1) {
-        value = 1 - 0.5f * powf(dist, n);
-      } else if (dist < 2) {
-        value = 0.5f * powf(2 - dist, n);
-      } else {
-        value = 0;
-      }
-
-      value *= 75;
-      unsigned char *c = (unsigned char *)t;
-      c[0] = c[1] = c[2] = c[3] = (unsigned char)value;
-      ++t;
-    }
-
-    texel += rect.Pitch / sizeof(TexelType);
-  }
-
-  if (FAILED(g_pTexture->UnlockRect(0))) {
-    return E_FAIL;
-  }
-
-  // Set sampler state
-  if (FAILED(g_pD3DDevice->SetSamplerState(0, D3DSAMP_MINFILTER,
-                                           D3DTEXF_LINEAR))) {
-    return E_FAIL;
-  }
-
-  if (FAILED(g_pD3DDevice->SetSamplerState(0, D3DSAMP_MAGFILTER,
-                                           D3DTEXF_LINEAR))) {
-    return E_FAIL;
-  }
-
-  return S_OK;
-}
-
-//-----------------------------------------------------------------------------
-// Name: FreeVertexBuffer()
-// Desc: Free's the Vertex Buffer resource
-//-----------------------------------------------------------------------------
-HRESULT FreeVertexBuffer() {
-  if (g_pVB != NULL) {
-    // Unregister vertex buffer
-    cudaGraphicsUnregisterResource(cuda_VB_resource);
-    getLastCudaError("cudaGraphicsUnregisterResource failed");
-
-    g_pVB->Release();
-  }
-
-  return S_OK;
-}
-
-void updateVB(void) {
-  Vertex *data = new Vertex[DS];
-  g_pVB->Lock(0, DS * sizeof(Vertex), (void **)&data, 0);
-
-  for (int i = 0; i < DS; i++) {
-    data[i].x = g_particles[i].x;
-    data[i].y = g_particles[i].y;
-    data[i].z = 0.f;
-    data[i].c = 0xff00ff00;
-  }
-
-  g_pVB->Unlock();
-}
-
-HRESULT InitD3D9(HWND hWnd) {
-  // Create the D3D object.
-  if (S_OK != Direct3DCreate9Ex(D3D_SDK_VERSION, &g_pD3D)) {
-    return E_FAIL;
-  }
-
-  D3DADAPTER_IDENTIFIER9 adapterId;
-  int device;
-  bool bDeviceFound = false;
-  printf("\n");
-
-  cudaError cuStatus;
-
-  for (g_iAdapter = 0; g_iAdapter < g_pD3D->GetAdapterCount(); g_iAdapter++) {
-    HRESULT hr = g_pD3D->GetAdapterIdentifier(g_iAdapter, 0, &adapterId);
-
-    if (FAILED(hr)) {
-      continue;
-    }
-
-    // clear any errors we got while querying invalid compute devices
-    cuStatus = cudaGetLastError();
-    cuStatus = cudaD3D9GetDevice(&device, adapterId.DeviceName);
-    printLastCudaError("cudaD3D9GetDevice failed");  // This prints and resets
-                                                     // the cudaError to
-                                                     // cudaSuccess
-
-    printf("> Display Device #%d: \"%s\" %s Direct3D9\n", g_iAdapter,
-           adapterId.Description,
-           (cuStatus == cudaSuccess) ? "supports" : "does not support");
-
-    if (cudaSuccess == cuStatus) {
-      bDeviceFound = true;
-      STRCPY(device_name, NAME_LEN, adapterId.Description);
-      break;
-    }
-  }
-
-  // we check to make sure we have found a cuda-compatible D3D device to work on
-  if (!bDeviceFound) {
-    printf("\nNo CUDA-compatible Direct3D9 device available\n");
-    // Release the D3D device
-    g_pD3D->Release();
-    exit(EXIT_SUCCESS);
-  }
-
-  cudaGetDevice(&device);
-  cudaDeviceProp deviceProp;
-  cudaGetDeviceProperties(&deviceProp, device);
-  strcpy(device_name, deviceProp.name);
-
-  RECT rc;
-  GetClientRect(hWnd, &rc);
-  g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
-
-  // Set up the structure used to create the D3DDevice
-  D3DPRESENT_PARAMETERS d3dpp;
-  ZeroMemory(&d3dpp, sizeof(d3dpp));
-  d3dpp.Windowed = TRUE;
-  d3dpp.SwapEffect = D3DSWAPEFFECT_DISCARD;
-  d3dpp.BackBufferFormat = g_d3ddm.Format;  // D3DFMT_UNKNOWN;
-
-  // Create the D3DDevice
-  if (FAILED(g_pD3D->CreateDeviceEx(g_iAdapter, D3DDEVTYPE_HAL, hWnd,
-                                    D3DCREATE_HARDWARE_VERTEXPROCESSING, &d3dpp,
-                                    NULL, &g_pD3DDevice))) {
-    return E_FAIL;
-  } else {
-    return S_OK;
-  }
-}
-
-// Initialize the D3D Rendering State
-HRESULT InitD3D9RenderState() {
-  // Set projection matrix
-  XMMATRIX matProj;
-  XMFLOAT4X4 matProjFloat;
-  matProj = XMMatrixOrthographicOffCenterLH(0, 1, 1, 0, 0, 1);
-  XMStoreFloat4x4(&matProjFloat, matProj);
-  g_pD3DDevice->SetTransform(D3DTS_PROJECTION, (D3DMATRIX *)&matProjFloat);
-
-  // Turn off D3D lighting, since we are providing our own vertex colors
-  if (FAILED(g_pD3DDevice->SetRenderState(D3DRS_LIGHTING, FALSE))) {
-    return E_FAIL;
-  }
-
-  return S_OK;
-}
-
-HRESULT InitCUDA() {
-  printf("InitCUDA() g_pD3DDevice = %p\n", g_pD3DDevice);
-
-  // Now we need to bind a CUDA context to the DX9 device
-  // This is the CUDA 2.0 DX9 interface (required for Windows XP and Vista)
-  cudaD3D9SetDirect3DDevice(g_pD3DDevice);
-  getLastCudaError("cudaD3D9SetDirect3DDevice failed");
-
-  return S_OK;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! RestoreContextResourcess
-//    - this function restores all of the CUDA/D3D resources and contexts
-////////////////////////////////////////////////////////////////////////////////
-HRESULT RestoreContextResources() {
-  // Reinitialize D3D9 resources, CUDA resources/contexts
-  InitCUDA();
-  InitD3D9RenderState();
-  InitCUFFT();
-  InitVertexBuffer();
-  InitPointTexture();
-
-  return S_OK;
-}
-
-////////////////////////////////////////////////////////////////////////////////
-//! DeviceLostHandler
-//    - this function handles reseting and initialization of the D3D device
-//      in the event this Device gets Lost
-////////////////////////////////////////////////////////////////////////////////
-HRESULT DeviceLostHandler() {
-  HRESULT hr = S_OK;
-
-  // test the cooperative level to see if it's okay
-  // to render
-  if (FAILED(hr = g_pD3DDevice->TestCooperativeLevel())) {
-    // if the device was truly lost, (i.e., a fullscreen device just lost
-    // focus), wait
-    // until we g_et it back
-    if (hr == D3DERR_DEVICELOST) {
-      return S_OK;
-    }
-
-    // eventually, we will g_et this return value,
-    // indicating that we can now reset the device
-    if (hr == D3DERR_DEVICENOTRESET) {
-      // if we are windowed, read the desktop mode and use the same format for
-      // the back buffer; this effectively turns off color conversion
-
-      if (g_bWindowed) {
-        g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
-        g_d3dpp.BackBufferFormat = g_d3ddm.Format;
-      }
-
-      // now try to reset the device
-      if (FAILED(hr = g_pD3DDevice->Reset(&g_d3dpp))) {
-        return hr;
-      } else {
-        // This is a common function we use to restore all hardware
-        // resources/state
-        RestoreContextResources();
-
-        // we have acquired the device
-        g_bDeviceLost = false;
-      }
-    }
-  }
-
-  return hr;
-}
-
-HRESULT InitCUFFT() {
-  // You can only call CUDA D3D9 device has been bound to the CUDA
-  // context, otherwise it will not work
-  g_hvfield = (cData *)malloc(sizeof(cData) * DS);
-  memset(g_hvfield, 0, sizeof(cData) * DS);
-
-  // Allocate and initialize device data
-  cudaMallocPitch((void **)&g_dvfield, &g_tPitch, sizeof(cData) * DIM, DIM);
-
-  cudaMemcpy(g_dvfield, g_hvfield, sizeof(cData) * DS, cudaMemcpyHostToDevice);
-
-  // Temporary complex velocity field data
-  cudaMalloc((void **)&g_vxfield, sizeof(cData) * PDS);
-  cudaMalloc((void **)&g_vyfield, sizeof(cData) * PDS);
-
-  setupTexture(DIM, DIM);
-
-  // Create particle array
-  g_particles = (cData *)malloc(sizeof(cData) * DS);
-  memset(g_particles, 0, sizeof(cData) * DS);
-
-  initParticles(g_particles, DIM, DIM);
-
-  // Create CUFFT transform plan configuration
-  cufftPlan2d(&g_planr2c, DIM, DIM, CUFFT_R2C);
-  cufftPlan2d(&g_planc2r, DIM, DIM, CUFFT_C2R);
-
-  return S_OK;
-}
-
-HRESULT Render(void) {
-  HRESULT hr = S_OK;
-
-  // Normal case where CUDA Device is not lost
-  if (!g_bDeviceLost) {
-    sdkStartTimer(&timer);
-
-    advectVelocity(g_dvfield, (float *)g_vxfield, (float *)g_vyfield, DIM,
-                   RPADW, DIM, DT, g_tPitch);
-    {
-      // Forward FFT
-      cufftExecR2C(g_planr2c, (cufftReal *)g_vxfield,
-                   (cufftComplex *)g_vxfield);
-      cufftExecR2C(g_planr2c, (cufftReal *)g_vyfield,
-                   (cufftComplex *)g_vyfield);
-
-      diffuseProject(g_vxfield, g_vyfield, CPADW, DIM, DT, VIS, g_tPitch);
-
-      // Inverse FFT
-      cufftExecC2R(g_planc2r, (cufftComplex *)g_vxfield,
-                   (cufftReal *)g_vxfield);
-      cufftExecC2R(g_planc2r, (cufftComplex *)g_vyfield,
-                   (cufftReal *)g_vyfield);
-    }
-    updateVelocity(g_dvfield, (float *)g_vxfield, (float *)g_vyfield, DIM,
-                   RPADW, DIM, g_tPitch);
-
-    // Map D3D9 vertex buffer to CUDA
-    {
-      size_t num_bytes;
-      checkCudaErrors(cudaGraphicsMapResources(1, &cuda_VB_resource, 0));
-      getLastCudaError("cudaGraphicsMapResources failed");
-      // This gets a pointer from the Vertex Buffer
-      checkCudaErrors(cudaGraphicsResourceGetMappedPointer(
-          (void **)&g_mparticles, &num_bytes, cuda_VB_resource));
-      getLastCudaError("cudaGraphicsResourceGetMappedPointer failed");
-
-      advectParticles(g_mparticles, g_dvfield, DIM, DIM, DT, g_tPitch);
-
-      // Unmap vertex buffer
-      checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_VB_resource, 0));
-      getLastCudaError("cudaGraphicsUnmapResource failed");
-    }
-
-    g_pD3DDevice->Clear(0, NULL, D3DCLEAR_TARGET, D3DCOLOR_XRGB(0, 0, 0), 1.0f,
-                        0);
-    g_pD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, FALSE);
-    g_pD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE);
-    g_pD3DDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE);
-    g_pD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ONE);
-    g_pD3DDevice->SetRenderState(D3DRS_POINTSPRITEENABLE, TRUE);
-    float size = 16;
-    g_pD3DDevice->SetRenderState(D3DRS_POINTSIZE, *((DWORD *)&size));
-    g_pD3DDevice->SetTexture(0, g_pTexture);
-
-    if (SUCCEEDED(g_pD3DDevice->BeginScene())) {
-      // Draw particles
-      g_pD3DDevice->SetStreamSource(0, g_pVB, 0, sizeof(Vertex));
-      g_pD3DDevice->SetFVF(D3DFVF_CUSTOMVERTEX);
-      g_pD3DDevice->DrawPrimitive(D3DPT_POINTLIST, 0, DS);
-
-      g_pD3DDevice->EndScene();
-    }
-
-    // Finish timing before swap buffers to avoid refresh sync
-    sdkStopTimer(&timer);
-    // Present the backbuffer contents to the display
-    hr = g_pD3DDevice->Present(NULL, NULL, NULL, NULL);
-
-    if (hr == D3DERR_DEVICELOST) {
-      fprintf(stderr, "drawScene Present = %08x detected D3D DeviceLost\n", hr);
-      g_bDeviceLost = true;
-
-      FreeVertexBuffer();
-    }
-
-    fpsCount++;
-
-    if (fpsCount == fpsLimit) {
-      char fps[256];
-      float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
-      sprintf(fps, "CUDA/D3D9 Stable Fluids (%d x %d): %3.1f fps", DIM, DIM,
-              ifps);
-      SetWindowText(hWnd, fps);
-      fpsCount = 0;
-      fpsLimit = (int)MAX(ifps, 1.f);
-      sdkResetTimer(&timer);
-    }
-  } else {
-    // Begin code to handle case where the D3D9 device is lost
-    if (FAILED(hr = DeviceLostHandler())) {
-      fprintf(stderr, "DeviceLostHandler FAILED returned %08x\n", hr);
-      return hr;
-    }
-
-    fprintf(stderr, "Render DeviceLost handler\n");
-
-    // test the cooperative level to see if it's okay
-    // to render
-    if (FAILED(hr = g_pD3DDevice->TestCooperativeLevel())) {
-      fprintf(stderr,
-              "TestCooperativeLevel = %08x failed, will attempt to reset\n",
-              hr);
-
-      // if the device was truly lost, (i.e., a fullscreen device just lost
-      // focus), wait
-      // until we g_et it back
-
-      if (hr == D3DERR_DEVICELOST) {
-        fprintf(
-            stderr,
-            "TestCooperativeLevel = %08x DeviceLost, will retry next call\n",
-            hr);
-        return S_OK;
-      }
-
-      // eventually, we will g_et this return value,
-      // indicating that we can now reset the device
-      if (hr == D3DERR_DEVICENOTRESET) {
-        fprintf(stderr,
-                "TestCooperativeLevel = %08x will try to RESET the device\n",
-                hr);
-        // if we are windowed, read the desktop mode and use the same format for
-        // the back buffer; this effectively turns off color conversion
-
-        if (g_bWindowed) {
-          g_pD3D->GetAdapterDisplayModeEx(g_iAdapter, &g_d3ddm, NULL);
-          g_d3dpp.BackBufferFormat = g_d3ddm.Format;
-        }
-
-        // now try to reset the device
-        if (FAILED(hr = g_pD3DDevice->Reset(&g_d3dpp))) {
-          fprintf(stderr, "TestCooperativeLevel = %08x RESET device FAILED\n",
-                  hr);
-          return hr;
-        } else {
-          fprintf(stderr, "TestCooperativeLevel = %08x RESET device SUCCESS!\n",
-                  hr);
-
-          // Reinitialize D3D9 resources, CUDA resources/contexts
-          RestoreContextResources();
-
-          fprintf(stderr, "TestCooperativeLevel = %08x INIT device SUCCESS!\n",
-                  hr);
-
-          // we have acquired the device
-          g_bDeviceLost = false;
-        }
-      }
-    }
-  }
-
-  return hr;
-}
-
-// very simple von neumann middle-square prng.  can't use rand() in -qatest
-// mode because its implementation varies across platforms which makes testing
-// for consistency in the important parts of this program difficult.
-float myrand(void) {
-  static int seed = 72191;
-  char sq[22];
-
-  if (ref_file) {
-    seed *= seed;
-    sprintf(sq, "%010d", seed);
-    // pull the middle 5 digits out of sq
-    sq[8] = 0;
-    seed = atoi(&sq[3]);
-
-    return seed / 99999.f;
-  } else {
-    return rand() / (float)RAND_MAX;
-  }
-}
-
-void initParticles(cData *p, int dx, int dy) {
-  int i, j;
-
-  for (i = 0; i < dy; i++) {
-    for (j = 0; j < dx; j++) {
-      p[i * dx + j].x = (j + 0.5f + (myrand() - 0.5f)) / dx;
-      p[i * dx + j].y = (i + 0.5f + (myrand() - 0.5f)) / dy;
-    }
-  }
-}
-
-int main(int argc, char **argv) {
-  pArgc = &argc;
-  pArgv = argv;
-
-  printf("%s Starting...\n\n", argv[0]);
-
-  printf(
-      "NOTE: The CUDA Samples are not meant for performance measurements. "
-      "Results may vary when GPU Boost is enabled.\n\n");
-
-  sdkCreateTimer(&timer);
-  sdkResetTimer(&timer);
-
-  // command line options
-  // automated build testing harness
-  if (checkCmdLineFlag(argc, (const char **)argv, "file")) {
-    getCmdLineArgumentString(argc, (const char **)argv, "file", &ref_file);
-  }
-
-  HINSTANCE hInst = GetModuleHandle(NULL);
-  // Register the window class
-  WNDCLASSEX wc = {sizeof(WNDCLASSEX),    CS_CLASSDC, MsgProc, 0L,   0L,
-                   GetModuleHandle(NULL), NULL,       NULL,    NULL, NULL,
-                   "fluidsD3D9",          NULL};
-  RegisterClassEx(&wc);
-
-  // Create the application's window
-  int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME);
-  int yCaption = ::GetSystemMetrics(SM_CYCAPTION);
-  int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME);
-  hWnd = CreateWindow("fluidsD3D9", "CUDA/D3D9 Stable Fluids",
-                      WS_OVERLAPPEDWINDOW, 100, 100, wWidth + 2 * xBorder,
-                      wHeight + 2 * yBorder + yCaption, NULL, NULL,
-                      wc.hInstance, NULL);
-
-  if (SUCCEEDED(InitD3D9(hWnd)) && SUCCEEDED(InitCUDA()) &&
-      SUCCEEDED(InitD3D9RenderState()) && SUCCEEDED(InitCUFFT()) &&
-      SUCCEEDED(InitVertexBuffer()) && SUCCEEDED(InitPointTexture())) {
-    ShowWindow(hWnd, SW_SHOWDEFAULT);
-    UpdateWindow(hWnd);
-
-    // Rendering loop
-    MSG msg;
-    ZeroMemory(&msg, sizeof(msg));
-
-    while (msg.message != WM_QUIT) {
-      if (PeekMessage(&msg, NULL, 0U, 0U, PM_REMOVE)) {
-        TranslateMessage(&msg);
-        DispatchMessage(&msg);
-      } else {
-        Render();
-
-        if (ref_file) {
-          for (int count = 0; count < g_iFrameToCompare; count++) {
-            // add in a little force so the automated testing is interesing.
-            int x = wWidth / (count + 1);
-            int y = wHeight / (count + 1);
-            float fx = (x / (float)wWidth);
-            float fy = (y / (float)wHeight);
-            int nx = (int)(fx * DIM);
-            int ny = (int)(fy * DIM);
-
-            int ddx = 35;
-            int ddy = 35;
-            fx = ddx / (float)wWidth;
-            fy = ddy / (float)wHeight;
-            int spy = ny - FR;
-            int spx = nx - FR;
-
-            addForces(g_dvfield, DIM, DIM, spx, spy, FORCE * DT * fx,
-                      FORCE * DT * fy, FR, g_tPitch);
-            // g_bQAAddTestForce = false; // only add it once
-
-            Render();
-          }
-
-          const char *cur_image_path = "qatest_fluidsD3D9.ppm";
-
-          // Save a reference of our current test run image
-          CheckRenderD3D9::BackbufferToPPM(g_pD3DDevice, cur_image_path);
-
-          // compare to official reference image, printing PASS or FAIL.
-          g_bPassed = CheckRenderD3D9::PPMvsPPM(cur_image_path, ref_file,
-                                                argv[0], MAX_EPSILON, 0.30f);
-
-          PostQuitMessage(0);
-        }
-      }
-    }
-  }
-
-  UnregisterClass("fluidsD3D9", wc.hInstance);
-
-  //
-  // and exit
-  //
-  printf("> %s running on %s exiting...\n", SDK_name, device_name);
-  exit(g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE);
-}
--- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_kernels.cu
+++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_kernels.cu
@ -1,333 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *  * Neither the name of NVIDIA CORPORATION nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <cuda.h>
-#include <builtin_types.h>
-#include <cufft.h>
-#include <cuda_runtime.h>
-#include <helper_cuda.h>
-#include "fluidsD3D9_kernels.h"
-
-// Texture object for reading velocity field
-cudaTextureObject_t texObj;
-static cudaArray *array = NULL;
-
-void setupTexture(int x, int y) {
-  cudaChannelFormatDesc desc = cudaCreateChannelDesc<float2>();
-
-  cudaMallocArray(&array, &desc, y, x);
-  getLastCudaError("cudaMalloc failed");
-
-  cudaResourceDesc texRes;
-  memset(&texRes, 0, sizeof(cudaResourceDesc));
-
-  texRes.resType = cudaResourceTypeArray;
-  texRes.res.array.array = array;
-
-  cudaTextureDesc texDescr;
-  memset(&texDescr, 0, sizeof(cudaTextureDesc));
-
-  texDescr.normalizedCoords = false;
-  texDescr.filterMode = cudaFilterModeLinear;
-  texDescr.addressMode[0] = cudaAddressModeWrap;
-  texDescr.readMode = cudaReadModeElementType;
-
-  checkCudaErrors(cudaCreateTextureObject(&texObj, &texRes, &texDescr, NULL));
-}
-
-void updateTexture(cData *data, size_t wib, size_t h, size_t pitch) {
-  checkCudaErrors(cudaMemcpy2DToArray(array, 0, 0, data, pitch, wib, h,
-                                      cudaMemcpyDeviceToDevice));
-}
-
-void deleteTexture(void) {
-  checkCudaErrors(cudaDestroyTextureObject(texObj));
-  checkCudaErrors(cudaFreeArray(array));
-}
-
-// Note that these kernels are designed to work with arbitrary
-// domain sizes, not just domains that are multiples of the tile
-// size. Therefore, we have extra code that checks to make sure
-// a given thread location falls within the domain boundaries in
-// both X and Y. Also, the domain is covered by looping over
-// multiple elements in the Y direction, while there is a one-to-one
-// mapping between threads in X and the tile size in X.
-// Nolan Goodnight 9/22/06
-
-// This method adds constant force vectors to the velocity field
-// stored in 'v' according to v(x,t+1) = v(x,t) + dt * f.
-__global__ void addForces_k(cData *v, int dx, int dy, int spx, int spy,
-                            float fx, float fy, int r, size_t pitch) {
-  int tx = threadIdx.x;
-  int ty = threadIdx.y;
-  cData *fj = (cData *)((char *)v + (ty + spy) * pitch) + tx + spx;
-
-  cData vterm = *fj;
-  tx -= r;
-  ty -= r;
-  float s = 1.f / (1.f + tx * tx * tx * tx + ty * ty * ty * ty);
-  vterm.x += s * fx;
-  vterm.y += s * fy;
-  *fj = vterm;
-}
-
-// This method performs the velocity advection step, where we
-// trace velocity vectors back in time to update each grid cell.
-// That is, v(x,t+1) = v(p(x,-dt),t). Here we perform bilinear
-// interpolation in the velocity space.
-__global__ void advectVelocity_k(cData *v, float *vx, float *vy, int dx,
-                                 int pdx, int dy, float dt, int lb,
-                                 cudaTextureObject_t texObject) {
-  int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
-  int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
-  int p;
-
-  cData vterm, ploc;
-  float vxterm, vyterm;
-
-  // gtidx is the domain location in x for this thread
-  if (gtidx < dx) {
-    for (p = 0; p < lb; p++) {
-      // fi is the domain location in y for this thread
-      int fi = gtidy + p;
-
-      if (fi < dy) {
-        int fj = fi * pdx + gtidx;
-        vterm = tex2D<cData>(texObject, (float)gtidx, (float)fi);
-        ploc.x = (gtidx + 0.5f) - (dt * vterm.x * dx);
-        ploc.y = (fi + 0.5f) - (dt * vterm.y * dy);
-        vterm = tex2D<cData>(texObject, ploc.x, ploc.y);
-        vxterm = vterm.x;
-        vyterm = vterm.y;
-        vx[fj] = vxterm;
-        vy[fj] = vyterm;
-      }
-    }
-  }
-}
-
-// This method performs velocity diffusion and forces mass conservation
-// in the frequency domain. The inputs 'vx' and 'vy' are complex-valued
-// arrays holding the Fourier coefficients of the velocity field in
-// X and Y. Diffusion in this space takes a simple form described as:
-// v(k,t) = v(k,t) / (1 + visc * dt * k^2), where visc is the viscosity,
-// and k is the wavenumber. The projection step forces the Fourier
-// velocity vectors to be orthogonal to the vectors for each
-// wavenumber: v(k,t) = v(k,t) - ((k dot v(k,t) * k) / k^2.
-__global__ void diffuseProject_k(cData *vx, cData *vy, int dx, int dy, float dt,
-                                 float visc, int lb) {
-  int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
-  int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
-  int p;
-
-  cData xterm, yterm;
-
-  // gtidx is the domain location in x for this thread
-  if (gtidx < dx) {
-    for (p = 0; p < lb; p++) {
-      // fi is the domain location in y for this thread
-      int fi = gtidy + p;
-
-      if (fi < dy) {
-        int fj = fi * dx + gtidx;
-        xterm = vx[fj];
-        yterm = vy[fj];
-
-        // Compute the index of the wavenumber based on the
-        // data order produced by a standard NN FFT.
-        int iix = gtidx;
-        int iiy = (fi > dy / 2) ? (fi - (dy)) : fi;
-
-        // Velocity diffusion
-        float kk = (float)(iix * iix + iiy * iiy);  // k^2
-        float diff = 1.f / (1.f + visc * dt * kk);
-        xterm.x *= diff;
-        xterm.y *= diff;
-        yterm.x *= diff;
-        yterm.y *= diff;
-
-        // Velocity projection
-        if (kk > 0.f) {
-          float rkk = 1.f / kk;
-          // Real portion of velocity projection
-          float rkp = (iix * xterm.x + iiy * yterm.x);
-          // Imaginary portion of velocity projection
-          float ikp = (iix * xterm.y + iiy * yterm.y);
-          xterm.x -= rkk * rkp * iix;
-          xterm.y -= rkk * ikp * iix;
-          yterm.x -= rkk * rkp * iiy;
-          yterm.y -= rkk * ikp * iiy;
-        }
-
-        vx[fj] = xterm;
-        vy[fj] = yterm;
-      }
-    }
-  }
-}
-
-// This method updates the velocity field 'v' using the two complex
-// arrays from the previous step: 'vx' and 'vy'. Here we scale the
-// real components by 1/(dx*dy) to account for an unnormalized FFT.
-__global__ void updateVelocity_k(cData *v, float *vx, float *vy, int dx,
-                                 int pdx, int dy, int lb, size_t pitch) {
-  int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
-  int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
-  int p;
-
-  float vxterm, vyterm;
-  cData nvterm;
-
-  // gtidx is the domain location in x for this thread
-  if (gtidx < dx) {
-    for (p = 0; p < lb; p++) {
-      // fi is the domain location in y for this thread
-      int fi = gtidy + p;
-
-      if (fi < dy) {
-        int fjr = fi * pdx + gtidx;
-        vxterm = vx[fjr];
-        vyterm = vy[fjr];
-
-        // Normalize the result of the inverse FFT
-        float scale = 1.f / (dx * dy);
-        nvterm.x = vxterm * scale;
-        nvterm.y = vyterm * scale;
-
-        cData *fj = (cData *)((char *)v + fi * pitch) + gtidx;
-        *fj = nvterm;
-      }
-    }  // If this thread is inside the domain in Y
-  }    // If this thread is inside the domain in X
-}
-
-// This method updates the particles by moving particle positions
-// according to the velocity field and time step. That is, for each
-// particle: p(t+1) = p(t) + dt * v(p(t)).
-__global__ void advectParticles_k(Vertex *part, cData *v, int dx, int dy,
-                                  float dt, int lb, size_t pitch) {
-  int gtidx = blockIdx.x * blockDim.x + threadIdx.x;
-  int gtidy = blockIdx.y * (lb * blockDim.y) + threadIdx.y * lb;
-  int p;
-
-  // gtidx is the domain location in x for this thread
-  cData vterm;
-  Vertex pterm;
-
-  if (gtidx < dx) {
-    for (p = 0; p < lb; p++) {
-      // fi is the domain location in y for this thread
-      int fi = gtidy + p;
-
-      if (fi < dy) {
-        int fj = fi * dx + gtidx;
-        pterm = part[fj];
-
-        int xvi = ((int)(pterm.x * dx));
-        int yvi = ((int)(pterm.y * dy));
-        vterm = *((cData *)((char *)v + yvi * pitch) + xvi);
-
-        pterm.x += dt * vterm.x;
-        pterm.x = pterm.x - (int)pterm.x;
-        pterm.x += 1.f;
-        pterm.x = pterm.x - (int)pterm.x;
-        pterm.y += dt * vterm.y;
-        pterm.y = pterm.y - (int)pterm.y;
-        pterm.y += 1.f;
-        pterm.y = pterm.y - (int)pterm.y;
-
-        part[fj] = pterm;
-      }
-    }  // If this thread is inside the domain in Y
-  }    // If this thread is inside the domain in X
-}
-
-extern "C" void addForces(cData *v, int dx, int dy, int spx, int spy, float fx,
-                          float fy, int r, size_t tPitch) {
-  dim3 tids(2 * r + 1, 2 * r + 1);
-
-  addForces_k<<<1, tids>>>(v, dx, dy, spx, spy, fx, fy, r, tPitch);
-  getLastCudaError("addForces_k failed.");
-}
-
-extern "C" void advectVelocity(cData *v, float *vx, float *vy, int dx, int pdx,
-                               int dy, float dt, size_t tPitch) {
-  dim3 grid((dx / TILEX) + (!(dx % TILEX) ? 0 : 1),
-            (dy / TILEY) + (!(dy % TILEY) ? 0 : 1));
-
-  dim3 tids(TIDSX, TIDSY);
-
-  updateTexture(v, DIM * sizeof(cData), DIM, tPitch);
-  advectVelocity_k<<<grid, tids>>>(v, vx, vy, dx, pdx, dy, dt, TILEY / TIDSY,
-                                   texObj);
-
-  getLastCudaError("advectVelocity_k failed.");
-}
-
-extern "C" void diffuseProject(cData *vx, cData *vy, int dx, int dy, float dt,
-                               float visc, size_t tPitch) {
-  // Forward FFT
-  //    cufftExecR2C(planr2c, (cufftReal*)vx, (cufftComplex*)vx);
-  //    cufftExecR2C(planr2c, (cufftReal*)vy, (cufftComplex*)vy);
-
-  uint3 grid = make_uint3((dx / TILEX) + (!(dx % TILEX) ? 0 : 1),
-                          (dy / TILEY) + (!(dy % TILEY) ? 0 : 1), 1);
-
-  uint3 tids = make_uint3(TIDSX, TIDSY, 1);
-
-  diffuseProject_k<<<grid, tids>>>(vx, vy, dx, dy, dt, visc, TILEY / TIDSY);
-  getLastCudaError("diffuseProject_k failed.");
-
-  // Inverse FFT
-  //    cufftExecC2R(planc2r, (cufftComplex*)vx, (cufftReal*)vx);
-  //    cufftExecC2R(planc2r, (cufftComplex*)vy, (cufftReal*)vy);
-}
-
-extern "C" void updateVelocity(cData *v, float *vx, float *vy, int dx, int pdx,
-                               int dy, size_t tPitch) {
-  dim3 grid((dx / TILEX) + (!(dx % TILEX) ? 0 : 1),
-            (dy / TILEY) + (!(dy % TILEY) ? 0 : 1));
-
-  dim3 tids(TIDSX, TIDSY);
-
-  updateVelocity_k<<<grid, tids>>>(v, vx, vy, dx, pdx, dy, TILEY / TIDSY,
-                                   tPitch);
-  getLastCudaError("updateVelocity_k failed.");
-}
-
-extern "C" void advectParticles(Vertex *p, cData *v, int dx, int dy, float dt,
-                                size_t tPitch) {
-  dim3 grid((dx / TILEX) + (!(dx % TILEX) ? 0 : 1),
-            (dy / TILEY) + (!(dy % TILEY) ? 0 : 1));
-
-  dim3 tids(TIDSX, TIDSY);
-
-  advectParticles_k<<<grid, tids>>>(p, v, dx, dy, dt, TILEY / TIDSY, tPitch);
-  getLastCudaError("advectParticles_k failed.");
-}
--- a/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_kernels.h
+++ b/Samples/5_Domain_Specific/fluidsD3D9/fluidsD3D9_kernels.h
@ -1,109 +0,0 @@
-/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *  * Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *  * Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *  * Neither the name of NVIDIA CORPORATION nor the names of its
- *    contributors may be used to endorse or promote products derived
- *    from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef __STABLEFLUIDS_KERNELS_H_
-#define __STABLEFLUIDS_KERNELS_H_
-
-#define DIM 512              // Square size of solver domain
-#define DS (DIM * DIM)       // Total domain size
-#define CPADW (DIM / 2 + 1)  // Padded width for real->complex in-place FFT
-#define RPADW \
-  (2 * (DIM / 2 + 1))      // Padded width for real->complex in-place FFT
-#define PDS (DIM * CPADW)  // Padded total domain size
-
-#define DT 0.09f            // Delta T for interative solver
-#define VIS 0.0025f         // Viscosity constant
-#define FORCE (5.8f * DIM)  // Force scale factor
-#define FR 4                // Force update radius
-
-#define TILEX 64  // Tile width
-#define TILEY 64  // Tile height
-#define TIDSX 64  // Tids in X
-#define TIDSY 4   // Tids in Y
-
-typedef unsigned long DWORD;
-
-typedef struct vertex {
-  float x, y, z;
-  DWORD c;
-} Vertex;
-
-// Vector data type used to velocity and force fields
-typedef float2 cData;
-
-extern "C" void setupTexture(int x, int y);
-extern "C" void updateTexture(cData *data, size_t w, size_t h, size_t pitch);
-extern "C" void deleteTexture(void);
-
-// This method adds constant force vectors to the velocity field
-// stored in 'v' according to v(x,t+1) = v(x,t) + dt * f.
-__global__ void addForces_k(cData *v, int dx, int dy, int spx, int spy,
-                            float fx, float fy, int r, size_t pitch);
-
-// This method performs the velocity advection step, where we
-// trace velocity vectors back in time to update each grid cell.
-// That is, v(x,t+1) = v(p(x,-dt),t). Here we perform bilinear
-// interpolation in the velocity space.
-__global__ void advectVelocity_k(cData *v, float *vx, float *vy, int dx,
-                                 int pdx, int dy, float dt, int lb,
-                                 cudaTextureObject_t tex);
-
-// This method performs velocity diffusion and forces mass conservation
-// in the frequency domain. The inputs 'vx' and 'vy' are complex-valued
-// arrays holding the Fourier coefficients of the velocity field in
-// X and Y. Diffusion in this space takes a simple form described as:
-// v(k,t) = v(k,t) / (1 + visc * dt * k^2), where visc is the viscosity,
-// and k is the wavenumber. The projection step forces the Fourier
-// velocity vectors to be orthogonal to the wave wave vectors for each
-// wavenumber: v(k,t) = v(k,t) - ((k dot v(k,t) * k) / k^2.
-__global__ void diffuseProject_k(cData *vx, cData *vy, int dx, int dy, float dt,
-                                 float visc, int lb);
-
-// This method updates the velocity field 'v' using the two complex
-// arrays from the previous step: 'vx' and 'vy'. Here we scale the
-// real components by 1/(dx*dy) to account for an unnormalized FFT.
-__global__ void updateVelocity_k(cData *v, float *vx, float *vy, int dx,
-                                 int pdx, int dy, int lb, size_t pitch);
-
-// This method updates the particles by moving particle positions
-// according to the velocity field and time step. That is, for each
-// particle: p(t+1) = p(t) + dt * v(p(t)).
-__global__ void advectParticles_k(Vertex *part, cData *v, int dx, int dy,
-                                  float dt, int lb, size_t pitch);
-
-extern "C" void addForces(cData *v, int dx, int dy, int spx, int spy, float fx,
-                          float fy, int r, size_t tPitch);
-extern "C" void advectVelocity(cData *v, float *vx, float *vy, int dx, int pdx,
-                               int dy, float dt, size_t tPitch);
-extern "C" void diffuseProject(cData *vx, cData *vy, int dx, int dy, float dt,
-                               float visc, size_t tPitch);
-extern "C" void updateVelocity(cData *v, float *vx, float *vy, int dx, int pdx,
-                               int dy, size_t tPitch);
-extern "C" void advectParticles(Vertex *p, cData *v, int dx, int dy, float dt,
-                                size_t tPitch);
-
-#endif