Compare commits

...

3 Commits

Author SHA1 Message Date
ROOZBEH
24ee6968dc
Merge 5748bf69dabd81be7d0993c30fd1996cc19c5eba into 3e8f91d1a116060d3fedfe856f3721db970de030 2025-02-27 10:53:42 +01:00
XSShawnZeng
3e8f91d1a1
Several small bug fixes for Windows platforms
* Enhancement for GLFW include and lib search

* Fixing issue #321: A potential bug in memMapIPCDrv/memMapIpc.cpp

* Update CMakelist.txt for the sample 0_Introduction/template

* Copy .dll to correct dir for 5_Domain_Specific/Mandelbrot

* Fix typo

* Update changelog for cudaNvSciBufMultiplanar
2025-02-26 08:23:39 -08:00
rkarimi
5748bf69da Modified the binomial options code to support American Options as well.
Note that the Black-Scholes method can only be used for the European options. For validating the GPU results the computed prices are only compared to the CPU version of binomial options algorithm.
2024-12-19 22:09:22 +00:00
8 changed files with 98 additions and 19 deletions

View File

@ -36,6 +36,7 @@
* `cuDLALayerwiseStatsHybrid`
* `cuDLALayerwiseStatsStandalone`
* `cuDLAStandaloneMode`
* `cudaNvSciBufMultiplanar`
* `cudaNvSciNvMedia`
* `fluidsGLES`
* `nbody_opengles`

View File

@ -55,6 +55,7 @@ add_subdirectory(simpleTexture3D)
add_subdirectory(simpleTextureDrv)
add_subdirectory(simpleVoteIntrinsics)
add_subdirectory(simpleZeroCopy)
add_subdirectory(template)
add_subdirectory(systemWideAtomics)
add_subdirectory(vectorAdd)
add_subdirectory(vectorAddDrv)

View File

@ -20,7 +20,7 @@ include_directories(../../../Common)
# Source file
# Add target for template
add_executable(template template.cu)
add_executable(template template.cu template_cpu.cpp)
target_compile_options(template PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)

View File

@ -65,14 +65,14 @@ target_compile_features(Mandelbrot PRIVATE cxx_std_17 cuda_std_17)
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_SOURCE_DIR}/../../../bin/win64/$<CONFIGURATION>/freeglut.dll
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_BINARY_DIR}/$<CONFIGURATION>
)
add_custom_command(TARGET Mandelbrot
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy
${CMAKE_CURRENT_SOURCE_DIR}/../../../bin/win64/$<CONFIGURATION>/glew64.dll
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_BINARY_DIR}/$<CONFIGURATION>
)
endif()

View File

@ -52,13 +52,14 @@ extern "C" void BlackScholesCall(real &callResult, TOptionData optionData);
// Process single option on CPU
// Note that CPU code is for correctness testing only and not for benchmarking.
////////////////////////////////////////////////////////////////////////////////
extern "C" void binomialOptionsCPU(real &callResult, TOptionData optionData);
extern "C" void binomialOptionsCPU(real &callResult, TOptionData optionData,
option_t option_type);
////////////////////////////////////////////////////////////////////////////////
// Process an array of OptN options on GPU
////////////////////////////////////////////////////////////////////////////////
extern "C" void binomialOptionsGPU(real *callValue, TOptionData *optionData,
int optN);
int optN, option_t option_type);
////////////////////////////////////////////////////////////////////////////////
// Helper function, returning uniformly distributed
@ -103,12 +104,14 @@ int main(int argc, char **argv) {
BlackScholesCall(callValueBS[i], optionData[i]);
}
printf("Running GPU binomial tree...\n");
option_t option_type = EU;
printf("Running GPU binomial tree (EU)...\n");
checkCudaErrors(cudaDeviceSynchronize());
sdkResetTimer(&hTimer);
sdkStartTimer(&hTimer);
binomialOptionsGPU(callValueGPU, optionData, OPT_N);
binomialOptionsGPU(callValueGPU, optionData, OPT_N, option_type);
checkCudaErrors(cudaDeviceSynchronize());
sdkStopTimer(&hTimer);
@ -118,13 +121,13 @@ int main(int argc, char **argv) {
printf("binomialOptionsGPU() time: %f msec\n", gpuTime);
printf("Options per second : %f \n", OPT_N / (gpuTime * 0.001));
printf("Running CPU binomial tree...\n");
printf("Running CPU binomial tree (EU)...\n");
for (i = 0; i < OPT_N; i++) {
binomialOptionsCPU(callValueCPU[i], optionData[i]);
binomialOptionsCPU(callValueCPU[i], optionData[i], option_type);
}
printf("Comparing the results...\n");
printf("Comparing the results (EU)...\n");
sumDelta = 0;
sumRef = 0;
printf("GPU binomial vs. Black-Scholes\n");
@ -170,6 +173,49 @@ int main(int argc, char **argv) {
printf("Avg. diff: %E\n", (double)(sumDelta / (real)OPT_N));
}
if (errorVal > 5e-4) {
printf("Test failed!\n");
exit(EXIT_FAILURE);
}
option_type = NA;
printf("\nRunning GPU binomial tree (NA)...\n");
checkCudaErrors(cudaDeviceSynchronize());
sdkResetTimer(&hTimer);
sdkStartTimer(&hTimer);
binomialOptionsGPU(callValueGPU, optionData, OPT_N, option_type);
checkCudaErrors(cudaDeviceSynchronize());
sdkStopTimer(&hTimer);
gpuTime = sdkGetTimerValue(&hTimer);
printf("Options count : %i \n", OPT_N);
printf("Time steps : %i \n", NUM_STEPS);
printf("binomialOptionsGPU() time: %f msec\n", gpuTime);
printf("Options per second : %f \n", OPT_N / (gpuTime * 0.001));
printf("Running CPU binomial tree (NA)...\n");
for (i = 0; i < OPT_N; i++) {
binomialOptionsCPU(callValueCPU[i], optionData[i], option_type);
}
printf("CPU binomial vs. GPU binomial\n");
sumDelta = 0;
sumRef = 0;
for (i = 0; i < OPT_N; i++) {
sumDelta += fabs(callValueGPU[i] - callValueCPU[i]);
sumRef += callValueCPU[i];
}
if (sumRef > 1E-5) {
printf("L1 norm: %E\n", errorVal = sumDelta / sumRef);
} else {
printf("Avg. diff: %E\n", (double)(sumDelta / (real)OPT_N));
}
printf("Shutting down...\n");
sdkDeleteTimer(&hTimer);

View File

@ -41,6 +41,15 @@ typedef struct {
real V;
} TOptionData;
////////////////////////////////////////////////////////////////////////////////
// Option types
////////////////////////////////////////////////////////////////////////////////
enum option_t
{
NA = 0,
EU,
};
////////////////////////////////////////////////////////////////////////////////
// Global parameters
////////////////////////////////////////////////////////////////////////////////

View File

@ -78,7 +78,8 @@ static real expiryCallValue(real S, real X, real vDt, int i) {
return (d > (real)0) ? d : (real)0;
}
extern "C" void binomialOptionsCPU(real &callResult, TOptionData optionData) {
extern "C" void binomialOptionsCPU(real &callResult, TOptionData optionData,
option_t option_type) {
static real Call[NUM_STEPS + 1];
const real S = optionData.S;
@ -112,9 +113,18 @@ extern "C" void binomialOptionsCPU(real &callResult, TOptionData optionData) {
////////////////////////////////////////////////////////////////////////
// Walk backwards up binomial tree
////////////////////////////////////////////////////////////////////////
for (int i = NUM_STEPS; i > 0; i--)
for (int j = 0; j <= i - 1; j++)
Call[j] = puByDf * Call[j + 1] + pdByDf * Call[j];
for (int i = NUM_STEPS; i > 0; i--) {
for (int j = 0; j <= i - 1; j++) {
real continuation_value = puByDf * Call[j + 1] + pdByDf * Call[j];
if(option_type == NA){
real fwd = S * exp((2*j-i) * vDt);
real exercise_value = (fwd - X) > (real)0 ? (fwd - X) : (real)0;
Call[j] = exercise_value > continuation_value ? exercise_value : continuation_value;
} else if (option_type == EU) {
Call[j] = continuation_value;
}
}
}
callResult = (real)Call[0];
}

View File

@ -74,7 +74,7 @@ __device__ inline double expiryCallValue(double S, double X, double vDt,
#error Bad constants
#endif
__global__ void binomialOptionsKernel() {
__global__ void binomialOptionsKernel(option_t option_type) {
// Handle to thread block group
cg::thread_block cta = cg::this_thread_block();
__shared__ real call_exchange[THREADBLOCK_SIZE + 1];
@ -105,8 +105,20 @@ __global__ void binomialOptionsKernel() {
if (i > final_it) {
#pragma unroll
for (int j = 0; j < ELEMS_PER_THREAD; ++j)
call[j] = puByDf * call[j + 1] + pdByDf * call[j];
for (int j = 0; j < ELEMS_PER_THREAD; ++j) {
real continuation_value = puByDf * call[j + 1] + pdByDf * call[j];
if(option_type == NA){
#ifndef DOUBLE_PRECISION
real fwd = S*__expf(vDt * (2*(tid * ELEMS_PER_THREAD + j) - i));
#else
real fwd = S*exp(vDt * (2*(tid * ELEMS_PER_THREAD + j) - i));
#endif
real exercise_value = ((fwd - X) > (real)0) ? (fwd - X) : (real)0;
call[j] = exercise_value > continuation_value ? exercise_value : continuation_value;
} else if (option_type == EU){
call[j] = continuation_value;
}
}
}
}
@ -119,7 +131,7 @@ __global__ void binomialOptionsKernel() {
// Host-side interface to GPU binomialOptions
////////////////////////////////////////////////////////////////////////////////
extern "C" void binomialOptionsGPU(real *callValue, TOptionData *optionData,
int optN) {
int optN, option_t option_type) {
__TOptionData h_OptionData[MAX_OPTIONS];
for (int i = 0; i < optN; i++) {
@ -150,7 +162,7 @@ extern "C" void binomialOptionsGPU(real *callValue, TOptionData *optionData,
checkCudaErrors(cudaMemcpyToSymbol(d_OptionData, h_OptionData,
optN * sizeof(__TOptionData)));
binomialOptionsKernel<<<optN, THREADBLOCK_SIZE>>>();
binomialOptionsKernel<<<optN, THREADBLOCK_SIZE>>>(option_type);
getLastCudaError("binomialOptionsKernel() execution failed.\n");
checkCudaErrors(
cudaMemcpyFromSymbol(callValue, d_CallValue, optN * sizeof(real)));