mirror of
https://github.com/NVIDIA/cuda-samples.git
synced 2025-04-10 18:22:11 +01:00
Merge pull request #10 from XSShawnZeng/Tegra_Samples_Cmake_Transition
Add Tegra sample cudaNvSciBufMultiplanar
This commit is contained in:
commit
1a466282da
@ -1,4 +1,5 @@
|
||||
add_subdirectory(cudaNvSciNvMedia)
|
||||
add_subdirectory(cudaNvSciBufMultiplanar)
|
||||
add_subdirectory(cuDLAErrorReporting)
|
||||
add_subdirectory(cuDLAHybridMode)
|
||||
add_subdirectory(cuDLALayerwiseStatsHybrid)
|
||||
|
18
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json
vendored
Normal file
18
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Linux",
|
||||
"includePath": [
|
||||
"${workspaceFolder}/**",
|
||||
"${workspaceFolder}/../../../Common"
|
||||
],
|
||||
"defines": [],
|
||||
"compilerPath": "/usr/local/cuda/bin/nvcc",
|
||||
"cStandard": "gnu17",
|
||||
"cppStandard": "gnu++14",
|
||||
"intelliSenseMode": "linux-gcc-x64",
|
||||
"configurationProvider": "ms-vscode.makefile-tools"
|
||||
}
|
||||
],
|
||||
"version": 4
|
||||
}
|
7
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json
vendored
Normal file
7
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
{
|
||||
"recommendations": [
|
||||
"nvidia.nsight-vscode-edition",
|
||||
"ms-vscode.cpptools",
|
||||
"ms-vscode.makefile-tools"
|
||||
]
|
||||
}
|
10
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json
vendored
Normal file
10
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json
vendored
Normal file
@ -0,0 +1,10 @@
|
||||
{
|
||||
"configurations": [
|
||||
{
|
||||
"name": "CUDA C++: Launch",
|
||||
"type": "cuda-gdb",
|
||||
"request": "launch",
|
||||
"program": "${workspaceFolder}/cudaNvSciBufMultiplanar"
|
||||
}
|
||||
]
|
||||
}
|
15
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json
vendored
Normal file
15
Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
{
|
||||
"version": "2.0.0",
|
||||
"tasks": [
|
||||
{
|
||||
"label": "sample",
|
||||
"type": "shell",
|
||||
"command": "make dbg=1",
|
||||
"problemMatcher": ["$nvcc"],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
"isDefault": true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
@ -0,0 +1,74 @@
|
||||
cmake_minimum_required(VERSION 3.20)
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../../cmake/Modules")
|
||||
|
||||
project(cudaNvSciBufMultiplanar LANGUAGES C CXX CUDA)
|
||||
|
||||
find_package(CUDAToolkit REQUIRED)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
set(CMAKE_CUDA_ARCHITECTURES 53 61 70 72 75 80 86 87 90)
|
||||
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
||||
# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G") # enable cuda-gdb (expensive)
|
||||
endif()
|
||||
|
||||
# Include directories and libraries
|
||||
include_directories(../../../../Common)
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
# Find the NVSCI libraries
|
||||
# use CMAKE_LIBRARY_PATH so that users can also specify the NVSCI lib path in cmake command
|
||||
set(CMAKE_LIBRARY_PATH "/usr/lib" ${CMAKE_LIBRARY_PATH})
|
||||
file(GLOB_RECURSE NVSCIBUF_LIB
|
||||
${CMAKE_LIBRARY_PATH}/*/libnvscibuf.so
|
||||
)
|
||||
file(GLOB_RECURSE NVSCISYNC_LIB
|
||||
${CMAKE_LIBRARY_PATH}/*/libnvscisync.so
|
||||
)
|
||||
|
||||
# Find the NVSCI header files
|
||||
# use CMAKE_INCLUDE_PATH so that users can also specify the NVSCI include path in cmake command
|
||||
set(CMAKE_INCLUDE_PATH "/usr/include" ${CMAKE_LIBRARY_PATH})
|
||||
find_path(NVSCIBUF_INCLUDE_DIR nvscibuf.h PATHS ${CMAKE_INCLUDE_PATH})
|
||||
find_path(NVSCISYNC_INCLUDE_DIR nvscisync.h PATHS ${CMAKE_INCLUDE_PATH})
|
||||
|
||||
if(NVSCIBUF_LIB AND NVSCISYNC_LIB AND NVSCIBUF_INCLUDE_DIR AND NVSCISYNC_INCLUDE_DIR)
|
||||
message(STATUS "FOUND NVSCI libs: ${NVSCIBUF_LIB} ${NVSCISYNC_LIB}")
|
||||
message(STATUS "Using NVSCI headers path: ${NVSCIBUF_INCLUDE_DIR} ${NVSCIBUF_INCLUDE_DIR}")
|
||||
# Source file
|
||||
# Add target for cudaNvSciBufMultiplanar
|
||||
add_executable(cudaNvSciBufMultiplanar imageKernels.cu cudaNvSciBufMultiplanar.cpp main.cpp)
|
||||
|
||||
target_compile_options(cudaNvSciBufMultiplanar PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
|
||||
|
||||
target_compile_features(cudaNvSciBufMultiplanar PRIVATE cxx_std_17 cuda_std_17)
|
||||
|
||||
set_target_properties(cudaNvSciBufMultiplanar PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
|
||||
|
||||
target_include_directories(cudaNvSciBufMultiplanar PUBLIC
|
||||
${CUDAToolkit_INCLUDE_DIRS}
|
||||
${NVSCIBUF_INCLUDE_DIR}
|
||||
${NVSCISYNC_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
target_link_libraries(cudaNvSciBufMultiplanar
|
||||
CUDA::cuda_driver
|
||||
${NVSCIBUF_LIB}
|
||||
${NVSCISYNC_LIB}
|
||||
)
|
||||
# Copy yuv_planar_img1.yuv to the output directory
|
||||
add_custom_command(TARGET cudaNvSciBufMultiplanar POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/yuv_planar_img1.yuv ${CMAKE_CURRENT_BINARY_DIR}/yuv_planar_img1.yuv
|
||||
)
|
||||
# Specify additional clean files
|
||||
set_target_properties(cudaNvSciBufMultiplanar PROPERTIES
|
||||
ADDITIONAL_CLEAN_FILES "image_out.yuv"
|
||||
)
|
||||
else()
|
||||
message(STATUS "NvSCI not found - will not build sample 'cudaNvSciBufMultiplanar'")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Will not build sample cudaNvSciBufMultiplanar - requires Linux OS")
|
||||
endif()
|
@ -0,0 +1,64 @@
|
||||
# cudaNvSciBufMultiplanar - CUDA NvSciBufMultiplanar Image Samples
|
||||
|
||||
## Description
|
||||
|
||||
This sample demonstrates CUDA-NvSciBuf Interop for Multiplanar images. A YUV 420 multiplanar image is flipped and allocated using NvSciBuf APIs and imported into CUDA with CUDA External Resource Interoperability. A CUDA surface is created from the corresponding mapped CUDA array and again bit flipping is performed on the surface. The result is copied back to a YUV image which is compared against the input.
|
||||
|
||||
## Key Concepts
|
||||
|
||||
CUDA NvSci Interop, Data Parallel Algorithms, Image Processing
|
||||
|
||||
## Supported SM Architectures
|
||||
|
||||
[SM 6.0 ](https://developer.nvidia.com/cuda-gpus) [SM 6.1 ](https://developer.nvidia.com/cuda-gpus) [SM 7.0 ](https://developer.nvidia.com/cuda-gpus) [SM 7.2 ](https://developer.nvidia.com/cuda-gpus) [SM 7.5 ](https://developer.nvidia.com/cuda-gpus) [SM 8.0 ](https://developer.nvidia.com/cuda-gpus) [SM 8.6 ](https://developer.nvidia.com/cuda-gpus) [SM 8.7 ](https://developer.nvidia.com/cuda-gpus) [SM 8.9 ](https://developer.nvidia.com/cuda-gpus) [SM 9.0 ](https://developer.nvidia.com/cuda-gpus) [SM 10.0 ](https://developer.nvidia.com/cuda-gpus) [SM 10.1 ](https://developer.nvidia.com/cuda-gpus) [SM 12.0 ](https://developer.nvidia.com/cuda-gpus)
|
||||
|
||||
## Supported OSes
|
||||
|
||||
Linux
|
||||
|
||||
## Supported CPU Architecture
|
||||
|
||||
aarch64
|
||||
|
||||
## CUDA APIs involved
|
||||
|
||||
### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
|
||||
cudaDeviceGetAttribute, cudaNvSciBufMultiplanar, cudaDestroyExternalMemory, cuDriverGetVersion, cuDeviceGetUuid, cudaSetDevice, cudaGetMipmappedArrayLevel, cudaFreeMipmappedArray, cudaImportExternalMemory, cudaCreateChannelDesc, cudaExternalMemoryGetMappedMipmappedArray, cuCtxSynchronize, cudaMemcpy2DToArray, cudaMemcpy2DFromArray
|
||||
|
||||
## Dependencies needed to build/run
|
||||
[NVSCI](../../../README.md#nvsci)
|
||||
|
||||
## Prerequisites
|
||||
|
||||
Download and install the [CUDA Toolkit 12.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
|
||||
Make sure the dependencies mentioned in [Dependencies]() section above are installed.
|
||||
|
||||
## Build and Run
|
||||
|
||||
### Linux
|
||||
The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
|
||||
```
|
||||
$ cd <sample_dir>
|
||||
$ make
|
||||
```
|
||||
The samples makefiles can take advantage of certain options:
|
||||
* **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are aarch64.
|
||||
By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
|
||||
`$ make TARGET_ARCH=aarch64` <br/>
|
||||
See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
|
||||
* **dbg=1** - build with debug symbols
|
||||
```
|
||||
$ make dbg=1
|
||||
```
|
||||
* **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
|
||||
```
|
||||
$ make SMS="50 60"
|
||||
```
|
||||
|
||||
* **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
|
||||
```
|
||||
$ make HOST_COMPILER=g++
|
||||
```
|
||||
|
||||
## References (for more details)
|
||||
|
@ -0,0 +1,435 @@
|
||||
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include "cudaNvSciBufMultiplanar.h"
|
||||
|
||||
NvSciBufModule module;
|
||||
NvSciBufObj buffObj;
|
||||
CUuuid uuid;
|
||||
|
||||
void flipBits(uint8_t *pBuff, uint32_t size) {
|
||||
for (uint32_t i = 0; i < size; i++) {
|
||||
pBuff[i] = (~pBuff[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Compare input and generated image files
|
||||
void compareFiles(std::string &path1, std::string &path2) {
|
||||
bool result = true;
|
||||
FILE *fp1, *fp2;
|
||||
int ch1, ch2;
|
||||
|
||||
fp1 = fopen(path1.c_str(), "rb");
|
||||
fp2 = fopen(path2.c_str(), "rb");
|
||||
if (!fp1) {
|
||||
result = false;
|
||||
printf("File %s open failed in %s line %d\n", path1.c_str(), __FILE__, __LINE__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (!fp2) {
|
||||
result = false;
|
||||
printf("File %s open failed in %s line %d\n", path2.c_str(), __FILE__, __LINE__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
do {
|
||||
ch1 = getc(fp1);
|
||||
ch2 = getc(fp2);
|
||||
|
||||
if (ch1 != ch2) {
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
} while(ch1 != EOF && ch2 != EOF);
|
||||
|
||||
if (result) {
|
||||
printf("Input file : %s and output file : %s match SUCCESS\n", path1.c_str(), path2.c_str());
|
||||
}
|
||||
else {
|
||||
printf("Input file : %s and output file : %s match FAILURE\n", path1.c_str(), path2.c_str());
|
||||
}
|
||||
|
||||
if (fp1) {
|
||||
fclose(fp1);
|
||||
}
|
||||
if (fp2) {
|
||||
fclose(fp2);
|
||||
}
|
||||
}
|
||||
|
||||
void Caller::init() {
|
||||
checkNvSciErrors(NvSciBufAttrListCreate(module, &attrList));
|
||||
attrListOut = NULL;
|
||||
}
|
||||
|
||||
void Caller::deinit() {
|
||||
NvSciBufAttrListFree(attrList);
|
||||
checkCudaErrors(cudaDestroyExternalMemory(extMem));
|
||||
}
|
||||
|
||||
// Set NvSciBufImage attribute values in the attribute list
|
||||
void Caller::setAttrListImageMultiPlanes(int imageWidth, int imageHeight) {
|
||||
NvSciBufType bufType = NvSciBufType_Image;
|
||||
NvSciBufAttrValImageLayoutType layout = NvSciBufImage_BlockLinearType;
|
||||
bool cpuAccessFlag = false;
|
||||
NvSciBufAttrValAccessPerm perm = NvSciBufAccessPerm_ReadWrite;
|
||||
NvSciRmGpuId gpuid;
|
||||
bool vpr = false;
|
||||
int32_t planeCount = PLANAR_NUM_PLANES;
|
||||
int drvVersion;
|
||||
// Dimensions of the imported image in the YUV 420 planar format
|
||||
int32_t planeWidths[] = {imageWidth, imageWidth/2, imageWidth/2};
|
||||
int32_t planeHeights[] = {imageHeight, imageHeight/2, imageHeight/2};
|
||||
NvSciBufAttrKeyValuePair keyPair;
|
||||
NvSciBufAttrKeyValuePair pairArray[ATTR_SIZE];
|
||||
|
||||
NvSciBufAttrValColorFmt planeColorFmts[] =
|
||||
{ NvSciColor_Y8, NvSciColor_V8, NvSciColor_U8 };
|
||||
NvSciBufAttrValImageScanType planeScanType[] =
|
||||
{ NvSciBufScan_ProgressiveType };
|
||||
|
||||
memcpy(&gpuid.bytes, &uuid.bytes, sizeof(uuid.bytes));
|
||||
|
||||
NvSciBufAttrKeyValuePair imgBuffAttrsArr[] = {
|
||||
{ NvSciBufGeneralAttrKey_Types, &bufType, sizeof(bufType) },
|
||||
{ NvSciBufGeneralAttrKey_NeedCpuAccess, &cpuAccessFlag,
|
||||
sizeof(cpuAccessFlag) },
|
||||
{ NvSciBufGeneralAttrKey_RequiredPerm, &perm, sizeof(perm) },
|
||||
{ NvSciBufGeneralAttrKey_GpuId, &gpuid, sizeof(gpuid) },
|
||||
{ NvSciBufImageAttrKey_Layout, &layout, sizeof(layout) },
|
||||
{ NvSciBufImageAttrKey_VprFlag, &vpr, sizeof(vpr) },
|
||||
{ NvSciBufImageAttrKey_PlaneCount, &planeCount, sizeof(planeCount) },
|
||||
{ NvSciBufImageAttrKey_PlaneColorFormat, planeColorFmts,
|
||||
sizeof(planeColorFmts) },
|
||||
{ NvSciBufImageAttrKey_PlaneWidth, planeWidths, sizeof(planeWidths) },
|
||||
{ NvSciBufImageAttrKey_PlaneHeight, planeHeights,
|
||||
sizeof(planeHeights) },
|
||||
{ NvSciBufImageAttrKey_PlaneScanType, planeScanType,
|
||||
sizeof(planeScanType) },
|
||||
};
|
||||
|
||||
std::vector<NvSciBufAttrKeyValuePair> imgBuffAttrsVec(imgBuffAttrsArr,
|
||||
imgBuffAttrsArr+(sizeof(imgBuffAttrsArr)/sizeof(imgBuffAttrsArr[0])));
|
||||
|
||||
memset(pairArray, 0, sizeof(NvSciBufAttrKeyValuePair) * imgBuffAttrsVec.size());
|
||||
std::copy(imgBuffAttrsVec.begin(), imgBuffAttrsVec.end(), pairArray);
|
||||
checkNvSciErrors(NvSciBufAttrListSetAttrs(attrList, pairArray, imgBuffAttrsVec.size()));
|
||||
}
|
||||
|
||||
cudaNvSciBufMultiplanar::cudaNvSciBufMultiplanar(size_t width, size_t height, std::vector<int> &deviceIds)
|
||||
: imageWidth(width),
|
||||
imageHeight(height) {
|
||||
mCudaDeviceId = deviceIds[0];
|
||||
attrListReconciled = NULL;
|
||||
attrListConflict = NULL;
|
||||
checkNvSciErrors(NvSciBufModuleOpen(&module));
|
||||
initCuda(mCudaDeviceId);
|
||||
}
|
||||
|
||||
void cudaNvSciBufMultiplanar::initCuda(int devId) {
|
||||
int major = 0, minor = 0, drvVersion;
|
||||
NvSciRmGpuId gpuid;
|
||||
|
||||
checkCudaErrors(cudaSetDevice(mCudaDeviceId));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(
|
||||
&major, cudaDevAttrComputeCapabilityMajor, mCudaDeviceId));
|
||||
checkCudaErrors(cudaDeviceGetAttribute(
|
||||
&minor, cudaDevAttrComputeCapabilityMinor, mCudaDeviceId));
|
||||
printf(
|
||||
"[cudaNvSciBufMultiplanar] GPU Device %d: \"%s\" with compute capability "
|
||||
"%d.%d\n\n",
|
||||
mCudaDeviceId, _ConvertSMVer2ArchName(major, minor), major, minor);
|
||||
|
||||
checkCudaDrvErrors(cuDriverGetVersion(&drvVersion));
|
||||
|
||||
if (drvVersion <= 11030) {
|
||||
checkCudaDrvErrors(cuDeviceGetUuid(&uuid, devId));
|
||||
} else {
|
||||
checkCudaDrvErrors(cuDeviceGetUuid_v2(&uuid, devId));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
Caller1 flips a YUV image which is allocated to nvscibuf APIs and copied into CUDA Array.
|
||||
It is mapped to CUDA surface and bit flip is done. Caller2 in the same thread copies
|
||||
CUDA Array to a YUV image file. The original image is compared with the double bit
|
||||
flipped image.
|
||||
*/
|
||||
void cudaNvSciBufMultiplanar::runCudaNvSciBufPlanar(std::string &imageFilename, std::string &imageFilenameOut) {
|
||||
cudaArray_t levelArray1[PLANAR_NUM_PLANES];
|
||||
cudaArray_t levelArray2[PLANAR_NUM_PLANES];
|
||||
Caller caller1;
|
||||
Caller caller2;
|
||||
|
||||
int numPlanes = PLANAR_NUM_PLANES;
|
||||
caller1.init();
|
||||
caller2.init();
|
||||
|
||||
// Set NvSciBufImage attribute values in the attribute list
|
||||
caller1.setAttrListImageMultiPlanes(imageWidth, imageHeight);
|
||||
caller2.setAttrListImageMultiPlanes(imageWidth, imageHeight);
|
||||
|
||||
// Reconcile attribute lists and allocate NvSciBuf object
|
||||
reconcileAttrList(&caller1.attrList, &caller2.attrList);
|
||||
caller1.copyExtMemToMultiPlanarArrays();
|
||||
for (int i = 0; i < numPlanes; i++) {
|
||||
checkCudaErrors(cudaGetMipmappedArrayLevel(&levelArray1[i], caller1.multiPlanarArray[i], 0));
|
||||
}
|
||||
caller1.copyYUVToCudaArrayAndFlipBits(imageFilename, levelArray1);
|
||||
|
||||
caller2.copyExtMemToMultiPlanarArrays();
|
||||
for (int i = 0; i < numPlanes; i++) {
|
||||
checkCudaErrors(cudaGetMipmappedArrayLevel(&levelArray2[i], caller2.multiPlanarArray[i], 0));
|
||||
}
|
||||
// Maps cudaArray to surface memory and launches a kernel to flip bits
|
||||
launchFlipSurfaceBitsKernel(levelArray2, caller2.multiPlanarWidth, caller2.multiPlanarHeight, numPlanes);
|
||||
|
||||
// Synchronization can be done using nvSciSync when non CUDA callers and cross-process signaler-waiter
|
||||
// applications are involved. Please refer to the cudaNvSci sample library for more details.
|
||||
checkCudaDrvErrors(cuCtxSynchronize());
|
||||
printf("Bit flip of the surface memory done\n");
|
||||
|
||||
caller2.copyCudaArrayToYUV(imageFilenameOut, levelArray2);
|
||||
compareFiles(imageFilename, imageFilenameOut);
|
||||
|
||||
// Release memory
|
||||
printf("Releasing memory\n");
|
||||
for (int i = 0; i < numPlanes; i++) {
|
||||
checkCudaErrors(cudaFreeMipmappedArray(caller1.multiPlanarArray[i]));
|
||||
checkCudaErrors(cudaFreeMipmappedArray(caller2.multiPlanarArray[i]));
|
||||
}
|
||||
tearDown(&caller1, &caller2);
|
||||
}
|
||||
|
||||
// Map NvSciBufObj to cudaMipmappedArray
|
||||
void Caller::copyExtMemToMultiPlanarArrays() {
|
||||
checkNvSciErrors(NvSciBufObjGetAttrList(buffObj, &attrListOut));
|
||||
memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * PLANE_ATTR_SIZE);
|
||||
cudaExternalMemoryHandleDesc memHandleDesc;
|
||||
cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
|
||||
cudaChannelFormatDesc desc = {0};
|
||||
cudaExtent extent = {0};
|
||||
|
||||
pairArrayOut[PLANE_SIZE].key = NvSciBufImageAttrKey_Size; // Datatype: @c uint64_t
|
||||
pairArrayOut[PLANE_ALIGNED_SIZE].key = NvSciBufImageAttrKey_PlaneAlignedSize; // Datatype: @c uint64_t[]
|
||||
pairArrayOut[PLANE_OFFSET].key = NvSciBufImageAttrKey_PlaneOffset; // Datatype: @c uint64_t[]
|
||||
pairArrayOut[PLANE_HEIGHT].key = NvSciBufImageAttrKey_PlaneHeight; // Datatype: @c uint32_t[]
|
||||
pairArrayOut[PLANE_WIDTH].key = NvSciBufImageAttrKey_PlaneWidth; // Datatype: @c int32_t[]
|
||||
pairArrayOut[PLANE_CHANNEL_COUNT].key = NvSciBufImageAttrKey_PlaneChannelCount; // Datatype: @c uint8_t
|
||||
pairArrayOut[PLANE_BITS_PER_PIXEL].key = NvSciBufImageAttrKey_PlaneBitsPerPixel;// Datatype: @c uint32_t[]
|
||||
pairArrayOut[PLANE_COUNT].key = NvSciBufImageAttrKey_PlaneCount; // Datatype: @c uint32_t
|
||||
checkNvSciErrors(NvSciBufAttrListGetAttrs(attrListOut, pairArrayOut, (PLANE_ATTR_SIZE)));
|
||||
|
||||
uint64_t size = *(uint64_t*)pairArrayOut[PLANE_SIZE].value;
|
||||
uint64_t *planeAlignedSize = (uint64_t*)pairArrayOut[PLANE_ALIGNED_SIZE].value;
|
||||
int32_t *planeWidth = (int32_t*)pairArrayOut[PLANE_WIDTH].value;
|
||||
int32_t *planeHeight = (int32_t*)pairArrayOut[PLANE_HEIGHT].value;
|
||||
uint64_t *planeOffset = (uint64_t*)pairArrayOut[PLANE_OFFSET].value;
|
||||
uint8_t planeChannelCount = *(uint8_t*)pairArrayOut[PLANE_CHANNEL_COUNT].value;
|
||||
uint32_t *planeBitsPerPixel = (uint32_t*)pairArrayOut[PLANE_BITS_PER_PIXEL].value;
|
||||
uint32_t planeCount = *(uint32_t*)pairArrayOut[PLANE_COUNT].value;
|
||||
|
||||
numPlanes = planeCount;
|
||||
|
||||
for (int i = 0; i < numPlanes; i++) {
|
||||
multiPlanarWidth[i] = planeWidth[i];
|
||||
multiPlanarHeight[i] = planeHeight[i];
|
||||
}
|
||||
|
||||
memset(&memHandleDesc, 0, sizeof(memHandleDesc));
|
||||
memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
|
||||
memHandleDesc.handle.nvSciBufObject = buffObj;
|
||||
memHandleDesc.size = size;
|
||||
checkCudaErrors(cudaImportExternalMemory(&extMem, &memHandleDesc));
|
||||
|
||||
desc = cudaCreateChannelDesc(planeBitsPerPixel[0], 0, 0, 0, cudaChannelFormatKindUnsigned);
|
||||
memset(&mipmapDesc, 0, sizeof(mipmapDesc));
|
||||
mipmapDesc.numLevels = 1;
|
||||
|
||||
for (int i = 0; i < numPlanes; i++) {
|
||||
memset(&extent, 0, sizeof(extent));
|
||||
extent.width = planeWidth[i];
|
||||
extent.height = planeHeight[i];
|
||||
extent.depth = 0;
|
||||
mipmapDesc.offset = planeOffset[i];
|
||||
mipmapDesc.formatDesc = desc;
|
||||
mipmapDesc.extent = extent;
|
||||
mipmapDesc.flags = cudaArraySurfaceLoadStore;;
|
||||
checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&multiPlanarArray[i], extMem, &mipmapDesc));
|
||||
}
|
||||
}
|
||||
|
||||
void cudaNvSciBufMultiplanar::reconcileAttrList(NvSciBufAttrList *attrList1, NvSciBufAttrList *attrList2) {
|
||||
attrList[0] = *attrList1;
|
||||
attrList[1] = *attrList2;
|
||||
bool isReconciled = false;
|
||||
|
||||
checkNvSciErrors(NvSciBufAttrListReconcile(attrList, 2, &attrListReconciled, &attrListConflict));
|
||||
checkNvSciErrors(NvSciBufAttrListIsReconciled(attrListReconciled, &isReconciled));
|
||||
checkNvSciErrors(NvSciBufObjAlloc(attrListReconciled, &buffObj));
|
||||
printf("NvSciBufAttrList reconciled\n");
|
||||
}
|
||||
|
||||
// YUV 420 image is flipped and copied to cuda Array which is mapped to nvsciBuf
|
||||
void Caller::copyYUVToCudaArrayAndFlipBits(std::string &path, cudaArray_t *cudaArr) {
|
||||
FILE *fp = NULL;
|
||||
uint8_t *pYBuff, *pUBuff, *pVBuff, *pChroma;
|
||||
uint8_t *pBuff = NULL;
|
||||
uint32_t uvOffset[numPlanes] = {0}, copyWidthInBytes[numPlanes] = {0}, copyHeight[numPlanes] = {0};
|
||||
uint32_t width = multiPlanarWidth[0];
|
||||
uint32_t height = multiPlanarHeight[0];
|
||||
|
||||
fp = fopen(path.c_str(), "rb");
|
||||
if (!fp) {
|
||||
printf("CudaProducer: Error opening file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
pBuff = (uint8_t*)malloc((width * height * PLANAR_CHROMA_WIDTH_ORDER * PLANAR_CHROMA_HEIGHT_ORDER) * sizeof(unsigned char));
|
||||
if (!pBuff) {
|
||||
printf("CudaProducer: Failed to allocate image buffer in %s line %d\n", __FILE__, __LINE__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
// Y V U order in the buffer. Fully planar formats use
|
||||
// three planes to store the Y, Cb and Cr components separately.
|
||||
pYBuff = pBuff;
|
||||
pVBuff = pYBuff + width * height;
|
||||
pUBuff = pVBuff + (width / PLANAR_CHROMA_WIDTH_ORDER) * (height / PLANAR_CHROMA_HEIGHT_ORDER);
|
||||
for (uint32_t i = 0; i < height; i++) {
|
||||
if (fread(pYBuff, width, 1, fp) != 1) {
|
||||
printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
flipBits(pYBuff, width);
|
||||
pYBuff += width;
|
||||
}
|
||||
|
||||
pChroma = pVBuff;
|
||||
for (uint32_t i = 0; i < height / PLANAR_CHROMA_HEIGHT_ORDER; i++) {
|
||||
if (fread(pChroma, width / PLANAR_CHROMA_WIDTH_ORDER, 1, fp) != 1) {
|
||||
printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
flipBits(pChroma, width);
|
||||
pChroma += width / PLANAR_CHROMA_WIDTH_ORDER;
|
||||
}
|
||||
|
||||
pChroma = pUBuff;
|
||||
for (uint32_t i = 0; i < height / PLANAR_CHROMA_HEIGHT_ORDER; i++) {
|
||||
if (fread(pChroma, width / PLANAR_CHROMA_WIDTH_ORDER, 1, fp) != 1) {
|
||||
printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
flipBits(pChroma, width);
|
||||
pChroma += width / PLANAR_CHROMA_WIDTH_ORDER;
|
||||
}
|
||||
uvOffset[0] = 0;
|
||||
copyHeight[0] = height;
|
||||
copyHeight[1] = height / PLANAR_CHROMA_HEIGHT_ORDER;
|
||||
copyHeight[2] = height / PLANAR_CHROMA_HEIGHT_ORDER;
|
||||
copyWidthInBytes[0] = width;
|
||||
// Width of the second and third planes is half of the first plane.
|
||||
copyWidthInBytes[1] = width / PLANAR_CHROMA_WIDTH_ORDER;
|
||||
copyWidthInBytes[2] = width / PLANAR_CHROMA_WIDTH_ORDER;
|
||||
uvOffset[1] = width * height;
|
||||
uvOffset[2] = uvOffset[1] + (width / PLANAR_CHROMA_WIDTH_ORDER) * (height / PLANAR_CHROMA_HEIGHT_ORDER);
|
||||
for (int i = 0; i < numPlanes; i++) {
|
||||
checkCudaDrvErrors(cuCtxSynchronize());
|
||||
checkCudaErrors(cudaMemcpy2DToArray(
|
||||
cudaArr[i], 0, 0, (void *)(pBuff + uvOffset[i]), copyWidthInBytes[i],
|
||||
copyWidthInBytes[i], copyHeight[i],
|
||||
cudaMemcpyHostToDevice));
|
||||
}
|
||||
|
||||
if (fp) {
|
||||
fclose(fp);
|
||||
fp = NULL;
|
||||
}
|
||||
if (pBuff) {
|
||||
free(pBuff);
|
||||
pBuff = NULL;
|
||||
}
|
||||
printf("Image %s copied to CUDA Array and bit flip done\n", path.c_str());
|
||||
}
|
||||
|
||||
// Copy Cuda Array in YUV 420 format to a file
|
||||
void Caller::copyCudaArrayToYUV(std::string &path, cudaArray_t *cudaArr) {
|
||||
FILE *fp = NULL;
|
||||
int bufferSize;
|
||||
uint32_t width = multiPlanarWidth[0];
|
||||
uint32_t height = multiPlanarHeight[0];
|
||||
uint32_t copyWidthInBytes=0, copyHeight=0;
|
||||
uint8_t *pCudaCopyMem = NULL;
|
||||
|
||||
fp = fopen(path.c_str(), "wb+");
|
||||
if (!fp) {
|
||||
printf("WriteFrame: file open failed %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
for (int i = 0; i < numPlanes; i++) {
|
||||
if (i == 0) {
|
||||
bufferSize = width * height;
|
||||
copyWidthInBytes = width;
|
||||
copyHeight = height;
|
||||
|
||||
pCudaCopyMem = (uint8_t *)malloc(bufferSize);
|
||||
if (pCudaCopyMem == NULL) {
|
||||
printf("pCudaCopyMem malloc failed in %s line %d\n", __FILE__, __LINE__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
else {
|
||||
bufferSize = ((height / PLANAR_CHROMA_HEIGHT_ORDER) * (width / PLANAR_CHROMA_WIDTH_ORDER));
|
||||
copyWidthInBytes = width / PLANAR_CHROMA_WIDTH_ORDER;
|
||||
copyHeight = height / PLANAR_CHROMA_HEIGHT_ORDER;
|
||||
}
|
||||
memset(pCudaCopyMem, 0, bufferSize);
|
||||
|
||||
checkCudaErrors(cudaMemcpy2DFromArray(
|
||||
(void *)pCudaCopyMem, copyWidthInBytes, cudaArr[i], 0, 0,
|
||||
copyWidthInBytes, copyHeight,
|
||||
cudaMemcpyDeviceToHost));
|
||||
|
||||
checkCudaDrvErrors(cuCtxSynchronize());
|
||||
|
||||
if (fwrite(pCudaCopyMem, bufferSize, 1, fp) != 1) {
|
||||
printf("Cuda consumer: output file write failed in %s line %d\n", __FILE__, __LINE__);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
}
|
||||
printf("Output file : %s saved\n", path.c_str());
|
||||
|
||||
if (fp) {
|
||||
fclose(fp);
|
||||
fp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void cudaNvSciBufMultiplanar::tearDown(Caller *caller1, Caller *caller2) {
|
||||
caller1->deinit();
|
||||
caller2->deinit();
|
||||
NvSciBufObjFree(buffObj);
|
||||
}
|
@ -0,0 +1,124 @@
|
||||
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef CUDA_NVSCIBUF_MULTIPLANAR_H
|
||||
#define CUDA_NVSCIBUF_MULTIPLANAR_H
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
#include <nvscibuf.h>
|
||||
#include <vector>
|
||||
#include <cuda.h>
|
||||
#include <helper_cuda.h>
|
||||
|
||||
#define PLANAR_NUM_PLANES 3
|
||||
#define PLANAR_CHROMA_WIDTH_ORDER 2
|
||||
#define PLANAR_CHROMA_HEIGHT_ORDER 2
|
||||
|
||||
#define ATTR_SIZE 20
|
||||
#define DEFAULT_GPU 0
|
||||
|
||||
#define checkNvSciErrors(call) \
|
||||
do { \
|
||||
NvSciError _status = call; \
|
||||
if (NvSciError_Success != _status) { \
|
||||
printf( \
|
||||
"NVSCI call in file '%s' in line %i returned" \
|
||||
" %d, expected %d\n", \
|
||||
__FILE__, __LINE__, _status, NvSciError_Success); \
|
||||
fflush(stdout); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define checkCudaDrvErrors(call) \
|
||||
do { \
|
||||
CUresult err = call; \
|
||||
if (CUDA_SUCCESS != err) { \
|
||||
const char *errorStr = NULL; \
|
||||
cuGetErrorString(err, &errorStr); \
|
||||
printf( \
|
||||
"checkCudaDrvErrors() Driver API error" \
|
||||
" = %04d \"%s\" from file <%s>, " \
|
||||
"line %i.\n", \
|
||||
err, errorStr, __FILE__, __LINE__); \
|
||||
exit(EXIT_FAILURE); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
extern void launchFlipSurfaceBitsKernel(cudaArray_t *levelArray, int32_t *multiPlanarWidth,
|
||||
int32_t *multiPlanarHeight, int numPlanes);
|
||||
|
||||
class Caller {
|
||||
private:
|
||||
NvSciBufAttrList attrListOut;
|
||||
NvSciBufAttrKeyValuePair pairArrayOut[ATTR_SIZE];
|
||||
cudaExternalMemory_t extMem;
|
||||
int32_t numPlanes;
|
||||
public:
|
||||
NvSciBufAttrList attrList;
|
||||
cudaMipmappedArray_t multiPlanarArray[PLANAR_NUM_PLANES];
|
||||
int32_t multiPlanarWidth[PLANAR_NUM_PLANES];
|
||||
int32_t multiPlanarHeight[PLANAR_NUM_PLANES];
|
||||
|
||||
void init();
|
||||
void deinit();
|
||||
void copyExtMemToMultiPlanarArrays();
|
||||
void copyYUVToCudaArrayAndFlipBits(std::string &image_filename, cudaArray_t *yuvPlanes);
|
||||
void copyCudaArrayToYUV(std::string &image_filename, cudaArray_t *yuvPlanes);
|
||||
void setAttrListImageMultiPlanes(int imageWidth, int imageHeight);
|
||||
};
|
||||
|
||||
|
||||
class cudaNvSciBufMultiplanar {
|
||||
private:
|
||||
size_t imageWidth;
|
||||
size_t imageHeight;
|
||||
int mCudaDeviceId;
|
||||
int deviceCnt;
|
||||
NvSciBufAttrList attrList[2];
|
||||
NvSciBufAttrList attrListReconciled;
|
||||
NvSciBufAttrList attrListConflict;
|
||||
public:
|
||||
cudaNvSciBufMultiplanar(size_t imageWidth, size_t imageHeight, std::vector<int> &deviceIds);
|
||||
void initCuda(int devId);
|
||||
void reconcileAttrList(NvSciBufAttrList *attrList1, NvSciBufAttrList *attrList2);
|
||||
void runCudaNvSciBufPlanar(std::string &image_filename, std::string &image_filename_out);
|
||||
void tearDown(Caller *caller1, Caller *caller2);
|
||||
};
|
||||
|
||||
enum NvSciBufImageAttributes {
|
||||
PLANE_SIZE,
|
||||
PLANE_ALIGNED_SIZE,
|
||||
PLANE_OFFSET,
|
||||
PLANE_HEIGHT,
|
||||
PLANE_WIDTH,
|
||||
PLANE_CHANNEL_COUNT,
|
||||
PLANE_BITS_PER_PIXEL,
|
||||
PLANE_COUNT,
|
||||
PLANE_ATTR_SIZE
|
||||
};
|
||||
|
||||
#endif // CUDA_NVSCIBUF_MULTIPLANAR_H
|
@ -0,0 +1,64 @@
|
||||
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <cuda.h>
|
||||
#include <helper_cuda.h>
|
||||
|
||||
static __global__ void flipSurfaceBits(cudaSurfaceObject_t surfObj, int width, int height) {
|
||||
char data;
|
||||
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
if (x < width && y < height) {
|
||||
// Read from input surface
|
||||
surf2Dread(&data, surfObj, x, y);
|
||||
// Write to output surface
|
||||
data = ~data;
|
||||
surf2Dwrite(data, surfObj, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy cudaArray to surface memory and launch the CUDA kernel
|
||||
void launchFlipSurfaceBitsKernel(
|
||||
cudaArray_t *levelArray,
|
||||
int32_t *multiPlanarWidth,
|
||||
int32_t *multiPlanarHeight,
|
||||
int numPlanes) {
|
||||
|
||||
cudaSurfaceObject_t surfObject[numPlanes] = {0};
|
||||
cudaResourceDesc resDesc;
|
||||
|
||||
for (int i = 0; i < numPlanes; i++) {
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
resDesc.resType = cudaResourceTypeArray;
|
||||
resDesc.res.array.array = levelArray[i];
|
||||
checkCudaErrors(cudaCreateSurfaceObject(&surfObject[i], &resDesc));
|
||||
dim3 threadsperBlock(16, 16);
|
||||
dim3 numBlocks((multiPlanarWidth[i] + threadsperBlock.x - 1) / threadsperBlock.x,
|
||||
(multiPlanarHeight[i] + threadsperBlock.y - 1) / threadsperBlock.y);
|
||||
flipSurfaceBits<<<numBlocks, threadsperBlock>>>(surfObject[i], multiPlanarWidth[i], multiPlanarHeight[i]);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,72 @@
|
||||
/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* * Neither the name of NVIDIA CORPORATION nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
|
||||
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
|
||||
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
#include <cuda.h>
|
||||
#include <vector>
|
||||
#include "cudaNvSciBufMultiplanar.h"
|
||||
#include <helper_image.h>
|
||||
|
||||
#define MAX_FILE_SIZE 100
|
||||
|
||||
int main(int argc, const char **argv) {
|
||||
int numOfGPUs = 0;
|
||||
std::vector<int> deviceIds;
|
||||
(cudaGetDeviceCount(&numOfGPUs));
|
||||
|
||||
printf("%d GPUs found\n", numOfGPUs);
|
||||
if (!numOfGPUs) {
|
||||
exit(EXIT_WAIVED);
|
||||
} else {
|
||||
for (int devID = 0; devID < numOfGPUs; devID++) {
|
||||
int major = 0, minor = 0;
|
||||
(cudaDeviceGetAttribute(
|
||||
&major, cudaDevAttrComputeCapabilityMajor, devID));
|
||||
(cudaDeviceGetAttribute(
|
||||
&minor, cudaDevAttrComputeCapabilityMinor, devID));
|
||||
if (major >= 6) {
|
||||
deviceIds.push_back(devID);
|
||||
}
|
||||
}
|
||||
if (deviceIds.size() == 0) {
|
||||
printf(
|
||||
"cudaNvSciBufMultiplanar requires one or more GPUs of Pascal(SM 6.0) or higher "
|
||||
"archs\nWaiving..\n");
|
||||
exit(EXIT_WAIVED);
|
||||
}
|
||||
}
|
||||
|
||||
std::string image_filename = sdkFindFilePath("yuv_planar_img1.yuv", argv[0]);
|
||||
std::string image_filename_out = "image_out.yuv";
|
||||
uint32_t imageWidth = 720;
|
||||
uint32_t imageHeight = 480;
|
||||
|
||||
printf("input image %s , width = %d, height = %d\n", image_filename.c_str(), imageWidth, imageHeight);
|
||||
|
||||
cudaNvSciBufMultiplanar cudaNvSciBufMultiplanarApp(imageWidth, imageHeight, deviceIds);
|
||||
cudaNvSciBufMultiplanarApp.runCudaNvSciBufPlanar(image_filename, image_filename_out);
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user