Merge pull request #10 from XSShawnZeng/Tegra_Samples_Cmake_Transition

Add Tegra sample cudaNvSciBufMultiplanar
2025-04-10 18:22:11 +01:00 · 2025-01-16 09:01:44 -08:00 · 2025-01-16 09:01:44 -08:00 · 1a466282da
commit 1a466282da
parent b518bfe9be 545194e7aa
12 changed files with 884 additions and 0 deletions
--- a/Samples/8_Platform_Specific/Tegra/CMakeLists.txt
+++ b/Samples/8_Platform_Specific/Tegra/CMakeLists.txt
@ -1,4 +1,5 @@
 add_subdirectory(cudaNvSciNvMedia)
+add_subdirectory(cudaNvSciBufMultiplanar)
 add_subdirectory(cuDLAErrorReporting)
 add_subdirectory(cuDLAHybridMode)
 add_subdirectory(cuDLALayerwiseStatsHybrid)
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/c_cpp_properties.json
@ -0,0 +1,18 @@
+{
+    "configurations": [
+        {
+            "name": "Linux",
+            "includePath": [
+                "${workspaceFolder}/**",
+                "${workspaceFolder}/../../../Common"
+            ],
+            "defines": [],
+            "compilerPath": "/usr/local/cuda/bin/nvcc",
+            "cStandard": "gnu17",
+            "cppStandard": "gnu++14",
+            "intelliSenseMode": "linux-gcc-x64",
+            "configurationProvider": "ms-vscode.makefile-tools"
+        }
+    ],
+    "version": 4
+}
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/extensions.json
@ -0,0 +1,7 @@
+{
+    "recommendations": [
+        "nvidia.nsight-vscode-edition",
+        "ms-vscode.cpptools",
+        "ms-vscode.makefile-tools"
+    ]
+}
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/launch.json
@ -0,0 +1,10 @@
+{
+    "configurations": [
+        {
+            "name": "CUDA C++: Launch",
+            "type": "cuda-gdb",
+            "request": "launch",
+            "program": "${workspaceFolder}/cudaNvSciBufMultiplanar"
+        }
+    ]
+}
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/.vscode/tasks.json
@ -0,0 +1,15 @@
+{
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "label": "sample",
+            "type": "shell",
+            "command": "make dbg=1",
+            "problemMatcher": ["$nvcc"],
+            "group": {
+                "kind": "build",
+                "isDefault": true
+            }
+        }
+    ]
+}
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/CMakeLists.txt
@ -0,0 +1,74 @@
+cmake_minimum_required(VERSION 3.20)
+
+list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../../../../cmake/Modules")
+
+project(cudaNvSciBufMultiplanar LANGUAGES C CXX CUDA)
+
+find_package(CUDAToolkit REQUIRED)
+
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+set(CMAKE_CUDA_ARCHITECTURES 53 61 70 72 75 80 86 87 90)
+if(CMAKE_BUILD_TYPE STREQUAL "Debug")
+    # set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G")  # enable cuda-gdb (expensive)
+endif()
+
+# Include directories and libraries
+include_directories(../../../../Common)
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+    # Find the NVSCI libraries
+    # use CMAKE_LIBRARY_PATH so that users can also specify the NVSCI lib path in cmake command
+    set(CMAKE_LIBRARY_PATH "/usr/lib" ${CMAKE_LIBRARY_PATH})
+    file(GLOB_RECURSE NVSCIBUF_LIB
+         ${CMAKE_LIBRARY_PATH}/*/libnvscibuf.so
+    )
+    file(GLOB_RECURSE NVSCISYNC_LIB
+         ${CMAKE_LIBRARY_PATH}/*/libnvscisync.so
+    )
+
+    # Find the NVSCI header files
+    # use CMAKE_INCLUDE_PATH so that users can also specify the NVSCI include path in cmake command
+    set(CMAKE_INCLUDE_PATH "/usr/include" ${CMAKE_LIBRARY_PATH})
+    find_path(NVSCIBUF_INCLUDE_DIR nvscibuf.h PATHS ${CMAKE_INCLUDE_PATH})
+    find_path(NVSCISYNC_INCLUDE_DIR nvscisync.h PATHS ${CMAKE_INCLUDE_PATH})
+
+    if(NVSCIBUF_LIB AND NVSCISYNC_LIB AND NVSCIBUF_INCLUDE_DIR AND NVSCISYNC_INCLUDE_DIR)
+        message(STATUS "FOUND NVSCI libs: ${NVSCIBUF_LIB} ${NVSCISYNC_LIB}")
+        message(STATUS "Using NVSCI headers path: ${NVSCIBUF_INCLUDE_DIR} ${NVSCIBUF_INCLUDE_DIR}")
+        # Source file
+        # Add target for cudaNvSciBufMultiplanar
+        add_executable(cudaNvSciBufMultiplanar imageKernels.cu cudaNvSciBufMultiplanar.cpp main.cpp)
+
+        target_compile_options(cudaNvSciBufMultiplanar PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:--extended-lambda>)
+
+        target_compile_features(cudaNvSciBufMultiplanar PRIVATE cxx_std_17 cuda_std_17)
+
+        set_target_properties(cudaNvSciBufMultiplanar PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
+
+        target_include_directories(cudaNvSciBufMultiplanar PUBLIC
+            ${CUDAToolkit_INCLUDE_DIRS}
+            ${NVSCIBUF_INCLUDE_DIR}
+            ${NVSCISYNC_INCLUDE_DIR}
+        )
+
+        target_link_libraries(cudaNvSciBufMultiplanar
+            CUDA::cuda_driver
+            ${NVSCIBUF_LIB}
+            ${NVSCISYNC_LIB}
+        )
+        # Copy yuv_planar_img1.yuv to the output directory
+        add_custom_command(TARGET cudaNvSciBufMultiplanar POST_BUILD
+            COMMAND ${CMAKE_COMMAND} -E copy_if_different
+            ${CMAKE_CURRENT_SOURCE_DIR}/yuv_planar_img1.yuv ${CMAKE_CURRENT_BINARY_DIR}/yuv_planar_img1.yuv
+        )
+        # Specify additional clean files
+        set_target_properties(cudaNvSciBufMultiplanar PROPERTIES
+            ADDITIONAL_CLEAN_FILES "image_out.yuv"
+        )
+    else()
+        message(STATUS "NvSCI not found - will not build sample 'cudaNvSciBufMultiplanar'")
+    endif()
+else()
+    message(STATUS "Will not build sample cudaNvSciBufMultiplanar - requires Linux OS")
+endif()
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/README.md
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/README.md
@ -0,0 +1,64 @@
+# cudaNvSciBufMultiplanar - CUDA NvSciBufMultiplanar Image Samples
+
+## Description
+
+This sample demonstrates CUDA-NvSciBuf Interop for Multiplanar images. A YUV 420 multiplanar image is flipped and allocated using NvSciBuf APIs and imported into CUDA with CUDA External Resource Interoperability. A CUDA surface is created from the corresponding mapped CUDA array and again bit flipping is performed on the surface. The result is copied back to a YUV image which is compared against the input.
+
+## Key Concepts
+
+CUDA NvSci Interop, Data Parallel Algorithms, Image Processing
+
+## Supported SM Architectures
+
+[SM 6.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 6.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.2 ](https://developer.nvidia.com/cuda-gpus)  [SM 7.5 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.6 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.7 ](https://developer.nvidia.com/cuda-gpus)  [SM 8.9 ](https://developer.nvidia.com/cuda-gpus)  [SM 9.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 10.0 ](https://developer.nvidia.com/cuda-gpus)  [SM 10.1 ](https://developer.nvidia.com/cuda-gpus)  [SM 12.0 ](https://developer.nvidia.com/cuda-gpus)
+
+## Supported OSes
+
+Linux
+
+## Supported CPU Architecture
+
+aarch64
+
+## CUDA APIs involved
+
+### [CUDA Runtime API](http://docs.nvidia.com/cuda/cuda-runtime-api/index.html)
+cudaDeviceGetAttribute, cudaNvSciBufMultiplanar, cudaDestroyExternalMemory, cuDriverGetVersion, cuDeviceGetUuid, cudaSetDevice, cudaGetMipmappedArrayLevel, cudaFreeMipmappedArray, cudaImportExternalMemory, cudaCreateChannelDesc, cudaExternalMemoryGetMappedMipmappedArray, cuCtxSynchronize, cudaMemcpy2DToArray, cudaMemcpy2DFromArray
+
+## Dependencies needed to build/run
+[NVSCI](../../../README.md#nvsci)
+
+## Prerequisites
+
+Download and install the [CUDA Toolkit 12.8](https://developer.nvidia.com/cuda-downloads) for your corresponding platform.
+Make sure the dependencies mentioned in [Dependencies]() section above are installed.
+
+## Build and Run
+
+### Linux
+The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
+```
+$ cd <sample_dir>
+$ make
+```
+The samples makefiles can take advantage of certain options:
+*  **TARGET_ARCH=<arch>** - cross-compile targeting a specific architecture. Allowed architectures are aarch64.
+    By default, TARGET_ARCH is set to HOST_ARCH. On a x86_64 machine, not setting TARGET_ARCH is the equivalent of setting TARGET_ARCH=x86_64.<br/>
+`$ make TARGET_ARCH=aarch64` <br/>
+    See [here](http://docs.nvidia.com/cuda/cuda-samples/index.html#cross-samples) for more details.
+*   **dbg=1** - build with debug symbols
+    ```
+    $ make dbg=1
+    ```
+*   **SMS="A B ..."** - override the SM architectures for which the sample will be built, where `"A B ..."` is a space-delimited list of SM architectures. For example, to generate SASS for SM 50 and SM 60, use `SMS="50 60"`.
+    ```
+    $ make SMS="50 60"
+    ```
+
+*  **HOST_COMPILER=<host_compiler>** - override the default g++ host compiler. See the [Linux Installation Guide](http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements) for a list of supported host compilers.
+```
+    $ make HOST_COMPILER=g++
+```
+
+## References (for more details)
+
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.cpp
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.cpp
@ -0,0 +1,435 @@
+/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of NVIDIA CORPORATION nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include "cudaNvSciBufMultiplanar.h"
+
+NvSciBufModule module;
+NvSciBufObj buffObj;
+CUuuid uuid;
+
+void flipBits(uint8_t *pBuff, uint32_t size) {
+    for (uint32_t i = 0; i < size; i++) {
+        pBuff[i] = (~pBuff[i]);
+    }
+}
+
+// Compare input and generated image files
+void compareFiles(std::string &path1, std::string &path2) {  
+    bool result = true;
+    FILE *fp1, *fp2;
+    int ch1, ch2;
+    
+    fp1 = fopen(path1.c_str(), "rb");
+    fp2 = fopen(path2.c_str(), "rb");
+    if (!fp1) {
+        result = false;
+        printf("File %s open failed in %s line %d\n", path1.c_str(), __FILE__, __LINE__);
+        exit(EXIT_FAILURE); 
+    }
+    if (!fp2) {
+        result = false;
+        printf("File %s open failed in %s line %d\n", path2.c_str(), __FILE__, __LINE__);
+        exit(EXIT_FAILURE);
+    }
+   
+    do  {       
+        ch1 = getc(fp1); 
+        ch2 = getc(fp2);
+
+        if (ch1 != ch2) { 
+            result = false; 
+            break;
+        } 
+    } while(ch1 != EOF && ch2 != EOF);
+
+    if (result) {
+        printf("Input file : %s and output file : %s match SUCCESS\n", path1.c_str(), path2.c_str());
+    } 
+    else {
+        printf("Input file : %s and output file : %s match FAILURE\n", path1.c_str(), path2.c_str());
+    }
+  
+    if (fp1) {
+        fclose(fp1);
+    }
+    if (fp2) {
+        fclose(fp2);
+    }
+} 
+
+void Caller::init() {
+    checkNvSciErrors(NvSciBufAttrListCreate(module, &attrList));
+    attrListOut = NULL;
+}
+
+void Caller::deinit() {
+    NvSciBufAttrListFree(attrList);
+    checkCudaErrors(cudaDestroyExternalMemory(extMem));
+}
+
+// Set NvSciBufImage attribute values in the attribute list  
+void Caller::setAttrListImageMultiPlanes(int imageWidth, int imageHeight) {
+    NvSciBufType bufType = NvSciBufType_Image;
+    NvSciBufAttrValImageLayoutType layout = NvSciBufImage_BlockLinearType;
+    bool cpuAccessFlag = false;
+    NvSciBufAttrValAccessPerm perm = NvSciBufAccessPerm_ReadWrite;
+    NvSciRmGpuId gpuid;
+    bool vpr = false;
+    int32_t planeCount = PLANAR_NUM_PLANES;
+    int drvVersion;
+    // Dimensions of the imported image in the YUV 420 planar format
+    int32_t planeWidths[] = {imageWidth, imageWidth/2, imageWidth/2};
+    int32_t planeHeights[] = {imageHeight, imageHeight/2, imageHeight/2};
+    NvSciBufAttrKeyValuePair keyPair;
+    NvSciBufAttrKeyValuePair pairArray[ATTR_SIZE];
+ 
+    NvSciBufAttrValColorFmt planeColorFmts[] =
+            { NvSciColor_Y8, NvSciColor_V8, NvSciColor_U8 };
+    NvSciBufAttrValImageScanType planeScanType[] =
+            { NvSciBufScan_ProgressiveType };
+ 
+    memcpy(&gpuid.bytes, &uuid.bytes, sizeof(uuid.bytes));
+ 
+    NvSciBufAttrKeyValuePair imgBuffAttrsArr[] = {
+        { NvSciBufGeneralAttrKey_Types, &bufType, sizeof(bufType) },
+        { NvSciBufGeneralAttrKey_NeedCpuAccess, &cpuAccessFlag,
+            sizeof(cpuAccessFlag) },
+        { NvSciBufGeneralAttrKey_RequiredPerm, &perm, sizeof(perm) },
+        { NvSciBufGeneralAttrKey_GpuId, &gpuid, sizeof(gpuid) },
+        { NvSciBufImageAttrKey_Layout, &layout, sizeof(layout) },
+        { NvSciBufImageAttrKey_VprFlag, &vpr, sizeof(vpr) },
+        { NvSciBufImageAttrKey_PlaneCount, &planeCount, sizeof(planeCount) },
+        { NvSciBufImageAttrKey_PlaneColorFormat, planeColorFmts,
+            sizeof(planeColorFmts) },
+        { NvSciBufImageAttrKey_PlaneWidth, planeWidths, sizeof(planeWidths) },
+        { NvSciBufImageAttrKey_PlaneHeight, planeHeights,
+            sizeof(planeHeights) },
+        { NvSciBufImageAttrKey_PlaneScanType, planeScanType,
+            sizeof(planeScanType) },
+    };
+ 
+    std::vector<NvSciBufAttrKeyValuePair> imgBuffAttrsVec(imgBuffAttrsArr,
+                            imgBuffAttrsArr+(sizeof(imgBuffAttrsArr)/sizeof(imgBuffAttrsArr[0])));
+  
+    memset(pairArray, 0, sizeof(NvSciBufAttrKeyValuePair) * imgBuffAttrsVec.size());
+    std::copy(imgBuffAttrsVec.begin(), imgBuffAttrsVec.end(), pairArray);
+    checkNvSciErrors(NvSciBufAttrListSetAttrs(attrList, pairArray, imgBuffAttrsVec.size()));
+}
+
+cudaNvSciBufMultiplanar::cudaNvSciBufMultiplanar(size_t width, size_t height, std::vector<int> &deviceIds)
+    : imageWidth(width),
+      imageHeight(height) {
+        mCudaDeviceId =  deviceIds[0];
+        attrListReconciled = NULL;
+        attrListConflict = NULL;
+        checkNvSciErrors(NvSciBufModuleOpen(&module));
+        initCuda(mCudaDeviceId);
+    }
+
+void cudaNvSciBufMultiplanar::initCuda(int devId) {
+    int major = 0, minor = 0, drvVersion;
+    NvSciRmGpuId gpuid;
+
+    checkCudaErrors(cudaSetDevice(mCudaDeviceId));
+    checkCudaErrors(cudaDeviceGetAttribute(
+        &major, cudaDevAttrComputeCapabilityMajor, mCudaDeviceId));
+    checkCudaErrors(cudaDeviceGetAttribute(
+        &minor, cudaDevAttrComputeCapabilityMinor, mCudaDeviceId));
+    printf(
+        "[cudaNvSciBufMultiplanar] GPU Device %d: \"%s\" with compute capability "
+        "%d.%d\n\n",
+        mCudaDeviceId, _ConvertSMVer2ArchName(major, minor), major, minor);
+
+    checkCudaDrvErrors(cuDriverGetVersion(&drvVersion));
+
+    if (drvVersion <= 11030) {
+        checkCudaDrvErrors(cuDeviceGetUuid(&uuid, devId));
+    } else {
+        checkCudaDrvErrors(cuDeviceGetUuid_v2(&uuid, devId));
+    }
+}
+
+/* 
+Caller1 flips a YUV image which is allocated to nvscibuf APIs and copied into CUDA Array.
+It is mapped to CUDA surface and bit flip is done. Caller2 in the same thread copies 
+CUDA Array to a YUV image file. The original image is compared with the double bit
+flipped image.
+*/
+void cudaNvSciBufMultiplanar::runCudaNvSciBufPlanar(std::string &imageFilename, std::string &imageFilenameOut) {
+    cudaArray_t levelArray1[PLANAR_NUM_PLANES];
+    cudaArray_t levelArray2[PLANAR_NUM_PLANES];
+    Caller caller1;
+    Caller caller2;
+
+    int numPlanes = PLANAR_NUM_PLANES;
+    caller1.init();
+    caller2.init();
+
+    // Set NvSciBufImage attribute values in the attribute list
+    caller1.setAttrListImageMultiPlanes(imageWidth, imageHeight);
+    caller2.setAttrListImageMultiPlanes(imageWidth, imageHeight);
+    
+    // Reconcile attribute lists and allocate NvSciBuf object
+    reconcileAttrList(&caller1.attrList, &caller2.attrList); 
+    caller1.copyExtMemToMultiPlanarArrays();
+    for (int i = 0; i < numPlanes; i++) {
+        checkCudaErrors(cudaGetMipmappedArrayLevel(&levelArray1[i], caller1.multiPlanarArray[i], 0)); 
+    }
+    caller1.copyYUVToCudaArrayAndFlipBits(imageFilename, levelArray1);
+    
+    caller2.copyExtMemToMultiPlanarArrays();
+    for (int i = 0; i < numPlanes; i++) {
+        checkCudaErrors(cudaGetMipmappedArrayLevel(&levelArray2[i], caller2.multiPlanarArray[i], 0)); 
+    }
+    // Maps cudaArray to surface memory and launches a kernel to flip bits
+    launchFlipSurfaceBitsKernel(levelArray2, caller2.multiPlanarWidth, caller2.multiPlanarHeight, numPlanes);
+    
+    // Synchronization can be done using nvSciSync when non CUDA callers and cross-process signaler-waiter 
+    // applications are involved. Please refer to the cudaNvSci sample library for more details.
+    checkCudaDrvErrors(cuCtxSynchronize());
+    printf("Bit flip of the surface memory done\n");
+
+    caller2.copyCudaArrayToYUV(imageFilenameOut, levelArray2);
+    compareFiles(imageFilename, imageFilenameOut);
+
+    // Release memory
+    printf("Releasing memory\n");
+    for (int i = 0; i < numPlanes; i++) {
+        checkCudaErrors(cudaFreeMipmappedArray(caller1.multiPlanarArray[i]));
+        checkCudaErrors(cudaFreeMipmappedArray(caller2.multiPlanarArray[i]));
+    }
+    tearDown(&caller1, &caller2);
+}
+
+// Map NvSciBufObj to cudaMipmappedArray
+void Caller::copyExtMemToMultiPlanarArrays() {
+    checkNvSciErrors(NvSciBufObjGetAttrList(buffObj, &attrListOut));
+    memset(pairArrayOut, 0, sizeof(NvSciBufAttrKeyValuePair) * PLANE_ATTR_SIZE);
+    cudaExternalMemoryHandleDesc memHandleDesc;
+    cudaExternalMemoryMipmappedArrayDesc mipmapDesc = {0};
+    cudaChannelFormatDesc desc = {0};
+    cudaExtent extent = {0};
+ 
+    pairArrayOut[PLANE_SIZE].key   = NvSciBufImageAttrKey_Size;                       // Datatype: @c uint64_t
+    pairArrayOut[PLANE_ALIGNED_SIZE].key   = NvSciBufImageAttrKey_PlaneAlignedSize;   // Datatype: @c uint64_t[]
+    pairArrayOut[PLANE_OFFSET].key   = NvSciBufImageAttrKey_PlaneOffset;              // Datatype: @c uint64_t[]
+    pairArrayOut[PLANE_HEIGHT].key   = NvSciBufImageAttrKey_PlaneHeight;              // Datatype: @c uint32_t[]
+    pairArrayOut[PLANE_WIDTH].key   = NvSciBufImageAttrKey_PlaneWidth;                // Datatype: @c int32_t[]
+    pairArrayOut[PLANE_CHANNEL_COUNT].key   = NvSciBufImageAttrKey_PlaneChannelCount; // Datatype: @c uint8_t
+    pairArrayOut[PLANE_BITS_PER_PIXEL].key   = NvSciBufImageAttrKey_PlaneBitsPerPixel;// Datatype: @c uint32_t[]
+    pairArrayOut[PLANE_COUNT].key   = NvSciBufImageAttrKey_PlaneCount;                // Datatype: @c uint32_t
+    checkNvSciErrors(NvSciBufAttrListGetAttrs(attrListOut, pairArrayOut, (PLANE_ATTR_SIZE)));  
+ 
+    uint64_t size = *(uint64_t*)pairArrayOut[PLANE_SIZE].value;
+    uint64_t *planeAlignedSize = (uint64_t*)pairArrayOut[PLANE_ALIGNED_SIZE].value;
+    int32_t *planeWidth = (int32_t*)pairArrayOut[PLANE_WIDTH].value;
+    int32_t *planeHeight = (int32_t*)pairArrayOut[PLANE_HEIGHT].value;
+    uint64_t *planeOffset = (uint64_t*)pairArrayOut[PLANE_OFFSET].value;
+    uint8_t planeChannelCount = *(uint8_t*)pairArrayOut[PLANE_CHANNEL_COUNT].value;
+    uint32_t *planeBitsPerPixel = (uint32_t*)pairArrayOut[PLANE_BITS_PER_PIXEL].value;
+    uint32_t planeCount = *(uint32_t*)pairArrayOut[PLANE_COUNT].value;
+ 
+    numPlanes = planeCount;
+ 
+    for (int i = 0; i < numPlanes; i++) {
+        multiPlanarWidth[i] = planeWidth[i];
+        multiPlanarHeight[i] = planeHeight[i];
+    }
+   
+    memset(&memHandleDesc, 0, sizeof(memHandleDesc));
+    memHandleDesc.type = cudaExternalMemoryHandleTypeNvSciBuf;
+    memHandleDesc.handle.nvSciBufObject = buffObj;
+    memHandleDesc.size = size;
+    checkCudaErrors(cudaImportExternalMemory(&extMem, &memHandleDesc));
+    
+    desc = cudaCreateChannelDesc(planeBitsPerPixel[0], 0, 0, 0, cudaChannelFormatKindUnsigned);
+    memset(&mipmapDesc, 0, sizeof(mipmapDesc));
+    mipmapDesc.numLevels = 1; 
+
+    for (int i = 0; i < numPlanes; i++) {    
+        memset(&extent, 0, sizeof(extent));
+        extent.width = planeWidth[i];
+        extent.height = planeHeight[i];
+        extent.depth = 0;
+        mipmapDesc.offset = planeOffset[i]; 
+        mipmapDesc.formatDesc = desc;
+        mipmapDesc.extent = extent;
+        mipmapDesc.flags = cudaArraySurfaceLoadStore;;
+        checkCudaErrors(cudaExternalMemoryGetMappedMipmappedArray(&multiPlanarArray[i], extMem, &mipmapDesc));
+    }
+}
+
+void cudaNvSciBufMultiplanar::reconcileAttrList(NvSciBufAttrList *attrList1, NvSciBufAttrList *attrList2) {
+    attrList[0] = *attrList1;
+    attrList[1] = *attrList2;
+    bool isReconciled = false;
+
+    checkNvSciErrors(NvSciBufAttrListReconcile(attrList, 2, &attrListReconciled, &attrListConflict));
+    checkNvSciErrors(NvSciBufAttrListIsReconciled(attrListReconciled, &isReconciled));
+    checkNvSciErrors(NvSciBufObjAlloc(attrListReconciled, &buffObj));
+    printf("NvSciBufAttrList reconciled\n");
+}
+
+// YUV 420 image is flipped and copied to cuda Array which is mapped to nvsciBuf
+void Caller::copyYUVToCudaArrayAndFlipBits(std::string &path, cudaArray_t *cudaArr) {
+    FILE *fp = NULL;
+    uint8_t *pYBuff, *pUBuff, *pVBuff, *pChroma;
+    uint8_t *pBuff = NULL; 
+    uint32_t uvOffset[numPlanes] = {0}, copyWidthInBytes[numPlanes] = {0}, copyHeight[numPlanes] = {0};
+    uint32_t width = multiPlanarWidth[0];
+    uint32_t height = multiPlanarHeight[0];
+
+    fp = fopen(path.c_str(), "rb");
+    if (!fp) {
+        printf("CudaProducer: Error opening file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
+        exit(EXIT_FAILURE); 
+    }
+    pBuff = (uint8_t*)malloc((width * height * PLANAR_CHROMA_WIDTH_ORDER * PLANAR_CHROMA_HEIGHT_ORDER) * sizeof(unsigned char));
+    if (!pBuff) {
+        printf("CudaProducer: Failed to allocate image buffer in %s line %d\n", __FILE__, __LINE__);
+        exit(EXIT_FAILURE); 
+    }
+    // Y V U order in the buffer. Fully planar formats use 
+    // three planes to store the Y, Cb and Cr components separately.
+    pYBuff = pBuff; 
+    pVBuff = pYBuff + width * height;
+    pUBuff = pVBuff + (width / PLANAR_CHROMA_WIDTH_ORDER)  * (height / PLANAR_CHROMA_HEIGHT_ORDER);
+    for (uint32_t i = 0; i < height; i++) {
+        if (fread(pYBuff, width, 1, fp) != 1) {
+            printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
+            exit(EXIT_FAILURE); 
+        }
+        flipBits(pYBuff, width);
+        pYBuff += width;
+    }
+ 
+    pChroma = pVBuff;
+    for (uint32_t i = 0; i < height / PLANAR_CHROMA_HEIGHT_ORDER; i++) {
+        if (fread(pChroma, width / PLANAR_CHROMA_WIDTH_ORDER, 1, fp) != 1) {
+            printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
+            exit(EXIT_FAILURE);
+        }
+        flipBits(pChroma, width);
+        pChroma += width / PLANAR_CHROMA_WIDTH_ORDER;
+    }
+ 
+    pChroma = pUBuff;
+    for (uint32_t i = 0; i < height / PLANAR_CHROMA_HEIGHT_ORDER; i++) {
+        if (fread(pChroma, width / PLANAR_CHROMA_WIDTH_ORDER, 1, fp) != 1) {
+            printf("ReadYUVFrame: Error reading file: %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
+            exit(EXIT_FAILURE);
+        }
+        flipBits(pChroma, width);
+        pChroma += width / PLANAR_CHROMA_WIDTH_ORDER;
+    }
+    uvOffset[0] = 0;
+    copyHeight[0] = height;
+    copyHeight[1] = height / PLANAR_CHROMA_HEIGHT_ORDER;
+    copyHeight[2] = height / PLANAR_CHROMA_HEIGHT_ORDER;
+    copyWidthInBytes[0] = width;
+    // Width of the second and third planes is half of the first plane.  
+    copyWidthInBytes[1] = width / PLANAR_CHROMA_WIDTH_ORDER;        
+    copyWidthInBytes[2] = width / PLANAR_CHROMA_WIDTH_ORDER;        
+    uvOffset[1] = width * height;
+    uvOffset[2] = uvOffset[1] + (width / PLANAR_CHROMA_WIDTH_ORDER) * (height / PLANAR_CHROMA_HEIGHT_ORDER);
+    for (int i = 0; i < numPlanes; i++) {
+        checkCudaDrvErrors(cuCtxSynchronize());
+        checkCudaErrors(cudaMemcpy2DToArray(
+        cudaArr[i], 0, 0, (void *)(pBuff + uvOffset[i]), copyWidthInBytes[i],
+        copyWidthInBytes[i], copyHeight[i],
+        cudaMemcpyHostToDevice));
+    }
+
+    if (fp) {
+        fclose(fp);
+        fp = NULL;
+    }
+    if (pBuff) {
+        free(pBuff);
+        pBuff = NULL;
+    }
+    printf("Image %s copied to CUDA Array and bit flip done\n", path.c_str());
+}
+
+// Copy Cuda Array in YUV 420 format to a file 
+void Caller::copyCudaArrayToYUV(std::string &path, cudaArray_t *cudaArr) {
+    FILE *fp = NULL;
+    int bufferSize;
+    uint32_t width = multiPlanarWidth[0];
+    uint32_t height = multiPlanarHeight[0];
+    uint32_t copyWidthInBytes=0, copyHeight=0;
+    uint8_t *pCudaCopyMem = NULL;
+ 
+    fp = fopen(path.c_str(), "wb+");
+    if (!fp) {
+        printf("WriteFrame: file open failed %s in %s line %d\n", path.c_str(), __FILE__, __LINE__);
+        exit(EXIT_FAILURE);
+    }
+ 
+    for (int i = 0; i < numPlanes; i++) {
+        if (i == 0) {
+            bufferSize = width * height;
+            copyWidthInBytes = width;
+            copyHeight = height;
+ 
+            pCudaCopyMem = (uint8_t *)malloc(bufferSize);
+            if (pCudaCopyMem == NULL) {
+                printf("pCudaCopyMem malloc failed in %s line %d\n", __FILE__, __LINE__);
+                exit(EXIT_FAILURE);
+            }
+        } 
+        else {
+            bufferSize = ((height / PLANAR_CHROMA_HEIGHT_ORDER) * (width / PLANAR_CHROMA_WIDTH_ORDER));
+            copyWidthInBytes = width / PLANAR_CHROMA_WIDTH_ORDER;
+            copyHeight = height / PLANAR_CHROMA_HEIGHT_ORDER;
+        }
+        memset(pCudaCopyMem, 0, bufferSize);
+ 
+        checkCudaErrors(cudaMemcpy2DFromArray(
+        (void *)pCudaCopyMem, copyWidthInBytes, cudaArr[i], 0, 0, 
+        copyWidthInBytes, copyHeight,
+        cudaMemcpyDeviceToHost));
+
+        checkCudaDrvErrors(cuCtxSynchronize());
+       
+        if (fwrite(pCudaCopyMem, bufferSize, 1, fp) != 1) {
+            printf("Cuda consumer: output file write failed in %s line %d\n", __FILE__, __LINE__);
+             exit(EXIT_FAILURE);
+        }  
+    }
+    printf("Output file : %s saved\n", path.c_str());
+    
+    if (fp) {
+        fclose(fp);
+        fp = NULL;
+    }
+} 
+
+void cudaNvSciBufMultiplanar::tearDown(Caller *caller1, Caller *caller2) {
+    caller1->deinit();
+    caller2->deinit();
+    NvSciBufObjFree(buffObj);
+}
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.h
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/cudaNvSciBufMultiplanar.h
@ -0,0 +1,124 @@
+/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of NVIDIA CORPORATION nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef CUDA_NVSCIBUF_MULTIPLANAR_H
+#define CUDA_NVSCIBUF_MULTIPLANAR_H
+
+#include <cuda_runtime.h>
+#include <nvscibuf.h>
+#include <vector>
+#include <cuda.h>
+#include <helper_cuda.h>
+
+#define PLANAR_NUM_PLANES 3
+#define PLANAR_CHROMA_WIDTH_ORDER 2
+#define PLANAR_CHROMA_HEIGHT_ORDER 2
+ 
+#define ATTR_SIZE 20
+#define DEFAULT_GPU 0
+
+#define checkNvSciErrors(call)                              \
+  do {                                                      \
+    NvSciError _status = call;                              \
+    if (NvSciError_Success != _status) {                    \
+      printf(                                               \
+          "NVSCI call in file '%s' in line %i returned"     \
+          " %d, expected %d\n",                             \
+          __FILE__, __LINE__, _status, NvSciError_Success); \
+      fflush(stdout);                                       \
+      exit(EXIT_FAILURE);                                   \
+    }                                                       \
+  } while (0)
+
+#define checkCudaDrvErrors(call)                           \
+  do {                                                     \
+    CUresult err = call;                                   \
+    if (CUDA_SUCCESS != err) {                             \
+      const char *errorStr = NULL;                         \
+      cuGetErrorString(err, &errorStr);                    \
+      printf(                                              \
+              "checkCudaDrvErrors() Driver API error"      \
+              " = %04d \"%s\" from file <%s>, "            \
+              "line %i.\n",                                \
+              err, errorStr, __FILE__, __LINE__);          \
+      exit(EXIT_FAILURE);                                  \
+    }                                                      \
+  } while (0)
+
+extern void launchFlipSurfaceBitsKernel(cudaArray_t *levelArray, int32_t *multiPlanarWidth,
+ int32_t *multiPlanarHeight, int numPlanes);
+
+class Caller {
+private:
+    NvSciBufAttrList attrListOut;
+    NvSciBufAttrKeyValuePair pairArrayOut[ATTR_SIZE];
+    cudaExternalMemory_t extMem;
+    int32_t numPlanes;
+public:
+    NvSciBufAttrList attrList;
+    cudaMipmappedArray_t multiPlanarArray[PLANAR_NUM_PLANES];
+    int32_t multiPlanarWidth[PLANAR_NUM_PLANES];
+    int32_t multiPlanarHeight[PLANAR_NUM_PLANES];
+
+    void init();
+    void deinit();
+    void copyExtMemToMultiPlanarArrays();
+    void copyYUVToCudaArrayAndFlipBits(std::string &image_filename, cudaArray_t *yuvPlanes);
+    void copyCudaArrayToYUV(std::string &image_filename, cudaArray_t *yuvPlanes);
+    void setAttrListImageMultiPlanes(int imageWidth, int imageHeight);
+};
+
+
+class cudaNvSciBufMultiplanar {
+private:
+    size_t imageWidth;
+    size_t imageHeight;
+    int mCudaDeviceId;
+    int deviceCnt;
+    NvSciBufAttrList attrList[2];
+    NvSciBufAttrList attrListReconciled;
+    NvSciBufAttrList attrListConflict;
+public:
+    cudaNvSciBufMultiplanar(size_t imageWidth, size_t imageHeight, std::vector<int> &deviceIds);
+    void initCuda(int devId);
+    void reconcileAttrList(NvSciBufAttrList *attrList1, NvSciBufAttrList *attrList2);
+    void runCudaNvSciBufPlanar(std::string &image_filename, std::string &image_filename_out);
+    void tearDown(Caller *caller1, Caller *caller2);
+};
+
+enum NvSciBufImageAttributes {
+    PLANE_SIZE,  
+    PLANE_ALIGNED_SIZE,
+    PLANE_OFFSET,
+    PLANE_HEIGHT, 
+    PLANE_WIDTH, 
+    PLANE_CHANNEL_COUNT, 
+    PLANE_BITS_PER_PIXEL,
+    PLANE_COUNT,
+    PLANE_ATTR_SIZE
+};
+
+#endif  // CUDA_NVSCIBUF_MULTIPLANAR_H
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/imageKernels.cu
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/imageKernels.cu
@ -0,0 +1,64 @@
+/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of NVIDIA CORPORATION nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <cuda.h>
+#include <helper_cuda.h>
+
+static __global__ void flipSurfaceBits(cudaSurfaceObject_t surfObj, int width, int height) {
+    char data;
+    unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
+    unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
+    if (x < width && y < height) {
+        // Read from input surface
+        surf2Dread(&data,  surfObj, x, y);
+        // Write to output surface
+        data = ~data;
+        surf2Dwrite(data, surfObj, x, y);
+    }
+}
+
+// Copy cudaArray to surface memory and launch the CUDA kernel
+void launchFlipSurfaceBitsKernel(
+    cudaArray_t *levelArray, 
+    int32_t *multiPlanarWidth, 
+    int32_t *multiPlanarHeight, 
+    int numPlanes) {
+
+    cudaSurfaceObject_t surfObject[numPlanes] = {0};
+    cudaResourceDesc resDesc;
+    
+    for (int i = 0; i < numPlanes; i++) { 
+        memset(&resDesc, 0, sizeof(resDesc));
+        resDesc.resType = cudaResourceTypeArray;
+        resDesc.res.array.array = levelArray[i];
+        checkCudaErrors(cudaCreateSurfaceObject(&surfObject[i], &resDesc));
+        dim3 threadsperBlock(16, 16);
+        dim3 numBlocks((multiPlanarWidth[i] + threadsperBlock.x - 1) / threadsperBlock.x,
+                (multiPlanarHeight[i] + threadsperBlock.y - 1) / threadsperBlock.y);
+        flipSurfaceBits<<<numBlocks, threadsperBlock>>>(surfObject[i], multiPlanarWidth[i], multiPlanarHeight[i]);
+    }
+}
+
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/main.cpp
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/main.cpp
@ -0,0 +1,72 @@
+/* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of NVIDIA CORPORATION nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <cuda.h>
+#include <vector>
+#include "cudaNvSciBufMultiplanar.h"
+#include <helper_image.h>
+
+#define MAX_FILE_SIZE 100
+
+int main(int argc, const char **argv) {
+  int numOfGPUs = 0;
+  std::vector<int> deviceIds;
+  (cudaGetDeviceCount(&numOfGPUs));
+
+  printf("%d GPUs found\n", numOfGPUs);
+  if (!numOfGPUs) {
+    exit(EXIT_WAIVED);
+  } else {
+    for (int devID = 0; devID < numOfGPUs; devID++) {
+      int major = 0, minor = 0;
+      (cudaDeviceGetAttribute(
+          &major, cudaDevAttrComputeCapabilityMajor, devID));
+      (cudaDeviceGetAttribute(
+          &minor, cudaDevAttrComputeCapabilityMinor, devID));
+      if (major >= 6) {
+        deviceIds.push_back(devID);
+      }
+    }
+    if (deviceIds.size() == 0) {
+      printf(
+          "cudaNvSciBufMultiplanar requires one or more GPUs of Pascal(SM 6.0) or higher "
+          "archs\nWaiving..\n");
+      exit(EXIT_WAIVED);
+    }
+  }
+
+  std::string image_filename = sdkFindFilePath("yuv_planar_img1.yuv", argv[0]);
+  std::string image_filename_out = "image_out.yuv";
+  uint32_t imageWidth = 720;
+  uint32_t imageHeight = 480;
+
+  printf("input image %s , width = %d, height = %d\n", image_filename.c_str(), imageWidth, imageHeight);
+
+  cudaNvSciBufMultiplanar cudaNvSciBufMultiplanarApp(imageWidth, imageHeight, deviceIds);
+  cudaNvSciBufMultiplanarApp.runCudaNvSciBufPlanar(image_filename, image_filename_out);
+
+  return EXIT_SUCCESS;
+}
--- a/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/yuv_planar_img1.yuv
+++ b/Samples/8_Platform_Specific/Tegra/cudaNvSciBufMultiplanar/yuv_planar_img1.yuv