From 0bce11f7a506b0f8908b36c59cbd5d6a67b315e3 Mon Sep 17 00:00:00 2001
From: mdoijade <mdoijade@nvidia.com>
Date: Wed, 11 Apr 2018 00:09:13 +0530
Subject: [PATCH] -- Update README.md with changes for repo link and other
 updates -- remove MDCG support on windows -- integrate fixes to nvrtc samples
 makefile and driver api samples makefiles

---
 README.md                                     | 43 +++++++++++--------
 .../NsightEclipse.xml                         |  3 --
 .../conjugateGradientMultiDeviceCG/README.md  | 12 +-----
 Samples/matrixMulDrv/Makefile                 |  5 ++-
 Samples/vectorAdd_nvrtc/Makefile              | 11 ++---
 5 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/README.md b/README.md
index edb41487..39636d03 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ For system requirements and installation instructions of cuda toolkit, please re
 
 Using git clone the repository of CUDA Samples using the command below.
 ```
-git clone <GIT_REPO_CLONE_LINK>
+git clone https://github.com/NVIDIA/cuda-samples.git
 ```
 
 Without using git the easiest way to use these samples is to download the zip file containing the current version by clicking the "Download ZIP" button on the repo page. You can then unzip the entire archive and use the samples.
@@ -108,22 +108,22 @@ The samples makefiles can take advantage of certain options:
 ### Samples by OS
 
 #### Linux
-**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** |
+**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[shfl_scan](./Samples/shfl_scan)** | **[deviceQuery](./Samples/deviceQuery)** |
 ---|---|---|---|
-**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** |
-**[matrixMul](./Samples/matrixMul)** | **[shfl_scan](./Samples/shfl_scan)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[deviceQuery](./Samples/deviceQuery)** |
+**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
+**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[matrixMul](./Samples/matrixMul)** |
 
 #### Windows
-**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** |
+**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[conjugateGradientMultiBlockCG](./Samples/conjugateGradientMultiBlockCG)** | **[shfl_scan](./Samples/shfl_scan)** | **[deviceQuery](./Samples/deviceQuery)** |
 ---|---|---|---|
-**[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[conjugateGradientMultiDeviceCG](./Samples/conjugateGradientMultiDeviceCG)** |
-**[matrixMul](./Samples/matrixMul)** | **[shfl_scan](./Samples/shfl_scan)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[deviceQuery](./Samples/deviceQuery)** |
+**[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** |
+**[cudaTensorCoreGemm](./Samples/cudaTensorCoreGemm)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[matrixMul](./Samples/matrixMul)** |
 
 #### Mac OSX
-**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[matrixMulDrv](./Samples/matrixMulDrv)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** | **[simpleCUFFT](./Samples/simpleCUFFT)** |
+**[warpAggregatedAtomicsCG](./Samples/warpAggregatedAtomicsCG)** | **[shfl_scan](./Samples/shfl_scan)** | **[deviceQuery](./Samples/deviceQuery)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** |
 ---|---|---|---|
-**[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[matrixMul](./Samples/matrixMul)** | **[shfl_scan](./Samples/shfl_scan)** | **[simpleVoteIntrinsics](./Samples/simpleVoteIntrinsics)** |
-**[deviceQuery](./Samples/deviceQuery)** |
+**[matrixMulDrv](./Samples/matrixMulDrv)** | **[simpleCUFFT](./Samples/simpleCUFFT)** | **[simpleCUBLAS](./Samples/simpleCUBLAS)** | **[vectorAdd_nvrtc](./Samples/vectorAdd_nvrtc)** |
+**[matrixMul](./Samples/matrixMul)** |
 
 ## Dependencies
 
@@ -193,13 +193,17 @@ These CUDA features are needed by some CUDA samples. They are provided by either
 
 CUFFT Callback Routines are user-supplied kernel routines that CUFFT will call when loading or storing data. These callback routines are only available on Linux x86_64 and ppc64le systems.
 
-#### CUDA Dynamic Paralellism
+#### CUDA Dynamic Parallellism
 
-CDP (CUDA Dynamic Paralellism) allows kernels to be launched from threads running on the GPU. CDP is only available on GPUs with SM architecture of 3.5 or above.
+CDP (CUDA Dynamic Parallellism) allows kernels to be launched from threads running on the GPU. CDP is only available on GPUs with SM architecture of 3.5 or above.
 
 #### Multi-block Cooperative Groups
 
-Multi Block Cooperative Groups(MBCG) extends Cooperative Groups and the CUDA programming model to express inter-thread-block synchronization. MBCG is available on GPUs with Pascal and higher architecture on Linux systems.
+Multi Block Cooperative Groups(MBCG) extends Cooperative Groups and the CUDA programming model to express inter-thread-block synchronization. MBCG is available on GPUs with Pascal and higher architecture.
+
+#### Multi-Device Cooperative Groups
+
+ Multi Device Cooperative Groups extends Cooperative Groups and the CUDA programming model enabling thread blocks executing on multiple GPUs to cooperate and synchronize as they execute. This feature is available on GPUs with Pascal and higher architecture.
 
 #### CUBLAS
 
@@ -231,16 +235,12 @@ NPP (NVIDIA Performance Primitives) provides GPU-accelerated image, video, and s
 
 #### NVGRAPH
 
-NVGRAPH is a GPU-accelerated graph analytics library..
+NVGRAPH is a GPU-accelerated graph analytics library.
 
 #### NVRTC
 
 NVRTC (CUDA RunTime Compilation) is a runtime compilation library for CUDA C++.
 
-#### NVCUVID
-
-NVCUVID (NVIDIA CUDA Video Decoder) provides GPU-accelerated video decoding capabilities.
-
 #### Stream Priorities
 
 Stream Priorities allows the creation of streams with specified priorities. Stream Priorities is only available on GPUs with SM architecture of 3.5 or above.
@@ -259,7 +259,7 @@ NVCC support of [C++11 features](https://en.wikipedia.org/wiki/C++11).
 
 ## Contributors Guide
 
-We welcome your input on issues and suggestions for new samples. At this time we are not accepting contributions from the public, check back here as we evolve our contribution model.
+We welcome your input on issues and suggestions for samples. At this time we are not accepting contributions from the public, check back here as we evolve our contribution model.
 
 We use Google C++ Style Guide for all the sources https://google.github.io/styleguide/cppguide.html
 
@@ -267,3 +267,8 @@ We use Google C++ Style Guide for all the sources https://google.github.io/style
 
 Answers to frequently asked questions about CUDA can be found at http://developer.nvidia.com/cuda-faq and in the [CUDA Toolkit Release Notes](http://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html).
 
+## References
+
+*   [CUDA Programming Guide](http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html)
+*   [Accelerated Computing Blog](https://devblogs.nvidia.com/category/accelerated-computing/)
+
diff --git a/Samples/conjugateGradientMultiDeviceCG/NsightEclipse.xml b/Samples/conjugateGradientMultiDeviceCG/NsightEclipse.xml
index b1f46c75..7f60f0fc 100644
--- a/Samples/conjugateGradientMultiDeviceCG/NsightEclipse.xml
+++ b/Samples/conjugateGradientMultiDeviceCG/NsightEclipse.xml
@@ -58,9 +58,6 @@
       <arch>ppc64le</arch>
       <platform>linux</platform>
     </env>
-    <env>
-      <platform>windows</platform>
-    </env>
   </supported_envs>
   <supported_sm_architectures>
     <from>6.0</from>
diff --git a/Samples/conjugateGradientMultiDeviceCG/README.md b/Samples/conjugateGradientMultiDeviceCG/README.md
index e1c69813..41bf4518 100644
--- a/Samples/conjugateGradientMultiDeviceCG/README.md
+++ b/Samples/conjugateGradientMultiDeviceCG/README.md
@@ -14,7 +14,7 @@ Unified Memory, Linear Algebra, Cooperative Groups, MultiDevice Cooperative Grou
 
 ## Supported OSes
 
-Linux, Windows
+Linux
 
 ## Supported CPU Architecture
 
@@ -35,16 +35,6 @@ Make sure the dependencies mentioned in [Dependencies]() section above are insta
 
 ## Build and Run
 
-### Windows
-The Windows samples are built using the Visual Studio IDE. Solution files (.sln) are provided for each supported version of Visual Studio, using the format:
-```
-*_vs<version>.sln - for Visual Studio <version>
-```
-Each individual sample has its own set of solution files in its directory:
-
-To build/examine all the samples at once, the complete solution files should be used. To build/examine a single sample, the individual sample solution files should be used.
-> **Note:** Some samples require that the Microsoft DirectX SDK (June 2010 or newer) be installed and that the VC++ directory paths are properly set up (**Tools > Options...**). Check DirectX Dependencies section for details."
-
 ### Linux
 The Linux samples are built using makefiles. To use the makefiles, change the current directory to the sample directory you wish to build, and run make:
 ```
diff --git a/Samples/matrixMulDrv/Makefile b/Samples/matrixMulDrv/Makefile
index e3a46b92..fe6bc157 100644
--- a/Samples/matrixMulDrv/Makefile
+++ b/Samples/matrixMulDrv/Makefile
@@ -310,9 +310,10 @@ else
   ifeq ("$(CUDALIB)","")
     $(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed.  Please re-install the driver. <<<)
     SAMPLE_ENABLED := 0
+  else
+    CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
+    LIBRARIES += -L$(CUDALIB) -lcuda
   endif
-
-  LIBRARIES += -lcuda
 endif
 
 ifeq ($(SAMPLE_ENABLED),0)
diff --git a/Samples/vectorAdd_nvrtc/Makefile b/Samples/vectorAdd_nvrtc/Makefile
index 05995484..08ab2bd2 100644
--- a/Samples/vectorAdd_nvrtc/Makefile
+++ b/Samples/vectorAdd_nvrtc/Makefile
@@ -257,11 +257,7 @@ LIBRARIES :=
 
 # libNVRTC specific libraries
 ifeq ($(TARGET_OS),darwin)
- LDFLAGS += -L$(CUDA_PATH)/lib -framework CUDA
-else ifeq ($(TARGET_ARCH),x86_64)
- LDFLAGS += -L$(CUDA_PATH)/lib64/stubs -L$(CUDA_PATH)/lib64
-else ifeq ($(TARGET_ARCH),ppc64le)
- LDFLAGS += -L$(CUDA_PATH)/targets/ppc64le-linux/lib/stubs -L$(CUDA_PATH)/targets/ppc64le-linux/lib
+ LDFLAGS += -L$(CUDA_PATH)/lib -F/Library/Frameworks -framework CUDA
 endif
 
 ifeq ($(TARGET_OS),darwin)
@@ -304,9 +300,10 @@ else
   ifeq ("$(CUDALIB)","")
     $(info >>> WARNING - libcuda.so not found, CUDA Driver is not installed.  Please re-install the driver. <<<)
     SAMPLE_ENABLED := 0
+  else
+    CUDALIB := $(shell echo $(CUDALIB) | sed "s/ .*//" | sed "s/\/libcuda.so//" )
+    LIBRARIES += -L$(CUDALIB) -lcuda
   endif
-
-  LIBRARIES += -lcuda
 endif
 
 INCLUDES += -I$(CUDA_PATH)/include