Update source code for cudaGraphPerfScaling

This commit is contained in:
shawnz 2025-02-12 12:22:55 +08:00
parent 93f1c78c5b
commit 24a617c043

View File

@ -350,8 +350,10 @@ int main(int argc, char **argv)
cudaFree(0); cudaFree(0);
cudaMallocHost(&hostData, sizeof(*hostData)); cudaMallocHost(&hostData, sizeof(*hostData));
stream.resize(width); int numStreams = width;
for (int i = 0; i < width; i++) if (numStreams == 1) numStreams = 2; // demo needs two streams even if capture only needs 1.
stream.resize(numStreams);
for (int i = 0; i < numStreams; i++)
{ {
cudaStreamCreate(&stream[i]); cudaStreamCreate(&stream[i]);
} }
@ -386,7 +388,7 @@ int main(int argc, char **argv)
if (!(outputFmt & 6)) { if (!(outputFmt & 6)) {
printf("skipping trials since no output is expected\n"); printf("skipping trials since no output is expected\n");
return EXIT_FAILURE; return 1;
} }
std::vector<double> metricTotal; std::vector<double> metricTotal;
@ -429,6 +431,9 @@ int main(int argc, char **argv)
length += stride; length += stride;
} }
printf("\n"); cudaFreeHost(hostData);
}
printf("\n");
printf("Test passed\n");
return 0;
}