Unverified Commit 5901ce0e authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

Adding direct destination mem validation, env var refactor (#19)

parent e6f64e97
# Changelog for TransferBench # Changelog for TransferBench
## v1.18
### Added
- Adding ability to validate GPU destination memory directly without going through CPU staging buffer (VALIDATE_DIRECT)
- NOTE: This will only work on AMD devices with large-bar access enable and may slow things down considerably
### Changed
- Refactored how environment variables are displayed
- Mismatch stops after first detected error within an array instead of list all mismatched elements
## v1.17 ## v1.17
### Added ### Added
- Allow switch to GFX kernel for source array initialization (USE_PREP_KERNEL) - Allow switch to GFX kernel for source array initialization (USE_PREP_KERNEL)
......
...@@ -1215,7 +1215,17 @@ void RunPeerToPeerBenchmarks(EnvVars const& ev, size_t N) ...@@ -1215,7 +1215,17 @@ void RunPeerToPeerBenchmarks(EnvVars const& ev, size_t N)
ev.useRemoteRead ? "Local" : "Remote", ev.useRemoteRead ? "Local" : "Remote",
ev.useDmaCopy ? "DMA" : "GFX"); ev.useDmaCopy ? "DMA" : "GFX");
printf("%10s", "SRC\\DST"); if (isBidirectional)
{
printf("%12s", "SRC\\DST");
}
else
{
if (ev.useRemoteRead)
printf("%12s", "SRC\\EXE+DST");
else
printf("%12s", "SRC+EXE\\DST");
}
for (int i = 0; i < numCpus; i++) printf("%7s %02d", "CPU", i); for (int i = 0; i < numCpus; i++) printf("%7s %02d", "CPU", i);
for (int i = 0; i < numGpus; i++) printf("%7s %02d", "GPU", i); for (int i = 0; i < numGpus; i++) printf("%7s %02d", "GPU", i);
printf("\n"); printf("\n");
...@@ -1228,7 +1238,7 @@ void RunPeerToPeerBenchmarks(EnvVars const& ev, size_t N) ...@@ -1228,7 +1238,7 @@ void RunPeerToPeerBenchmarks(EnvVars const& ev, size_t N)
int const srcIndex = (srcType == MEM_CPU ? src : src - numCpus); int const srcIndex = (srcType == MEM_CPU ? src : src - numCpus);
if (!ev.outputToCsv) if (!ev.outputToCsv)
printf("%7s %02d", (srcType == MEM_CPU) ? "CPU" : "GPU", srcIndex); printf("%9s %02d", (srcType == MEM_CPU) ? "CPU" : "GPU", srcIndex);
for (int dst = 0; dst < numDevices; dst++) for (int dst = 0; dst < numDevices; dst++)
{ {
...@@ -1482,7 +1492,7 @@ void Transfer::ValidateDst(EnvVars const& ev) ...@@ -1482,7 +1492,7 @@ void Transfer::ValidateDst(EnvVars const& ev)
for (int dstIdx = 0; dstIdx < this->numDsts; ++dstIdx) for (int dstIdx = 0; dstIdx < this->numDsts; ++dstIdx)
{ {
float* output; float* output;
if (IsCpuType(this->dstType[dstIdx])) if (IsCpuType(this->dstType[dstIdx]) || ev.validateDirect)
{ {
output = this->dstMem[dstIdx] + initOffset; output = this->dstMem[dstIdx] + initOffset;
} }
...@@ -1525,6 +1535,8 @@ void Transfer::ValidateDst(EnvVars const& ev) ...@@ -1525,6 +1535,8 @@ void Transfer::ValidateDst(EnvVars const& ev)
this->DstToStr().c_str()); this->DstToStr().c_str());
if (!ev.continueOnError) if (!ev.continueOnError)
exit(1); exit(1);
else
break;
} }
} }
} }
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment