Unverified Commit 633e3b91 authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

Src validate direct (#20)

* Validating source directly, extra nullptr deallocation checks
parent 5901ce0e
# Changelog for TransferBench # Changelog for TransferBench
## v1.19
### Added
- VALIDATE_DIRECT now also applies to source memory array checking
- Adding null memory pointer check prior to deallocation
## v1.18 ## v1.18
### Added ### Added
- Adding ability to validate GPU destination memory directly without going through CPU staging buffer (VALIDATE_DIRECT) - Adding ability to validate GPU destination memory directly without going through CPU staging buffer (VALIDATE_DIRECT)
......
...@@ -940,7 +940,7 @@ void AllocateMemory(MemType memType, int devIndex, size_t numBytes, void** memPt ...@@ -940,7 +940,7 @@ void AllocateMemory(MemType memType, int devIndex, size_t numBytes, void** memPt
printf("[ERROR] Unable to allocate 0 bytes\n"); printf("[ERROR] Unable to allocate 0 bytes\n");
exit(1); exit(1);
} }
*memPtr = nullptr;
if (IsCpuType(memType)) if (IsCpuType(memType))
{ {
// Set numa policy prior to call to hipHostMalloc // Set numa policy prior to call to hipHostMalloc
...@@ -1000,6 +1000,7 @@ void AllocateMemory(MemType memType, int devIndex, size_t numBytes, void** memPt ...@@ -1000,6 +1000,7 @@ void AllocateMemory(MemType memType, int devIndex, size_t numBytes, void** memPt
#endif #endif
} }
HIP_CALL(hipMemset(*memPtr, 0, numBytes)); HIP_CALL(hipMemset(*memPtr, 0, numBytes));
HIP_CALL(hipDeviceSynchronize());
} }
else else
{ {
...@@ -1012,14 +1013,29 @@ void DeallocateMemory(MemType memType, void* memPtr, size_t const bytes) ...@@ -1012,14 +1013,29 @@ void DeallocateMemory(MemType memType, void* memPtr, size_t const bytes)
{ {
if (memType == MEM_CPU || memType == MEM_CPU_FINE) if (memType == MEM_CPU || memType == MEM_CPU_FINE)
{ {
if (memPtr == nullptr)
{
printf("[ERROR] Attempting to free null CPU pointer for %lu bytes. Skipping hipHostFree\n", bytes);
return;
}
HIP_CALL(hipHostFree(memPtr)); HIP_CALL(hipHostFree(memPtr));
} }
else if (memType == MEM_CPU_UNPINNED) else if (memType == MEM_CPU_UNPINNED)
{ {
if (memPtr == nullptr)
{
printf("[ERROR] Attempting to free null unpinned CPU pointer for %lu bytes. Skipping numa_free\n", bytes);
return;
}
numa_free(memPtr, bytes); numa_free(memPtr, bytes);
} }
else if (memType == MEM_GPU || memType == MEM_GPU_FINE) else if (memType == MEM_GPU || memType == MEM_GPU_FINE)
{ {
if (memPtr == nullptr)
{
printf("[ERROR] Attempting to free null GPU pointer for %lu bytes. Skipping hipFree\n", bytes);
return;
}
HIP_CALL(hipFree(memPtr)); HIP_CALL(hipFree(memPtr));
} }
} }
...@@ -1434,34 +1450,54 @@ bool Transfer::PrepareSrc(EnvVars const& ev) ...@@ -1434,34 +1450,54 @@ bool Transfer::PrepareSrc(EnvVars const& ev)
std::vector<float> reference(N); std::vector<float> reference(N);
for (int srcIdx = 0; srcIdx < this->numSrcs; ++srcIdx) for (int srcIdx = 0; srcIdx < this->numSrcs; ++srcIdx)
{ {
PrepareReference(ev, reference, srcIdx); float* srcPtr = this->srcMem[srcIdx] + initOffset;
if (ev.usePrepSrcKernel && IsGpuType(this->srcType[srcIdx]))
// Initialize source memory array with reference pattern
if (IsGpuType(this->srcType[srcIdx]))
{ {
int const srcIndex = RemappedIndex(this->srcIndex[srcIdx], false); int const deviceIdx = RemappedIndex(this->srcIndex[srcIdx], false);
HIP_CALL(hipSetDevice(srcIndex)); HIP_CALL(hipSetDevice(deviceIdx));
PrepSrcDataKernel<<<32, BLOCKSIZE>>>(this->srcMem[srcIdx] + initOffset, N, srcIdx); if (ev.usePrepSrcKernel)
{
PrepSrcDataKernel<<<32, BLOCKSIZE>>>(srcPtr, N, srcIdx);
}
else
{
PrepareReference(ev, reference, srcIdx);
HIP_CALL(hipMemcpy(srcPtr, reference.data(), this->numBytesActual, hipMemcpyDefault));
}
HIP_CALL(hipDeviceSynchronize()); HIP_CALL(hipDeviceSynchronize());
} }
else else if (IsCpuType(this->srcType[srcIdx]))
{ {
HIP_CALL(hipMemcpy(this->srcMem[srcIdx] + initOffset, reference.data(), this->numBytesActual, hipMemcpyDefault)); PrepareReference(ev, reference, srcIdx);
memcpy(srcPtr, reference.data(), this->numBytesActual);
} }
// Perform check just to make sure that data has been copied properly // Perform check just to make sure that data has been copied properly
float* srcCheckPtr = srcPtr;
std::vector<float> srcCopy(N); std::vector<float> srcCopy(N);
HIP_CALL(hipMemcpy(srcCopy.data(), this->srcMem[srcIdx] + initOffset, this->numBytesActual, hipMemcpyDefault)); if (IsGpuType(this->srcType[srcIdx]))
{
if (!ev.validateDirect)
{
HIP_CALL(hipMemcpy(srcCopy.data(), srcPtr, this->numBytesActual, hipMemcpyDefault));
HIP_CALL(hipDeviceSynchronize());
srcCheckPtr = srcCopy.data();
}
}
for (size_t i = 0; i < N; ++i) for (size_t i = 0; i < N; ++i)
{ {
if (reference[i] != srcCopy[i]) if (reference[i] != srcCheckPtr[i])
{ {
printf("\n[ERROR] Unexpected mismatch at index %lu of source array %d:\n", i, srcIdx); printf("\n[ERROR] Unexpected mismatch at index %lu of source array %d:\n", i, srcIdx);
#if !defined(__NVCC__) #if !defined(__NVCC__)
float const val = this->srcMem[srcIdx][initOffset + i]; float const val = this->srcMem[srcIdx][initOffset + i];
printf("[ERROR] SRC %02d value: %10.5f [%08X] Direct: %10.5f [%08X]\n", printf("[ERROR] SRC %02d value: %10.5f [%08X] Direct: %10.5f [%08X]\n",
srcIdx, srcCopy[i], *(unsigned int*)&srcCopy[i], val, *(unsigned int*)&val); srcIdx, srcCheckPtr[i], *(unsigned int*)&srcCheckPtr[i], val, *(unsigned int*)&val);
#else #else
printf("[ERROR] SRC %02d value: %10.5f [%08X]\n", srcIdx, srcCopy[i], *(unsigned int*)&srcCopy[i]); printf("[ERROR] SRC %02d value: %10.5f [%08X]\n", srcIdx, srcCheckPtr[i], *(unsigned int*)&srcCheckPtr[i]);
#endif #endif
printf("[ERROR] EXPECTED value: %10.5f [%08X]\n", reference[i], *(unsigned int*)&reference[i]); printf("[ERROR] EXPECTED value: %10.5f [%08X]\n", reference[i], *(unsigned int*)&reference[i]);
printf("[ERROR] Failed Transfer details: #%d: %s -> [%c%d:%d] -> %s\n", printf("[ERROR] Failed Transfer details: #%d: %s -> [%c%d:%d] -> %s\n",
...@@ -1498,7 +1534,10 @@ void Transfer::ValidateDst(EnvVars const& ev) ...@@ -1498,7 +1534,10 @@ void Transfer::ValidateDst(EnvVars const& ev)
} }
else else
{ {
int const deviceIdx = RemappedIndex(this->dstIndex[dstIdx], false);
HIP_CALL(hipSetDevice(deviceIdx));
HIP_CALL(hipMemcpy(hostBuffer.data(), this->dstMem[dstIdx] + initOffset, this->numBytesActual, hipMemcpyDefault)); HIP_CALL(hipMemcpy(hostBuffer.data(), this->dstMem[dstIdx] + initOffset, this->numBytesActual, hipMemcpyDefault));
HIP_CALL(hipDeviceSynchronize());
output = hostBuffer.data(); output = hostBuffer.data();
} }
......
...@@ -29,7 +29,7 @@ THE SOFTWARE. ...@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp" #include "Compatibility.hpp"
#include "Kernels.hpp" #include "Kernels.hpp"
#define TB_VERSION "1.18" #define TB_VERSION "1.19"
extern char const MemTypeStr[]; extern char const MemTypeStr[];
extern char const ExeTypeStr[]; extern char const ExeTypeStr[];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment