Unverified Commit b5439548 authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

Adding src array check, CONTINUE_ON_ERROR, gpu mem clear (#16)

parent 5087ad50
# Changelog for TransferBench
## v1.16
### Added
- Additional src array validation during preparation
- Adding new env var CONTINUE_ON_ERROR to resume tests after mis-match detection
- Initializing GPU memory to 0 during allocation
## v1.15
### Fixed
- Fixed a bug that prevented single Transfers > 8GB
......
......@@ -262,7 +262,6 @@ void ExecuteTransfers(EnvVars const& ev,
transfer->subExecParam.data(),
transfer->subExecParam.size() * sizeof(SubExecParam),
hipMemcpyHostToDevice));
transferOffset += transfer->subExecParam.size();
}
}
......@@ -978,7 +977,9 @@ void AllocateMemory(MemType memType, int devIndex, size_t numBytes, void** memPt
// Reset to default numa mem policy
numa_set_preferred(-1);
}
else if (memType == MEM_GPU)
if (IsGpuType(memType))
{
if (memType == MEM_GPU)
{
// Allocate GPU memory on appropriate device
HIP_CALL(hipSetDevice(devIndex));
......@@ -992,8 +993,11 @@ void AllocateMemory(MemType memType, int devIndex, size_t numBytes, void** memPt
#else
HIP_CALL(hipSetDevice(devIndex));
HIP_CALL(hipExtMallocWithFlags((void**)memPtr, numBytes, hipDeviceMallocFinegrained));
#endif
}
HIP_CALL(hipMemset(*memPtr, 0, numBytes));
}
else
{
printf("[ERROR] Unsupported memory type %d\n", memType);
......@@ -1417,9 +1421,30 @@ void Transfer::PrepareSrc(EnvVars const& ev)
std::vector<float> reference(N);
for (int srcIdx = 0; srcIdx < this->numSrcs; ++srcIdx)
{
//PrepareReference(ev, reference, srcIdx);
PrepareReference(ev, reference, srcIdx);
HIP_CALL(hipMemcpy(this->srcMem[srcIdx] + initOffset, reference.data(), this->numBytesActual, hipMemcpyDefault));
// Perform check just to make sure that data has been copied properly
std::vector<float> srcCopy(N);
HIP_CALL(hipMemcpy(srcCopy.data(), this->srcMem[srcIdx] + initOffset, this->numBytesActual, hipMemcpyDefault));
for (size_t i = 0; i < N; ++i)
{
if (reference[i] != srcCopy[i])
{
printf("\n[ERROR] Unexpected mismatch at index %lu of source array %d:\n", i, srcIdx);
printf("[ERROR] SRC %02d value: %10.5f [%08X]\n", srcIdx, srcCopy[i], *(unsigned int*)&srcCopy[i]);
printf("[ERROR] EXPECTED value: %10.5f [%08X]\n", reference[i], *(unsigned int*)&reference[i]);
printf("[ERROR] Failed Transfer details: #%d: %s -> [%c%d:%d] -> %s\n",
this->transferIndex,
this->SrcToStr().c_str(),
ExeTypeStr[this->exeType], this->exeIndex,
this->numSubExecs,
this->DstToStr().c_str());
if (!ev.continueOnError)
exit(1);
}
}
}
}
......@@ -1455,16 +1480,17 @@ void Transfer::ValidateDst(EnvVars const& ev)
{
float srcVal;
HIP_CALL(hipMemcpy(&srcVal, this->srcMem[srcIdx] + initOffset + i, sizeof(float), hipMemcpyDefault));
printf("[ERROR] SRC %02d value: %8.6f [%08X]\n", srcIdx, srcVal, *(unsigned int*)&srcVal);
printf("[ERROR] SRC %02d value: %10.5f [%08X]\n", srcIdx, srcVal, *(unsigned int*)&srcVal);
}
printf("[ERROR] EXPECTED value: %8.6f [%08X]\n", reference[i], *(unsigned int*)&reference[i]);
printf("[ERROR] DST %02d value: %8.6f [%08X]\n", dstIdx, output[i], *(unsigned int*)&output[i]);
printf("[ERROR] EXPECTED value: %10.5f [%08X]\n", reference[i], *(unsigned int*)&reference[i]);
printf("[ERROR] DST %02d value: %10.5f [%08X]\n", dstIdx, output[i], *(unsigned int*)&output[i]);
printf("[ERROR] Failed Transfer details: #%d: %s -> [%c%d:%d] -> %s\n",
this->transferIndex,
this->SrcToStr().c_str(),
ExeTypeStr[this->exeType], this->exeIndex,
this->numSubExecs,
this->DstToStr().c_str());
if (!ev.continueOnError)
exit(1);
}
}
......
......@@ -28,7 +28,7 @@ THE SOFTWARE.
#include <time.h>
#include "Kernels.hpp"
#define TB_VERSION "1.15"
#define TB_VERSION "1.16"
extern char const MemTypeStr[];
extern char const ExeTypeStr[];
......@@ -64,6 +64,7 @@ public:
// Environment variables
int blockBytes; // Each CU, except the last, gets a multiple of this many bytes to copy
int byteOffset; // Byte-offset for memory allocations
int continueOnError; // Continue tests even after mismatch detected
int numCpuDevices; // Number of CPU devices to use (defaults to # NUMA nodes detected)
int numGpuDevices; // Number of GPU devices to use (defaults to # HIP devices detected)
int numIterations; // Number of timed iterations to perform. If negative, run for -numIterations seconds instead
......@@ -135,6 +136,7 @@ public:
blockBytes = GetEnvVar("BLOCK_BYTES" , 256);
byteOffset = GetEnvVar("BYTE_OFFSET" , 0);
continueOnError = GetEnvVar("CONTINUE_ON_ERROR" , 0);
numCpuDevices = GetEnvVar("NUM_CPU_DEVICES" , numDetectedCpus);
numGpuDevices = GetEnvVar("NUM_GPU_DEVICES" , numDetectedGpus);
numIterations = GetEnvVar("NUM_ITERATIONS" , DEFAULT_NUM_ITERATIONS);
......@@ -352,6 +354,7 @@ public:
printf("======================\n");
printf(" BLOCK_BYTES=B - Each CU (except the last) receives a multiple of BLOCK_BYTES to copy\n");
printf(" BYTE_OFFSET - Initial byte-offset for memory allocations. Must be multiple of 4. Defaults to 0\n");
printf(" CONTINUE_ON_ERROR - Continue tests even after mismatch detected\n");
printf(" FILL_PATTERN=STR - Fill input buffer with pattern specified in hex digits (0-9,a-f,A-F). Must be even number of digits, (byte-level big-endian)\n");
printf(" NUM_CPU_DEVICES=X - Restrict number of CPUs to X. May not be greater than # detected NUMA nodes\n");
printf(" NUM_GPU_DEVICES=X - Restrict number of GPUs to X. May not be greater than # detected HIP devices\n");
......@@ -374,6 +377,7 @@ public:
printf("=====================================================\n");
printf("%-20s = %12d : Each CU gets a multiple of %d bytes to copy\n", "BLOCK_BYTES", blockBytes, blockBytes);
printf("%-20s = %12d : Using byte offset of %d\n", "BYTE_OFFSET", byteOffset, byteOffset);
printf("%-20s = %12d : Continue on error\n", "CONTINUE_ON_ERROR", continueOnError);
printf("%-20s = %12s : ", "FILL_PATTERN", getenv("FILL_PATTERN") ? "(specified)" : "(unset)");
if (fillPattern.size())
printf("Pattern: %s", getenv("FILL_PATTERN"));
......@@ -404,6 +408,7 @@ public:
printf("EnvVar,Value,Description,(TransferBench v%s)\n", TB_VERSION);
printf("BLOCK_BYTES,%d,Each CU gets a multiple of %d bytes to copy\n", blockBytes, blockBytes);
printf("BYTE_OFFSET,%d,Using byte offset of %d\n", byteOffset, byteOffset);
printf("CONTINUE_ON_ERROR,%d,Continue test on mismatch error\n", continueOnError);
printf("FILL_PATTERN,%s,", getenv("FILL_PATTERN") ? "(specified)" : "(unset)");
if (fillPattern.size())
printf("Pattern: %s", getenv("FILL_PATTERN"));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment