Unverified Commit 989cdd9b authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

Fixing USE_PREP_KERNEL vs VALIDATE_DIRECT (#21)

parent 633e3b91
# Changelog for TransferBench # Changelog for TransferBench
## v1.20
### Fixed
- VALIDATE_DIRECT can now be used with USE_PREP_KERNEL
- Switch to local GPU for validating GPU memory
## v1.19 ## v1.19
### Added ### Added
- VALIDATE_DIRECT now also applies to source memory array checking - VALIDATE_DIRECT now also applies to source memory array checking
......
...@@ -214,6 +214,8 @@ void ExecuteTransfers(EnvVars const& ev, ...@@ -214,6 +214,8 @@ void ExecuteTransfers(EnvVars const& ev,
// Prepare additional requirement for GPU-based executors // Prepare additional requirement for GPU-based executors
if (IsGpuType(exeType)) if (IsGpuType(exeType))
{ {
HIP_CALL(hipSetDevice(exeIndex));
// Single-stream is only supported for GFX-based executors // Single-stream is only supported for GFX-based executors
int const numStreamsToUse = (exeType == EXE_GPU_DMA || !ev.useSingleStream) ? exeInfo.transfers.size() : 1; int const numStreamsToUse = (exeType == EXE_GPU_DMA || !ev.useSingleStream) ? exeInfo.transfers.size() : 1;
exeInfo.streams.resize(numStreamsToUse); exeInfo.streams.resize(numStreamsToUse);
...@@ -221,7 +223,6 @@ void ExecuteTransfers(EnvVars const& ev, ...@@ -221,7 +223,6 @@ void ExecuteTransfers(EnvVars const& ev,
exeInfo.stopEvents.resize(numStreamsToUse); exeInfo.stopEvents.resize(numStreamsToUse);
for (int i = 0; i < numStreamsToUse; ++i) for (int i = 0; i < numStreamsToUse; ++i)
{ {
HIP_CALL(hipSetDevice(exeIndex));
HIP_CALL(hipStreamCreate(&exeInfo.streams[i])); HIP_CALL(hipStreamCreate(&exeInfo.streams[i]));
HIP_CALL(hipEventCreate(&exeInfo.startEvents[i])); HIP_CALL(hipEventCreate(&exeInfo.startEvents[i]));
HIP_CALL(hipEventCreate(&exeInfo.stopEvents[i])); HIP_CALL(hipEventCreate(&exeInfo.stopEvents[i]));
...@@ -243,7 +244,11 @@ void ExecuteTransfers(EnvVars const& ev, ...@@ -243,7 +244,11 @@ void ExecuteTransfers(EnvVars const& ev,
bool isSrcCorrect = true; bool isSrcCorrect = true;
for (auto& exeInfoPair : transferMap) for (auto& exeInfoPair : transferMap)
{ {
ExecutorInfo& exeInfo = exeInfoPair.second; Executor const& executor = exeInfoPair.first;
ExecutorInfo& exeInfo = exeInfoPair.second;
ExeType const exeType = executor.first;
int const exeIndex = RemappedIndex(executor.second, IsCpuType(exeType));
exeInfo.totalBytes = 0; exeInfo.totalBytes = 0;
int transferOffset = 0; int transferOffset = 0;
...@@ -259,10 +264,13 @@ void ExecuteTransfers(EnvVars const& ev, ...@@ -259,10 +264,13 @@ void ExecuteTransfers(EnvVars const& ev,
if (transfer->exeType == EXE_GPU_GFX) if (transfer->exeType == EXE_GPU_GFX)
{ {
exeInfo.transfers[i]->subExecParamGpuPtr = exeInfo.subExecParamGpu + transferOffset; exeInfo.transfers[i]->subExecParamGpuPtr = exeInfo.subExecParamGpu + transferOffset;
HIP_CALL(hipSetDevice(exeIndex));
HIP_CALL(hipMemcpy(&exeInfo.subExecParamGpu[transferOffset], HIP_CALL(hipMemcpy(&exeInfo.subExecParamGpu[transferOffset],
transfer->subExecParam.data(), transfer->subExecParam.data(),
transfer->subExecParam.size() * sizeof(SubExecParam), transfer->subExecParam.size() * sizeof(SubExecParam),
hipMemcpyHostToDevice)); hipMemcpyHostToDevice));
HIP_CALL(hipDeviceSynchronize());
transferOffset += transfer->subExecParam.size(); transferOffset += transfer->subExecParam.size();
} }
} }
...@@ -1451,6 +1459,7 @@ bool Transfer::PrepareSrc(EnvVars const& ev) ...@@ -1451,6 +1459,7 @@ bool Transfer::PrepareSrc(EnvVars const& ev)
for (int srcIdx = 0; srcIdx < this->numSrcs; ++srcIdx) for (int srcIdx = 0; srcIdx < this->numSrcs; ++srcIdx)
{ {
float* srcPtr = this->srcMem[srcIdx] + initOffset; float* srcPtr = this->srcMem[srcIdx] + initOffset;
PrepareReference(ev, reference, srcIdx);
// Initialize source memory array with reference pattern // Initialize source memory array with reference pattern
if (IsGpuType(this->srcType[srcIdx])) if (IsGpuType(this->srcType[srcIdx]))
...@@ -1458,19 +1467,13 @@ bool Transfer::PrepareSrc(EnvVars const& ev) ...@@ -1458,19 +1467,13 @@ bool Transfer::PrepareSrc(EnvVars const& ev)
int const deviceIdx = RemappedIndex(this->srcIndex[srcIdx], false); int const deviceIdx = RemappedIndex(this->srcIndex[srcIdx], false);
HIP_CALL(hipSetDevice(deviceIdx)); HIP_CALL(hipSetDevice(deviceIdx));
if (ev.usePrepSrcKernel) if (ev.usePrepSrcKernel)
{
PrepSrcDataKernel<<<32, BLOCKSIZE>>>(srcPtr, N, srcIdx); PrepSrcDataKernel<<<32, BLOCKSIZE>>>(srcPtr, N, srcIdx);
}
else else
{
PrepareReference(ev, reference, srcIdx);
HIP_CALL(hipMemcpy(srcPtr, reference.data(), this->numBytesActual, hipMemcpyDefault)); HIP_CALL(hipMemcpy(srcPtr, reference.data(), this->numBytesActual, hipMemcpyDefault));
}
HIP_CALL(hipDeviceSynchronize()); HIP_CALL(hipDeviceSynchronize());
} }
else if (IsCpuType(this->srcType[srcIdx])) else if (IsCpuType(this->srcType[srcIdx]))
{ {
PrepareReference(ev, reference, srcIdx);
memcpy(srcPtr, reference.data(), this->numBytesActual); memcpy(srcPtr, reference.data(), this->numBytesActual);
} }
......
...@@ -29,7 +29,7 @@ THE SOFTWARE. ...@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp" #include "Compatibility.hpp"
#include "Kernels.hpp" #include "Kernels.hpp"
#define TB_VERSION "1.19" #define TB_VERSION "1.20"
extern char const MemTypeStr[]; extern char const MemTypeStr[];
extern char const ExeTypeStr[]; extern char const ExeTypeStr[];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment