Unverified Commit bedd2a20 authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

Fixing specific DMA engine transfers, enabling GFX_SINGLE_TEAM=1 by default (#166)

parent b311f022
...@@ -3,6 +3,13 @@ ...@@ -3,6 +3,13 @@
Documentation for TransferBench is available at Documentation for TransferBench is available at
[https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench). [https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench).
## v1.60.00
### Modified
- Reverted GFX_SINGLE_TEAM default back to 1
### Fixed
- Fixed bug where peer memory access was not enabled for DMA transfers, which would break specific DMA engine transfers
## v1.59.01 ## v1.59.01
### Added ### Added
- The a2a preset A2A_MODE variable has been enhanced to allow for customizing the number of srcs/dsts to use - The a2a preset A2A_MODE variable has been enhanced to allow for customizing the number of srcs/dsts to use
......
...@@ -23,7 +23,7 @@ THE SOFTWARE. ...@@ -23,7 +23,7 @@ THE SOFTWARE.
#pragma once #pragma once
// TransferBench client version // TransferBench client version
#define CLIENT_VERSION "01" #define CLIENT_VERSION "00"
#include "TransferBench.hpp" #include "TransferBench.hpp"
#include "EnvVars.hpp" #include "EnvVars.hpp"
......
...@@ -136,7 +136,7 @@ public: ...@@ -136,7 +136,7 @@ public:
blockBytes = GetEnvVar("BLOCK_BYTES" , 256); blockBytes = GetEnvVar("BLOCK_BYTES" , 256);
byteOffset = GetEnvVar("BYTE_OFFSET" , 0); byteOffset = GetEnvVar("BYTE_OFFSET" , 0);
gfxBlockSize = GetEnvVar("GFX_BLOCK_SIZE" , 256); gfxBlockSize = GetEnvVar("GFX_BLOCK_SIZE" , 256);
gfxSingleTeam = GetEnvVar("GFX_SINGLE_TEAM" , 0); gfxSingleTeam = GetEnvVar("GFX_SINGLE_TEAM" , 1);
gfxUnroll = GetEnvVar("GFX_UNROLL" , defaultGfxUnroll); gfxUnroll = GetEnvVar("GFX_UNROLL" , defaultGfxUnroll);
gfxWaveOrder = GetEnvVar("GFX_WAVE_ORDER" , 0); gfxWaveOrder = GetEnvVar("GFX_WAVE_ORDER" , 0);
hideEnv = GetEnvVar("HIDE_ENV" , 0); hideEnv = GetEnvVar("HIDE_ENV" , 0);
......
...@@ -64,7 +64,7 @@ namespace TransferBench ...@@ -64,7 +64,7 @@ namespace TransferBench
using std::set; using std::set;
using std::vector; using std::vector;
constexpr char VERSION[] = "1.59"; constexpr char VERSION[] = "1.60";
/** /**
* Enumeration of supported Executor types * Enumeration of supported Executor types
...@@ -2297,7 +2297,7 @@ namespace { ...@@ -2297,7 +2297,7 @@ namespace {
MemDevice const& srcMemDevice = t.srcs[iSrc]; MemDevice const& srcMemDevice = t.srcs[iSrc];
// Ensure executing GPU can access source memory // Ensure executing GPU can access source memory
if (exeDevice.exeType == EXE_GPU_GFX && IsGpuMemType(srcMemDevice.memType) && if (IsGpuExeType(exeDevice.exeType) && IsGpuMemType(srcMemDevice.memType) &&
srcMemDevice.memIndex != exeDevice.exeIndex) { srcMemDevice.memIndex != exeDevice.exeIndex) {
ERR_CHECK(EnablePeerAccess(exeDevice.exeIndex, srcMemDevice.memIndex)); ERR_CHECK(EnablePeerAccess(exeDevice.exeIndex, srcMemDevice.memIndex));
} }
...@@ -2310,7 +2310,7 @@ namespace { ...@@ -2310,7 +2310,7 @@ namespace {
MemDevice const& dstMemDevice = t.dsts[iDst]; MemDevice const& dstMemDevice = t.dsts[iDst];
// Ensure executing GPU can access destination memory // Ensure executing GPU can access destination memory
if (exeDevice.exeType == EXE_GPU_GFX && IsGpuMemType(dstMemDevice.memType) && if (IsGpuExeType(exeDevice.exeType) && IsGpuMemType(dstMemDevice.memType) &&
dstMemDevice.memIndex != exeDevice.exeIndex) { dstMemDevice.memIndex != exeDevice.exeIndex) {
ERR_CHECK(EnablePeerAccess(exeDevice.exeIndex, dstMemDevice.memIndex)); ERR_CHECK(EnablePeerAccess(exeDevice.exeIndex, dstMemDevice.memIndex));
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment