Unverified Commit bedd2a20 authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

Fixing specific DMA engine transfers, enabling GFX_SINGLE_TEAM=1 by default (#166)

parent b311f022
......@@ -3,10 +3,17 @@
Documentation for TransferBench is available at
[https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench).
## v1.60.00
### Modified
- Reverted GFX_SINGLE_TEAM default back to 1
### Fixed
- Fixed bug where peer memory access was not enabled for DMA transfers, which would break specific DMA engine transfers
## v1.59.01
### Added
- The a2a preset A2A_MODE variable has been enhanced to allow for customizing the number of srcs/dsts to use
This is specified by setting A2A_MODE to numSrcs:numDsts. Extra destinations past 1 will be "local" writes (i.e. if one sets A2A_MODE=1:3, then transfers will follow this pattern: Fx Gx FyFxFx)
This is specified by setting A2A_MODE to numSrcs:numDsts. Extra destinations past 1 will be "local" writes (i.e. if one sets A2A_MODE=1:3, then transfers will follow this pattern: Fx Gx FyFxFx)
to simulate similar conditions normally used during collective algorithms such as ring-based AllReduce
## v1.59.00
......
......@@ -23,7 +23,7 @@ THE SOFTWARE.
#pragma once
// TransferBench client version
#define CLIENT_VERSION "01"
#define CLIENT_VERSION "00"
#include "TransferBench.hpp"
#include "EnvVars.hpp"
......
......@@ -136,7 +136,7 @@ public:
blockBytes = GetEnvVar("BLOCK_BYTES" , 256);
byteOffset = GetEnvVar("BYTE_OFFSET" , 0);
gfxBlockSize = GetEnvVar("GFX_BLOCK_SIZE" , 256);
gfxSingleTeam = GetEnvVar("GFX_SINGLE_TEAM" , 0);
gfxSingleTeam = GetEnvVar("GFX_SINGLE_TEAM" , 1);
gfxUnroll = GetEnvVar("GFX_UNROLL" , defaultGfxUnroll);
gfxWaveOrder = GetEnvVar("GFX_WAVE_ORDER" , 0);
hideEnv = GetEnvVar("HIDE_ENV" , 0);
......
......@@ -64,7 +64,7 @@ namespace TransferBench
using std::set;
using std::vector;
constexpr char VERSION[] = "1.59";
constexpr char VERSION[] = "1.60";
/**
* Enumeration of supported Executor types
......@@ -2297,7 +2297,7 @@ namespace {
MemDevice const& srcMemDevice = t.srcs[iSrc];
// Ensure executing GPU can access source memory
if (exeDevice.exeType == EXE_GPU_GFX && IsGpuMemType(srcMemDevice.memType) &&
if (IsGpuExeType(exeDevice.exeType) && IsGpuMemType(srcMemDevice.memType) &&
srcMemDevice.memIndex != exeDevice.exeIndex) {
ERR_CHECK(EnablePeerAccess(exeDevice.exeIndex, srcMemDevice.memIndex));
}
......@@ -2310,7 +2310,7 @@ namespace {
MemDevice const& dstMemDevice = t.dsts[iDst];
// Ensure executing GPU can access destination memory
if (exeDevice.exeType == EXE_GPU_GFX && IsGpuMemType(dstMemDevice.memType) &&
if (IsGpuExeType(exeDevice.exeType) && IsGpuMemType(dstMemDevice.memType) &&
dstMemDevice.memIndex != exeDevice.exeIndex) {
ERR_CHECK(EnablePeerAccess(exeDevice.exeIndex, dstMemDevice.memIndex));
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment