Unverified Commit fb713d03 authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

v1.58.00 Fixing DMA copy-on-engine (#152)

parent 3ea2f226
...@@ -3,6 +3,10 @@ ...@@ -3,6 +3,10 @@
Documentation for TransferBench is available at Documentation for TransferBench is available at
[https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench). [https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench).
## v1.58.00
### Fixed
- Fixed broken specific DMA-engine copies
## v1.57.01 ## v1.57.01
### Added ### Added
- Re-added "scaling" GPU GFX preset benchmark, which tests copies from GPU to other devices using varying - Re-added "scaling" GPU GFX preset benchmark, which tests copies from GPU to other devices using varying
......
...@@ -7,7 +7,7 @@ else() ...@@ -7,7 +7,7 @@ else()
endif() endif()
cmake_minimum_required(VERSION 3.5) cmake_minimum_required(VERSION 3.5)
project(TransferBench VERSION 1.57.0 LANGUAGES CXX) project(TransferBench VERSION 1.58.00 LANGUAGES CXX)
# Default GPU architectures to build # Default GPU architectures to build
#================================================================================================== #==================================================================================================
......
...@@ -23,7 +23,7 @@ THE SOFTWARE. ...@@ -23,7 +23,7 @@ THE SOFTWARE.
#pragma once #pragma once
// TransferBench client version // TransferBench client version
#define CLIENT_VERSION "01" #define CLIENT_VERSION "00"
#include "TransferBench.hpp" #include "TransferBench.hpp"
#include "EnvVars.hpp" #include "EnvVars.hpp"
......
...@@ -327,7 +327,7 @@ public: ...@@ -327,7 +327,7 @@ public:
int numGpuDevices = TransferBench::GetNumExecutors(EXE_GPU_GFX); int numGpuDevices = TransferBench::GetNumExecutors(EXE_GPU_GFX);
if (!outputToCsv) { if (!outputToCsv) {
printf("TransferBench Client v%s Backend v%s\n", CLIENT_VERSION, TransferBench::VERSION); printf("TransferBench v%s.%s\n", TransferBench::VERSION, CLIENT_VERSION);
printf("===============================================================\n"); printf("===============================================================\n");
if (!hideEnv) printf("[Common] (Suppress by setting HIDE_ENV=1)\n"); if (!hideEnv) printf("[Common] (Suppress by setting HIDE_ENV=1)\n");
} }
......
...@@ -49,7 +49,7 @@ namespace TransferBench ...@@ -49,7 +49,7 @@ namespace TransferBench
using std::set; using std::set;
using std::vector; using std::vector;
constexpr char VERSION[] = "1.57"; constexpr char VERSION[] = "1.58";
/** /**
* Enumeration of supported Executor types * Enumeration of supported Executor types
...@@ -1002,7 +1002,7 @@ namespace { ...@@ -1002,7 +1002,7 @@ namespace {
#endif #endif
} }
if (!IsGpuMemType(t.srcs[0].memType) || !IsGpuMemType(t.dsts[0].memType)) { if (!IsGpuMemType(t.srcs[0].memType) && !IsGpuMemType(t.dsts[0].memType)) {
errors.push_back({ERR_WARN, errors.push_back({ERR_WARN,
"Transfer %d: No GPU memory for source or destination. Copy might not execute on DMA %d", "Transfer %d: No GPU memory for source or destination. Copy might not execute on DMA %d",
i, t.exeDevice.exeIndex}); i, t.exeDevice.exeIndex});
...@@ -1406,6 +1406,9 @@ namespace { ...@@ -1406,6 +1406,9 @@ namespace {
// Create HSA completion signal // Create HSA completion signal
ERR_CHECK(hsa_signal_create(1, 0, NULL, &resources.signal)); ERR_CHECK(hsa_signal_create(1, 0, NULL, &resources.signal));
if (t.exeSubIndex != -1)
resources.sdmaEngineId = (hsa_amd_sdma_engine_id_t)(1U << t.exeSubIndex);
#endif #endif
} }
...@@ -2020,7 +2023,7 @@ namespace { ...@@ -2020,7 +2023,7 @@ namespace {
// Use HSA async copy // Use HSA async copy
do { do {
hsa_signal_store_screlease(resources.signal, 1); hsa_signal_store_screlease(resources.signal, 1);
if (cfg.dma.useHsaCopy) { if (!useSubIndices) {
ERR_CHECK(hsa_amd_memory_async_copy(resources.dstMem[0], resources.dstAgent, ERR_CHECK(hsa_amd_memory_async_copy(resources.dstMem[0], resources.dstAgent,
resources.srcMem[0], resources.srcAgent, resources.srcMem[0], resources.srcAgent,
resources.numBytes, 0, NULL, resources.numBytes, 0, NULL,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment