Unverified Commit 97fbbbb3 authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

Enumerating missing DMA engines in topology display (#96)

parent 568dc42d
...@@ -3,6 +3,11 @@ ...@@ -3,6 +3,11 @@
Documentation for TransferBench is available at Documentation for TransferBench is available at
[https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench). [https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench).
## v1.49
### Fixes
* Enumerating previously missed DMA engines used only for CPU traffic in topology display
## v1.48 ## v1.48
### Fixes ### Fixes
......
...@@ -1128,28 +1128,39 @@ void DisplayTopology(bool const outputToCsv) ...@@ -1128,28 +1128,39 @@ void DisplayTopology(bool const outputToCsv)
// Figure out DMA engines per GPU // Figure out DMA engines per GPU
std::vector<std::set<int>> dmaEngineIdsPerDevice(numGpuDevices); std::vector<std::set<int>> dmaEngineIdsPerDevice(numGpuDevices);
{ {
std::vector<hsa_agent_t> agentList; std::vector<hsa_agent_t> gpuAgentList;
std::vector<hsa_agent_t> allAgentList;
hsa_amd_pointer_info_t info; hsa_amd_pointer_info_t info;
info.size = sizeof(info); info.size = sizeof(info);
for (int deviceId = 0; deviceId < numGpuDevices; deviceId++) for (int deviceId = 0; deviceId < numGpuDevices; deviceId++)
{ {
HIP_CALL(hipSetDevice(deviceId)); HIP_CALL(hipSetDevice(deviceId));
int32_t* tempBuffer; int32_t* tempGpuBuffer;
HIP_CALL(hipMalloc((void**)&tempBuffer, 1024)); HIP_CALL(hipMalloc((void**)&tempGpuBuffer, 1024));
HSA_CHECK(hsa_amd_pointer_info(tempBuffer, &info, NULL, NULL, NULL)); HSA_CHECK(hsa_amd_pointer_info(tempGpuBuffer, &info, NULL, NULL, NULL));
agentList.push_back(info.agentOwner); gpuAgentList.push_back(info.agentOwner);
HIP_CALL(hipFree(tempBuffer)); allAgentList.push_back(info.agentOwner);
HIP_CALL(hipFree(tempGpuBuffer));
}
for (int deviceId = 0; deviceId < numCpuDevices; deviceId++)
{
int32_t* tempCpuBuffer;
AllocateMemory(MEM_CPU, deviceId, 1024, (void**)&tempCpuBuffer);
HSA_CHECK(hsa_amd_pointer_info(tempCpuBuffer, &info, NULL, NULL, NULL));
allAgentList.push_back(info.agentOwner);
DeallocateMemory(MEM_CPU, tempCpuBuffer, 1024);
} }
for (int srcDevice = 0; srcDevice < numGpuDevices; srcDevice++) for (int srcDevice = 0; srcDevice < numGpuDevices; srcDevice++)
{ {
dmaEngineIdsPerDevice[srcDevice].clear(); dmaEngineIdsPerDevice[srcDevice].clear();
for (int dstDevice = 0; dstDevice < numGpuDevices; dstDevice++) for (int dstDevice = 0; dstDevice < allAgentList.size(); dstDevice++)
{ {
if (srcDevice == dstDevice) continue; if (srcDevice == dstDevice) continue;
uint32_t engineIdMask = 0; uint32_t engineIdMask = 0;
if (hsa_amd_memory_copy_engine_status(agentList[dstDevice], if (hsa_amd_memory_copy_engine_status(allAgentList[dstDevice],
agentList[srcDevice], gpuAgentList[srcDevice],
&engineIdMask) != HSA_STATUS_SUCCESS) &engineIdMask) != HSA_STATUS_SUCCESS)
continue; continue;
for (int engineId = 0; engineId < 32; engineId++) for (int engineId = 0; engineId < 32; engineId++)
......
...@@ -29,7 +29,7 @@ THE SOFTWARE. ...@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp" #include "Compatibility.hpp"
#include "Kernels.hpp" #include "Kernels.hpp"
#define TB_VERSION "1.48" #define TB_VERSION "1.49"
extern char const MemTypeStr[]; extern char const MemTypeStr[];
extern char const ExeTypeStr[]; extern char const ExeTypeStr[];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment