"docs/vscode:/vscode.git/clone" did not exist on "b215776f2d4d31c160538dccdbfe7c827d1d3e88"
Unverified Commit 97fbbbb3 authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

Enumerating missing DMA engines in topology display (#96)

parent 568dc42d
......@@ -3,6 +3,11 @@
Documentation for TransferBench is available at
[https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench).
## v1.49
### Fixes
* Enumerating previously missed DMA engines used only for CPU traffic in topology display
## v1.48
### Fixes
......
......@@ -1128,28 +1128,39 @@ void DisplayTopology(bool const outputToCsv)
// Figure out DMA engines per GPU
std::vector<std::set<int>> dmaEngineIdsPerDevice(numGpuDevices);
{
std::vector<hsa_agent_t> agentList;
std::vector<hsa_agent_t> gpuAgentList;
std::vector<hsa_agent_t> allAgentList;
hsa_amd_pointer_info_t info;
info.size = sizeof(info);
for (int deviceId = 0; deviceId < numGpuDevices; deviceId++)
{
HIP_CALL(hipSetDevice(deviceId));
int32_t* tempBuffer;
HIP_CALL(hipMalloc((void**)&tempBuffer, 1024));
HSA_CHECK(hsa_amd_pointer_info(tempBuffer, &info, NULL, NULL, NULL));
agentList.push_back(info.agentOwner);
HIP_CALL(hipFree(tempBuffer));
int32_t* tempGpuBuffer;
HIP_CALL(hipMalloc((void**)&tempGpuBuffer, 1024));
HSA_CHECK(hsa_amd_pointer_info(tempGpuBuffer, &info, NULL, NULL, NULL));
gpuAgentList.push_back(info.agentOwner);
allAgentList.push_back(info.agentOwner);
HIP_CALL(hipFree(tempGpuBuffer));
}
for (int deviceId = 0; deviceId < numCpuDevices; deviceId++)
{
int32_t* tempCpuBuffer;
AllocateMemory(MEM_CPU, deviceId, 1024, (void**)&tempCpuBuffer);
HSA_CHECK(hsa_amd_pointer_info(tempCpuBuffer, &info, NULL, NULL, NULL));
allAgentList.push_back(info.agentOwner);
DeallocateMemory(MEM_CPU, tempCpuBuffer, 1024);
}
for (int srcDevice = 0; srcDevice < numGpuDevices; srcDevice++)
{
dmaEngineIdsPerDevice[srcDevice].clear();
for (int dstDevice = 0; dstDevice < numGpuDevices; dstDevice++)
for (int dstDevice = 0; dstDevice < allAgentList.size(); dstDevice++)
{
if (srcDevice == dstDevice) continue;
uint32_t engineIdMask = 0;
if (hsa_amd_memory_copy_engine_status(agentList[dstDevice],
agentList[srcDevice],
if (hsa_amd_memory_copy_engine_status(allAgentList[dstDevice],
gpuAgentList[srcDevice],
&engineIdMask) != HSA_STATUS_SUCCESS)
continue;
for (int engineId = 0; engineId < 32; engineId++)
......
......@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp"
#include "Kernels.hpp"
#define TB_VERSION "1.48"
#define TB_VERSION "1.49"
extern char const MemTypeStr[];
extern char const ExeTypeStr[];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment