Commit 6bfa9825 authored by Ramesh Errabolu's avatar Ramesh Errabolu
Browse files

Adding source files and cmake build file

parent b6bbb3a3
cmake_minimum_required(VERSION 2.8.0)
#
# Setup build environment
#
# 1) Setup env var ROCR_INC_DIR and ROCR_LIB_DIR to point to
# ROC Runtime header and libraries seperately
#
# export ROCR_INC_DIR="Path to ROC Runtime headers"
#`
# export ROCR_LIB_DIR="Path to ROC Runtime libraries"
#
# 2) Make an new folder called build under root folder
#
# mkdir build
#
# 3) Enter into folder of build, and run CMAKE to generate makefile
# and make it
#
# cd build; cmake ..; make
#
if(WIN32)
MESSAGE("Windows platfom is not supported")
RRETURN()
endif()
#
# Flag to enable / disable verbose output.
#
SET( CMAKE_VERBOSE_MAKEFILE on )
#
# Determin Roc Runtime header files are accessible
#
if(NOT EXISTS $ENV{ROCR_INC_DIR}/hsa/hsa.h)
MESSAGE("ERROR: ROC Runtime headers can't be found under specified path")
RETURN()
endif()
#
# Set core runtime module name
#
set ( ROC_THUNK_NAME "hsakmt" )
set ( CORE_RUNTIME_NAME "hsa-runtime" )
set ( ROC_THUNK_LIBRARY "lib${ROC_THUNK_NAME}" )
set ( CORE_RUNTIME_TARGET "${CORE_RUNTIME_NAME}64" )
set ( CORE_RUNTIME_LIBRARY "lib${CORE_RUNTIME_TARGET}" )
if(NOT EXISTS $ENV{ROCR_LIB_DIR}/${CORE_RUNTIME_LIBRARY}.so)
MESSAGE("ERROR: ROC Runtime libraries can't be found under sprcified path")
RETURN()
endif()
set(PROJECT_NAME "rocm_bandwidth_test")
set(TEST_NAME "${PROJECT_NAME}")
project (${PROJECT_NAME})
string(TOLOWER "${CMAKE_BUILD_TYPE}" tmp)
if("${tmp}" STREQUAL "debug")
set(ISDEBUG "1")
add_definitions(-DDEBUG)
endif()
if(ISDEBUG)
set(CMAKE_CXX_FLAGS "-std=c++11 -O0")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb")
else()
set(CMAKE_CXX_FLAGS "-std=c++11 -O2")
endif()
#
# Set the remaining compiler flags
#
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fexceptions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-math-errno")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fms-extensions")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fmerge-all-constants")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-threadsafe-statics")
INCLUDE_DIRECTORIES($ENV{ROCR_INC_DIR})
LINK_DIRECTORIES($ENV{ROCR_LIB_DIR})
# Add sources that belong to the project
aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR} Src)
add_executable(${TEST_NAME} ${Src})
target_link_libraries(${TEST_NAME} ${CORE_RUNTIME_TARGET} c stdc++ dl pthread rt )
Introduction:
#############
RocBandwidthTest is designed to capture the performance characteristics of buffer
copying and kernel read/write operations. The help screen of the benchmark
shows various options one can use in initiating cop/read/writer operations.
In addition one can also query the topology of the system in terms of memory
pools and their agents
Build Environment:
##################
To be able to build RocBandwidthTest, users must ensure that the build platform has
following conditions satisfied:
Build Procedure:
################
The following simply lists the steps to build RocBandwidthTest
--- Define following environment variable to specify location of header
and library files
// Contains header files exported by ROC Runtime
ROCR_INC_DIR="Path of ROC Runtime Header Files"
// Contains library files exported by ROC Runtime
ROCR_LIB_DIR="Path of ROC Runtime Library Files"
// Contains header files exported by ROC Thunk
ROCT_INC_DIR="Path of ROC Runtime Header Files"
// Contains library files exported by ROC Thunk
ROCT_LIB_DIR="Path of ROC Runtime Library Files"
--- Create a build directory in the project folder - roc_bandwidth_test.
e.g. mkdir ./build
--- Set working directory to be the new build directory
e.g. cd ./build
--- Invoke Cmake to interpret build rules and generate native build files
The argument for cmake should be the root folder of RocBandwidthTest
test suite
// Builds Release version (default)
// Assumes pwd is .../roc_bandwidth_test/build
e.g. cmake ..
// Builds Debug version
// Assumes pwd is .../roc_bandwidth_test/build
e.g. cmake -DCMAKE_BUILD_TYPE:STRING=Debug ..
--- Invoke the native build rules generated by cmake to build the various
object, library and executable files
e.g. make
--- Invoke the install command to copy build artifacts to pre-defined folders
of RocBandwidthTest suite. Upon completion artifacts will be copied to the
bin and lib directories of build directory
e.g. make install
@note: All executables will be found in <build_directory>/bin folder
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "base_test.hpp"
// Default Constructor
BaseTest::BaseTest(size_t num) {
// Set the numIteration_ to be 10 by default
num_iteration_ = num;
}
BaseTest::~BaseTest() {}
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef ROC_BANDWIDTH_TEST_BASE_H_
#define ROC_BANDWIDTH_TEST_BASE_H_
#include "hsa/hsa.h"
#include <iostream>
#include <string>
#include <vector>
using namespace std;
// @Brief: An interface for tests to do some basic things,
class BaseTest {
public:
BaseTest(size_t num = 3);
virtual ~BaseTest();
// @Brief: Allows setup proceedures to be completed
// before running the benchmark test case
virtual void SetUp() = 0;
// @Brief: Launches the proceedures of test scenario
virtual void Run() = 0;
// @Brief: Allows clean up proceedures to be invoked
virtual void Close() = 0;
// @Brief: Display the results
virtual void Display() const = 0;
// @Brief: Set number of iterations to run
void set_num_iteration(size_t num) {
num_iteration_ = num;
return;
}
// @Brief: Pre-declare some variables for deriviation, the
// derived class may declare more if needed
protected:
// @Brief: Real iteration number
uint64_t num_iteration_;
// @Brief: Status code
hsa_status_t err_;
};
#endif // ROC_BANDWIDTH_TEST_BASE_H_
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "common.hpp"
void error_check(hsa_status_t hsa_error_code, int line_num, const char* str) {
if (hsa_error_code != HSA_STATUS_SUCCESS &&
hsa_error_code != HSA_STATUS_INFO_BREAK) {
printf("HSA Error Found! In file: %s; At line: %d\n", str, line_num);
const char* string = nullptr;
hsa_status_string(hsa_error_code, &string);
printf("Error: %s\n", string);
exit(EXIT_FAILURE);
}
}
// So far, always find the first device
hsa_status_t FindGpuDevice(hsa_agent_t agent, void* data) {
if (data == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
hsa_device_type_t hsa_device_type;
hsa_status_t hsa_error_code =
hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
if (hsa_error_code != HSA_STATUS_SUCCESS) {
return hsa_error_code;
}
if (hsa_device_type == HSA_DEVICE_TYPE_GPU) {
*((hsa_agent_t*)data) = agent;
return HSA_STATUS_INFO_BREAK;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t FindCpuDevice(hsa_agent_t agent, void* data) {
if (data == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
hsa_device_type_t hsa_device_type;
hsa_status_t hsa_error_code =
hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &hsa_device_type);
if (hsa_error_code != HSA_STATUS_SUCCESS) {
return hsa_error_code;
}
if (hsa_device_type == HSA_DEVICE_TYPE_CPU) {
*((hsa_agent_t*)data) = agent;
return HSA_STATUS_INFO_BREAK;
}
return HSA_STATUS_SUCCESS;
}
hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t region, void* data) {
if (NULL == data) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
hsa_status_t err;
hsa_amd_segment_t segment;
uint32_t flag;
err = hsa_amd_memory_pool_get_info(region, HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
ErrorCheck(err);
err = hsa_amd_memory_pool_get_info(region, HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
ErrorCheck(err);
if ((HSA_AMD_SEGMENT_GLOBAL == segment) &&
(flag & HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED)) {
*((hsa_amd_memory_pool_t*)data) = region;
}
return HSA_STATUS_SUCCESS;
}
double CalcMedian(vector<double> scores) {
double median;
size_t size = scores.size();
if (size % 2 == 0)
median = (scores[size / 2 - 1] + scores[size / 2]) / 2;
else
median = scores[size / 2];
return median;
}
double CalcMean(vector<double> scores) {
double mean = 0;
size_t size = scores.size();
for (size_t i = 0; i < size; ++i) mean += scores[i];
return mean / size;
}
double CalcStdDeviation(vector<double> scores, int score_mean) {
double ret = 0.0;
for (size_t i = 0; i < scores.size(); ++i) {
ret += (scores[i] - score_mean) * (scores[i] - score_mean);
}
ret /= scores.size();
return sqrt(ret);
}
int CalcConcurrentQueues(vector<double> scores) {
int num_of_concurrent_queues = 0;
vector<double> execpted_exec_time_array;
for (size_t i = 0; i < scores.size(); ++i) {
execpted_exec_time_array.push_back(scores[0] / (1 << i));
}
for (size_t i = 0; i < scores.size(); ++i) {
cout << "expected exe time = " << execpted_exec_time_array[i] << endl;
}
for (size_t i = 1; i < scores.size(); ++i) {
if ((execpted_exec_time_array[i] - scores[i]) <
0.1 * execpted_exec_time_array[i])
++num_of_concurrent_queues;
}
return num_of_concurrent_queues;
}
/** hsa_status_t FindHostRegion(hsa_region_t region, void *data) {
if (data == NULL) {
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
bool is_host_region = false;
hsa_status_t hsa_error_code = hsa_region_get_info(
region, (hsa_region_info_t)HSA_EXT_REGION_INFO_HOST_ACCESS, &is_host_region
);
if (hsa_error_code != HSA_STATUS_SUCCESS) {
return hsa_error_code;
}
if (is_host_region) {
*((hsa_region_t*)data) = region;
}
return HSA_STATUS_SUCCESS;
} */
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef ROC_BANDWIDTH_TEST_COMMON_HPP
#define ROC_BANDWIDTH_TEST_COMMON_HPP
#include <cstdlib>
#include <iostream>
#include <vector>
#include <cmath>
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"
using namespace std;
#if defined(_MSC_VER)
#define ALIGNED_(x) __declspec(align(x))
#else
#if defined(__GNUC__)
#define ALIGNED_(x) __attribute__((aligned(x)))
#endif // __GNUC__
#endif // _MSC_VER
#define MULTILINE(...) #__VA_ARGS__
#define HSA_ARGUMENT_ALIGN_BYTES 16
#define ErrorCheck(x) error_check(x, __LINE__, __FILE__)
// @Brief: Check HSA API return value
void error_check(hsa_status_t hsa_error_code, int line_num, const char* str);
// @Brief: Find the first avaliable GPU device
hsa_status_t FindGpuDevice(hsa_agent_t agent, void* data);
// @Brief: Find the first avaliable CPU device
hsa_status_t FindCpuDevice(hsa_agent_t agent, void* data);
// @Brief: Find the agent's global region / pool
hsa_status_t FindGlobalPool(hsa_amd_memory_pool_t region, void* data);
// @Brief: Calculate the mean number of the vector
double CalcMean(vector<double> scores);
// @Brief: Calculate the Median valud of the vector
double CalcMedian(vector<double> scores);
// @Brief: Calculate the standard deviation of the vector
double CalcStdDeviation(vector<double> scores, int score_mean);
#endif // ROC_BANDWIDTH_TEST_COMMON_HPP
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "hsatimer.hpp"
#define NANOSECONDS_PER_SECOND 1000000000
PerfTimer::PerfTimer() {
freq_in_100mhz = MeasureTSCFreqHz();
}
PerfTimer::~PerfTimer() {
while (!_timers.empty()) {
Timer *temp = _timers.back();
_timers.pop_back();
delete temp;
}
}
// Create a new timer instance and return its index
int PerfTimer::CreateTimer() {
Timer *newTimer = new Timer;
newTimer->_start = 0.0;
newTimer->_clocks = 0.0;
#ifdef __linux__
newTimer->_freq = NANOSECONDS_PER_SECOND;
#endif
// Save the timer object in timer list
_timers.push_back(newTimer);
return (int)(_timers.size() - 1);
}
int PerfTimer::StartTimer(int index) {
if (index >= (int)_timers.size()) {
Error("Cannot reset timer. Invalid handle.");
return HSA_FAILURE;
}
#ifdef __linux__
// General Linux timing method
#ifndef _AMD
struct timespec s;
clock_gettime(CLOCK_MONOTONIC, &s);
_timers[index]->_start =
(long long)s.tv_sec * NANOSECONDS_PER_SECOND + (long long)s.tv_nsec;
// AMD Linux timing method
#else
unsigned int unused;
_timers[index]->_start = __rdtscp(&unused);
#endif
#endif
return HSA_SUCCESS;
}
int PerfTimer::StopTimer(int index) {
long long n = 0;
if (index >= (int)_timers.size()) {
Error("Cannot reset timer. Invalid handle.");
return HSA_FAILURE;
}
#ifdef __linux__
// General Linux timing method
#ifndef _AMD
struct timespec s;
clock_gettime(CLOCK_MONOTONIC, &s);
n = (long long)s.tv_sec * NANOSECONDS_PER_SECOND + (long long)s.tv_nsec;
// AMD Linux timing
#else
unsigned int unused;
n = __rdtscp(&unused);
#endif
#endif
n -= _timers[index]->_start;
_timers[index]->_start = 0;
#ifndef _AMD
_timers[index]->_clocks += n;
#endif
#ifdef __linux__
//_timers[index]->_clocks += 10 * n /freq_in_100mhz; // unit is ns
_timers[index]->_clocks += 1.0E-6 * 10 * n / freq_in_100mhz; // convert to ms
// cout << "_AMD is enabled!!!" << endl;
#endif
return HSA_SUCCESS;
}
void PerfTimer::Error(string str) { cout << str << endl; }
double PerfTimer::ReadTimer(int index) {
if (index >= (int)_timers.size()) {
Error("Cannot read timer. Invalid handle.");
return HSA_FAILURE;
}
double reading = double(_timers[index]->_clocks);
reading = double(reading / _timers[index]->_freq);
return reading;
}
void PerfTimer::ResetTimer(int index) {
// Check if index value is over the timer's size
if (index >= (int)_timers.size()) {
Error("Invalid index value\n");
exit(1);
}
_timers[index]->_clocks = 0.0;
_timers[index]->_start = 0.0;
}
uint64_t PerfTimer::CoarseTimestampUs() {
#ifdef __linux__
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
#endif
}
uint64_t PerfTimer::MeasureTSCFreqHz() {
// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
unsigned int unused;
uint64_t tscTicksEnd;
uint64_t coarseBeginUs = CoarseTimestampUs();
uint64_t tscTicksBegin = __rdtscp(&unused);
do {
tscTicksEnd = __rdtscp(&unused);
} while (tscTicksEnd - tscTicksBegin < 1000000000);
uint64_t coarseEndUs = CoarseTimestampUs();
// Compute the TSC frequency and round to nearest 100MHz.
uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
}
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef ROC_BANDWIDTH_TEST_MYTIME_H_
#define ROC_BANDWIDTH_TEST_MYTIME_H_
// Will use AMD timer and general Linux timer based on users'
// need --> compilation flag. Support for windows platform is
// not currently available
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <x86intrin.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <string>
using namespace std;
#include <sys/time.h>
#define HSA_FAILURE 1
#define HSA_SUCCESS 0
class PerfTimer {
private:
struct Timer {
string name; /* < name name of time object*/
long long _freq; /* < _freq frequency*/
long long _clocks; /* < _clocks number of ticks at end*/
long long _start; /* < _start start point ticks*/
};
std::vector<Timer*> _timers; /*< _timers vector to Timer objects */
double freq_in_100mhz;
public:
PerfTimer();
~PerfTimer();
private:
// AMD timing method
uint64_t CoarseTimestampUs();
uint64_t MeasureTSCFreqHz();
// General Linux timing method
public:
int CreateTimer();
int StartTimer(int index);
int StopTimer(int index);
void ResetTimer(int index);
public:
// retrieve time
double ReadTimer(int index);
// write into a file
double WriteTimer(int index);
public:
void Error(string str);
};
#endif // ROC_BANDWIDTH_TEST_MYTIME_H_
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include <unistd.h>
#include <iostream>
#include "hsatimer.hpp"
#include "rocm_bandwidth_test.hpp"
using namespace std;
int main(int argc, char** argv) {
// Default behavior is implemented as two runs
uint32_t arg_cnt = argc;
if (argc == 1) {
argc++;
argv[1] = (char*)"-a";
setenv("ROCM_BW_DEFAULT_RUN", "false", true);
}
// Create the Bandwidth test object
RocmBandwidthTest bw_test1(argc, argv);
// Initialize the Bandwidth test object
bw_test1.SetUp();
// Run the Bandwidth tests requested by user
bw_test1.Run();
// Return if user has not passed in any arguments
// Display the time taken by various tests
// Release the Bandwidth test object resources
if (arg_cnt != 1) {
bw_test1.Display();
bw_test1.Close();
return 0;
}
// Run the second iteration of copy requests
if (arg_cnt == 1) {
optind = 1;
argv[1] = (char*)"-A";
}
// Create the Bandwidth test object
RocmBandwidthTest bw_test2(argc, argv);
// Initialize the Bandwidth test object
bw_test2.SetUp();
// Run the Bandwidth tests requested by user
bw_test2.Run();
// Display the time taken by various tests
// and then release associated resources
bw_test1.Display();
bw_test1.Close();
// Display the time taken by various tests
// and then release associated resources
bw_test2.Display();
bw_test2.Close();
return 0;
}
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
// Code for Linux platform
#ifdef __linux__
#include "os.hpp"
#include <stdlib.h>
void SetEnv(const char* env_var_name, const char* env_var_value) {
int err = setenv(env_var_name, env_var_value, 1);
if (0 != err) {
printf("Set environment variable failed!\n");
exit(1);
}
return;
}
char* GetEnv(const char* env_var_name) { return getenv(env_var_name); }
#endif // End of Linux Code
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef ROC_BANDWIDTH_TEST_UTILS_OS_H_
#define ROC_BANDWIDTH_TEST_UTILS_OS_H_
#include <stdio.h>
// Set envriroment variable
void SetEnv(const char* env_var_name, const char* env_var_value);
// Get the value of enviroment
char* GetEnv(const char* env_var_name);
#endif // ROC_BANDWIDTH_TEST_UTILS_OS_H_
This diff is collapsed.
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef __ROC_BANDWIDTH_TEST_H__
#define __ROC_BANDWIDTH_TEST_H__
#include "hsa/hsa.h"
#include "base_test.hpp"
#include "hsatimer.hpp"
#include "common.hpp"
#include <vector>
using namespace std;
// Structure to encapsulate a RocR agent and its index in a list
typedef struct agent_info {
agent_info(hsa_agent_t agent,
uint32_t index, hsa_device_type_t device_type) {
agent_ = agent;
index_ = index;
device_type_ = device_type;
}
agent_info() {}
uint32_t index_;
hsa_agent_t agent_;
hsa_device_type_t device_type_;
char name_[64]; // Size specified in public header file
} agent_info_t;
typedef struct pool_info {
pool_info(hsa_agent_t agent, uint32_t agent_index,
hsa_amd_memory_pool_t pool, hsa_amd_segment_t segment,
size_t size, uint32_t index, bool is_fine_grained,
bool is_kernarg, bool access_to_all,
hsa_amd_memory_pool_access_t owner_access) {
pool_ = pool;
index_ = index;
segment_ = segment;
owner_agent_ = agent;
agent_index_ = agent_index;
allocable_size_ = size;
is_kernarg_ = is_kernarg;
owner_access_ = owner_access;
access_to_all_ = access_to_all;
is_fine_grained_ = is_fine_grained;
}
pool_info() {}
uint32_t index_;
bool is_kernarg_;
bool access_to_all_;
bool is_fine_grained_;
size_t allocable_size_;
uint32_t agent_index_;
hsa_agent_t owner_agent_;
hsa_amd_segment_t segment_;
hsa_amd_memory_pool_t pool_;
hsa_amd_memory_pool_access_t owner_access_;
} pool_info_t;
// Used to print out topology info
typedef struct agent_pool_info {
agent_pool_info() {}
agent_info agent;
vector<pool_info_t> pool_list;
} agent_pool_info_t;
typedef struct async_trans {
uint32_t req_type_;
union {
struct {
bool bidir_;
bool uses_gpu_;
uint32_t src_idx_;
uint32_t dst_idx_;
hsa_amd_memory_pool_t src_pool_;
hsa_amd_memory_pool_t dst_pool_;
} copy;
struct {
void* code_;
uint32_t agent_idx_;
hsa_agent_t agent_;
uint32_t pool_idx_;
hsa_amd_memory_pool_t pool_;
} kernel;
};
// Cpu BenchMark average copy time
vector<double> cpu_avg_time_;
// Cpu Min time
vector<double> cpu_min_time_;
// Gpu BenchMark average copy time
vector<double> gpu_avg_time_;
// Gpu Min time
vector<double> gpu_min_time_;
// BenchMark's Average copy time and average bandwidth
vector<double> avg_time_;
vector<double> avg_bandwidth_;
// BenchMark's Min copy time and peak bandwidth
vector<double> min_time_;
vector<double> peak_bandwidth_;
async_trans(uint32_t req_type) { req_type_ = req_type; }
} async_trans_t;
typedef enum Request_Type {
REQ_READ = 1,
REQ_WRITE = 2,
REQ_COPY_BIDIR = 3,
REQ_COPY_UNIDIR = 4,
REQ_COPY_ALL_BIDIR = 5,
REQ_COPY_ALL_UNIDIR = 6,
REQ_INVALID = 7,
} Request_Type;
class RocmBandwidthTest : public BaseTest {
public:
// @brief: Constructor for test case of RocmBandwidthTest
RocmBandwidthTest(int argc, char** argv);
// @brief: Destructor for test case of RocmBandwidthTest
virtual ~RocmBandwidthTest();
// @brief: Setup the environment for measurement
virtual void SetUp();
// @brief: Core measurement execution
virtual void Run();
// @brief: Clean up and retrive the resource
virtual void Close();
// @brief: Display the results
virtual void Display() const;
private:
// @brief: Print Help Menu Screen
void PrintHelpScreen();
// @brief: Discover the topology of pools on Rocm Platform
void DiscoverTopology();
// @brief: Populate link weight for the set of agents
void DiscoverLinkWeight();
// @brief: Populates the access matrix
void PopulateAccessMatrix();
// @brief: Print topology info
void PrintTopology();
// @brief: Print link matrix
void PrintLinkMatrix() const;
// @brief: Print access matrix
void PrintAccessMatrix() const;
// @brief: Print info on agents in system
void PrintAgentsList();
// @brief: Print info on memory pools in system
void PrintPoolsList();
// @brief: Parse the arguments provided by user to
// build list of transactions
void ParseArguments();
// @brief: Print the list of transactions
void PrintTransList();
// @brief: Run read/write requests of users
void RunIOBenchmark(async_trans_t& trans);
// @brief: Run copy requests of users
void RunCopyBenchmark(async_trans_t& trans);
// @brief: Get iteration number
uint32_t GetIterationNum();
// @brief: Get the mean copy time
double GetMeanTime(std::vector<double>& vec);
// @brief: Get the min copy time
double GetMinTime(std::vector<double>& vec);
// @brief: Dispaly Benchmark result
void DisplayDevInfo() const;
void DisplayIOTime(async_trans_t& trans) const;
void DisplayCopyTime(async_trans_t& trans) const;
void DisplayCopyTimeMatrix(bool peak) const;
private:
// @brief: Validate the arguments passed in by user
bool ValidateArguments();
bool ValidateReadReq();
bool ValidateWriteReq();
bool ValidateReadOrWriteReq(vector<uint32_t>& in_list);
bool ValidateBidirCopyReq();
bool ValidateUnidirCopyReq();
bool ValidateCopyReq(vector<uint32_t>& in_list);
void PrintIOAccessError(uint32_t agent_idx, uint32_t pool_idx);
void PrintCopyAccessError(uint32_t src_pool_idx, uint32_t dst_pool_idx);
bool PoolIsPresent(vector<uint32_t>& in_list);
bool PoolIsDuplicated(vector<uint32_t>& in_list);
// @brief: Builds a list of transaction per user request
void ComputeCopyTime(async_trans_t& trans);
bool BuildTransList();
bool BuildReadTrans();
bool BuildWriteTrans();
bool BuildBidirCopyTrans();
bool BuildUnidirCopyTrans();
bool BuildAllPoolsBidirCopyTrans();
bool BuildAllPoolsUnidirCopyTrans();
bool BuildReadOrWriteTrans(uint32_t req_type,
vector<uint32_t>& in_list);
bool BuildCopyTrans(uint32_t req_type,
vector<uint32_t>& src_list,
vector<uint32_t>& dst_list);
void AllocateCopyBuffers(uint32_t size,
uint32_t src_dev_idx, uint32_t dst_dev_idx,
void*& src, hsa_amd_memory_pool_t src_pool,
void*& dst, hsa_amd_memory_pool_t dst_pool,
hsa_agent_t src_agent, hsa_agent_t dst_agent,
hsa_signal_t& signal);
void ReleaseBuffers(bool bidir,
void* src_fwd, void* src_rev,
void* dst_fwd, void* dst_rev,
hsa_signal_t signal_fwd, hsa_signal_t signal_rev);
double GetGpuCopyTime(bool bidir, hsa_signal_t signal_fwd, hsa_signal_t signal_rev);
void AllocateHostBuffers(uint32_t size,
uint32_t src_dev_idx,
uint32_t dst_dev_idx,
void*& src, void*& dst,
void* buf_src, void* buf_dst,
hsa_agent_t src_agent, hsa_agent_t dst_agent,
hsa_signal_t& signal);
void copy_buffer(void* dst, hsa_agent_t dst_agent,
void* src, hsa_agent_t src_agent,
size_t size, hsa_signal_t signal);
bool FilterCpuPool(uint32_t req_type,
hsa_device_type_t dev_type,
bool fine_grained);
// @brief: Check if agent and access memory pool, if so, set
// access to the agent, if not, exit
void AcquireAccess(hsa_agent_t agent, void* ptr);
void AcquirePoolAcceses(uint32_t src_dev_idx, hsa_agent_t src_agent, void* src,
uint32_t dst_dev_idx, hsa_agent_t dst_agent, void* dst);
// Functions to find agents and memory pools and udpate
// relevant data structures used to maintain system topology
friend hsa_status_t AgentInfo(hsa_agent_t agent, void* data);
friend hsa_status_t MemPoolInfo(hsa_amd_memory_pool_t pool, void* data);
protected:
// More variables declared for testing
// vector<transaction> tran_;
// Used to help count agent_info
uint32_t agent_index_;
// List used to store agent info, indexed by agent_index_
vector<agent_info_t> agent_list_;
// Used to help count pool_info_t
uint32_t pool_index_;
// List used to store pool_info_t, indexed by pool_index_
vector<pool_info_t> pool_list_;
// List used to store agent_pool_info_t
vector<agent_pool_info_t> agent_pool_list_;
// List of agents involved in a bidrectional copy operation
// Size of the list cannot exceed the number of agents
// reported by the system
vector<uint32_t> bidir_list_;
// List of source agents in a unidrectional copy operation
// Size of the list cannot exceed the number of agents
// reported by the system
vector<uint32_t> src_list_;
// List of destination agents in a unidrectional copy operation
// Size of the list cannot exceed the number of agents
// reported by the system
vector<uint32_t> dst_list_;
// List of agents involved in read operation. Has
// two agents, the first agent hosts the memory pool
// while the second agent executes the read operation
vector<uint32_t> read_list_;
// List of agents involved in write operation. Has
// two agents, the first agent hosts the memory pool
// while the second agent executes the write operation
vector<uint32_t> write_list_;
// List of sizes to use in copy and read/write transactions
// Size is specified in terms of Megabytes
vector<uint32_t> size_list_;
// Type of service requested by user
uint32_t req_read_;
uint32_t req_write_;
uint32_t req_copy_bidir_;
uint32_t req_copy_unidir_;
uint32_t req_copy_all_bidir_;
uint32_t req_copy_all_unidir_;
// List used to store transactions per user request
vector<async_trans_t> trans_list_;
// List used to track agents involved in various transactions
uint32_t* active_agents_list_;
// Matrix used to track Access among agents
uint32_t* access_matrix_;
uint32_t* link_matrix_;
// Env key to determine if Fine-grained or
// Coarse-grained pool should be filtered out
char* skip_fine_grain_;
// Env key to determine if the run should block
// or actively wait on completion signal
char* bw_blocking_run_;
// Env key to determine if the run is a default one
char* bw_default_run_;
// Variable to store argument number
// Variable to store argument number
// Variable to store argument number
uint32_t usr_argc_;
// Pointer to store address of argument text
char** usr_argv_;
// Flag to print Cpu time
bool print_cpu_time_;
// Determines if user has requested verification
bool verify_;
// CPU agent used for verification
int32_t cpu_index_;
hsa_agent_t cpu_agent_;
// System region
hsa_amd_memory_pool_t sys_pool_;
// static const uint32_t SIZE_LIST[4];
static const uint32_t SIZE_LIST[20];
};
#endif // __ROC_BANDWIDTH_TEST_H__
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
#include <stdlib.h>
#include <assert.h>
#include <algorithm>
#include <unistd.h>
#include <cctype>
#include <sstream>
void RocmBandwidthTest::RunIOBenchmark(async_trans_t& trans) {
std::cout << "Unsupported Request - Read / Write" << std::endl;
exit(1);
}
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
#include <algorithm>
#include <sstream>
#include <unistd.h>
// Parse option value string. The string has one more decimal
// values separated by comma - "3,6,9,12,15".
static bool ParseOptionValue(char* value, vector<uint32_t>&value_list) {
// Capture the option value string
std::stringstream stream;
stream << value;
uint32_t token = 0x11231926;
do {
// Read the option value
stream >> token;
// Update output list with values
value_list.push_back(token);
// Ignore the delimiter
if((stream.eof()) ||
(stream.peek() == ',')) {
stream.ignore();
} else {
return false;
}
} while (!stream.eof());
return true;
}
void RocmBandwidthTest::ParseArguments() {
bool print_help = false;
bool copy_all_bi = false;
bool copy_all_uni = false;
bool print_topology = false;
// This will suppress prints from getopt implementation
// In case of error, it will return the character '?' as
// return value.
opterr = 0;
int opt;
bool status;
while ((opt = getopt(usr_argc_, usr_argv_, "hvctaAb:s:d:r:w:m:")) != -1) {
switch (opt) {
// Print help screen
case 'h':
print_help = true;
break;
// Print Cpu time
case 'c':
print_cpu_time_ = true;
break;
// Print system topology
case 't':
print_topology = true;
break;
// Set verification flag to true
case 'v':
verify_ = true;
break;
// Collect list of agents involved in bidirectional copy operation
case 'b':
status = ParseOptionValue(optarg, bidir_list_);
if (status) {
req_copy_bidir_ = REQ_COPY_BIDIR;
break;
}
print_help = true;
break;
// Collect list of source buffers involved in unidirectional copy operation
case 's':
status = ParseOptionValue(optarg, src_list_);
if (status) {
req_copy_unidir_ = REQ_COPY_UNIDIR;
break;
}
print_help = true;
break;
// Collect list of destination buffers involved in unidirectional copy operation
case 'd':
status = ParseOptionValue(optarg, dst_list_);
if (status) {
req_copy_unidir_ = REQ_COPY_UNIDIR;
break;
}
print_help = true;
break;
// Collect request to read a buffer
case 'r':
req_read_ = REQ_READ;
status = ParseOptionValue(optarg, read_list_);
if (status == false) {
print_help = true;
}
break;
// Collect request to write a buffer
case 'w':
req_write_ = REQ_WRITE;
status = ParseOptionValue(optarg, write_list_);
if (status == false) {
print_help = true;
}
break;
// Size of buffers to use in copy and read/write operations
case 'm':
status = ParseOptionValue(optarg, size_list_);
if (status == false) {
print_help = true;
}
break;
// Enable Unidirectional copy among all valid buffers
case 'a':
copy_all_uni = true;
req_copy_all_unidir_ = REQ_COPY_ALL_UNIDIR;
break;
// Enable Bidirectional copy among all valid buffers
case 'A':
copy_all_bi = true;
req_copy_all_bidir_ = REQ_COPY_ALL_BIDIR;
break;
// getopt implementation returns the value of the unknown
// option or an option with missing operand in the variable
// optopt
case '?':
std::cout << "Argument is illegal or needs value: " << '?' << std::endl;
if ((optopt == 'b' || optopt == 's' || optopt == 'd' || optopt == 'e')) {
std::cout << "Error: Option -b -s -d and -e require argument" << std::endl;
}
print_help = true;
break;
default:
print_help = true;
break;
}
}
// Print help screen if user option has "-h"
if (print_help) {
PrintHelpScreen();
exit(0);
}
// Initialize Roc Runtime
err_ = hsa_init();
ErrorCheck(err_);
// Discover the topology of RocR agent in system
DiscoverTopology();
// Print system topology if user option has "-t"
if (print_topology) {
PrintTopology();
PrintAccessMatrix();
PrintLinkMatrix();
exit(0);
}
// Invalidate request if user has requested full
// copying for both unidirectional and bidirectional
if ((copy_all_bi) && (copy_all_uni)) {
PrintHelpScreen();
exit(0);
}
// Initialize buffer list if full copying in unidirectional mode is enabled
if (copy_all_uni) {
uint32_t size = pool_list_.size();
for (uint32_t idx = 0; idx < size; idx++) {
src_list_.push_back(idx);
dst_list_.push_back(idx);
}
}
// Initialize buffer list if full copying in bidirectional mode is enabled
if (copy_all_bi) {
uint32_t size = pool_list_.size();
for (uint32_t idx = 0; idx < size; idx++) {
bidir_list_.push_back(idx);
}
}
// Initialize the list of buffer sizes to use in copy/read/write operations
// For All Copy operations use only one buffer size
if (size_list_.size() == 0) {
uint32_t size_len = sizeof(SIZE_LIST)/sizeof(uint32_t);
for (uint32_t idx = 0; idx < size_len; idx++) {
if ((copy_all_bi) || (copy_all_uni)) {
if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]);
}
} else {
size_list_.push_back(SIZE_LIST[idx]);
}
}
} else {
uint32_t size_len = size_list_.size();
for (uint32_t idx = 0; idx < size_len; idx++) {
size_list_[idx] = size_list_[idx] * 1024 * 1024;
}
}
std::sort(size_list_.begin(), size_list_.end());
}
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
// @Brief: Print Help Menu Screen
void RocmBandwidthTest::PrintHelpScreen() {
std::cout << std::endl;
std::cout << "Supported arguments:" << std::endl;
std::cout << std::endl;
std::cout << "\t -h Prints the help screen" << std::endl;
std::cout << "\t -c Time the operation using CPU Timers" << std::endl;
std::cout << "\t -v Verifies the output of transfer operations" << std::endl;
std::cout << "\t -t Prints system topology and allocatable memory info" << std::endl;
std::cout << "\t -m List of buffer sizes to use, specified in Megabytes" << std::endl;
std::cout << "\t -b List devices to use in bidirectional copy operations" << std::endl;
std::cout << "\t -s List of source devices to use in copy unidirectional operations" << std::endl;
std::cout << "\t -d List of destination devices to use in unidirectional copy operations" << std::endl;
std::cout << "\t -a Perform Unidirectional Copy involving all device combinations" << std::endl;
std::cout << "\t -A Perform Bidirectional Copy involving all device combinations" << std::endl;
std::cout << std::endl;
std::cout << std::endl;
}
// @brief: Print the topology of Memory Pools and Devices present in system
void RocmBandwidthTest::PrintTopology() {
uint32_t format = 10;
size_t count = agent_pool_list_.size();
std::cout << std::endl;
for (uint32_t idx = 0; idx < count; idx++) {
agent_pool_info_t node = agent_pool_list_.at(idx);
std::cout.width(format);
std::cout << "";
std::cout.width(format);
// Print device info
std::cout << "Device Index: "
<< node.agent.index_ << std::endl;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
if (HSA_DEVICE_TYPE_CPU == node.agent.device_type_)
std::cout << " Device Type: CPU" << std::endl;
else if (HSA_DEVICE_TYPE_GPU == node.agent.device_type_)
std::cout << " Device Type: GPU" << std::endl;
// Print pool info
size_t pool_count = node.pool_list.size();
for (uint32_t jdx = 0; jdx < pool_count; jdx++) {
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << " Allocatable Memory Size (KB): "
<< node.pool_list.at(jdx).allocable_size_ / 1024 << std::endl;
/*
std::cout << " is fine-grained: "
<< node.pool_list.at(jdx).is_fine_grained_ << std::endl;
*/
}
std::cout << std::endl;
}
std::cout << std::endl;
}
void RocmBandwidthTest::PrintAccessMatrix() const {
uint32_t format = 10;
std::cout.setf(ios::left);
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "Device Access";
std::cout << std::endl;
std::cout << std::endl;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "D/D";
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
std::cout.width(format);
std::cout << idx0;
}
std::cout << std::endl;
std::cout << std::endl;
for (uint32_t src_idx = 0; src_idx < agent_index_; src_idx++) {
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << src_idx;
for (uint32_t dst_idx = 0; dst_idx < agent_index_; dst_idx++) {
uint32_t path_exists = access_matrix_[(src_idx * agent_index_) + dst_idx];
std::cout.width(format);
if (path_exists == 2) {
path_exists = 1;
}
std::cout << path_exists;
}
std::cout << std::endl;
std::cout << std::endl;
}
std::cout << std::endl;
}
void RocmBandwidthTest::PrintLinkMatrix() const {
// uint32_t format = 12;
uint32_t format = 10;
std::cout.setf(ios::left);
// std::cout << std::endl;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "Device Numa Distance";
std::cout << std::endl;
std::cout << std::endl;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "D/D";
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
std::cout.width(format);
std::cout << idx0;
}
std::cout << std::endl;
std::cout << std::endl;
for (uint32_t src_idx = 0; src_idx < agent_index_; src_idx++) {
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << src_idx;
for (uint32_t dst_idx = 0; dst_idx < agent_index_; dst_idx++) {
uint32_t link_weight = link_matrix_[(src_idx * agent_index_) + dst_idx];
std::cout.width(format);
std::cout << link_weight;
}
std::cout << std::endl;
std::cout << std::endl;
}
std::cout << std::endl;
}
// @brief: Print info on Devices in system
void RocmBandwidthTest::PrintAgentsList() {
size_t count = agent_pool_list_.size();
for (uint32_t idx = 0; idx < count; idx++) {
std::cout << std::endl;
agent_pool_info_t node = agent_pool_list_.at(idx);
std::cout << "Device Index: "
<< node.agent.index_ << std::endl;
if (HSA_DEVICE_TYPE_CPU == node.agent.device_type_)
std::cout << " Device Type: CPU" << std::endl;
else if (HSA_DEVICE_TYPE_GPU == node.agent.device_type_)
std::cout << " Device Type: GPU" << std::endl;
}
std::cout << std::endl;
}
// @brief: Print info on memory pools in system
void RocmBandwidthTest::PrintPoolsList() {
size_t pool_count = pool_list_.size();
for (uint32_t jdx = 0; jdx < pool_count; jdx++) {
std::cout << std::endl;
std::cout << "Memory Pool Idx: "
<< pool_list_.at(jdx).index_ << std::endl;
std::cout << " max allocable size in KB: "
<< pool_list_.at(jdx).allocable_size_ / 1024 << std::endl;
std::cout << " segment id: "
<< pool_list_.at(jdx).segment_ << std::endl;
std::cout << " is kernarg: "
<< pool_list_.at(jdx).is_kernarg_ << std::endl;
std::cout << " is fine-grained: "
<< pool_list_.at(jdx).is_fine_grained_ << std::endl;
std::cout << " accessible to owner: "
<< pool_list_.at(jdx).owner_access_ << std::endl;
std::cout << " accessible to all by default: "
<< pool_list_.at(jdx).access_to_all_ << std::endl;
}
std::cout << std::endl;
}
// @brief: Print the list of transactions that will be executed
void RocmBandwidthTest::PrintTransList() {
size_t count = trans_list_.size();
for (uint32_t idx = 0; idx < count; idx++) {
async_trans_t trans = trans_list_.at(idx);
std::cout << std::endl;
std::cout << " Transaction Id: " << idx << std::endl;
std::cout << " Transaction Type: " << trans.req_type_ << std::endl;
if ((trans.req_type_ == REQ_READ) || (trans.req_type_ == REQ_WRITE)) {
std::cout << "Rocm Kernel used by Transaction: " << trans.kernel.code_ << std::endl;
std::cout << "Rocm Buffer index Used by Kernel: " << trans.kernel.pool_idx_ << std::endl;
std::cout << " Rocm Device used for Execution: " << trans.kernel.agent_idx_ << std::endl;
}
if ((trans.req_type_ == REQ_COPY_BIDIR) || (trans.req_type_ == REQ_COPY_UNIDIR)) {
std::cout << " Src Buffer used in Copy: " << trans.copy.src_idx_ << std::endl;
std::cout << " Dst Buffer used in Copy: " << trans.copy.dst_idx_ << std::endl;
}
if ((trans.req_type_ == REQ_COPY_ALL_BIDIR) || (trans.req_type_ == REQ_COPY_ALL_UNIDIR)) {
std::cout << " Src Memory Pool used in Copy: " << trans.copy.src_idx_ << std::endl;
std::cout << " Dst Memory Pool used in Copy: " << trans.copy.dst_idx_ << std::endl;
}
}
std::cout << std::endl;
}
// @brief: Prints error message when a request to copy between
// source buffer and destination buffer is not possible
void RocmBandwidthTest::PrintCopyAccessError(uint32_t src_idx, uint32_t dst_idx) {
// Retrieve Roc runtime handles for Src memory pool and devices
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;
// Retrieve Roc runtime handles for Dst memory pool and devices
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;
std::cout << std::endl;
std::cout << "Index of Src Memory: " << src_idx << std::endl;
std::cout << "Index of Dst Memory: " << dst_idx << std::endl;
std::cout << "Index of Src Device: " << src_dev_idx << std::endl;
std::cout << "Index of Dst Device: " << dst_dev_idx << std::endl;
std::cout << "Device Type of Src Device: " << src_dev_type << std::endl;
std::cout << "Device Type of Dst Device: " << dst_dev_type << std::endl;
std::cout << "Rocm Device hosting Src Memory cannot ACCESS Dst Memory" << std::endl;
std::cout << std::endl;
}
// @brief: Prints error message when a request to read / write from
// a buffer of a device is not possible
void RocmBandwidthTest::PrintIOAccessError(uint32_t exec_idx, uint32_t pool_idx) {
// Retrieve device type of executing device
hsa_device_type_t exec_dev_type = agent_list_[exec_idx].device_type_;
// Retrieve device type of memory pool's device
uint32_t pool_dev_idx = pool_list_[pool_idx].agent_index_;
hsa_device_type_t pool_dev_type = agent_list_[pool_dev_idx].device_type_;
std::cout << std::endl;
std::cout << "Index of Executing Device: " << exec_idx << std::endl;
std::cout << "Device Type of Executing Device: " << exec_dev_type << std::endl;
std::cout << "Index of Buffer: " << pool_idx << std::endl;
std::cout << "Index of Buffer's Device: " << pool_dev_idx << std::endl;
std::cout << "Device Type Hosting Buffer: " << pool_dev_type << std::endl;
std::cout << "Rocm Device executing Read / Write request cannot ACCESS Buffer" << std::endl;
std::cout << std::endl;
}
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
#include <iomanip>
#include <sstream>
#include <algorithm>
static void printRecord(uint32_t size, double avg_time,
double bandwidth, double min_time,
double peak_bandwidth) {
std::stringstream size_str;
if (size < 1024 * 1024) {
size_str << size / 1024 << " KB";
} else {
size_str << size / (1024 * 1024) << " MB";
}
uint32_t format = 15;
std::cout.precision(6);
std::cout << std::fixed;
std::cout.width(format);
std::cout << size_str.str();
std::cout.width(format);
std::cout << (avg_time * 1e6);
std::cout.width(format);
std::cout << bandwidth;
std::cout.width(format);
std::cout << (min_time * 1e6);
std::cout.width(format);
std::cout << peak_bandwidth;
std::cout << std::endl;
}
static void printCopyBanner(uint32_t src_pool_id, uint32_t src_agent_type,
uint32_t dst_pool_id, uint32_t dst_agent_type) {
std::stringstream src_type;
std::stringstream dst_type;
(src_agent_type == 0) ? src_type << "Cpu" : src_type << "Gpu";
(dst_agent_type == 0) ? dst_type << "Cpu" : dst_type << "Gpu";
std::cout << std::endl;
std::cout << "================";
std::cout << " Benchmark Result";
std::cout << " ================";
std::cout << std::endl;
std::cout << "================";
std::cout << " Src Device Id: " << src_pool_id;
std::cout << " Src Device Type: " << src_type.str();
std::cout << " ================";
std::cout << std::endl;
std::cout << "================";
std::cout << " Dst Device Id: " << dst_pool_id;
std::cout << " Dst Device Type: " << dst_type.str();
std::cout << " ================";
std::cout << std::endl;
std::cout << std::endl;
uint32_t format = 15;
std::cout.setf(ios::left);
std::cout.width(format);
std::cout << "Data Size";
std::cout.width(format);
std::cout << "Avg Time(us)";
std::cout.width(format);
std::cout << "Avg BW(GB/s)";
std::cout.width(format);
std::cout << "Min Time(us)";
std::cout.width(format);
std::cout << "Peak BW(GB/s)";
std::cout << std::endl;
}
double RocmBandwidthTest::GetMinTime(std::vector<double>& vec) {
std::sort(vec.begin(), vec.end());
return vec.at(0);
}
double RocmBandwidthTest::GetMeanTime(std::vector<double>& vec) {
std::sort(vec.begin(), vec.end());
vec.erase(vec.begin());
vec.erase(vec.begin(), vec.begin() + num_iteration_ * 0.1);
vec.erase(vec.begin() + num_iteration_, vec.end());
double mean = 0.0;
int num = vec.size();
for (int it = 0; it < num; it++) {
mean += vec[it];
}
mean /= num;
return mean;
}
void RocmBandwidthTest::Display() const {
// Iterate through list of transactions and display its timing data
uint32_t trans_size = trans_list_.size();
if (trans_size == 0) {
std::cout << std::endl;
std::cout << " Invalid Request" << std::endl;
std::cout << std::endl;
return;
}
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
if (bw_default_run_ == NULL) {
DisplayDevInfo();
PrintAccessMatrix();
}
DisplayCopyTimeMatrix(true);
return;
}
if (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) {
DisplayDevInfo();
PrintAccessMatrix();
PrintLinkMatrix();
DisplayCopyTimeMatrix(true);
return;
}
for (uint32_t idx = 0; idx < trans_size; idx++) {
async_trans_t trans = trans_list_[idx];
if ((trans.req_type_ == REQ_COPY_BIDIR) ||
(trans.req_type_ == REQ_COPY_UNIDIR)) {
DisplayCopyTime(trans);
}
if ((trans.req_type_ == REQ_READ) ||
(trans.req_type_ == REQ_WRITE)) {
DisplayIOTime(trans);
}
}
std::cout << std::endl;
}
void RocmBandwidthTest::DisplayIOTime(async_trans_t& trans) const {
}
void RocmBandwidthTest::DisplayCopyTime(async_trans_t& trans) const {
// Print Benchmark Header
uint32_t src_idx = trans.copy.src_idx_;
uint32_t dst_idx = trans.copy.dst_idx_;
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;
printCopyBanner(src_idx, src_dev_type, dst_idx, dst_dev_type);
uint32_t size_len = size_list_.size();
for (uint32_t idx = 0; idx < size_len; idx++) {
printRecord(size_list_[idx], trans.avg_time_[idx],
trans.avg_bandwidth_[idx], trans.min_time_[idx],
trans.peak_bandwidth_[idx]);
}
}
void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
double* perf_matrix = new double[agent_index_ * agent_index_]();
uint32_t trans_size = trans_list_.size();
for (uint32_t idx = 0; idx < trans_size; idx++) {
async_trans_t trans = trans_list_[idx];
uint32_t src_idx = trans.copy.src_idx_;
uint32_t dst_idx = trans.copy.dst_idx_;
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
if (peak) {
perf_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.peak_bandwidth_[0];
} else {
perf_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.avg_bandwidth_[0];
}
}
uint32_t format = 10;
std::cout.setf(ios::left);
std::cout.width(format);
std::cout << "";
std::cout.width(format);
if ((peak) && (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) {
std::cout << "Unidirectional peak bandwidth GB/s";
}
if ((peak == false) && (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) {
std::cout << "Unidirectional average bandwidth GB/s";
}
if ((peak) && (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR)) {
std::cout << "Bdirectional peak bandwidth GB/s";
}
if ((peak == false) && (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR)) {
std::cout << "Bidirectional average bandwidth GB/s";
}
std::cout << std::endl;
std::cout << std::endl;
std::cout.precision(6);
std::cout << std::fixed;
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "D/D";
format = 12;
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
std::cout.width(format);
std::stringstream agent_id;
agent_id << idx0;
std::cout << agent_id.str();
}
std::cout << std::endl;
std::cout << std::endl;
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
format = 10;
std::cout.width(format);
std::cout << "";
std::stringstream agent_id;
agent_id << idx0;
std::cout.width(format);
std::cout << agent_id.str();
for (uint32_t idx1 = 0; idx1 < agent_index_; idx1++) {
format = 12;
std::cout.width(format);
double value = perf_matrix[(idx0 * agent_index_) + idx1];
if (value == 0) {
std::cout << "N/A";
} else {
std::cout << perf_matrix[(idx0 * agent_index_) + idx1];
}
}
std::cout << std::endl;
std::cout << std::endl;
}
std::cout << std::endl;
}
void RocmBandwidthTest::DisplayDevInfo() const {
uint32_t format = 10;
std::cout.setf(ios::left);
std::cout << std::endl;
for (uint32_t idx = 0; idx < agent_index_; idx++) {
uint32_t active = active_agents_list_[idx];
if (active == 1) {
std::cout.width(format);
std::cout << "";
std::cout << "Device: " << idx;
std::cout << ", " << agent_list_[idx].name_ << std::endl;
}
}
std::cout << std::endl;
}
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
// @brief: Helper method to iterate throught the memory pools of
// an agent and discover its properties
hsa_status_t MemPoolInfo(hsa_amd_memory_pool_t pool, void* data) {
hsa_status_t status;
RocmBandwidthTest* asyncDrvr = reinterpret_cast<RocmBandwidthTest*>(data);
// Query pools' segment, report only pools from global segment
hsa_amd_segment_t segment;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_SEGMENT, &segment);
ErrorCheck(status);
if (HSA_AMD_SEGMENT_GLOBAL != segment) {
return HSA_STATUS_SUCCESS;
}
// Determine if allocation is allowed in this pool
// Report only pools that allow an alloction by user
bool alloc = false;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED, &alloc);
ErrorCheck(status);
if (alloc != true) {
return HSA_STATUS_SUCCESS;
}
// Query the max allocatable size
size_t max_size = 0;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_SIZE, &max_size);
ErrorCheck(status);
// Determine if the pools is accessible to all agents
bool access_to_all = false;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_ACCESSIBLE_BY_ALL, &access_to_all);
ErrorCheck(status);
// Determine type of access to owner agent
hsa_amd_memory_pool_access_t owner_access;
hsa_agent_t agent = asyncDrvr->agent_list_.back().agent_;
status = hsa_amd_agent_memory_pool_get_info(agent, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &owner_access);
ErrorCheck(status);
// Determine if the pool is fine-grained or coarse-grained
uint32_t flag = 0;
status = hsa_amd_memory_pool_get_info(pool,
HSA_AMD_MEMORY_POOL_INFO_GLOBAL_FLAGS, &flag);
ErrorCheck(status);
bool is_kernarg = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_KERNARG_INIT & flag);
bool is_fine_grained = (HSA_AMD_MEMORY_POOL_GLOBAL_FLAG_FINE_GRAINED & flag);
// Update the pool handle for system memory if kernarg is true
if (is_kernarg) {
asyncDrvr->sys_pool_ = pool;
}
// Consult user request and add either fine-grained or
// coarse-grained memory pools if agent is CPU
agent_info_t& agent_info = asyncDrvr->agent_list_.back();
if (agent_info.device_type_ == HSA_DEVICE_TYPE_CPU) {
if (asyncDrvr->skip_fine_grain_ != NULL) {
if (is_fine_grained == true) {
return HSA_STATUS_SUCCESS;
}
} else {
if (is_fine_grained == false) {
return HSA_STATUS_SUCCESS;
}
}
}
// hsa_device_type_t device_type;
// status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
// ErrorCheck(status);
// Create an instance of agent_pool_info and add it to the list
pool_info_t pool_info(agent, asyncDrvr->agent_index_, pool,
segment, max_size, asyncDrvr->pool_index_,
is_fine_grained, is_kernarg,
access_to_all, owner_access);
asyncDrvr->pool_list_.push_back(pool_info);
// Create an agent_pool_infot and add it to its list
asyncDrvr->agent_pool_list_[asyncDrvr->agent_index_].pool_list.push_back(pool_info);
asyncDrvr->pool_index_++;
return HSA_STATUS_SUCCESS;
}
// @brief: Helper method to iterate throught the agents of
// a system and discover its properties
hsa_status_t AgentInfo(hsa_agent_t agent, void* data) {
RocmBandwidthTest* asyncDrvr = reinterpret_cast<RocmBandwidthTest*>(data);
// Get the name of the agent
char agent_name[64];
hsa_status_t status;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_NAME, agent_name);
ErrorCheck(status);
// Get device type
hsa_device_type_t device_type;
status = hsa_agent_get_info(agent, HSA_AGENT_INFO_DEVICE, &device_type);
ErrorCheck(status);
// Capture the handle of Cpu agent
if (device_type == HSA_DEVICE_TYPE_CPU) {
asyncDrvr->cpu_agent_ = agent;
asyncDrvr->cpu_index_ = asyncDrvr->agent_index_;
}
// Instantiate an instance of agent_info_t and populate its name
// field before adding it to the list of agent_info_t objects
agent_info_t agent_info(agent, asyncDrvr->agent_index_, device_type);
status = hsa_agent_get_info(agent,
(hsa_agent_info_t)HSA_AMD_AGENT_INFO_PRODUCT_NAME,
(void *)&agent_info.name_[0]);
asyncDrvr->agent_list_.push_back(agent_info);
// Contruct an new agent_pool_info structure and add it to the list
agent_pool_info node;
node.agent = asyncDrvr->agent_list_.back();
asyncDrvr->agent_pool_list_.push_back(node);
status = hsa_amd_agent_iterate_memory_pools(agent, MemPoolInfo, asyncDrvr);
asyncDrvr->agent_index_++;
return HSA_STATUS_SUCCESS;
}
void RocmBandwidthTest::PopulateAccessMatrix() {
// Allocate memory to hold access lists
access_matrix_ = new uint32_t[agent_index_ * agent_index_]();
hsa_status_t status;
uint32_t size = pool_list_.size();
for (uint32_t src_idx = 0; src_idx < size; src_idx++) {
// Determine if the pool belongs to Cpu and is coarse-grained
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
if (src_dev_type == HSA_DEVICE_TYPE_CPU) {
bool src_fine_grained = pool_list_[src_idx].is_fine_grained_;
if (src_fine_grained == false) {
continue;
}
}
*/
hsa_agent_t src_agent = pool_list_[src_idx].owner_agent_;
hsa_amd_memory_pool_t src_pool = pool_list_[src_idx].pool_;
for (uint32_t dst_idx = 0; dst_idx < size; dst_idx++) {
// Determine if the pool belongs to Cpu and is coarse-grained
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
if (dst_dev_type == HSA_DEVICE_TYPE_CPU) {
bool dst_fine_grained = pool_list_[dst_idx].is_fine_grained_;
if (dst_fine_grained == false) {
continue;
}
}
*/
hsa_agent_t dst_agent = pool_list_[dst_idx].owner_agent_;
hsa_amd_memory_pool_t dst_pool = pool_list_[dst_idx].pool_;
// Determine if accessibility to dst pool for src agent is not denied
hsa_amd_memory_pool_access_t access1;
status = hsa_amd_agent_memory_pool_get_info(src_agent, dst_pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access1);
ErrorCheck(status);
// Determine if accessibility to src pool for dst agent is not denied
hsa_amd_memory_pool_access_t access2;
status = hsa_amd_agent_memory_pool_get_info(dst_agent, src_pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access2);
// Access between the two agents is Non-Existent
if ((access1 == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) &&
(access2 == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)) {
access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = 0;
}
// Access between the two agents is Unidirectional
if ((access1 == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) ||
(access2 == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)) {
if ((src_dev_type == HSA_DEVICE_TYPE_GPU) &&
(dst_dev_type == HSA_DEVICE_TYPE_GPU)) {
access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = 0;
} else {
access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = 1;
}
}
// Access between the two agents is Bidirectional
if ((access1 != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) &&
(access2 != HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED)) {
access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx] = 2;
}
}
}
}
void RocmBandwidthTest::DiscoverTopology() {
// Populate the lists of agents and pools
err_ = hsa_iterate_agents(AgentInfo, this);
// Populate the access matrix
PopulateAccessMatrix();
DiscoverLinkWeight();
}
void RocmBandwidthTest::DiscoverLinkWeight() {
// Allocate space if it is first time
if (link_matrix_ == NULL) {
link_matrix_ = new uint32_t[agent_index_ * agent_index_]();
}
agent_info_t agent_info;
hsa_agent_t agent1;
hsa_agent_t agent2;
hsa_amd_memory_pool_link_info_t link_info = {0};
for (uint32_t idx1 = 0; idx1 < agent_index_; idx1++) {
agent1 = agent_list_[idx1].agent_;
for (uint32_t idx2 = 0; idx2 < agent_index_; idx2++) {
if (idx1 == idx2) {
link_matrix_[(idx1 *agent_index_) + idx2] = 0;
continue;
}
if (agent_pool_list_[idx2].pool_list.size() != 0) {
hsa_amd_memory_pool_t& pool = agent_pool_list_[idx2].pool_list[0].pool_;
agent2 = agent_pool_list_[idx2].agent.agent_;
err_ = hsa_amd_agent_memory_pool_get_info(agent1, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, &link_info);
link_matrix_[(idx1 *agent_index_) + idx2] = link_info.numa_distance;
}
}
}
}
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
bool RocmBandwidthTest::BuildReadOrWriteTrans(uint32_t req_type,
vector<uint32_t>& in_list) {
// Validate the list of pool-agent tuples
hsa_status_t status;
hsa_amd_memory_pool_access_t access;
uint32_t list_size = in_list.size();
for (uint32_t idx = 0; idx < list_size; idx+=2) {
uint32_t pool_idx = in_list[idx];
uint32_t exec_idx = in_list[idx + 1];
// Retrieve Roc runtime handles for memory pool and agent
hsa_agent_t exec_agent = agent_list_[exec_idx].agent_;
hsa_amd_memory_pool_t pool = pool_list_[pool_idx].pool_;
// Determine agent can access the memory pool
status = hsa_amd_agent_memory_pool_get_info(exec_agent, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_ACCESS, &access);
ErrorCheck(status);
// Determine if accessibility to agent is not denied
if (access == HSA_AMD_MEMORY_POOL_ACCESS_NEVER_ALLOWED) {
PrintIOAccessError(exec_idx, pool_idx);
return false;
}
// Agent has access, build an instance of transaction
// and add it to the list of transactions
async_trans_t trans(req_type);
trans.kernel.code_ = nullptr;
trans.kernel.pool_ = pool;
trans.kernel.pool_idx_ = pool_idx;
trans.kernel.agent_ = exec_agent;
trans.kernel.agent_idx_ = exec_idx;
trans_list_.push_back(trans);
}
return true;
}
bool RocmBandwidthTest::BuildReadTrans() {
return BuildReadOrWriteTrans(REQ_READ, read_list_);
}
bool RocmBandwidthTest::BuildWriteTrans() {
return BuildReadOrWriteTrans(REQ_WRITE, write_list_);
}
bool RocmBandwidthTest::FilterCpuPool(uint32_t req_type,
hsa_device_type_t dev_type,
bool fine_grained) {
if ((req_type != REQ_COPY_ALL_BIDIR) &&
(req_type != REQ_COPY_ALL_UNIDIR)) {
return false;
}
// Determine if device is a Cpu - filter out only if
// it is a Cpu device
if (dev_type != HSA_DEVICE_TYPE_CPU) {
return false;
}
// If env to skip fine grain is NULL it means
// we should filter out coarse-grain pools
if (skip_fine_grain_ == NULL) {
return (fine_grained == false);
}
// If env to skip fine grain is NON-NULL it means
// we should filter out fine-grain pools
return (fine_grained == true);
}
bool RocmBandwidthTest::BuildCopyTrans(uint32_t req_type,
vector<uint32_t>& src_list,
vector<uint32_t>& dst_list) {
// bool filter_out;
uint32_t src_size = src_list.size();
uint32_t dst_size = dst_list.size();
// hsa_status_t status;
// hsa_amd_memory_pool_access_t access;
for (uint32_t idx = 0; idx < src_size; idx++) {
// Retrieve Roc runtime handles for Src memory pool and agents
uint32_t src_idx = src_list[idx];
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
// hsa_agent_t src_agent = pool_list_[src_idx].owner_agent_;
hsa_amd_memory_pool_t src_pool = pool_list_[src_idx].pool_;
// bool src_fine_grained = pool_list_[src_idx].is_fine_grained_;
hsa_device_type_t src_dev_type = agent_list_[src_dev_idx].device_type_;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
filter_out = FilterCpuPool(req_type, src_dev_type, src_fine_grained);
if (filter_out) {
continue;
}
*/
for (uint32_t jdx = 0; jdx < dst_size; jdx++) {
// Retrieve Roc runtime handles for Dst memory pool and agents
uint32_t dst_idx = dst_list[jdx];
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
// hsa_agent_t dst_agent = pool_list_[dst_idx].owner_agent_;
hsa_amd_memory_pool_t dst_pool = pool_list_[dst_idx].pool_;
// bool dst_fine_grained = pool_list_[dst_idx].is_fine_grained_;
hsa_device_type_t dst_dev_type = agent_list_[dst_dev_idx].device_type_;
/*
* This block of code makes sense only if both Fine and Coarse
* grained memory pools are captured. This does not make sense
* if only of them is captured
filter_out = FilterCpuPool(req_type, dst_dev_type, dst_fine_grained);
if (filter_out) {
continue;
}
*/
// Filter out transactions that involve only Cpu agents/devices
// without regard to type of request, default run, partial or full
// unidirectional or bidirectional copies
if ((src_dev_type == HSA_DEVICE_TYPE_CPU) &&
(dst_dev_type == HSA_DEVICE_TYPE_CPU)) {
continue;
}
// Filter out transactions that involve only same GPU as both
// Src and Dst device if the request is bidirectional copy that
// is either partial or full
if ((req_type == REQ_COPY_BIDIR) ||
(req_type == REQ_COPY_ALL_BIDIR)) {
if (src_dev_idx == dst_dev_idx) {
continue;
}
}
// Determine if accessibility to src pool for dst agent is not denied
uint32_t path_exists = access_matrix_[(src_dev_idx * agent_index_) + dst_dev_idx];
if (path_exists == 0) {
if ((req_type == REQ_COPY_ALL_BIDIR) ||
(req_type == REQ_COPY_ALL_UNIDIR)) {
continue;
} else {
PrintCopyAccessError(src_idx, dst_idx);
return false;
}
}
// Update the list of agents active in any copy operation
if (active_agents_list_ == NULL) {
active_agents_list_ = new uint32_t[agent_index_]();
}
active_agents_list_[src_dev_idx] = 1;
active_agents_list_[dst_dev_idx] = 1;
// Agents have access, build an instance of transaction
// and add it to the list of transactions
async_trans_t trans(req_type);
trans.copy.src_idx_ = src_idx;
trans.copy.dst_idx_ = dst_idx;
trans.copy.src_pool_ = src_pool;
trans.copy.dst_pool_ = dst_pool;
trans.copy.bidir_ = ((req_type == REQ_COPY_BIDIR) ||
(req_type == REQ_COPY_ALL_BIDIR));
trans.copy.uses_gpu_ = ((src_dev_type == HSA_DEVICE_TYPE_GPU) ||
(dst_dev_type == HSA_DEVICE_TYPE_GPU));
trans_list_.push_back(trans);
}
}
return true;
}
bool RocmBandwidthTest::BuildBidirCopyTrans() {
return BuildCopyTrans(REQ_COPY_BIDIR, bidir_list_, bidir_list_);
}
bool RocmBandwidthTest::BuildUnidirCopyTrans() {
return BuildCopyTrans(REQ_COPY_UNIDIR, src_list_, dst_list_);
}
bool RocmBandwidthTest::BuildAllPoolsBidirCopyTrans() {
return BuildCopyTrans(REQ_COPY_ALL_BIDIR, bidir_list_, bidir_list_);
}
bool RocmBandwidthTest::BuildAllPoolsUnidirCopyTrans() {
return BuildCopyTrans(REQ_COPY_ALL_UNIDIR, src_list_, dst_list_);
}
// @brief: Builds a list of transaction per user request
bool RocmBandwidthTest::BuildTransList() {
// Build list of Read transactions per user request
bool status = false;
if (req_read_ == REQ_READ) {
status = BuildReadTrans();
if (status == false) {
return status;
}
}
// Build list of Write transactions per user request
status = false;
if (req_write_ == REQ_WRITE) {
status = BuildWriteTrans();
if (status == false) {
return status;
}
}
// Build list of Bidirectional Copy transactions per user request
status = false;
if (req_copy_bidir_ == REQ_COPY_BIDIR) {
status = BuildBidirCopyTrans();
if (status == false) {
return status;
}
}
// Build list of Unidirectional Copy transactions per user request
status = false;
if (req_copy_unidir_ == REQ_COPY_UNIDIR) {
status = BuildUnidirCopyTrans();
if (status == false) {
return status;
}
}
// Build list of All Bidir Copy transactions per user request
status = false;
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
status = BuildAllPoolsBidirCopyTrans();
if (status == false) {
return status;
}
}
// Build list of All Unidir Copy transactions per user request
status = false;
if (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) {
status = BuildAllPoolsUnidirCopyTrans();
if (status == false) {
return status;
}
}
// All of the transaction are built up
return true;
}
void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
// Get the frequency of Gpu Timestamping
uint64_t sys_freq = 0;
hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &sys_freq);
double avg_time = 0;
double min_time = 0;
uint32_t data_size = 0;
double avg_bandwidth = 0;
double peak_bandwidth = 0;
uint32_t size_len = size_list_.size();
for (uint32_t idx = 0; idx < size_len; idx++) {
// Adjust size of data involved in copy
data_size = size_list_[idx];
if (trans.copy.bidir_ == true) {
data_size += size_list_[idx];
}
// Double data size if copying the same device
if (trans.copy.src_idx_ == trans.copy.dst_idx_) {
data_size += data_size;
}
// Copy operation does not involve a Gpu device
if (trans.copy.uses_gpu_ != true) {
avg_time = trans.cpu_avg_time_[idx];
min_time = trans.cpu_min_time_[idx];
avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
} else {
if (print_cpu_time_ == false) {
avg_time = trans.gpu_avg_time_[idx] / sys_freq;
min_time = trans.gpu_min_time_[idx] / sys_freq;
} else {
avg_time = trans.cpu_avg_time_[idx];
min_time = trans.cpu_min_time_[idx];
}
avg_bandwidth = (double)data_size / avg_time / 1000 / 1000 / 1000;
peak_bandwidth = (double)data_size / min_time / 1000 / 1000 / 1000;
}
trans.min_time_.push_back(min_time);
trans.avg_time_.push_back(avg_time);
trans.avg_bandwidth_.push_back(avg_bandwidth);
trans.peak_bandwidth_.push_back(peak_bandwidth);
}
}
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
#include <assert.h>
#include <algorithm>
#include <unistd.h>
#include <cctype>
#include <sstream>
bool RocmBandwidthTest::PoolIsPresent(vector<uint32_t>& in_list) {
bool is_present;
uint32_t idx1 = 0;
uint32_t idx2 = 0;
uint32_t count = in_list.size();
uint32_t pool_count = pool_list_.size();
for (idx1 = 0; idx1 < count; idx1++) {
is_present = false;
for (idx2 = 0; idx2 < pool_count; idx2++) {
if (in_list[idx1] == pool_list_[idx2].index_) {
is_present = true;
break;
}
}
if (is_present == false) {
return false;
}
}
return true;
}
bool RocmBandwidthTest::PoolIsDuplicated(vector<uint32_t>& in_list) {
uint32_t idx1 = 0;
uint32_t idx2 = 0;
uint32_t count = in_list.size();
for (idx1 = 0; idx1 < count; idx1++) {
for (idx2 = 0; idx2 < count; idx2++) {
if ((in_list[idx1] == in_list[idx2]) && (idx1 != idx2)){
return false;
}
}
}
return true;
}
bool RocmBandwidthTest::ValidateReadOrWriteReq(vector<uint32_t>& in_list) {
// Determine read / write request is even
// Request is specified as a list of memory
// pool, agent tuples - first element identifies
// memory pool while the second element denotes
// an agent
uint32_t list_size = in_list.size();
if ((list_size % 2) != 0) {
return false;
}
// Validate the list of pool-agent tuples
for (uint32_t idx = 0; idx < list_size; idx+=2) {
uint32_t pool_idx = in_list[idx];
uint32_t exec_idx = in_list[idx + 1];
// Determine the pool and agent exist in system
if ((pool_idx >= pool_index_) ||
(exec_idx >= agent_index_)) {
return false;
}
}
return true;
}
bool RocmBandwidthTest::ValidateReadReq() {
return ValidateReadOrWriteReq(read_list_);
}
bool RocmBandwidthTest::ValidateWriteReq() {
return ValidateReadOrWriteReq(write_list_);
}
bool RocmBandwidthTest::ValidateCopyReq(vector<uint32_t>& in_list) {
// Determine pool list length is valid
uint32_t count = in_list.size();
uint32_t pool_count = pool_list_.size();
if (count > pool_count) {
return false;
}
// Determine no pool is duplicated
bool status = PoolIsDuplicated(in_list);
if (status == false) {
return false;
}
// Determine every pool is present in system
return PoolIsPresent(in_list);
}
bool RocmBandwidthTest::ValidateBidirCopyReq() {
return ValidateCopyReq(bidir_list_);
}
bool RocmBandwidthTest::ValidateUnidirCopyReq() {
return ((ValidateCopyReq(src_list_)) && (ValidateCopyReq(dst_list_)));
}
bool RocmBandwidthTest::ValidateArguments() {
// Determine if user has requested a READ
// operation and gave valid inputs
bool status = false;
if (req_read_ == REQ_READ) {
status = ValidateReadReq();
if (status == false) {
return status;
}
}
// Determine if user has requested a WRITE
// operation and gave valid inputs
status = false;
if (req_write_ == REQ_WRITE) {
status = ValidateWriteReq();
if (status == false) {
return status;
}
}
// Determine if user has requested a Copy
// operation that is bidirectional and gave
// valid inputs. Same validation is applied
// for all-to-all unidirectional copy operation
status = false;
if ((req_copy_bidir_ == REQ_COPY_BIDIR) ||
(req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR)) {
status = ValidateBidirCopyReq();
if (status == false) {
return status;
}
}
// Determine if user has requested a Copy
// operation that is unidirectional and gave
// valid inputs. Same validation is applied
// for all-to-all bidirectional copy operation
status = false;
if ((req_copy_unidir_ == REQ_COPY_UNIDIR) ||
(req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) {
status = ValidateUnidirCopyReq();
if (status == false) {
return status;
}
}
// All of the request are well formed
return true;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment