"src/vscode:/vscode.git/clone" did not exist on "3ad49eeeddc5b3a82540bd37ac133650d02ad93d"
Commit 1e5e2e06 authored by Ramesh Errabolu's avatar Ramesh Errabolu
Browse files

Initialize buffers used to copy

parent fca6eaa8
......@@ -97,87 +97,93 @@ void RocmBandwidthTest::AcquirePoolAcceses(uint32_t src_dev_idx,
return;
}
void RocmBandwidthTest::AllocateHostBuffers(size_t size,
uint32_t src_dev_idx,
uint32_t dst_dev_idx,
void*& src, void*& dst,
void* buf_src, void* buf_dst,
hsa_agent_t src_agent, hsa_agent_t dst_agent,
hsa_signal_t& signal) {
void RocmBandwidthTest::InitializeSrcBuffer(size_t size, void* buf_cpy,
uint32_t cpy_dev_idx, hsa_agent_t cpy_agent) {
// Allocate host buffers and setup accessibility for copy operation
err_ = hsa_amd_memory_pool_allocate(sys_pool_, size, 0, (void**)&src);
ErrorCheck(err_);
if (init_src_ == NULL) {
err_ = hsa_amd_memory_pool_allocate(sys_pool_, size, 0, (void**)&init_src_);
ErrorCheck(err_);
memset(init_src_, init_val_, size);
err_ = hsa_signal_create(0, 0, NULL, &init_signal_);
ErrorCheck(err_);
}
// Gain access to the pools
AcquirePoolAcceses(cpu_index_, cpu_agent_, src,
src_dev_idx, src_agent, buf_src);
// If Copy device is a Gpu setup buffer access
hsa_device_type_t cpy_dev_type = agent_list_[cpy_dev_idx].device_type_;
if (cpy_dev_type == HSA_DEVICE_TYPE_GPU) {
AcquireAccess(cpy_agent, init_src_);
hsa_signal_store_relaxed(init_signal_, 1);
copy_buffer(buf_cpy, cpy_agent,
init_src_, cpu_agent_,
size, init_signal_);
return;
}
err_ = hsa_amd_memory_pool_allocate(sys_pool_, size, 0, (void**)&dst);
ErrorCheck(err_);
// Copy initialization buffer into copy buffer
memcpy(buf_cpy, init_src_, size);
return;
}
bool RocmBandwidthTest::ValidateDstBuffer(size_t max_size, size_t curr_size, void* buf_cpy,
uint32_t cpy_dev_idx, hsa_agent_t cpy_agent) {
// Gain access to the pools
AcquirePoolAcceses(dst_dev_idx, dst_agent, buf_dst,
cpu_index_, cpu_agent_, dst);
// Allocate host buffers and setup accessibility for copy operation
if (validate_dst_ == NULL) {
err_ = hsa_amd_memory_pool_allocate(sys_pool_, max_size, 0, (void**)&validate_dst_);
ErrorCheck(err_);
}
// Initialize host buffers to a determinate value
memset(src, 0x23, size);
memset(dst, 0x00, size);
// Create a signal to wait on copy operation
// @TODO: replace it with a signal pool call
err_ = hsa_signal_create(1, 0, NULL, &signal);
ErrorCheck(err_);
// If Copy device is a Gpu setup buffer access
memset(validate_dst_, ~init_val_, curr_size);
hsa_device_type_t cpy_dev_type = agent_list_[cpy_dev_idx].device_type_;
if (cpy_dev_type == HSA_DEVICE_TYPE_GPU) {
AcquireAccess(cpy_agent, validate_dst_);
hsa_signal_store_relaxed(init_signal_, 1);
copy_buffer(validate_dst_, cpu_agent_,
buf_cpy, cpy_agent,
curr_size, init_signal_);
} else {
// Copying device is a CPU, copy dst buffer
// into validation buffer
memcpy(validate_dst_, buf_cpy, curr_size);
}
return;
// Copy initialization buffer into copy buffer
err_ = (hsa_status_t)memcmp(init_src_, validate_dst_, curr_size);
if (err_ != HSA_STATUS_SUCCESS) {
exit_value_ = err_;
}
return (err_ == HSA_STATUS_SUCCESS);
}
void RocmBandwidthTest::AllocateCopyBuffers(size_t size,
uint32_t src_dev_idx, uint32_t dst_dev_idx,
void*& src, hsa_amd_memory_pool_t src_pool,
void*& dst, hsa_amd_memory_pool_t dst_pool,
hsa_agent_t src_agent, hsa_agent_t dst_agent,
hsa_signal_t& signal) {
void*& dst, hsa_amd_memory_pool_t dst_pool) {
// Allocate buffers in src and dst pools for forward copy
err_ = hsa_amd_memory_pool_allocate(src_pool, size, 0, &src);
ErrorCheck(err_);
err_ = hsa_amd_memory_pool_allocate(dst_pool, size, 0, &dst);
ErrorCheck(err_);
}
// Create a signal to wait on copy operation
// @TODO: replace it with a signal pool call
err_ = hsa_signal_create(1, 0, NULL, &signal);
ErrorCheck(err_);
void RocmBandwidthTest::ReleaseBuffers(std::vector<void*>& buffer_list) {
return AcquirePoolAcceses(src_dev_idx, src_agent, src,
dst_dev_idx, dst_agent, dst);
for(uint32_t idx = 0; idx < buffer_list.size(); idx++) {
void* buffer = buffer_list[idx];
err_ = hsa_amd_memory_pool_free(buffer);
ErrorCheck(err_);
}
}
void RocmBandwidthTest::ReleaseBuffers(bool bidir,
void* src_fwd, void* src_rev,
void* dst_fwd, void* dst_rev,
hsa_signal_t signal_fwd,
hsa_signal_t signal_rev) {
void RocmBandwidthTest::ReleaseSignals(std::vector<hsa_signal_t>& signal_list) {
// Free the src and dst buffers used in forward copy
// including the signal used to wait
err_ = hsa_amd_memory_pool_free(src_fwd);
ErrorCheck(err_);
err_ = hsa_amd_memory_pool_free(dst_fwd);
ErrorCheck(err_);
err_ = hsa_signal_destroy(signal_fwd);
ErrorCheck(err_);
// Free the src and dst buffers used in reverse copy
// including the signal used to wait
if (bidir) {
err_ = hsa_amd_memory_pool_free(src_rev);
ErrorCheck(err_);
err_ = hsa_amd_memory_pool_free(dst_rev);
ErrorCheck(err_);
err_ = hsa_signal_destroy(signal_rev);
for(uint32_t idx = 0; idx < signal_list.size(); idx++) {
hsa_signal_t signal = signal_list[idx];
err_ = hsa_signal_destroy(signal);
ErrorCheck(err_);
}
}
......@@ -217,6 +223,20 @@ double RocmBandwidthTest::GetGpuCopyTime(bool bidir,
return copy_time;
}
void RocmBandwidthTest::WaitForCopyCompletion(vector<hsa_signal_t>& signal_list) {
hsa_wait_state_t policy = (bw_blocking_run_ == NULL) ?
HSA_WAIT_STATE_ACTIVE : HSA_WAIT_STATE_BLOCKED;
uint32_t size = signal_list.size();
for (uint32_t idx = 0; idx < size; idx++) {
hsa_signal_t signal = signal_list[idx];
// Wait for copy operation to complete
while (hsa_signal_wait_acquire(signal, HSA_SIGNAL_CONDITION_LT,
1, uint64_t(-1), policy));
}
}
void RocmBandwidthTest::copy_buffer(void* dst, hsa_agent_t dst_agent,
void* src, hsa_agent_t src_agent,
size_t size, hsa_signal_t signal) {
......@@ -247,11 +267,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
void* buf_dst_fwd;
void* buf_src_rev;
void* buf_dst_rev;
void* validation_dst;
void* validation_src;
hsa_signal_t signal_fwd;
hsa_signal_t signal_rev;
hsa_signal_t validation_signal;
hsa_signal_t signal_start_bidir;
uint32_t src_idx = trans.copy.src_idx_;
uint32_t dst_idx = trans.copy.dst_idx_;
......@@ -267,46 +284,64 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
hsa_agent_t dst_agent_fwd = pool_list_[dst_idx].owner_agent_;
hsa_agent_t src_agent_rev = dst_agent_fwd;
hsa_agent_t dst_agent_rev = src_agent_fwd;
std::vector<void*> buffer_list;
std::vector<hsa_signal_t> signal_list;
// Allocate buffers and signal objects
// Allocate buffers for forward path of unidirectional
// or bidirectional copy
AllocateCopyBuffers(max_size,
src_dev_idx_fwd,
dst_dev_idx_fwd,
buf_src_fwd, src_pool_fwd,
buf_dst_fwd, dst_pool_fwd,
src_agent_fwd, dst_agent_fwd,
signal_fwd);
buf_dst_fwd, dst_pool_fwd);
// Create a signal to wait on copy operation
// @TODO: replace it with a signal pool call
err_ = hsa_signal_create(1, 0, NULL, &signal_fwd);
ErrorCheck(err_);
// Collect resources to be released later
signal_list.push_back(signal_fwd);
buffer_list.push_back(buf_src_fwd);
buffer_list.push_back(buf_dst_fwd);
// Allocate buffers for reverse path of bidirectional copy
if (bidir) {
AllocateCopyBuffers(max_size,
src_dev_idx_rev,
dst_dev_idx_rev,
buf_src_rev, src_pool_rev,
buf_dst_rev, dst_pool_rev,
src_agent_rev, dst_agent_rev,
signal_rev);
buf_dst_rev, dst_pool_rev);
// Create a signal to begin bidir copy operations
// @TODO: replace it with a signal pool call
err_ = hsa_signal_create(1, 0, NULL, &signal_rev);
ErrorCheck(err_);
err_ = hsa_signal_create(1, 0, NULL, &signal_start_bidir);
ErrorCheck(err_);
signal_list.push_back(signal_rev);
signal_list.push_back(signal_start_bidir);
buffer_list.push_back(buf_src_rev);
buffer_list.push_back(buf_dst_rev);
}
if (validate_) {
AllocateHostBuffers(max_size,
src_dev_idx_fwd,
dst_dev_idx_fwd,
validation_src, validation_dst,
buf_src_fwd, buf_dst_fwd,
src_agent_fwd, dst_agent_fwd,
validation_signal);
// Initialize source buffer with values from verification buffer
copy_buffer(buf_src_fwd, src_agent_fwd,
validation_src, cpu_agent_,
max_size, validation_signal);
// Initialize source buffers with data that could be verified
if ((init_) || (validate_)) {
InitializeSrcBuffer(max_size, buf_src_fwd,
src_dev_idx_fwd, src_agent_fwd);
if (bidir) {
InitializeSrcBuffer(max_size, buf_src_rev,
src_dev_idx_rev, src_agent_rev);
}
}
// Setup access to destination buffers for
// both unidirectional and bidirectional copies
AcquirePoolAcceses(src_dev_idx_fwd, src_agent_fwd, buf_src_fwd,
dst_dev_idx_fwd, dst_agent_fwd, buf_dst_fwd);
if (bidir) {
AcquirePoolAcceses(src_dev_idx_rev, src_agent_rev, buf_src_rev,
dst_dev_idx_rev, dst_agent_rev, buf_dst_rev);
}
// Bind the number of iterations
uint32_t iterations = GetIterationNum();
......@@ -367,31 +402,7 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
hsa_signal_store_relaxed(signal_start_bidir, 0);
}
if (bw_blocking_run_ == NULL) {
// Wait for the forward copy operation to complete
while (hsa_signal_wait_acquire(signal_fwd, HSA_SIGNAL_CONDITION_LT, 1,
uint64_t(-1), HSA_WAIT_STATE_ACTIVE));
// Wait for the reverse copy operation to complete
if (bidir) {
while (hsa_signal_wait_acquire(signal_rev, HSA_SIGNAL_CONDITION_LT, 1,
uint64_t(-1), HSA_WAIT_STATE_ACTIVE));
}
} else {
// Wait for the forward copy operation to complete
hsa_signal_wait_acquire(signal_fwd, HSA_SIGNAL_CONDITION_LT, 1,
uint64_t(-1), HSA_WAIT_STATE_BLOCKED);
// Wait for the reverse copy operation to complete
if (bidir) {
hsa_signal_wait_acquire(signal_rev, HSA_SIGNAL_CONDITION_LT, 1,
uint64_t(-1), HSA_WAIT_STATE_BLOCKED);
}
}
WaitForCopyCompletion(signal_list);
// Stop the timer object
timer.StopTimer(index);
......@@ -408,19 +419,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
}
if (validate_) {
// Init dst buffer with values from outbuffer of copy operation
hsa_signal_store_relaxed(validation_signal, 1);
copy_buffer(validation_dst, cpu_agent_,
buf_dst_fwd, dst_agent_fwd,
curr_size, validation_signal);
// Compare output equals input
err_ = (hsa_status_t)memcmp(validation_src, validation_dst, curr_size);
if (err_ != HSA_STATUS_SUCCESS) {
verify = false;
exit_value_ = err_;
}
verify = ValidateDstBuffer(max_size, curr_size, buf_dst_fwd,
dst_dev_idx_fwd, dst_agent_fwd);
}
}
......@@ -446,20 +446,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
}
// Free up buffers and signal objects used in copy operation
ReleaseBuffers(bidir, buf_src_fwd, buf_src_rev,
buf_dst_fwd, buf_dst_rev, signal_fwd, signal_rev);
if (validate_) {
hsa_signal_t fake_signal = {0};
ReleaseBuffers(false, validation_src, NULL,
validation_dst, NULL, validation_signal, fake_signal);
}
// Free signal used to sync bidirectional copies
if (bidir) {
err_ = hsa_signal_destroy(signal_start_bidir);
ErrorCheck(err_);
}
ReleaseSignals(signal_list);
ReleaseBuffers(buffer_list);
}
void RocmBandwidthTest::Run() {
......@@ -553,9 +541,16 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
active_agents_list_ = NULL;
link_weight_matrix_ = NULL;
init_ = false;
latency_ = false;
validate_ = false;
print_cpu_time_ = false;
// Set initial value to 0x23 in case
// user does not have a preference
init_val_ = 0x23;
init_src_ = NULL;
validate_dst_ = NULL;
// Initialize version of the test
version_.major_id = 2;
......@@ -585,6 +580,13 @@ RocmBandwidthTest::~RocmBandwidthTest() {
delete link_type_matrix_;
delete link_weight_matrix_;
delete active_agents_list_;
if (init_) {
hsa_signal_destroy(init_signal_);
hsa_amd_memory_pool_free(init_src_);
if (validate_) {
hsa_amd_memory_pool_free(validate_dst_);
}
}
}
std::string RocmBandwidthTest::GetVersion() const {
......
......@@ -166,11 +166,13 @@ typedef enum Request_Type {
REQ_READ = 1,
REQ_WRITE = 2,
REQ_COPY_BIDIR = 3,
REQ_COPY_UNIDIR = 4,
REQ_COPY_ALL_BIDIR = 5,
REQ_COPY_ALL_UNIDIR = 6,
REQ_INVALID = 7,
REQ_VERSION = 3,
REQ_TOPOLOGY = 4,
REQ_COPY_BIDIR = 5,
REQ_COPY_UNIDIR = 6,
REQ_COPY_ALL_BIDIR = 7,
REQ_COPY_ALL_UNIDIR = 8,
REQ_INVALID = 9,
} Request_Type;
......@@ -255,6 +257,8 @@ class RocmBandwidthTest : public BaseTest {
double GetMinTime(std::vector<double>& vec);
// @brief: Dispaly Benchmark result
void PopulatePerfMatrix(bool peak, double* perf_matrix) const;
void PrintPerfMatrix(bool validate, bool peak, double* perf_matrix) const;
void DisplayDevInfo() const;
void DisplayIOTime(async_trans_t& trans) const;
void DisplayCopyTime(async_trans_t& trans) const;
......@@ -268,6 +272,11 @@ class RocmBandwidthTest : public BaseTest {
bool ValidateReadReq();
bool ValidateWriteReq();
bool ValidateReadOrWriteReq(vector<size_t>& in_list);
void ValidateCopyBidirFlags(uint32_t copy_ctrl_mask);
void ValidateCopyAllBidirFlags(uint32_t copy_ctrl_mask);
void ValidateCopyAllUnidirFlags(uint32_t copy_ctrl_mask);
void ValidateCopyUnidirFlags(uint32_t copy_mask, uint32_t copy_ctrl_mask);
bool ValidateBidirCopyReq();
bool ValidateUnidirCopyReq();
......@@ -280,6 +289,8 @@ class RocmBandwidthTest : public BaseTest {
// @brief: Builds a list of transaction per user request
void ComputeCopyTime(async_trans_t& trans);
void BuildDeviceList();
void BuildBufferList();
bool BuildTransList();
bool BuildReadTrans();
bool BuildWriteTrans();
......@@ -293,24 +304,23 @@ class RocmBandwidthTest : public BaseTest {
vector<size_t>& src_list,
vector<size_t>& dst_list);
void WaitForCopyCompletion(vector<hsa_signal_t>& signal_list);
void AllocateCopyBuffers(size_t size,
uint32_t src_dev_idx, uint32_t dst_dev_idx,
void*& src, hsa_amd_memory_pool_t src_pool,
void*& dst, hsa_amd_memory_pool_t dst_pool,
hsa_agent_t src_agent, hsa_agent_t dst_agent,
hsa_signal_t& signal);
void ReleaseBuffers(bool bidir,
void* src_fwd, void* src_rev,
void* dst_fwd, void* dst_rev,
hsa_signal_t signal_fwd, hsa_signal_t signal_rev);
void*& dst, hsa_amd_memory_pool_t dst_pool);
void ReleaseBuffers(std::vector<void*>& buffer_list);
void ReleaseSignals(std::vector<hsa_signal_t>& signal_list);
double GetGpuCopyTime(bool bidir, hsa_signal_t signal_fwd, hsa_signal_t signal_rev);
void AllocateHostBuffers(size_t size,
uint32_t src_dev_idx,
uint32_t dst_dev_idx,
void*& src, void*& dst,
void* buf_src, void* buf_dst,
hsa_agent_t src_agent, hsa_agent_t dst_agent,
hsa_signal_t& signal);
void InitializeSrcBuffer(size_t size, void* buf_cpy,
uint32_t cpy_dev_idx, hsa_agent_t cpy_agent);
bool ValidateDstBuffer(size_t max_size, size_t curr_size,
void* buf_cpy, uint32_t cpy_dev_idx, hsa_agent_t cpy_agent);
void copy_buffer(void* dst, hsa_agent_t dst_agent,
void* src, hsa_agent_t src_agent,
size_t size, hsa_signal_t signal);
......@@ -418,6 +428,8 @@ class RocmBandwidthTest : public BaseTest {
// Type of service requested by user
uint32_t req_read_;
uint32_t req_write_;
uint32_t req_version_;
uint32_t req_topology_;
uint32_t req_copy_bidir_;
uint32_t req_copy_unidir_;
uint32_t req_copy_all_bidir_;
......@@ -427,9 +439,10 @@ class RocmBandwidthTest : public BaseTest {
static const uint32_t USR_DST_FLAG = 0x02;
static const uint32_t USR_BUFFER_SIZE = 0x01;
static const uint32_t USR_VISIBLE_TIME = 0x02;
static const uint32_t DEV_COPY_LATENCY = 0x04;
static const uint32_t VALIDATE_COPY_OP = 0x08;
static const uint32_t USR_BUFFER_INIT = 0x02;
static const uint32_t CPU_VISIBLE_TIME = 0x04;
static const uint32_t DEV_COPY_LATENCY = 0x08;
static const uint32_t VALIDATE_COPY_OP = 0x010;
static const uint32_t LINK_TYPE_SELF = 0x00;
static const uint32_t LINK_TYPE_PCIE = 0x01;
......@@ -482,9 +495,18 @@ class RocmBandwidthTest : public BaseTest {
// Flag to print Cpu time
bool print_cpu_time_;
// Determines if user has requested initialization
bool init_;
// Determines if user has requested validation
bool validate_;
uint8_t init_val_;
// Handles to buffer used to initialize and validate
void* init_src_;
void* validate_dst_;
hsa_signal_t init_signal_;
// Determines the latency overhead of copy operations
bool latency_;
......
......@@ -43,10 +43,20 @@
#include "common.hpp"
#include "rocm_bandwidth_test.hpp"
#include <assert.h>
#include <algorithm>
#include <sstream>
#include <unistd.h>
// Parse option value string. The string has one decimal
// value as in example: -i 0x33
static bool ParseInitValue(char* value_str, uint8_t&value) {
// Capture the option value string
uint32_t value_read = strtoul(value_str, NULL, 0);
return ((value = value_read) && (value_read > 255)) ? false : true;
}
// Parse option value string. The string has one more decimal
// values separated by comma - "3,6,9,12,15".
static bool ParseOptionValue(char* value, vector<size_t>&value_list) {
......@@ -80,80 +90,190 @@ static bool ParseOptionValue(char* value, vector<size_t>&value_list) {
return true;
}
void RocmBandwidthTest::ValidateInputFlags(uint32_t pf_cnt,
uint32_t copy_mask, uint32_t copy_ctrl_mask) {
void RocmBandwidthTest::ValidateCopyBidirFlags(uint32_t copy_ctrl_mask) {
// Input can't have more than two Primary flags
if (pf_cnt > 2) {
// It is illegal to specify following flags
// secondary flag that affects a copy operation
if ((copy_ctrl_mask & DEV_COPY_LATENCY) ||
(copy_ctrl_mask & CPU_VISIBLE_TIME) ||
(copy_ctrl_mask & VALIDATE_COPY_OP)) {
PrintHelpScreen();
exit(0);
}
return;
}
void RocmBandwidthTest::ValidateCopyUnidirFlags(uint32_t copy_mask,
uint32_t copy_ctrl_mask) {
if (copy_mask != (USR_SRC_FLAG | USR_DST_FLAG)) {
PrintHelpScreen();
exit(0);
}
// Input specifies unidirectional copy among subset of devices
if (pf_cnt == 2) {
if (copy_mask != (USR_SRC_FLAG | USR_DST_FLAG)) {
// It is illegal to specify Latency and another
// secondary flag that affects a copy operation
if ((copy_ctrl_mask & DEV_COPY_LATENCY) &&
((copy_ctrl_mask & USR_BUFFER_INIT) ||
(copy_ctrl_mask & CPU_VISIBLE_TIME) ||
(copy_ctrl_mask & VALIDATE_COPY_OP))) {
PrintHelpScreen();
exit(0);
}
}
// Rewrite input if user is requesting validation
if (pf_cnt == 0) {
if (copy_ctrl_mask & VALIDATE_COPY_OP) {
req_copy_all_unidir_ = REQ_COPY_ALL_UNIDIR;
}
// It is illegal to specify user buffer sizes and another
// secondary flag that affects a copy operation
if ((copy_ctrl_mask & USR_BUFFER_SIZE) &&
(copy_ctrl_mask & VALIDATE_COPY_OP)) {
PrintHelpScreen();
exit(0);
}
// User input for primary operation is valid.
// Determine secondary flags are legal
// Check of illegal flags is complete
return;
}
// Case 1: It is illegal to specify copy size for copy
// operations involving all devices
if (((req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) ||
(req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) &&
(copy_ctrl_mask & USR_BUFFER_SIZE)) {
void RocmBandwidthTest::ValidateCopyAllBidirFlags(uint32_t copy_ctrl_mask) {
// It is illegal to specify following flags
// secondary flag that affects a copy operation
if ((copy_ctrl_mask & DEV_COPY_LATENCY) ||
(copy_ctrl_mask & USR_BUFFER_SIZE) ||
(copy_ctrl_mask & CPU_VISIBLE_TIME) ||
(copy_ctrl_mask & VALIDATE_COPY_OP)) {
PrintHelpScreen();
exit(0);
}
//
// Case 2: It is illegal to specify Latency for bidirectional
// copy operations or all-unidirectional
if (((req_copy_bidir_ == REQ_COPY_BIDIR) ||
(req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) ||
(req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) &&
(copy_ctrl_mask & DEV_COPY_LATENCY)) {
// Check of illegal flags is complete
return;
}
void RocmBandwidthTest::ValidateCopyAllUnidirFlags(uint32_t copy_ctrl_mask) {
// It is illegal to specify following flags
// secondary flag that affects a copy operation
if ((copy_ctrl_mask & DEV_COPY_LATENCY) ||
(copy_ctrl_mask & USR_BUFFER_SIZE)) {
PrintHelpScreen();
exit(0);
}
//
// Case 3: It is illegal to specify Latency and another secondary
// flag that affects a copy operation
if ((copy_ctrl_mask & DEV_COPY_LATENCY) &&
((copy_ctrl_mask & USR_BUFFER_SIZE) ||
(copy_ctrl_mask & USR_VISIBLE_TIME) ||
(copy_ctrl_mask & VALIDATE_COPY_OP))) {
PrintHelpScreen();
exit(0);
// Check of illegal flags is complete
return;
}
void RocmBandwidthTest::ValidateInputFlags(uint32_t pf_cnt,
uint32_t copy_mask, uint32_t copy_ctrl_mask) {
// Input can't have more than two Primary flags
if ((pf_cnt == 0) || (pf_cnt > 2)) {
PrintHelpScreen();
exit(0);
}
//
// Case 4: It is illegal to request Cpu time along with validation
// of copy operation
if ((copy_ctrl_mask & VALIDATE_COPY_OP) &&
((copy_ctrl_mask & USR_BUFFER_SIZE) ||
(copy_ctrl_mask & USR_VISIBLE_TIME))) {
PrintHelpScreen();
exit(0);
// Input specifies unidirectional copy among subset of devices
// rocm_bandwidth_test -s Di,Dj,Dk -d Dp,Dq,Dr
if (pf_cnt == 2) {
return ValidateCopyUnidirFlags(copy_mask, copy_ctrl_mask);
}
// Input is requesting to print RBT version
// rocm_bandwidth_test -q
if (req_version_ == REQ_VERSION) {
PrintVersion();
exit(0);
}
// Input is requesting to print ROCm topology
// rocm_bandwidth_test -t
if (req_topology_ == REQ_TOPOLOGY) {
return;
}
// Input is for bidirectional bandwidth for some devices
// rocm_bandwidth_test -b
if (req_copy_bidir_ == REQ_COPY_BIDIR) {
return ValidateCopyBidirFlags(copy_ctrl_mask);
}
// Input is for bidirectional bandwidth for all devices
// rocm_bandwidth_test -A
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
return ValidateCopyAllBidirFlags(copy_ctrl_mask);
}
// Input is for unidirectional bandwidth for all devices
// rocm_bandwidth_test -a
if (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) {
return ValidateCopyAllUnidirFlags(copy_ctrl_mask);
}
std::cout << "ValidateInputFlags: This should not be happening" << std::endl;
assert(false);
return;
}
void RocmBandwidthTest::BuildDeviceList() {
// Initialize devices list if copying unidirectional
// all or bidirectional all mode is enabled
uint32_t size = pool_list_.size();
for (uint32_t idx = 0; idx < size; idx++) {
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
bidir_list_.push_back(idx);
} else {
src_list_.push_back(idx);
dst_list_.push_back(idx);
}
}
}
void RocmBandwidthTest::BuildBufferList() {
// User has specified buffer sizes to be used
if (size_list_.size() != 0) {
uint32_t size_len = size_list_.size();
for (uint32_t idx = 0; idx < size_len; idx++) {
size_list_[idx] = size_list_[idx] * 1024 * 1024;
}
return;
}
// User has NOT specified buffer sizes to be used
// For All Copy operations use only one buffer size
uint32_t size_len = sizeof(SIZE_LIST)/sizeof(size_t);
for (uint32_t idx = 0; idx < size_len; idx++) {
if ((req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) ||
(req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) {
if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]);
}
}
if (req_copy_unidir_ == REQ_COPY_UNIDIR) {
if (latency_) {
size_list_.push_back(LATENCY_SIZE_LIST[idx]);
} else if (validate_) {
if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]);
}
} else {
size_list_.push_back(SIZE_LIST[idx]);
}
}
if (req_copy_bidir_ == REQ_COPY_BIDIR) {
size_list_.push_back(SIZE_LIST[idx]);
}
}
}
void RocmBandwidthTest::ParseArguments() {
bool print_help = false;
bool print_version = false;
bool print_topology = false;
bool print_help = 0;
uint32_t copy_mask = 0;
uint32_t copy_ctrl_mask = 0;
uint32_t num_primary_flags = 0;
......@@ -165,25 +285,24 @@ void RocmBandwidthTest::ParseArguments() {
int opt;
bool status;
while ((opt = getopt(usr_argc_, usr_argv_, "hqtclvaAb:s:d:r:w:m:")) != -1) {
while ((opt = getopt(usr_argc_, usr_argv_, "hqtclvaAb:i:s:d:r:w:m:")) != -1) {
switch (opt) {
// Print help screen
case 'h':
print_help = true;
num_primary_flags++;
break;
// Print version of the test
case 'q':
print_version = true;
num_primary_flags++;
req_version_ = REQ_VERSION;
break;
// Print system topology
case 't':
print_topology = true;
num_primary_flags++;
req_topology_ = REQ_TOPOLOGY;
break;
// Enable Unidirectional copy among all valid buffers
......@@ -226,6 +345,7 @@ void RocmBandwidthTest::ParseArguments() {
case 'b':
status = ParseOptionValue(optarg, bidir_list_);
if (status) {
num_primary_flags++;
req_copy_bidir_ = REQ_COPY_BIDIR;
break;
}
......@@ -244,7 +364,7 @@ void RocmBandwidthTest::ParseArguments() {
// Print Cpu time
case 'c':
print_cpu_time_ = true;
copy_ctrl_mask |= USR_VISIBLE_TIME;
copy_ctrl_mask |= CPU_VISIBLE_TIME;
break;
// Set Latency mode flag to true
......@@ -259,6 +379,16 @@ void RocmBandwidthTest::ParseArguments() {
copy_ctrl_mask |= VALIDATE_COPY_OP;
break;
// Set initialization mode flag to true
case 'i':
init_ = true;
status = ParseInitValue(optarg, init_val_);
if (status == false) {
print_help = true;
}
copy_ctrl_mask |= USR_BUFFER_INIT;
break;
// Collect request to read a buffer
case 'r':
req_read_ = REQ_READ;
......@@ -282,8 +412,9 @@ void RocmBandwidthTest::ParseArguments() {
// optopt
case '?':
std::cout << "Argument is illegal or needs value: " << '?' << std::endl;
if ((optopt == 'b' || optopt == 's' || optopt == 'd' || optopt == 'm')) {
std::cout << "Error: Option -b -s -d and -m require argument" << std::endl;
if ((optopt == 'b') || (optopt == 's') ||
(optopt == 'd') || (optopt == 'm') || (optopt == 'i')) {
std::cout << "Error: Options -b -s -d and -m -i require argument" << std::endl;
}
print_help = true;
break;
......@@ -292,9 +423,6 @@ void RocmBandwidthTest::ParseArguments() {
break;
}
}
// Determine input of primary flags is valid
ValidateInputFlags(num_primary_flags, copy_mask, copy_ctrl_mask);
// Print help screen if user option has "-h"
if (print_help) {
......@@ -302,11 +430,8 @@ void RocmBandwidthTest::ParseArguments() {
exit(0);
}
// Print version of the test
if (print_version) {
PrintVersion();
exit(0);
}
// Determine input of primary flags is valid
ValidateInputFlags(num_primary_flags, copy_mask, copy_ctrl_mask);
// Initialize Roc Runtime
err_ = hsa_init();
......@@ -316,7 +441,7 @@ void RocmBandwidthTest::ParseArguments() {
DiscoverTopology();
// Print system topology if user option has "-t"
if (print_topology) {
if (req_topology_ == REQ_TOPOLOGY) {
PrintVersion();
PrintTopology();
PrintLinkPropsMatrix(LINK_PROP_ACCESS);
......@@ -325,72 +450,15 @@ void RocmBandwidthTest::ParseArguments() {
exit(0);
}
// Initialize buffer list if full copying in unidirectional
// or bidirectional mode is enabled
// Initialize devices list if copying unidirectional
// all or bidirectional all mode is enabled
if ((req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) ||
(req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR)) {
uint32_t size = pool_list_.size();
for (uint32_t idx = 0; idx < size; idx++) {
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
bidir_list_.push_back(idx);
} else {
src_list_.push_back(idx);
dst_list_.push_back(idx);
}
}
}
// Initialize the list of buffer sizes to use in copy/read/write operations
// For All Copy operations use only one buffer size
if (size_list_.size() == 0) {
uint32_t size_len = sizeof(SIZE_LIST)/sizeof(size_t);
for (uint32_t idx = 0; idx < size_len; idx++) {
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]);
}
}
if (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR) {
if (idx == 16) {
if (latency_ == false) {
size_list_.push_back(SIZE_LIST[idx]);
} else {
size_list_.push_back(LATENCY_SIZE_LIST[3]); // size of 8 bytes
}
}
}
if (req_copy_unidir_ == REQ_COPY_UNIDIR) {
if (latency_) {
size_list_.push_back(LATENCY_SIZE_LIST[idx]);
} else if (validate_) {
if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]);
}
} else {
size_list_.push_back(SIZE_LIST[idx]);
}
}
if (req_copy_bidir_ == REQ_COPY_BIDIR) {
if (validate_) {
if (idx == 16) {
size_list_.push_back(SIZE_LIST[idx]);
}
} else {
size_list_.push_back(SIZE_LIST[idx]);
}
}
}
} else {
uint32_t size_len = size_list_.size();
for (uint32_t idx = 0; idx < size_len; idx++) {
size_list_[idx] = size_list_[idx] * 1024 * 1024;
}
BuildDeviceList();
}
// Initialize list of buffer sizes used in copy operations
BuildBufferList();
std::sort(size_list_.begin(), size_list_.end());
}
......@@ -59,6 +59,7 @@ void RocmBandwidthTest::PrintHelpScreen() {
std::cout << "\t -v Run the test in validation mode" << std::endl;
std::cout << "\t -l Run test to collect Latency data" << std::endl;
std::cout << "\t -c Time the operation using CPU Timers" << std::endl;
std::cout << "\t -i Initialize copy buffer with specified byte pattern" << std::endl;
std::cout << "\t -t Prints system topology and allocatable memory info" << std::endl;
std::cout << "\t -m List of buffer sizes to use, specified in Megabytes" << std::endl;
std::cout << "\t -b List devices to use in bidirectional copy operations" << std::endl;
......@@ -69,13 +70,13 @@ void RocmBandwidthTest::PrintHelpScreen() {
std::cout << std::endl;
std::cout << "\t NOTE: Mixing following options is illegal/unsupported" << std::endl;
std::cout << "\t\t Case 1: rocm_bandwidth_test -a or -A with -m" << std::endl;
std::cout << "\t\t Case 2: rocm_bandwidth_test -b or -A with -l" << std::endl;
std::cout << "\t\t Case 3: rocm_bandwidth_test -a or -s x -d with -l and -c" << std::endl;
std::cout << "\t\t Case 4: rocm_bandwidth_test -a or -s x -d with -l and -m" << std::endl;
std::cout << "\t\t Case 5: rocm_bandwidth_test -a or -s x -d with -l and -v" << std::endl;
std::cout << "\t\t Case 6: rocm_bandwidth_test -a or -A -b or -s x -d y with -v and -c" << std::endl;
std::cout << "\t\t Case 7: rocm_bandwidth_test -a or -A -b or -s x -d y with -v and -m" << std::endl;
std::cout << "\t\t Case 1: rocm_bandwidth_test -a or -A with -c" << std::endl;
std::cout << "\t\t Case 2: rocm_bandwidth_test -b or -A with -m" << std::endl;
std::cout << "\t\t Case 3: rocm_bandwidth_test -b or -A with -l" << std::endl;
std::cout << "\t\t Case 4: rocm_bandwidth_test -b or -A with -v" << std::endl;
std::cout << "\t\t Case 5: rocm_bandwidth_test -a or -s x -d y with -l and -c" << std::endl;
std::cout << "\t\t Case 6: rocm_bandwidth_test -a or -s x -d y with -l and -m" << std::endl;
std::cout << "\t\t Case 7: rocm_bandwidth_test -a or -s x -d y with -l and -v" << std::endl;
std::cout << std::endl;
......
......@@ -162,7 +162,6 @@ void RocmBandwidthTest::Display() const {
DisplayDevInfo();
PrintLinkPropsMatrix(LINK_PROP_ACCESS);
PrintLinkPropsMatrix(LINK_PROP_WEIGHT);
PrintLinkPropsMatrix(LINK_PROP_TYPE);
DisplayCopyTimeMatrix(true);
return;
}
......@@ -173,7 +172,6 @@ void RocmBandwidthTest::Display() const {
DisplayDevInfo();
PrintLinkPropsMatrix(LINK_PROP_ACCESS);
PrintLinkPropsMatrix(LINK_PROP_WEIGHT);
PrintLinkPropsMatrix(LINK_PROP_TYPE);
}
DisplayCopyTimeMatrix(true);
return;
......@@ -221,9 +219,8 @@ void RocmBandwidthTest::DisplayCopyTime(async_trans_t& trans) const {
}
}
void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
void RocmBandwidthTest::PopulatePerfMatrix(bool peak, double* perf_matrix) const {
double* perf_matrix = new double[agent_index_ * agent_index_]();
uint32_t trans_size = trans_list_.size();
for (uint32_t idx = 0; idx < trans_size; idx++) {
async_trans_t trans = trans_list_[idx];
......@@ -240,6 +237,10 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
}
}
}
void RocmBandwidthTest::PrintPerfMatrix(bool validate, bool peak, double* perf_matrix) const {
uint32_t format = 10;
std::cout.setf(ios::left);
......@@ -247,20 +248,24 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
std::cout << "";
std::cout.width(format);
if ((peak) && (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) {
std::cout << "Unidirectional copy peak bandwidth GB/s";
}
if ((peak == false) && (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) {
std::cout << "Unidirectional copy average bandwidth GB/s";
}
if ((peak) && (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR)) {
std::cout << "Bdirectional copy peak bandwidth GB/s";
}
if ((peak == false) && (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR)) {
std::cout << "Bidirectional copy average bandwidth GB/s";
if (validate == false) {
if ((peak) && (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) {
std::cout << "Unidirectional copy peak bandwidth GB/s";
}
if ((peak == false) && (req_copy_all_unidir_ == REQ_COPY_ALL_UNIDIR)) {
std::cout << "Unidirectional copy average bandwidth GB/s";
}
if ((peak) && (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR)) {
std::cout << "Bdirectional copy peak bandwidth GB/s";
}
if ((peak == false) && (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR)) {
std::cout << "Bidirectional copy average bandwidth GB/s";
}
} else {
std::cout << "Data Path Validation";
}
std::cout << std::endl;
......@@ -293,10 +298,20 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
format = 12;
std::cout.width(format);
double value = perf_matrix[(idx0 * agent_index_) + idx1];
if (value == 0) {
std::cout << "N/A";
if (validate) {
if (value == 0) {
std::cout << "N/A";
} else if (value < 1) {
std::cout << "FAIL";
} else {
std::cout << "PASS";
}
} else {
std::cout << perf_matrix[(idx0 * agent_index_) + idx1];
if (value == 0) {
std::cout << "N/A";
} else {
std::cout << perf_matrix[(idx0 * agent_index_) + idx1];
}
}
}
std::cout << std::endl;
......@@ -305,73 +320,20 @@ void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
std::cout << std::endl;
}
void RocmBandwidthTest::DisplayValidationMatrix() const {
void RocmBandwidthTest::DisplayCopyTimeMatrix(bool peak) const {
double* perf_matrix = new double[agent_index_ * agent_index_]();
uint32_t trans_size = trans_list_.size();
for (uint32_t idx = 0; idx < trans_size; idx++) {
async_trans_t trans = trans_list_[idx];
uint32_t src_idx = trans.copy.src_idx_;
uint32_t dst_idx = trans.copy.dst_idx_;
uint32_t src_dev_idx = pool_list_[src_idx].agent_index_;
uint32_t dst_dev_idx = pool_list_[dst_idx].agent_index_;
perf_matrix[(src_dev_idx * agent_index_) + dst_dev_idx] = trans.peak_bandwidth_[0];
if (req_copy_all_bidir_ == REQ_COPY_ALL_BIDIR) {
perf_matrix[(dst_dev_idx * agent_index_) + src_dev_idx] = trans.peak_bandwidth_[0];
}
}
uint32_t format = 10;
std::cout.setf(ios::left);
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "Data Path Validation";
PopulatePerfMatrix(peak, perf_matrix);
PrintPerfMatrix(false, peak, perf_matrix);
free(perf_matrix);
}
std::cout << std::endl;
std::cout << std::endl;
std::cout.precision(6);
std::cout << std::fixed;
void RocmBandwidthTest::DisplayValidationMatrix() const {
std::cout.width(format);
std::cout << "";
std::cout.width(format);
std::cout << "D/D";
format = 12;
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
std::cout.width(format);
std::stringstream agent_id;
agent_id << idx0;
std::cout << agent_id.str();
}
std::cout << std::endl;
std::cout << std::endl;
for (uint32_t idx0 = 0; idx0 < agent_index_; idx0++) {
format = 10;
std::cout.width(format);
std::cout << "";
std::stringstream agent_id;
agent_id << idx0;
std::cout.width(format);
std::cout << agent_id.str();
for (uint32_t idx1 = 0; idx1 < agent_index_; idx1++) {
format = 12;
std::cout.width(format);
double value = perf_matrix[(idx0 * agent_index_) + idx1];
if (value == 0) {
std::cout << "N/A";
} else if (value < 1) {
std::cout << "FAIL";
} else {
std::cout << "PASS";
}
}
std::cout << std::endl;
std::cout << std::endl;
}
std::cout << std::endl;
double* perf_matrix = new double[agent_index_ * agent_index_]();
PopulatePerfMatrix(true, perf_matrix);
PrintPerfMatrix(true, true, perf_matrix);
free(perf_matrix);
}
void RocmBandwidthTest::DisplayDevInfo() const {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment