Unverified Commit aefd848f authored by Ramesh Errabolu's avatar Ramesh Errabolu Committed by GitHub
Browse files

Merge pull request #63 from RadeonOpenCompute/rbtCpuTimerOpt

Construct and collect CPU timer only if requested by user
parents 2c6c202b ffbc28bb
...@@ -45,7 +45,6 @@ ...@@ -45,7 +45,6 @@
#define NANOSECONDS_PER_SECOND 1000000000 #define NANOSECONDS_PER_SECOND 1000000000
PerfTimer::PerfTimer() { PerfTimer::PerfTimer() {
freq_in_100mhz = MeasureTSCFreqHz();
} }
PerfTimer::~PerfTimer() { PerfTimer::~PerfTimer() {
...@@ -56,6 +55,10 @@ PerfTimer::~PerfTimer() { ...@@ -56,6 +55,10 @@ PerfTimer::~PerfTimer() {
} }
} }
void PerfTimer::InitTimer() {
freq_in_100mhz = MeasureTSCFreqHz();
}
// Create a new timer instance and return its index // Create a new timer instance and return its index
int PerfTimer::CreateTimer() { int PerfTimer::CreateTimer() {
......
...@@ -82,6 +82,7 @@ class PerfTimer { ...@@ -82,6 +82,7 @@ class PerfTimer {
PerfTimer(); PerfTimer();
~PerfTimer(); ~PerfTimer();
void InitTimer();
private: private:
......
...@@ -51,6 +51,8 @@ ...@@ -51,6 +51,8 @@
#include <cmath> #include <cmath>
#include <sstream> #include <sstream>
#include <limits> #include <limits>
#include <chrono>
#include <thread>
// Initialize the variable used to capture validation failure // Initialize the variable used to capture validation failure
const double RocmBandwidthTest::VALIDATE_COPY_OP_FAILURE = std::numeric_limits<double>::max(); const double RocmBandwidthTest::VALIDATE_COPY_OP_FAILURE = std::numeric_limits<double>::max();
...@@ -587,12 +589,21 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -587,12 +589,21 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
hsa_signal_store_relaxed(signal_start_bidir, 1); hsa_signal_store_relaxed(signal_start_bidir, 1);
} }
// Create a timer object and reset signals // Temporary code for testing
if (sleep_time_ > 0) {
std::this_thread::sleep_for(sleep_usecs_);
}
// Create a timer object and start it
PerfTimer timer; PerfTimer timer;
uint32_t index = timer.CreateTimer(); uint32_t cpuTimerIdx = 0;
if (print_cpu_time_) {
timer.InitTimer();
cpuTimerIdx = timer.CreateTimer();
timer.StartTimer(cpuTimerIdx);
}
// Start the timer and launch forward copy operation // Launch the copy operation
timer.StartTimer(index);
if (bidir == false) { if (bidir == false) {
err_ = hsa_amd_memory_async_copy(buf_dst_fwd, dst_agent_fwd, err_ = hsa_amd_memory_async_copy(buf_dst_fwd, dst_agent_fwd,
buf_src_fwd, src_agent_fwd, buf_src_fwd, src_agent_fwd,
...@@ -621,11 +632,11 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -621,11 +632,11 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
WaitForCopyCompletion(signal_list); WaitForCopyCompletion(signal_list);
// Stop the timer object // Stop the timer object and extract time taken
timer.StopTimer(index); if (print_cpu_time_) {
timer.StopTimer(cpuTimerIdx);
// Push the time taken for copy into a vector of copy times cpu_time.push_back(timer.ReadTimer(cpuTimerIdx));
cpu_time.push_back(timer.ReadTimer(index)); }
// Collect time from the signal(s) // Collect time from the signal(s)
if (print_cpu_time_ == false) { if (print_cpu_time_ == false) {
...@@ -667,7 +678,9 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -667,7 +678,9 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
verify = true; verify = true;
// Clear the stack of cpu times // Clear the stack of cpu times
cpu_time.clear(); if (print_cpu_time_) {
cpu_time.clear();
}
gpu_time.clear(); gpu_time.clear();
} }
...@@ -803,10 +816,24 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() { ...@@ -803,10 +816,24 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
// Initialize version of the test // Initialize version of the test
version_.major_id = 2; version_.major_id = 2;
version_.minor_id = 3; version_.minor_id = 4;
version_.step_id = 11; version_.step_id = 0;
version_.reserved = 0; version_.reserved = 0;
// Test impact of sleep, temp code
sleep_time_ = 0;
bw_sleep_time_ = getenv("ROCM_BW_SLEEP_TIME");
if (bw_sleep_time_ != NULL) {
sleep_time_ = atoi(bw_sleep_time_);
if ((sleep_time_ < 0) || (sleep_time_ > 60000)) {
std::cout << "Value of ROCM_BW_SLEEP_TIME must be between [1, 60000)" << sleep_time_ << std::endl;
exit(1);
}
sleep_time_ *= 100;
std::chrono::duration<uint32_t, std::micro> temp(sleep_time_);
sleep_usecs_ = temp;
}
bw_iter_cnt_ = getenv("ROCM_BW_ITER_CNT"); bw_iter_cnt_ = getenv("ROCM_BW_ITER_CNT");
bw_default_run_ = getenv("ROCM_BW_DEFAULT_RUN"); bw_default_run_ = getenv("ROCM_BW_DEFAULT_RUN");
bw_blocking_run_ = getenv("ROCR_BW_RUN_BLOCKING"); bw_blocking_run_ = getenv("ROCR_BW_RUN_BLOCKING");
...@@ -817,6 +844,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() { ...@@ -817,6 +844,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
int32_t num = atoi(bw_iter_cnt_); int32_t num = atoi(bw_iter_cnt_);
if (num < 0) { if (num < 0) {
std::cout << "Value of ROCM_BW_ITER_CNT can't be negative: " << num << std::endl; std::cout << "Value of ROCM_BW_ITER_CNT can't be negative: " << num << std::endl;
exit(1);
} }
set_num_iteration(num); set_num_iteration(num);
} }
......
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include "common.hpp" #include "common.hpp"
#include <vector> #include <vector>
#include <chrono>
using namespace std; using namespace std;
...@@ -505,6 +506,9 @@ class RocmBandwidthTest : public BaseTest { ...@@ -505,6 +506,9 @@ class RocmBandwidthTest : public BaseTest {
// Env key to specify iteration count // Env key to specify iteration count
char* bw_iter_cnt_; char* bw_iter_cnt_;
char* bw_sleep_time_;
uint32_t sleep_time_;
std::chrono::duration<uint32_t, std::micro> sleep_usecs_;
// Variable to store argument number // Variable to store argument number
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment