Commit 8258f92f authored by Ramesh Errabolu's avatar Ramesh Errabolu
Browse files

Use chrono library to get CPU times for transfers of data

parent aefd848f
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
File mode changed from 100644 to 100755
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#include "hsatimer.hpp"
#define NANOSECONDS_PER_SECOND 1000000000
PerfTimer::PerfTimer() {
}
PerfTimer::~PerfTimer() {
while (!_timers.empty()) {
Timer *temp = _timers.back();
_timers.pop_back();
delete temp;
}
}
void PerfTimer::InitTimer() {
freq_in_100mhz = MeasureTSCFreqHz();
}
// Create a new timer instance and return its index
int PerfTimer::CreateTimer() {
Timer *newTimer = new Timer;
newTimer->_start = 0.0;
newTimer->_clocks = 0.0;
#ifdef __linux__
newTimer->_freq = NANOSECONDS_PER_SECOND;
#endif
// Save the timer object in timer list
_timers.push_back(newTimer);
return (int)(_timers.size() - 1);
}
int PerfTimer::StartTimer(int index) {
if (index >= (int)_timers.size()) {
Error("Cannot reset timer. Invalid handle.");
return HSA_FAILURE;
}
#ifdef __linux__
// General Linux timing method
#ifndef _AMD
struct timespec s;
clock_gettime(CLOCK_MONOTONIC, &s);
_timers[index]->_start =
(long long)s.tv_sec * NANOSECONDS_PER_SECOND + (long long)s.tv_nsec;
// AMD Linux timing method
#else
unsigned int unused;
_timers[index]->_start = __rdtscp(&unused);
#endif
#endif
return HSA_SUCCESS;
}
int PerfTimer::StopTimer(int index) {
long long n = 0;
if (index >= (int)_timers.size()) {
Error("Cannot reset timer. Invalid handle.");
return HSA_FAILURE;
}
#ifdef __linux__
// General Linux timing method
#ifndef _AMD
struct timespec s;
clock_gettime(CLOCK_MONOTONIC, &s);
n = (long long)s.tv_sec * NANOSECONDS_PER_SECOND + (long long)s.tv_nsec;
// AMD Linux timing
#else
unsigned int unused;
n = __rdtscp(&unused);
#endif
#endif
n -= _timers[index]->_start;
_timers[index]->_start = 0;
#ifndef _AMD
_timers[index]->_clocks += n;
#endif
#ifdef __linux__
//_timers[index]->_clocks += 10 * n /freq_in_100mhz; // unit is ns
_timers[index]->_clocks += 1.0E-6 * 10 * n / freq_in_100mhz; // convert to ms
// cout << "_AMD is enabled!!!" << endl;
#endif
return HSA_SUCCESS;
}
void PerfTimer::Error(string str) { cout << str << endl; }
double PerfTimer::ReadTimer(int index) {
if (index >= (int)_timers.size()) {
Error("Cannot read timer. Invalid handle.");
return HSA_FAILURE;
}
double reading = double(_timers[index]->_clocks);
reading = double(reading / _timers[index]->_freq);
return reading;
}
void PerfTimer::ResetTimer(int index) {
// Check if index value is over the timer's size
if (index >= (int)_timers.size()) {
Error("Invalid index value\n");
exit(1);
}
_timers[index]->_clocks = 0.0;
_timers[index]->_start = 0.0;
}
uint64_t PerfTimer::CoarseTimestampUs() {
#ifdef __linux__
struct timespec ts;
clock_gettime(CLOCK_MONOTONIC_RAW, &ts);
return uint64_t(ts.tv_sec) * 1000000 + ts.tv_nsec / 1000;
#endif
}
uint64_t PerfTimer::MeasureTSCFreqHz() {
// Make a coarse interval measurement of TSC ticks for 1 gigacycles.
unsigned int unused;
uint64_t tscTicksEnd;
uint64_t coarseBeginUs = CoarseTimestampUs();
uint64_t tscTicksBegin = __rdtscp(&unused);
do {
tscTicksEnd = __rdtscp(&unused);
} while (tscTicksEnd - tscTicksBegin < 1000000000);
uint64_t coarseEndUs = CoarseTimestampUs();
// Compute the TSC frequency and round to nearest 100MHz.
uint64_t coarseIntervalNs = (coarseEndUs - coarseBeginUs) * 1000;
uint64_t tscIntervalTicks = tscTicksEnd - tscTicksBegin;
return (tscIntervalTicks * 10 + (coarseIntervalNs / 2)) / coarseIntervalNs;
}
////////////////////////////////////////////////////////////////////////////////
//
// The University of Illinois/NCSA
// Open Source License (NCSA)
//
// Copyright (c) 2014-2015, Advanced Micro Devices, Inc. All rights reserved.
//
// Developed by:
//
// AMD Research and AMD HSA Software Development
//
// Advanced Micro Devices, Inc.
//
// www.amd.com
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal with the Software without restriction, including without limitation
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
// and/or sell copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following conditions:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimers.
// - Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimers in
// the documentation and/or other materials provided with the distribution.
// - Neither the names of Advanced Micro Devices, Inc,
// nor the names of its contributors may be used to endorse or promote
// products derived from this Software without specific prior written
// permission.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
// THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
// DEALINGS WITH THE SOFTWARE.
//
////////////////////////////////////////////////////////////////////////////////
#ifndef ROC_BANDWIDTH_TEST_MYTIME_H_
#define ROC_BANDWIDTH_TEST_MYTIME_H_
// Will use AMD timer and general Linux timer based on users'
// need --> compilation flag. Support for windows platform is
// not currently available
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <x86intrin.h>
#include <string.h>
#include <iostream>
#include <vector>
#include <string>
using namespace std;
#include <sys/time.h>
#define HSA_FAILURE 1
#define HSA_SUCCESS 0
class PerfTimer {
private:
struct Timer {
string name; /* < name name of time object*/
long long _freq; /* < _freq frequency*/
long long _clocks; /* < _clocks number of ticks at end*/
long long _start; /* < _start start point ticks*/
};
std::vector<Timer*> _timers; /*< _timers vector to Timer objects */
double freq_in_100mhz;
public:
PerfTimer();
~PerfTimer();
void InitTimer();
private:
// AMD timing method
uint64_t CoarseTimestampUs();
uint64_t MeasureTSCFreqHz();
// General Linux timing method
public:
int CreateTimer();
int StartTimer(int index);
int StopTimer(int index);
void ResetTimer(int index);
public:
// retrieve time
double ReadTimer(int index);
// write into a file
double WriteTimer(int index);
public:
void Error(string str);
};
#endif // ROC_BANDWIDTH_TEST_MYTIME_H_
...@@ -42,7 +42,6 @@ ...@@ -42,7 +42,6 @@
#include <unistd.h> #include <unistd.h>
#include <iostream> #include <iostream>
#include "hsatimer.hpp"
#include "rocm_bandwidth_test.hpp" #include "rocm_bandwidth_test.hpp"
using namespace std; using namespace std;
......
File mode changed from 100644 to 100755
...@@ -49,6 +49,7 @@ ...@@ -49,6 +49,7 @@
#include <unistd.h> #include <unistd.h>
#include <cctype> #include <cctype>
#include <cmath> #include <cmath>
#include <cstring>
#include <sstream> #include <sstream>
#include <limits> #include <limits>
#include <chrono> #include <chrono>
...@@ -125,7 +126,7 @@ void RocmBandwidthTest::InitializeSrcBuffer(size_t size, void* buf_cpy, ...@@ -125,7 +126,7 @@ void RocmBandwidthTest::InitializeSrcBuffer(size_t size, void* buf_cpy,
// If copying agent is a CPU, use memcpy to initialize copy buffer // If copying agent is a CPU, use memcpy to initialize copy buffer
hsa_device_type_t cpy_dev_type = agent_list_[cpy_dev_idx].device_type_; hsa_device_type_t cpy_dev_type = agent_list_[cpy_dev_idx].device_type_;
if (cpy_dev_type == HSA_DEVICE_TYPE_CPU) { if (cpy_dev_type == HSA_DEVICE_TYPE_CPU) {
memcpy(buf_cpy, init_src_, size); std::memcpy(buf_cpy, init_src_, size);
return; return;
} }
...@@ -149,7 +150,7 @@ bool RocmBandwidthTest::ValidateDstBuffer(size_t max_size, size_t curr_size, voi ...@@ -149,7 +150,7 @@ bool RocmBandwidthTest::ValidateDstBuffer(size_t max_size, size_t curr_size, voi
} }
// If Copy device is a Gpu setup buffer access // If Copy device is a Gpu setup buffer access
memset(validate_dst_, ~(0x23), curr_size); std::memset(validate_dst_, ~(0x23), curr_size);
hsa_device_type_t cpy_dev_type = agent_list_[cpy_dev_idx].device_type_; hsa_device_type_t cpy_dev_type = agent_list_[cpy_dev_idx].device_type_;
if (cpy_dev_type == HSA_DEVICE_TYPE_GPU) { if (cpy_dev_type == HSA_DEVICE_TYPE_GPU) {
AcquireAccess(cpy_agent, validate_dst_); AcquireAccess(cpy_agent, validate_dst_);
...@@ -161,11 +162,11 @@ bool RocmBandwidthTest::ValidateDstBuffer(size_t max_size, size_t curr_size, voi ...@@ -161,11 +162,11 @@ bool RocmBandwidthTest::ValidateDstBuffer(size_t max_size, size_t curr_size, voi
// Copying device is a CPU, copy dst buffer // Copying device is a CPU, copy dst buffer
// into validation buffer // into validation buffer
memcpy(validate_dst_, buf_cpy, curr_size); std::memcpy(validate_dst_, buf_cpy, curr_size);
} }
// Compare initialization buffer with validation buffer // Compare initialization buffer with validation buffer
err_ = (hsa_status_t)memcmp(init_src_, validate_dst_, curr_size); err_ = (hsa_status_t)std::memcmp(init_src_, validate_dst_, curr_size);
if (err_ != HSA_STATUS_SUCCESS) { if (err_ != HSA_STATUS_SUCCESS) {
exit_value_ = err_; exit_value_ = err_;
} }
...@@ -595,12 +596,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -595,12 +596,8 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
} }
// Create a timer object and start it // Create a timer object and start it
PerfTimer timer;
uint32_t cpuTimerIdx = 0;
if (print_cpu_time_) { if (print_cpu_time_) {
timer.InitTimer(); cpu_start_ = std::chrono::steady_clock::now();
cpuTimerIdx = timer.CreateTimer();
timer.StartTimer(cpuTimerIdx);
} }
// Launch the copy operation // Launch the copy operation
...@@ -634,8 +631,10 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) { ...@@ -634,8 +631,10 @@ void RocmBandwidthTest::RunCopyBenchmark(async_trans_t& trans) {
// Stop the timer object and extract time taken // Stop the timer object and extract time taken
if (print_cpu_time_) { if (print_cpu_time_) {
timer.StopTimer(cpuTimerIdx); cpu_end_ = std::chrono::steady_clock::now();
cpu_time.push_back(timer.ReadTimer(cpuTimerIdx)); cpu_cp_time_ = cpu_end_ - cpu_start_;
uint64_t cpu_temp = cpu_cp_time_.count();
cpu_time.push_back(cpu_temp);
} }
// Collect time from the signal(s) // Collect time from the signal(s)
...@@ -816,7 +815,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() { ...@@ -816,7 +815,7 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
// Initialize version of the test // Initialize version of the test
version_.major_id = 2; version_.major_id = 2;
version_.minor_id = 4; version_.minor_id = 5;
version_.step_id = 0; version_.step_id = 0;
version_.reserved = 0; version_.reserved = 0;
...@@ -826,11 +825,13 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() { ...@@ -826,11 +825,13 @@ RocmBandwidthTest::RocmBandwidthTest(int argc, char** argv) : BaseTest() {
if (bw_sleep_time_ != NULL) { if (bw_sleep_time_ != NULL) {
sleep_time_ = atoi(bw_sleep_time_); sleep_time_ = atoi(bw_sleep_time_);
if ((sleep_time_ < 0) || (sleep_time_ > 60000)) { if ((sleep_time_ < 0) || (sleep_time_ > 60000)) {
std::cout << "Unit of sleep time is defined as 10 microseconds" << std::endl;
std::cout << "An input value of 10 implies sleep time of 100 microseconds" << std::endl;
std::cout << "Value of ROCM_BW_SLEEP_TIME must be between [1, 60000)" << sleep_time_ << std::endl; std::cout << "Value of ROCM_BW_SLEEP_TIME must be between [1, 60000)" << sleep_time_ << std::endl;
exit(1); exit(1);
} }
sleep_time_ *= 100; sleep_time_ *= 10;
std::chrono::duration<uint32_t, std::micro> temp(sleep_time_); std::chrono::microseconds temp(sleep_time_);
sleep_usecs_ = temp; sleep_usecs_ = temp;
} }
......
...@@ -45,7 +45,6 @@ ...@@ -45,7 +45,6 @@
#include "hsa.h" #include "hsa.h"
#include "base_test.hpp" #include "base_test.hpp"
#include "hsatimer.hpp"
#include "common.hpp" #include "common.hpp"
#include <vector> #include <vector>
...@@ -508,11 +507,10 @@ class RocmBandwidthTest : public BaseTest { ...@@ -508,11 +507,10 @@ class RocmBandwidthTest : public BaseTest {
char* bw_iter_cnt_; char* bw_iter_cnt_;
char* bw_sleep_time_; char* bw_sleep_time_;
uint32_t sleep_time_; uint32_t sleep_time_;
std::chrono::duration<uint32_t, std::micro> sleep_usecs_; std::chrono::nanoseconds cpu_cp_time_;
std::chrono::microseconds sleep_usecs_;
// Variable to store argument number std::chrono::time_point<std::chrono::steady_clock> cpu_end_;
std::chrono::time_point<std::chrono::steady_clock> cpu_start_;
// Variable to store argument number
// Variable to store argument number // Variable to store argument number
uint32_t usr_argc_; uint32_t usr_argc_;
......
File mode changed from 100644 to 100755
...@@ -97,7 +97,7 @@ static bool ParseOptionValue(char* value, vector<size_t>&value_list) { ...@@ -97,7 +97,7 @@ static bool ParseOptionValue(char* value, vector<size_t>&value_list) {
// Read the option value // Read the option value
stream >> token; stream >> token;
if (stream.fail()) { if (stream.fail()) {
exit(-1); return false;
} }
// Update output list with values // Update output list with values
...@@ -436,6 +436,7 @@ void RocmBandwidthTest::ParseArguments() { ...@@ -436,6 +436,7 @@ void RocmBandwidthTest::ParseArguments() {
status = ParseOptionValue(optarg, size_list_); status = ParseOptionValue(optarg, size_list_);
if (status == false) { if (status == false) {
print_help = true; print_help = true;
break;
} }
copy_ctrl_mask |= USR_BUFFER_SIZE; copy_ctrl_mask |= USR_BUFFER_SIZE;
break; break;
......
...@@ -61,7 +61,7 @@ static void printRecord(size_t size, double avg_time, ...@@ -61,7 +61,7 @@ static void printRecord(size_t size, double avg_time,
} }
uint32_t format = 15; uint32_t format = 15;
std::cout.precision(6); std::cout.precision(3);
std::cout << std::fixed; std::cout << std::fixed;
std::cout.width(format); std::cout.width(format);
std::cout << size_str.str(); std::cout << size_str.str();
...@@ -135,7 +135,6 @@ double RocmBandwidthTest::GetMeanTime(std::vector<double>& vec) { ...@@ -135,7 +135,6 @@ double RocmBandwidthTest::GetMeanTime(std::vector<double>& vec) {
} }
std::sort(vec.begin(), vec.end()); std::sort(vec.begin(), vec.end());
vec.erase(vec.begin());
vec.erase(vec.begin(), vec.begin() + num_iteration_ * 0.1); vec.erase(vec.begin(), vec.begin() + num_iteration_ * 0.1);
vec.erase(vec.begin() + num_iteration_, vec.end()); vec.erase(vec.begin() + num_iteration_, vec.end());
...@@ -287,7 +286,7 @@ void RocmBandwidthTest::PrintPerfMatrix(bool validate, bool peak, double* perf_m ...@@ -287,7 +286,7 @@ void RocmBandwidthTest::PrintPerfMatrix(bool validate, bool peak, double* perf_m
std::cout << std::endl; std::cout << std::endl;
std::cout << std::endl; std::cout << std::endl;
std::cout.precision(6); std::cout.precision(3);
std::cout << std::fixed; std::cout << std::fixed;
std::cout.width(format); std::cout.width(format);
......
...@@ -46,6 +46,7 @@ ...@@ -46,6 +46,7 @@
#include <iomanip> #include <iomanip>
#include <sstream> #include <sstream>
#include <string> #include <string>
#include <cstring>
// @brief: Helper method to iterate throught the memory pools of // @brief: Helper method to iterate throught the memory pools of
// an agent and discover its properties // an agent and discover its properties
...@@ -158,7 +159,7 @@ void PopulateBDF(uint32_t bdf_id, agent_info_t *agent_info) { ...@@ -158,7 +159,7 @@ void PopulateBDF(uint32_t bdf_id, agent_info_t *agent_info) {
std::stringstream stream; std::stringstream stream;
stream << std::setfill('0') << std::setw(sizeof(uint8_t) * 2); stream << std::setfill('0') << std::setw(sizeof(uint8_t) * 2);
stream << std::hex << +bus_id << ":" << +dev_id << "." << +func_id; stream << std::hex << +bus_id << ":" << +dev_id << "." << +func_id;
strcpy(agent_info->bdf_id_, (stream.str()).c_str()); std::strcpy(agent_info->bdf_id_, (stream.str()).c_str());
} }
// @brief: Helper method to iterate throught the agents of // @brief: Helper method to iterate throught the agents of
...@@ -334,7 +335,7 @@ void RocmBandwidthTest::BindLinkProps(uint32_t idx1, uint32_t idx2) { ...@@ -334,7 +335,7 @@ void RocmBandwidthTest::BindLinkProps(uint32_t idx1, uint32_t idx2) {
hsa_amd_memory_pool_link_info_t *link_info; hsa_amd_memory_pool_link_info_t *link_info;
uint32_t link_info_sz = hops * sizeof(hsa_amd_memory_pool_link_info_t); uint32_t link_info_sz = hops * sizeof(hsa_amd_memory_pool_link_info_t);
link_info = (hsa_amd_memory_pool_link_info_t *)malloc(link_info_sz); link_info = (hsa_amd_memory_pool_link_info_t *)malloc(link_info_sz);
memset(link_info, 0, (hops * sizeof(hsa_amd_memory_pool_link_info_t))); std::memset(link_info, 0, (hops * sizeof(hsa_amd_memory_pool_link_info_t)));
err_ = hsa_amd_agent_memory_pool_get_info(agent1, pool, err_ = hsa_amd_agent_memory_pool_get_info(agent1, pool,
HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info); HSA_AMD_AGENT_MEMORY_POOL_INFO_LINK_INFO, link_info);
......
...@@ -418,11 +418,14 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) { ...@@ -418,11 +418,14 @@ void RocmBandwidthTest::ComputeCopyTime(async_trans_t& trans) {
data_size += data_size; data_size += data_size;
} }
// Get time taken by copy operation // Get time taken by copy operation. Adjust time from nanoseconds
// to units of seconds
if ((print_cpu_time_) || if ((print_cpu_time_) ||
(trans.copy.uses_gpu_ != true)) { (trans.copy.uses_gpu_ != true)) {
avg_time = trans.cpu_avg_time_[idx]; avg_time = trans.cpu_avg_time_[idx];
min_time = trans.cpu_min_time_[idx]; min_time = trans.cpu_min_time_[idx];
avg_time = avg_time / 1000 / 1000 / 1000;
min_time = min_time / 1000 / 1000 / 1000;
} else { } else {
avg_time = trans.gpu_avg_time_[idx]; avg_time = trans.gpu_avg_time_[idx];
min_time = trans.gpu_min_time_[idx]; min_time = trans.gpu_min_time_[idx];
......
File mode changed from 100644 to 100755
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment