Commit d3267bb3 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into...

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into test_runner_match_input_output
parents 2d9e620b a33d6fa2
...@@ -14,9 +14,7 @@ constexpr auto traverse_preload(Shapes... ss) ...@@ -14,9 +14,7 @@ constexpr auto traverse_preload(Shapes... ss)
auto each = [&](auto x) { auto each = [&](auto x) {
constexpr auto s = decltype(x.get_shape()){}; constexpr auto s = decltype(x.get_shape()){};
constexpr auto size = _c<s.element_space()>; constexpr auto size = _c<s.element_space()>;
if constexpr(not s.broadcasted()) if constexpr(not s.broadcasted() or (s.elements() - size) < 64)
return f(x, offset, false_type{});
else if constexpr((s.elements() - size) < 64)
return f(x, offset, false_type{}); return f(x, offset, false_type{});
else else
{ {
......
#ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP #ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#define MIGRAPHX_GUARD_KERNELS_PRINT_HPP #define MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#include <hip/hip_runtime.h> #include <migraphx/kernels/hip.hpp>
#include <migraphx/kernels/index.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/functional.hpp> #include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/algorithm.hpp> #include <migraphx/kernels/algorithm.hpp>
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include <migraphx/kernels/index.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/dfor.hpp> #include <migraphx/kernels/dfor.hpp>
#include <migraphx/kernels/basic_ops.hpp> #include <migraphx/kernels/basic_ops.hpp>
#include <args.hpp> #include <migraphx/kernels/array.hpp>
namespace migraphx { namespace migraphx {
...@@ -104,14 +104,24 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data, ...@@ -104,14 +104,24 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data,
return op.final(output_val, count); return op.final(output_val, count);
} }
template <class T, class U, class V, class W> template <class T1, class T2, class T3, class T4>
__device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& y_t) struct roalign_settings
{ {
const float roi_offset = ROIS_OFFSET; T1 roi_offset{};
const bool is_avg_pooling = IS_AVG_POOLING; T2 is_avg_pooling{};
const int64_t sampling_ratio = SAMPLING_RATIO; T3 sampling_ratio{};
const float spatial_scale = SPATIAL_SCALE; T4 spatial_scale{};
};
template <class... Ts>
constexpr roalign_settings<Ts...> make_roalign_settings(Ts... xs)
{
return {xs...};
}
template <class T, class U, class V, class W, class Settings>
__device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& y_t, Settings s)
{
auto index = make_index(); auto index = make_index();
const auto* x = x_t.data(); const auto* x = x_t.data();
const auto* rois = rois_t.data(); const auto* rois = rois_t.data();
...@@ -146,9 +156,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -146,9 +156,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
const auto* offset_rois = rois + (n * roi_column_num); const auto* offset_rois = rois + (n * roi_column_num);
const int batch_ind = ind[n]; const int batch_ind = ind[n];
array<float, 2> roi_starts = {offset_rois[1] * spatial_scale, array<float, 2> roi_starts = {offset_rois[1] * s.spatial_scale,
offset_rois[0] * spatial_scale}; offset_rois[0] * s.spatial_scale};
array<float, 2> roi_ends = {offset_rois[3] * spatial_scale, offset_rois[2] * spatial_scale}; array<float, 2> roi_ends = {offset_rois[3] * s.spatial_scale,
offset_rois[2] * s.spatial_scale};
array<float, 2> roi_size{}; array<float, 2> roi_size{};
array<float, 2> bin_size{}; array<float, 2> bin_size{};
...@@ -161,11 +172,11 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -161,11 +172,11 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_size[ii] = roi_size[ii] / out_dims[ii];
bin_grid_size[ii] = bin_grid_size[ii] =
(sampling_ratio > 0) ? sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]); (s.sampling_ratio > 0) ? s.sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]);
} }
const auto* offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); const auto* offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]);
if constexpr(is_avg_pooling) if constexpr(s.is_avg_pooling)
{ {
out_ptr[i] = calc_pooling(offset_x, out_ptr[i] = calc_pooling(offset_x,
roi_starts, roi_starts,
...@@ -173,7 +184,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -173,7 +184,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{ph, pw}, {ph, pw},
bin_grid_size, bin_grid_size,
in_dims, in_dims,
roi_offset, s.roi_offset,
avg_pool{}); avg_pool{});
} }
else else
...@@ -184,7 +195,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -184,7 +195,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{ph, pw}, {ph, pw},
bin_grid_size, bin_grid_size,
in_dims, in_dims,
roi_offset, s.roi_offset,
max_pool{}); max_pool{});
} }
} }
......
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP #ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP #define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#include <hip/hip_runtime.h> #include <migraphx/kernels/hip.hpp>
namespace migraphx { namespace migraphx {
......
...@@ -13,7 +13,7 @@ constexpr auto vec_size(vec<T, N>) ...@@ -13,7 +13,7 @@ constexpr auto vec_size(vec<T, N>)
} }
template <class T> template <class T>
constexpr auto vec_size(T, ...) constexpr auto vec_size(T, ...) // NOLINT
{ {
return index_constant<0>{}; return index_constant<0>{};
} }
......
#include <migraphx/eliminate_contiguous.hpp> #include <migraphx/eliminate_contiguous.hpp>
#include <migraphx/dead_code_elimination.hpp> #include <migraphx/dead_code_elimination.hpp>
#include <migraphx/pass_manager.hpp> #include <migraphx/pass_manager.hpp>
#include <migraphx/instruction.hpp>
#include <basic_ops.hpp> #include <basic_ops.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <pointwise.hpp>
#include <test.hpp> #include <test.hpp>
void run_pass(migraphx::module& m) void run_pass(migraphx::module& m)
...@@ -159,4 +161,25 @@ TEST_CASE(standard_flatten_op) ...@@ -159,4 +161,25 @@ TEST_CASE(standard_flatten_op)
EXPECT(std::distance(m.begin(), m.end()) == (count - 1)); EXPECT(std::distance(m.begin(), m.end()) == (count - 1));
} }
TEST_CASE(contiguous_pointwise)
{
migraphx::shape s{migraphx::shape::float_type, {2, 3, 8, 8}};
migraphx::program p;
auto* mm = p.get_main_module();
{
auto x = mm->add_parameter("x", s);
auto y = mm->add_parameter("y", migraphx::shape{migraphx::shape::float_type, {3}});
auto yb = mm->add_instruction(
migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", {2, 3, 8, 8}}}), y);
auto yc = mm->add_instruction(migraphx::make_op("contiguous"), yb);
auto add = add_pointwise(p, "main:pointwise0", {x, yc}, single_pointwise("add"));
mm->add_instruction(pass_op{}, add);
}
auto count = std::distance(mm->begin(), mm->end());
run_pass(*mm);
EXPECT(std::distance(mm->begin(), mm->end()) == (count - 1));
EXPECT(std::none_of(
mm->begin(), mm->end(), [](auto&& ins) { return ins.name() == "contiguous"; }));
}
int main(int argc, const char* argv[]) { test::run(argc, argv); } int main(int argc, const char* argv[]) { test::run(argc, argv); }
...@@ -7,38 +7,13 @@ ...@@ -7,38 +7,13 @@
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <test.hpp> #include <test.hpp>
#include <pointwise.hpp>
void run_pass(migraphx::program& p) void run_pass(migraphx::program& p)
{ {
migraphx::run_passes(p, {migraphx::fuse_pointwise{}, migraphx::dead_code_elimination{}}); migraphx::run_passes(p, {migraphx::fuse_pointwise{}, migraphx::dead_code_elimination{}});
} }
template <class F>
migraphx::instruction_ref add_pointwise(migraphx::program& p,
const std::string& name,
std::vector<migraphx::instruction_ref> inputs,
F f)
{
auto* pm = p.create_module(name);
auto* mm = p.get_main_module();
pm->set_bypass();
std::vector<migraphx::instruction_ref> params;
std::transform(inputs.begin(), inputs.end(), std::back_inserter(params), [&](auto input) {
return pm->add_parameter("x" + std::to_string(params.size()),
migraphx::shape{input->get_shape().type()});
});
auto r = f(pm, params);
pm->add_return({r});
return mm->add_instruction(migraphx::make_op("pointwise"), inputs, {pm});
}
auto single_pointwise(const std::string& name)
{
return [=](auto* pm, const auto& inputs) {
return pm->add_instruction(migraphx::make_op(name), inputs);
};
}
TEST_CASE(single) TEST_CASE(single)
{ {
migraphx::shape s{migraphx::shape::float_type, {2, 3}}; migraphx::shape s{migraphx::shape::float_type, {2, 3}};
......
File mode changed from 100644 to 100755
#ifndef MIGRAPHX_GUARD_TEST_INCLUDE_POINTWISE_HPP
#define MIGRAPHX_GUARD_TEST_INCLUDE_POINTWISE_HPP
#include <migraphx/program.hpp>
#include <migraphx/module.hpp>
#include <migraphx/make_op.hpp>
template <class F>
migraphx::instruction_ref add_pointwise(migraphx::program& p,
const std::string& name,
std::vector<migraphx::instruction_ref> inputs,
F f)
{
auto* pm = p.create_module(name);
auto* mm = p.get_main_module();
pm->set_bypass();
std::vector<migraphx::instruction_ref> params;
std::transform(inputs.begin(), inputs.end(), std::back_inserter(params), [&](auto input) {
return pm->add_parameter("x" + std::to_string(params.size()),
migraphx::shape{input->get_shape().type()});
});
auto r = f(pm, params);
pm->add_return({r});
return mm->add_instruction(migraphx::make_op("pointwise"), inputs, {pm});
}
inline auto single_pointwise(const std::string& name)
{
return [=](auto* pm, const auto& inputs) {
return pm->add_instruction(migraphx::make_op(name), inputs);
};
}
#endif // MIGRAPHX_GUARD_TEST_INCLUDE_POINTWISE_HPP
...@@ -103,7 +103,14 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name()) ...@@ -103,7 +103,14 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name())
} // namespace operation_operators } // namespace operation_operators
template <class T> template <class T>
auto normalize_compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs) auto compute_shape_op(rank<3>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs))
{
return x.compute_shape(inputs);
}
template <class T>
auto compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.normalize_compute_shape(inputs)) -> decltype(x.normalize_compute_shape(inputs))
{ {
dependent_type<operation, T> y = x; dependent_type<operation, T> y = x;
...@@ -112,27 +119,27 @@ auto normalize_compute_shape_op(rank<2>, const T& x, const std::vector<shape>& i ...@@ -112,27 +119,27 @@ auto normalize_compute_shape_op(rank<2>, const T& x, const std::vector<shape>& i
} }
template <class T> template <class T>
auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs) auto compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs, {})) -> decltype(x.compute_shape(inputs, {}))
{ {
return x.compute_shape(inputs, {}); return x.compute_shape(inputs, {});
} }
template <class T> template <class T>
shape normalize_compute_shape_op(rank<0>, const T& x, const std::vector<shape>&) shape compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
{ {
std::string name = x.name(); std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name); MIGRAPHX_THROW("Shape not computable: " + name);
} }
template <class T> template <class T>
shape normalize_compute_shape_op(const T& x, const std::vector<shape>& inputs) shape compute_shape_op(const T& x, const std::vector<shape>& inputs)
{ {
return normalize_compute_shape_op(rank<2>{}, x, inputs); return compute_shape_op(rank<3>{}, x, inputs);
} }
template <class T> template <class T>
auto compute_shape_op(rank<1>, auto mod_compute_shape_op(rank<1>,
const T& x, const T& x,
const std::vector<shape>& inputs, const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args) const std::vector<module_ref>& mod_args)
...@@ -142,47 +149,23 @@ auto compute_shape_op(rank<1>, ...@@ -142,47 +149,23 @@ auto compute_shape_op(rank<1>,
} }
template <class T> template <class T>
shape shape mod_compute_shape_op(rank<0>,
compute_shape_op(rank<0>, const T& x, const std::vector<shape>&, const std::vector<module_ref>&)
{
std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name);
}
template <class T>
shape compute_shape_op(const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
return compute_shape_op(rank<1>{}, x, inputs, mod_args);
}
template <class T>
auto normalize_compute_shape_op(rank<1>,
const T& x, const T& x,
const std::vector<shape>& inputs, const std::vector<shape>& inputs,
std::vector<module_ref>& mod_args) const std::vector<module_ref>& mod_args)
-> decltype(x.normalize_compute_shape(inputs, mod_args))
{
return x.normalize_compute_shape(inputs, mod_args);
}
template <class T>
shape normalize_compute_shape_op(rank<0>,
const T& x,
const std::vector<shape>&,
const std::vector<module_ref>&)
{ {
if(mod_args.empty())
return compute_shape_op(x, inputs);
std::string name = x.name(); std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name); MIGRAPHX_THROW("Shape not computable: " + name);
} }
template <class T> template <class T>
shape normalize_compute_shape_op(const T& x, shape mod_compute_shape_op(const T& x,
const std::vector<shape>& inputs, const std::vector<shape>& inputs,
std::vector<module_ref>& mod_args) const std::vector<module_ref>& mod_args)
{ {
return normalize_compute_shape_op(rank<1>{}, x, inputs, mod_args); return mod_compute_shape_op(rank<1>{}, x, inputs, mod_args);
} }
template <class T> template <class T>
...@@ -495,13 +478,13 @@ lifetime get_lifetime_op(const T&) ...@@ -495,13 +478,13 @@ lifetime get_lifetime_op(const T&)
returns = 'shape', returns = 'shape',
input = 'const std::vector<shape>&', input = 'const std::vector<shape>&',
const = True, const = True,
default = 'detail::normalize_compute_shape_op'), default = 'detail::compute_shape_op'),
virtual('compute_shape', virtual('compute_shape',
returns = 'shape', returns = 'shape',
inputs = 'const std::vector<shape>&', inputs = 'const std::vector<shape>&',
mod_args = 'const std::vector<module_ref>&', mod_args = 'const std::vector<module_ref>&',
const = True, const = True,
default = 'detail::compute_shape_op'), default = 'detail::mod_compute_shape_op'),
virtual('compute', virtual('compute',
returns = 'argument', returns = 'argument',
ctx = 'context&', ctx = 'context&',
...@@ -589,7 +572,7 @@ template <class T> ...@@ -589,7 +572,7 @@ template <class T>
inline auto compute_shape(const T& op, const std::vector<shape>& inputs) inline auto compute_shape(const T& op, const std::vector<shape>& inputs)
-> decltype(op.normalize_compute_shape(inputs)) -> decltype(op.normalize_compute_shape(inputs))
{ {
return detail::normalize_compute_shape_op(op, inputs); return detail::compute_shape_op(op, inputs);
} }
inline shape compute_shape(const operation& op, inline shape compute_shape(const operation& op,
...@@ -614,7 +597,7 @@ inline auto compute_shape(const T& op, ...@@ -614,7 +597,7 @@ inline auto compute_shape(const T& op,
const std::vector<module_ref>& mod_args) const std::vector<module_ref>& mod_args)
-> decltype(op.normalize_compute_shape(inputs, mod_args)) -> decltype(op.normalize_compute_shape(inputs, mod_args))
{ {
return detail::normalize_compute_shape_op(op, inputs, mod_args); return detail::compute_shape_op(op, inputs, mod_args);
} }
inline bool is_context_free(const operation& op) { return op.is_context_free(); } inline bool is_context_free(const operation& op) { return op.is_context_free(); }
......
#!/usr/bin/env python3
import json
import argparse
import os
from sys import argv as sysargs
from sys import version_info as python_version
from sys import exit as sys_exit
import pandas as pd
from datetime import datetime
import venv
import shutil
if (python_version[0] < 3) or (python_version[0] < 3
and python_version[1] < 6):
raise Exception("Please utilize Python version 3.6 and above. Exiting...")
def parse_args():
parser = argparse.ArgumentParser(
description="Parser for MIGraphX ROCTX Markers")
parser.add_argument('--json-path',
type=str,
metavar='json_path',
help='Path to json file')
parser.add_argument('--out',
type=str,
metavar='out',
help='Output directory for run.')
parser.add_argument(
'--study-name',
type=str,
metavar='study-name',
help='Study-name is used for naming the output CSV file.')
parser.add_argument('--repeat',
type=int,
metavar='repeat',
help='Defines number of runs.',
default=2)
parser.add_argument('--parse',
default=False,
action='store_true',
help='Parses given JSON file.')
parser.add_argument('--clean',
default=False,
action='store_true',
help='Removes temporary paths')
parser.add_argument('--run',
type=str,
metavar='run',
help='Enables run and fetches run configs.')
parser.add_argument('--debug', default=False, action='store_true')
args = parser.parse_args()
return args
args = parse_args()
if not len(sysargs) > 1:
raise Exception("No arg is passed. Exiting...")
def parse(file):
with open(file, "r") as read_file:
data = json.load(read_file)
#Get marker names and first marker's time
list_names = []
first_marker = True
first_marker_time = 0
for i in data:
if (i):
if ("Marker start:" in i['name']) and (
i['name'] not in list_names):
list_names.append(i['name'])
if first_marker:
first_marker_time = i['ts']
first_marker = False
if (args.debug):
print(f"FIRST MARKER TIME DETERMINED: {first_marker_time}")
if (first_marker_time == 0):
raise ("FIRST MARKER TIME IS ZERO. EXITING...")
kernel_launch_info = [] #kernel description
kernel_launch_list = [] #kernel launch details
kernel_launch_time = [] #kernel execution time
for i in data:
if (i and i.get('args')):
try:
if (("KernelExecution" in i['args']['desc'])
and (i['ts'] >= first_marker_time)):
kernel_launch_info.append(i['args']['desc'])
kernel_launch_list.append(i)
kernel_launch_time.append(int(i['dur']))
except:
continue
max_index = kernel_launch_time.index(max(kernel_launch_time))
max_kernel_info = kernel_launch_list[max_index]
if (args.debug):
with open('rocTX_kernel_launch_list.txt', 'w') as f:
for i in kernel_launch_list:
f.write(f'{i}')
# Get timing information for each marker name
list_times_per_names = []
for name in list_names:
temp_list = []
for entry in data:
if (entry) and (
name == entry['name']
): # name can match on gpu or cpu side, for gpu, we need data from gpu markers.
if (("gpu::" in name)
and ("UserMarker frame:" in entry['args']['desc'])
): #gpu side information
temp_list.append(int(entry.get('dur')))
elif (("gpu::" not in name)
and ("Marker start:" in entry['args']['desc'])
): #cpu side information
temp_list.append(int(entry.get('dur')))
list_times_per_names.append(temp_list)
if (args.debug):
print(list_times_per_names)
sum_per_name = [] #TODO: refactor stat collection
for list in list_times_per_names:
sum_per_name.append(sum(list))
count_per_name = []
for list in list_times_per_names:
try:
count_per_name.append(len(list))
except:
count_per_name.append(0)
max_per_name = []
for list in list_times_per_names:
try:
max_per_name.append(max(list))
except:
max_per_name.append(0)
min_per_name = []
for list in list_times_per_names:
try:
min_per_name.append(min(list))
except:
min_per_name.append(0)
max_index_per_name = []
for list in list_times_per_names:
try:
max_index_per_name.append(list.index(max(list)))
except:
max_index_per_name.append(0)
max_occur_per_name = []
for list in list_times_per_names:
try:
max_occur_per_name.append(list.count(max(list)))
except:
max_occur_per_name.append(0)
total_time = sum(sum_per_name)
d = {
'SUM': sum_per_name,
'MIN': min_per_name,
'MAX': max_per_name,
'COUNT': count_per_name,
'MAX_INDEX': max_index_per_name,
'MAX_OCCUR': max_occur_per_name
}
df2 = pd.DataFrame(d)
df2.index = list_names
df2.sort_values(by=['SUM'], inplace=True, ascending=False)
if (args.debug):
print(df2)
print(f"\nTOTAL TIME: {total_time} us")
return df2, total_time, max_kernel_info
def run():
repeat_count = args.repeat
if (repeat_count == 0 or repeat_count == float('inf') or not repeat_count):
raise Exception("REPEAT COUNT CANNOT BE ZERO/INFINITY/NULL")
run_args = args.run
#configurations
configs = '--hip-trace --roctx-trace --flush-rate 10ms --timestamp on'
output_dir = f"-d {args.out}"
executable = f"/opt/rocm/bin/migraphx-driver roctx {run_args}"
process_args = configs + ' ' + output_dir + ' ' + executable
for i in range(repeat_count):
os.system('rocprof ' + process_args)
print("RUN COMPLETE.")
def clean():
shutil.rmtree('/tmp/rocm-profile-data/', ignore_errors=False)
def main():
if (args.clean):
clean()
sys_exit()
print("Initiating virtual environment...")
builder = venv.EnvBuilder(clear=True, with_pip=True)
builder.create('/tmp/rocm-profile-data/py/')
python_bin = '/tmp/rocm-profile-data/py' + '/bin/python'
file = args.json_path
if (args.study_name):
filename = args.study_name + ".csv"
else:
filename = "output" + datetime.now().strftime(
"%Y_%m_%d-%I:%M:%S_%p") + ".csv"
with open(filename, 'a') as f:
f.write(f"{args.run}\n")
if (args.run):
curr = os.path.abspath(os.getcwd())
rpd_path = '/tmp/rocm-profile-data/rocmProfileData/'
if not os.path.exists(rpd_path):
print("rocmProfileData DOES NOT EXIST. CLONING...")
os.system(
f"git clone https://github.com/ROCmSoftwarePlatform/rocmProfileData.git {rpd_path}"
)
os.chdir(rpd_path + "rocpd_python/")
os.system(python_bin + ' -m pip install --upgrade pip')
os.system(python_bin + ' setup.py install')
os.chdir(curr)
run()
os.chdir(curr + f"/{args.out}/")
out_path = os.popen(f"ls -td $PWD/*/*/ | head -{args.repeat}").read()
print(f"\nFOLLOWING PATHS WILL BE PARSED:\n{out_path}")
out_path = out_path.splitlines()
df_tot = pd.DataFrame()
tot_time = []
max_kernel_info_list = []
for path in out_path:
path = path.strip('\n')
print("\nPARSING OUTPUT PATH: " + path)
os.chdir(path)
os.system(
f"{python_bin} -m rocpd.rocprofiler_import --ops_input_file hcc_ops_trace.txt --api_input_file hip_api_trace.txt --roctx_input_file roctx_trace.txt trace.rpd"
)
os.system(
f"{python_bin} {rpd_path}/rpd2tracing.py trace.rpd trace.json")
os.chdir(curr)
df, total_time, path_max_kernel_info = parse(path + "trace.json")
max_kernel_info_list.append(path_max_kernel_info)
tot_time.append(total_time)
df_tot = pd.merge(df_tot,
df,
how='outer',
left_index=True,
right_index=True)
if (args.debug):
print("JSON FILE PATH: " + path + "trace.json")
df_tot.to_csv("rocTX_runs_dataframe.csv")
if (args.debug):
print(df_tot)
tmp_sum = df_tot.loc[:, df_tot.columns.str.contains('SUM')].astype(int)
tmp_min = df_tot.loc[:, df_tot.columns.str.contains('MIN')].astype(int)
tmp_max = df_tot.loc[:, df_tot.columns.str.match("^MAX_.$")].astype(
int)
tmp_count = df_tot.loc[:, df_tot.columns.str.match("COUNT")].astype(
int)
tmp_sum['SUM_avg'] = tmp_sum.mean(axis=1).astype(int)
tmp_min['MIN_avg'] = tmp_min.mean(axis=1).astype(int)
tmp_max['MAX_avg'] = tmp_max.mean(axis=1).astype(int)
df2 = tmp_sum['SUM_avg'].copy()
df2 = pd.merge(df2,
tmp_min['MIN_avg'],
how='outer',
left_index=True,
right_index=True)
df2 = pd.merge(df2,
tmp_max['MAX_avg'],
how='outer',
left_index=True,
right_index=True)
df2 = pd.merge(df2,
tmp_count['COUNT_x'],
how='outer',
left_index=True,
right_index=True)
df2.rename(columns={'COUNT_x': 'COUNT'}, inplace=True)
df2 = df2.loc[:, ~df2.columns.duplicated(
)] #there will be many COUNT_x in df2
df2.sort_values(by=['SUM_avg'], inplace=True, ascending=False)
if (args.debug):
pd.set_option('display.max_columns', None)
print(df_tot) #all data from all runs
print("\n*** RESULTS ***")
print(df2)
out_time = sum(tot_time) / len(tot_time)
print(f"\nAVG TOTAL TIME: {out_time} us\n")
df2.to_csv(filename, mode='a')
with open(filename, 'a') as f:
f.write(f"AVG TOTAL TIME: {out_time} us\n")
print(f"OUTPUT CSV FILE:\t{filename}")
if (args.debug):
#kernels that took the longest time printed
for item in max_kernel_info_list:
print(f"KERNEL NAME: {item['name']}\t\t{item['dur']}")
with open('rocTX_kernel_timing_details.txt', 'w') as f:
f.write(
"MOST TIME CONSUMING KERNELS IN EACH ITERATION (EXPECTED TO BE SAME KERNEL):\n"
)
for i in max_kernel_info_list:
f.write(f"KERNEL NAME: {i['name']}\t\t{i['dur']}\n")
print("KERNEL TIMING DETAILS:\trocTX_kernel_timing_details.txt")
print("ALL DATA FROM ALL RUNS:\trocTX_runs_dataframe.csv")
elif (args.parse):
if not (file):
raise Exception("JSON PATH IS NOT PROVIDED FOR PARSING.")
parse(file)
else:
raise Exception("PLEASE PROVIDE A COMMAND: RUN, PARSE, CLEAN")
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment