Commit 6c97c8ea authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into mlir-c

parents ee382ad9 2d4dcc47
......@@ -168,6 +168,7 @@ constexpr auto transform_args(F f, Fs... fs)
return [=](auto... xs) { return transform_args(f)(xs...)(transform_args(fs...)); };
}
// NOLINTNEXTLINE
#define MIGRAPHX_LIFT(...) \
([](auto&&... xs) { return (__VA_ARGS__)(static_cast<decltype(xs)>(xs)...); })
......
#ifndef MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP
#define MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP
namespace migraphx {
template <class F>
struct generic_constant
{
static constexpr auto value = F{}();
using value_type = decltype(value);
using type = generic_constant;
constexpr operator value_type() const noexcept { return value; }
constexpr value_type operator()() const noexcept { return value; }
};
template <class F>
constexpr generic_constant<F> make_generic_constant(F)
{
return {};
}
// NOLINTNEXTLINE
#define MIGRAPHX_MAKE_CONSTANT(x) \
make_generic_constant([] { \
struct fun \
{ \
constexpr auto operator()() const { return x; } \
}; \
return fun{}; \
}())
} // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_HIP_HPP
#define MIGRAPHX_GUARD_KERNELS_HIP_HPP
// Workaround macro redefinition issue with clang tidy
#if defined(__HIP_PLATFORM_HCC__) && defined(MIGRAPHX_USE_CLANG_TIDY)
#undef __HIP_PLATFORM_HCC__ // NOLINT
#endif
#include <hip/hip_runtime.h>
#endif // MIGRAPHX_GUARD_KERNELS_HIP_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_INDEX_HPP
#define MIGRAPHX_GUARD_KERNELS_INDEX_HPP
#include <hip/hip_runtime.h>
#include <migraphx/kernels/hip.hpp>
#include <migraphx/kernels/types.hpp>
namespace migraphx {
......@@ -17,7 +17,7 @@ struct index
#ifdef MIGRAPHX_NGLOBAL
return MIGRAPHX_NGLOBAL;
#else
return blockDim.x * gridDim.x;
return blockDim.x * gridDim.x; // NOLINT
#endif
}
......@@ -26,7 +26,7 @@ struct index
#ifdef MIGRAPHX_NLOCAL
return MIGRAPHX_NLOCAL;
#else
return blockDim.x;
return blockDim.x; // NOLINT
#endif
}
......@@ -53,7 +53,7 @@ struct index
inline __device__ index make_index()
{
return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x};
return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT
}
} // namespace migraphx
......
......@@ -5,28 +5,30 @@
namespace migraphx {
template <class T, T v>
template <class T, T V>
struct integral_constant
{
static constexpr T value = v;
static constexpr T value = V;
using value_type = T;
using type = integral_constant;
constexpr operator value_type() const noexcept { return value; }
constexpr value_type operator()() const noexcept { return value; }
};
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \
template <class T, T v, class U, U w> \
constexpr inline integral_constant<decltype(v op w), (v op w)> operator op( \
integral_constant<T, v>, integral_constant<U, w>) noexcept \
template <class T, T V, class U, U w> \
constexpr inline integral_constant<decltype(V op w), (V op w)> operator op( \
integral_constant<T, V>, integral_constant<U, w>) noexcept \
{ \
return {}; \
}
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \
template <class T, T v> \
constexpr inline integral_constant<decltype(op v), (op v)> operator op( \
integral_constant<T, v>) noexcept \
template <class T, T V> \
constexpr inline integral_constant<decltype(op V), (op V)> operator op( \
integral_constant<T, V>) noexcept \
{ \
return {}; \
}
......@@ -64,8 +66,8 @@ using false_type = bool_constant<false>;
template <index_int N>
using index_constant = integral_constant<index_int, N>;
template <auto v>
static constexpr auto _c = integral_constant<decltype(v), v>{};
template <auto V>
static constexpr auto _c = integral_constant<decltype(V), V>{}; // NOLINT
} // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP
......@@ -14,9 +14,7 @@ constexpr auto traverse_preload(Shapes... ss)
auto each = [&](auto x) {
constexpr auto s = decltype(x.get_shape()){};
constexpr auto size = _c<s.element_space()>;
if constexpr(not s.broadcasted())
return f(x, offset, false_type{});
else if constexpr((s.elements() - size) < 64)
if constexpr(not s.broadcasted() or (s.elements() - size) < 64)
return f(x, offset, false_type{});
else
{
......
#ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#define MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#include <hip/hip_runtime.h>
#include <migraphx/kernels/hip.hpp>
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/algorithm.hpp>
......
......@@ -4,7 +4,7 @@
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/dfor.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <args.hpp>
#include <migraphx/kernels/array.hpp>
namespace migraphx {
......@@ -104,14 +104,24 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data,
return op.final(output_val, count);
}
template <class T, class U, class V, class W>
__device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& y_t)
template <class T1, class T2, class T3, class T4>
struct roalign_settings
{
const float roi_offset = ROIS_OFFSET;
const bool is_avg_pooling = IS_AVG_POOLING;
const int64_t sampling_ratio = SAMPLING_RATIO;
const float spatial_scale = SPATIAL_SCALE;
T1 roi_offset{};
T2 is_avg_pooling{};
T3 sampling_ratio{};
T4 spatial_scale{};
};
template <class... Ts>
constexpr roalign_settings<Ts...> make_roalign_settings(Ts... xs)
{
return {xs...};
}
template <class T, class U, class V, class W, class Settings>
__device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& y_t, Settings s)
{
auto index = make_index();
const auto* x = x_t.data();
const auto* rois = rois_t.data();
......@@ -146,9 +156,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
const auto* offset_rois = rois + (n * roi_column_num);
const int batch_ind = ind[n];
array<float, 2> roi_starts = {offset_rois[1] * spatial_scale,
offset_rois[0] * spatial_scale};
array<float, 2> roi_ends = {offset_rois[3] * spatial_scale, offset_rois[2] * spatial_scale};
array<float, 2> roi_starts = {offset_rois[1] * s.spatial_scale,
offset_rois[0] * s.spatial_scale};
array<float, 2> roi_ends = {offset_rois[3] * s.spatial_scale,
offset_rois[2] * s.spatial_scale};
array<float, 2> roi_size{};
array<float, 2> bin_size{};
......@@ -161,11 +172,11 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
bin_size[ii] = roi_size[ii] / out_dims[ii];
bin_grid_size[ii] =
(sampling_ratio > 0) ? sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]);
(s.sampling_ratio > 0) ? s.sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]);
}
const auto* offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]);
if constexpr(is_avg_pooling)
if constexpr(s.is_avg_pooling)
{
out_ptr[i] = calc_pooling(offset_x,
roi_starts,
......@@ -173,7 +184,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{ph, pw},
bin_grid_size,
in_dims,
roi_offset,
s.roi_offset,
avg_pool{});
}
else
......@@ -184,7 +195,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{ph, pw},
bin_grid_size,
in_dims,
roi_offset,
s.roi_offset,
max_pool{});
}
}
......
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#include <hip/hip_runtime.h>
#include <migraphx/kernels/hip.hpp>
namespace migraphx {
......
......@@ -13,7 +13,7 @@ constexpr auto vec_size(vec<T, N>)
}
template <class T>
constexpr auto vec_size(T, ...)
constexpr auto vec_size(T, ...) // NOLINT
{
return index_constant<0>{};
}
......
......@@ -101,4 +101,38 @@ TEST_CASE(after_param_broadcast)
EXPECT(not m.get_output_shapes().back().broadcasted());
}
TEST_CASE(two_transpose_gather)
{
migraphx::module m1;
{
auto data = m1.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}});
auto ind = m1.add_parameter("ind", {migraphx::shape::float_type, {2, 3}});
auto td = m1.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), data);
auto sd = m1.add_instruction(migraphx::make_op("softmax", {{"axis", 2}}), td);
auto bd =
m1.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 3, 1, 2}}}), sd);
auto r = m1.add_instruction(migraphx::make_op("gather", {{"axis", 2}}), bd, ind);
m1.add_return({r});
}
run_pass(m1);
migraphx::module m2;
{
auto data = m2.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}});
auto ind = m2.add_parameter("ind", {migraphx::shape::float_type, {2, 3}});
auto td = m2.add_instruction(
migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}), data);
auto ctd = m2.add_instruction(migraphx::make_op("contiguous"), td);
auto sd = m2.add_instruction(migraphx::make_op("softmax", {{"axis", 2}}), ctd);
auto bd =
m2.add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 3, 1, 2}}}), sd);
auto cbd = m2.add_instruction(migraphx::make_op("contiguous"), bd);
auto r = m2.add_instruction(migraphx::make_op("gather", {{"axis", 2}}), cbd, ind);
m2.add_return({r});
}
EXPECT(m1 == m2);
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
#include <migraphx/eliminate_contiguous.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/instruction.hpp>
#include <basic_ops.hpp>
#include <migraphx/make_op.hpp>
#include <pointwise.hpp>
#include <test.hpp>
void run_pass(migraphx::module& m)
......@@ -159,4 +161,25 @@ TEST_CASE(standard_flatten_op)
EXPECT(std::distance(m.begin(), m.end()) == (count - 1));
}
TEST_CASE(contiguous_pointwise)
{
migraphx::shape s{migraphx::shape::float_type, {2, 3, 8, 8}};
migraphx::program p;
auto* mm = p.get_main_module();
{
auto x = mm->add_parameter("x", s);
auto y = mm->add_parameter("y", migraphx::shape{migraphx::shape::float_type, {3}});
auto yb = mm->add_instruction(
migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", {2, 3, 8, 8}}}), y);
auto yc = mm->add_instruction(migraphx::make_op("contiguous"), yb);
auto add = add_pointwise(p, "main:pointwise0", {x, yc}, single_pointwise("add"));
mm->add_instruction(pass_op{}, add);
}
auto count = std::distance(mm->begin(), mm->end());
run_pass(*mm);
EXPECT(std::distance(mm->begin(), mm->end()) == (count - 1));
EXPECT(std::none_of(
mm->begin(), mm->end(), [](auto&& ins) { return ins.name() == "contiguous"; }));
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
......@@ -7,38 +7,13 @@
#include <migraphx/make_op.hpp>
#include <test.hpp>
#include <pointwise.hpp>
void run_pass(migraphx::program& p)
{
migraphx::run_passes(p, {migraphx::fuse_pointwise{}, migraphx::dead_code_elimination{}});
}
template <class F>
migraphx::instruction_ref add_pointwise(migraphx::program& p,
const std::string& name,
std::vector<migraphx::instruction_ref> inputs,
F f)
{
auto* pm = p.create_module(name);
auto* mm = p.get_main_module();
pm->set_bypass();
std::vector<migraphx::instruction_ref> params;
std::transform(inputs.begin(), inputs.end(), std::back_inserter(params), [&](auto input) {
return pm->add_parameter("x" + std::to_string(params.size()),
migraphx::shape{input->get_shape().type()});
});
auto r = f(pm, params);
pm->add_return({r});
return mm->add_instruction(migraphx::make_op("pointwise"), inputs, {pm});
}
auto single_pointwise(const std::string& name)
{
return [=](auto* pm, const auto& inputs) {
return pm->add_instruction(migraphx::make_op(name), inputs);
};
}
TEST_CASE(single)
{
migraphx::shape s{migraphx::shape::float_type, {2, 3}};
......
File mode changed from 100644 to 100755
#ifndef MIGRAPHX_GUARD_TEST_INCLUDE_POINTWISE_HPP
#define MIGRAPHX_GUARD_TEST_INCLUDE_POINTWISE_HPP
#include <migraphx/program.hpp>
#include <migraphx/module.hpp>
#include <migraphx/make_op.hpp>
template <class F>
migraphx::instruction_ref add_pointwise(migraphx::program& p,
const std::string& name,
std::vector<migraphx::instruction_ref> inputs,
F f)
{
auto* pm = p.create_module(name);
auto* mm = p.get_main_module();
pm->set_bypass();
std::vector<migraphx::instruction_ref> params;
std::transform(inputs.begin(), inputs.end(), std::back_inserter(params), [&](auto input) {
return pm->add_parameter("x" + std::to_string(params.size()),
migraphx::shape{input->get_shape().type()});
});
auto r = f(pm, params);
pm->add_return({r});
return mm->add_instruction(migraphx::make_op("pointwise"), inputs, {pm});
}
inline auto single_pointwise(const std::string& name)
{
return [=](auto* pm, const auto& inputs) {
return pm->add_instruction(migraphx::make_op(name), inputs);
};
}
#endif // MIGRAPHX_GUARD_TEST_INCLUDE_POINTWISE_HPP
......@@ -1473,6 +1473,32 @@ TEST_CASE(fp32_fp16_test)
test_case({"add"});
}
TEST_CASE(gather_non_std_test)
{
{
migraphx::program p;
auto* mm = p.get_main_module();
std::vector<float> data = {0.5f, 3.5f, 6.5f, 1.5f, 4.5f, 7.5f, 2.5f, 2.5f, 8.5f};
migraphx::shape s{migraphx::shape::float_type, {3, 3}};
auto d = mm->add_literal(migraphx::literal{s, data});
migraphx::shape s_indices{migraphx::shape::int32_type, {2, 2}};
std::vector<int> indices{-3, -3, -1, -1};
auto ind = mm->add_literal(migraphx::literal{s_indices, indices});
auto td = mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), d);
auto tind =
mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), ind);
mm->add_instruction(migraphx::make_op("gather", {{"axis", 0}}), td, tind);
auto result = p.eval({}).back();
std::vector<float> golden = {
0.5f, 1.5f, 2.5f, 6.5f, 7.5f, 8.5f, 0.5f, 1.5f, 2.5f, 6.5f, 7.5f, 8.5f};
std::vector<float> res_data;
result.visit([&](auto output) { res_data.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(res_data, golden));
}
}
TEST_CASE(gather_test)
{
{
......@@ -2784,7 +2810,6 @@ TEST_CASE(nms_not_center_test)
auto output = p.eval({}).back();
std::vector<int64_t> result;
output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
std::cout << "output = " << output << std::endl;
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0};
EXPECT(migraphx::verify_range(result, gold));
}
......@@ -2818,7 +2843,6 @@ TEST_CASE(nms_test)
auto output = p.eval({}).back();
std::vector<int64_t> result;
output.visit([&](auto out) { result.assign(out.begin(), out.end()); });
std::cout << "output = " << output << std::endl;
std::vector<int64_t> gold = {0, 0, 3, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0};
EXPECT(migraphx::verify_range(result, gold));
}
......
#include "verify_program.hpp"
#include <migraphx/program.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/make_op.hpp>
struct test_nonstd_gather : verify_program<test_nonstd_gather>
{
migraphx::program create_program() const
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape s{migraphx::shape::float_type, {3, 3}};
migraphx::shape s_indices{migraphx::shape::int32_type, {2, 2}};
std::vector<int> indices{1, 1, 0, 2};
auto d = mm->add_parameter("data", s);
auto td = mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), d);
auto ind = mm->add_literal(migraphx::literal{s_indices, indices});
auto tind =
mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {1, 0}}}), ind);
auto r = mm->add_instruction(migraphx::make_op("gather", {{"axis", 1}}), td, tind);
mm->add_return({r});
return p;
}
};
......@@ -103,7 +103,14 @@ auto operator==(const T& x, const U& y) -> decltype(x.name() == y.name())
} // namespace operation_operators
template <class T>
auto normalize_compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
auto compute_shape_op(rank<3>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs))
{
return x.compute_shape(inputs);
}
template <class T>
auto compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.normalize_compute_shape(inputs))
{
dependent_type<operation, T> y = x;
......@@ -112,77 +119,53 @@ auto normalize_compute_shape_op(rank<2>, const T& x, const std::vector<shape>& i
}
template <class T>
auto normalize_compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
auto compute_shape_op(rank<1>, const T& x, const std::vector<shape>& inputs)
-> decltype(x.compute_shape(inputs, {}))
{
return x.compute_shape(inputs, {});
}
template <class T>
shape normalize_compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
shape compute_shape_op(rank<0>, const T& x, const std::vector<shape>&)
{
std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name);
}
template <class T>
shape normalize_compute_shape_op(const T& x, const std::vector<shape>& inputs)
shape compute_shape_op(const T& x, const std::vector<shape>& inputs)
{
return normalize_compute_shape_op(rank<2>{}, x, inputs);
return compute_shape_op(rank<3>{}, x, inputs);
}
template <class T>
auto compute_shape_op(rank<1>,
const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
auto mod_compute_shape_op(rank<1>,
const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
-> decltype(x.compute_shape(inputs, mod_args))
{
return x.compute_shape(inputs, mod_args);
}
template <class T>
shape
compute_shape_op(rank<0>, const T& x, const std::vector<shape>&, const std::vector<module_ref>&)
{
std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name);
}
template <class T>
shape compute_shape_op(const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
return compute_shape_op(rank<1>{}, x, inputs, mod_args);
}
template <class T>
auto normalize_compute_shape_op(rank<1>,
const T& x,
const std::vector<shape>& inputs,
std::vector<module_ref>& mod_args)
-> decltype(x.normalize_compute_shape(inputs, mod_args))
{
return x.normalize_compute_shape(inputs, mod_args);
}
template <class T>
shape normalize_compute_shape_op(rank<0>,
const T& x,
const std::vector<shape>&,
const std::vector<module_ref>&)
shape mod_compute_shape_op(rank<0>,
const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
if(mod_args.empty())
return compute_shape_op(x, inputs);
std::string name = x.name();
MIGRAPHX_THROW("Shape not computable: " + name);
}
template <class T>
shape normalize_compute_shape_op(const T& x,
const std::vector<shape>& inputs,
std::vector<module_ref>& mod_args)
shape mod_compute_shape_op(const T& x,
const std::vector<shape>& inputs,
const std::vector<module_ref>& mod_args)
{
return normalize_compute_shape_op(rank<1>{}, x, inputs, mod_args);
return mod_compute_shape_op(rank<1>{}, x, inputs, mod_args);
}
template <class T>
......@@ -495,13 +478,13 @@ lifetime get_lifetime_op(const T&)
returns = 'shape',
input = 'const std::vector<shape>&',
const = True,
default = 'detail::normalize_compute_shape_op'),
default = 'detail::compute_shape_op'),
virtual('compute_shape',
returns = 'shape',
inputs = 'const std::vector<shape>&',
mod_args = 'const std::vector<module_ref>&',
const = True,
default = 'detail::compute_shape_op'),
default = 'detail::mod_compute_shape_op'),
virtual('compute',
returns = 'argument',
ctx = 'context&',
......@@ -589,7 +572,7 @@ template <class T>
inline auto compute_shape(const T& op, const std::vector<shape>& inputs)
-> decltype(op.normalize_compute_shape(inputs))
{
return detail::normalize_compute_shape_op(op, inputs);
return detail::compute_shape_op(op, inputs);
}
inline shape compute_shape(const operation& op,
......@@ -614,7 +597,7 @@ inline auto compute_shape(const T& op,
const std::vector<module_ref>& mod_args)
-> decltype(op.normalize_compute_shape(inputs, mod_args))
{
return detail::normalize_compute_shape_op(op, inputs, mod_args);
return detail::compute_shape_op(op, inputs, mod_args);
}
inline bool is_context_free(const operation& op) { return op.is_context_free(); }
......
#!/usr/bin/env python3
import json
import argparse
import os
from sys import argv as sysargs
from sys import version_info as python_version
from sys import exit as sys_exit
import pandas as pd
from datetime import datetime
import venv
import shutil
if (python_version[0] < 3) or (python_version[0] < 3
and python_version[1] < 6):
raise Exception("Please utilize Python version 3.6 and above. Exiting...")
def parse_args():
parser = argparse.ArgumentParser(
description="Parser for MIGraphX ROCTX Markers")
parser.add_argument('--json-path',
type=str,
metavar='json_path',
help='Path to json file')
parser.add_argument('--out',
type=str,
metavar='out',
help='Output directory for run.')
parser.add_argument(
'--study-name',
type=str,
metavar='study-name',
help='Study-name is used for naming the output CSV file.')
parser.add_argument('--repeat',
type=int,
metavar='repeat',
help='Defines number of runs.',
default=2)
parser.add_argument('--parse',
default=False,
action='store_true',
help='Parses given JSON file.')
parser.add_argument('--clean',
default=False,
action='store_true',
help='Removes temporary paths')
parser.add_argument('--run',
type=str,
metavar='run',
help='Enables run and fetches run configs.')
parser.add_argument('--debug', default=False, action='store_true')
args = parser.parse_args()
return args
args = parse_args()
if not len(sysargs) > 1:
raise Exception("No arg is passed. Exiting...")
def parse(file):
with open(file, "r") as read_file:
data = json.load(read_file)
#Get marker names and first marker's time
list_names = []
first_marker = True
first_marker_time = 0
for i in data:
if (i):
if ("Marker start:" in i['name']) and (
i['name'] not in list_names):
list_names.append(i['name'])
if first_marker:
first_marker_time = i['ts']
first_marker = False
if (args.debug):
print(f"FIRST MARKER TIME DETERMINED: {first_marker_time}")
if (first_marker_time == 0):
raise ("FIRST MARKER TIME IS ZERO. EXITING...")
kernel_launch_info = [] #kernel description
kernel_launch_list = [] #kernel launch details
kernel_launch_time = [] #kernel execution time
for i in data:
if (i and i.get('args')):
try:
if (("KernelExecution" in i['args']['desc'])
and (i['ts'] >= first_marker_time)):
kernel_launch_info.append(i['args']['desc'])
kernel_launch_list.append(i)
kernel_launch_time.append(int(i['dur']))
except:
continue
max_index = kernel_launch_time.index(max(kernel_launch_time))
max_kernel_info = kernel_launch_list[max_index]
if (args.debug):
with open('rocTX_kernel_launch_list.txt', 'w') as f:
for i in kernel_launch_list:
f.write(f'{i}')
# Get timing information for each marker name
list_times_per_names = []
for name in list_names:
temp_list = []
for entry in data:
if (entry) and (
name == entry['name']
): # name can match on gpu or cpu side, for gpu, we need data from gpu markers.
if (("gpu::" in name)
and ("UserMarker frame:" in entry['args']['desc'])
): #gpu side information
temp_list.append(int(entry.get('dur')))
elif (("gpu::" not in name)
and ("Marker start:" in entry['args']['desc'])
): #cpu side information
temp_list.append(int(entry.get('dur')))
list_times_per_names.append(temp_list)
if (args.debug):
print(list_times_per_names)
sum_per_name = [] #TODO: refactor stat collection
for list in list_times_per_names:
sum_per_name.append(sum(list))
count_per_name = []
for list in list_times_per_names:
try:
count_per_name.append(len(list))
except:
count_per_name.append(0)
max_per_name = []
for list in list_times_per_names:
try:
max_per_name.append(max(list))
except:
max_per_name.append(0)
min_per_name = []
for list in list_times_per_names:
try:
min_per_name.append(min(list))
except:
min_per_name.append(0)
max_index_per_name = []
for list in list_times_per_names:
try:
max_index_per_name.append(list.index(max(list)))
except:
max_index_per_name.append(0)
max_occur_per_name = []
for list in list_times_per_names:
try:
max_occur_per_name.append(list.count(max(list)))
except:
max_occur_per_name.append(0)
total_time = sum(sum_per_name)
d = {
'SUM': sum_per_name,
'MIN': min_per_name,
'MAX': max_per_name,
'COUNT': count_per_name,
'MAX_INDEX': max_index_per_name,
'MAX_OCCUR': max_occur_per_name
}
df2 = pd.DataFrame(d)
df2.index = list_names
df2.sort_values(by=['SUM'], inplace=True, ascending=False)
if (args.debug):
print(df2)
print(f"\nTOTAL TIME: {total_time} us")
return df2, total_time, max_kernel_info
def run():
repeat_count = args.repeat
if (repeat_count == 0 or repeat_count == float('inf') or not repeat_count):
raise Exception("REPEAT COUNT CANNOT BE ZERO/INFINITY/NULL")
run_args = args.run
#configurations
configs = '--hip-trace --roctx-trace --flush-rate 10ms --timestamp on'
output_dir = f"-d {args.out}"
executable = f"/opt/rocm/bin/migraphx-driver roctx {run_args}"
process_args = configs + ' ' + output_dir + ' ' + executable
for i in range(repeat_count):
os.system('rocprof ' + process_args)
print("RUN COMPLETE.")
def clean():
shutil.rmtree('/tmp/rocm-profile-data/', ignore_errors=False)
def main():
if (args.clean):
clean()
sys_exit()
print("Initiating virtual environment...")
builder = venv.EnvBuilder(clear=True, with_pip=True)
builder.create('/tmp/rocm-profile-data/py/')
python_bin = '/tmp/rocm-profile-data/py' + '/bin/python'
file = args.json_path
if (args.study_name):
filename = args.study_name + ".csv"
else:
filename = "output" + datetime.now().strftime(
"%Y_%m_%d-%I:%M:%S_%p") + ".csv"
with open(filename, 'a') as f:
f.write(f"{args.run}\n")
if (args.run):
curr = os.path.abspath(os.getcwd())
rpd_path = '/tmp/rocm-profile-data/rocmProfileData/'
if not os.path.exists(rpd_path):
print("rocmProfileData DOES NOT EXIST. CLONING...")
os.system(
f"git clone https://github.com/ROCmSoftwarePlatform/rocmProfileData.git {rpd_path}"
)
os.chdir(rpd_path + "rocpd_python/")
os.system(python_bin + ' -m pip install --upgrade pip')
os.system(python_bin + ' setup.py install')
os.chdir(curr)
run()
os.chdir(curr + f"/{args.out}/")
out_path = os.popen(f"ls -td $PWD/*/*/ | head -{args.repeat}").read()
print(f"\nFOLLOWING PATHS WILL BE PARSED:\n{out_path}")
out_path = out_path.splitlines()
df_tot = pd.DataFrame()
tot_time = []
max_kernel_info_list = []
for path in out_path:
path = path.strip('\n')
print("\nPARSING OUTPUT PATH: " + path)
os.chdir(path)
os.system(
f"{python_bin} -m rocpd.rocprofiler_import --ops_input_file hcc_ops_trace.txt --api_input_file hip_api_trace.txt --roctx_input_file roctx_trace.txt trace.rpd"
)
os.system(
f"{python_bin} {rpd_path}/rpd2tracing.py trace.rpd trace.json")
os.chdir(curr)
df, total_time, path_max_kernel_info = parse(path + "trace.json")
max_kernel_info_list.append(path_max_kernel_info)
tot_time.append(total_time)
df_tot = pd.merge(df_tot,
df,
how='outer',
left_index=True,
right_index=True)
if (args.debug):
print("JSON FILE PATH: " + path + "trace.json")
df_tot.to_csv("rocTX_runs_dataframe.csv")
if (args.debug):
print(df_tot)
tmp_sum = df_tot.loc[:, df_tot.columns.str.contains('SUM')].astype(int)
tmp_min = df_tot.loc[:, df_tot.columns.str.contains('MIN')].astype(int)
tmp_max = df_tot.loc[:, df_tot.columns.str.match("^MAX_.$")].astype(
int)
tmp_count = df_tot.loc[:, df_tot.columns.str.match("COUNT")].astype(
int)
tmp_sum['SUM_avg'] = tmp_sum.mean(axis=1).astype(int)
tmp_min['MIN_avg'] = tmp_min.mean(axis=1).astype(int)
tmp_max['MAX_avg'] = tmp_max.mean(axis=1).astype(int)
df2 = tmp_sum['SUM_avg'].copy()
df2 = pd.merge(df2,
tmp_min['MIN_avg'],
how='outer',
left_index=True,
right_index=True)
df2 = pd.merge(df2,
tmp_max['MAX_avg'],
how='outer',
left_index=True,
right_index=True)
df2 = pd.merge(df2,
tmp_count['COUNT_x'],
how='outer',
left_index=True,
right_index=True)
df2.rename(columns={'COUNT_x': 'COUNT'}, inplace=True)
df2 = df2.loc[:, ~df2.columns.duplicated(
)] #there will be many COUNT_x in df2
df2.sort_values(by=['SUM_avg'], inplace=True, ascending=False)
if (args.debug):
pd.set_option('display.max_columns', None)
print(df_tot) #all data from all runs
print("\n*** RESULTS ***")
print(df2)
out_time = sum(tot_time) / len(tot_time)
print(f"\nAVG TOTAL TIME: {out_time} us\n")
df2.to_csv(filename, mode='a')
with open(filename, 'a') as f:
f.write(f"AVG TOTAL TIME: {out_time} us\n")
print(f"OUTPUT CSV FILE:\t{filename}")
if (args.debug):
#kernels that took the longest time printed
for item in max_kernel_info_list:
print(f"KERNEL NAME: {item['name']}\t\t{item['dur']}")
with open('rocTX_kernel_timing_details.txt', 'w') as f:
f.write(
"MOST TIME CONSUMING KERNELS IN EACH ITERATION (EXPECTED TO BE SAME KERNEL):\n"
)
for i in max_kernel_info_list:
f.write(f"KERNEL NAME: {i['name']}\t\t{i['dur']}\n")
print("KERNEL TIMING DETAILS:\trocTX_kernel_timing_details.txt")
print("ALL DATA FROM ALL RUNS:\trocTX_runs_dataframe.csv")
elif (args.parse):
if not (file):
raise Exception("JSON PATH IS NOT PROVIDED FOR PARSING.")
parse(file)
else:
raise Exception("PLEASE PROVIDE A COMMAND: RUN, PARSE, CLEAN")
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment