Commit 7f915595 authored by Shucai Xiao's avatar Shucai Xiao
Browse files

additional refinement for printout memory throughput and tflops

parent 6529e7c9
...@@ -37,7 +37,7 @@ struct squeeze ...@@ -37,7 +37,7 @@ struct squeeze
std::string name() const { return "squeeze"; } std::string name() const { return "squeeze"; }
shape normalize_compute_shape(std::vector<shape> inputs) const shape normalize_compute_shape(std::vector<shape> inputs) const
{ {
check_shapes{inputs, *this}.has(1); check_shapes{inputs, *this}.has(1).standard();
auto input_shape = inputs[0]; auto input_shape = inputs[0];
auto type = input_shape.type(); auto type = input_shape.type();
auto old_lens = input_shape.lens(); auto old_lens = input_shape.lens();
......
...@@ -705,6 +705,7 @@ static void print_ins_perf(std::ostream& os, ...@@ -705,6 +705,7 @@ static void print_ins_perf(std::ostream& os,
auto& time_per = titles.at(2); auto& time_per = titles.at(2);
auto& size_str = titles.at(3); auto& size_str = titles.at(3);
auto& flops_str = titles.at(4); auto& flops_str = titles.at(4);
auto& thrpt_str = titles.at(5);
auto& flops_funcs = get_flops_funcs(); auto& flops_funcs = get_flops_funcs();
std::string tms = std::to_string(t); std::string tms = std::to_string(t);
...@@ -726,10 +727,10 @@ static void print_ins_perf(std::ostream& os, ...@@ -726,10 +727,10 @@ static void print_ins_perf(std::ostream& os,
std::string op_name = ins->name(); std::string op_name = ins->name();
auto nloc = op_name.find("::"); auto nloc = op_name.find("::");
op_name.erase(op_name.begin(), op_name.begin() + nloc + 2); op_name.erase(op_name.begin(), op_name.begin() + nloc + 2);
auto inss = to_shapes(ins->inputs());
if(contains(flops_funcs, op_name)) if(contains(flops_funcs, op_name))
{ {
// print size // print size
auto inss = to_shapes(ins->inputs());
auto alens = inss.front().lens(); auto alens = inss.front().lens();
auto blens = inss.at(1).lens(); auto blens = inss.at(1).lens();
auto mb = auto mb =
...@@ -747,7 +748,7 @@ static void print_ins_perf(std::ostream& os, ...@@ -747,7 +748,7 @@ static void print_ins_perf(std::ostream& os,
szs.append(1, ','); szs.append(1, ',');
szs.append(std::to_string(mn)); szs.append(std::to_string(mn));
szs.append("}"); szs.append("}");
szs.append(1, '\t'); szs.append(size_str.length() - szs.length(), ' ');
auto op_flop_func = flops_funcs.at(op_name); auto op_flop_func = flops_funcs.at(op_name);
double flops = op_flop_func(inss); double flops = op_flop_func(inss);
...@@ -763,7 +764,29 @@ static void print_ins_perf(std::ostream& os, ...@@ -763,7 +764,29 @@ static void print_ins_perf(std::ostream& os,
} }
szs.append(size_str.length() - szs.length(), ' '); szs.append(size_str.length() - szs.length(), ' ');
flps.append(flops_str.length() - flps.length(), ' '); flps.append(flops_str.length() - flps.length(), ' ');
os << tms << pers << szs << flps << std::endl;
// print throughput for pointwise instruction
auto alias_num = ins->get_operator().output_alias({});
std::string thrpt;
if (alias_num != 0)
{
auto size = std::accumulate(inss.begin(), inss.end(), std::size_t{0}, [&](auto init, auto s) {
return init + s.bytes();
});
double throughput = size / t;
// convert to GB/s
throughput /= 1.0e9;
thrpt = std::to_string(throughput);
auto floc = flps.find('.');
if(floc != std::string::npos)
{
thrpt.erase(thrpt.begin() + floc + 4, thrpt.end());
}
}
thrpt.append(thrpt_str.length() - thrpt.length(), ' ');
os << tms << pers << szs << flps << thrpt << std::endl;
} }
void program::perf_report(std::ostream& os, void program::perf_report(std::ostream& os,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment