Unverified Commit 7271ddbc authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Group code objects by kernel name in perf report summary (#1234)

Break up the gpu::code_object  print to show the actual kernels...

gpu::code_object::add_kernel: 0.646121ms, 5%
gpu::code_object::mul_kernel: 0.623822ms, 5%
gpu::code_object::add_mul_erf_add_mul_mul_kernel: 0.498902ms, 4%
gpu::code_object::mul_add_kernel: 0.478352ms, 4%
parent bcac9858
...@@ -34,6 +34,10 @@ struct code_object_op ...@@ -34,6 +34,10 @@ struct code_object_op
f(self.output, "output")); f(self.output, "output"));
} }
value attributes() const { return {{"group", group()}}; }
std::string group() const { return "gpu::code_object::" + symbol_name; }
std::string name() const { return "gpu::code_object"; } std::string name() const { return "gpu::code_object"; }
shape compute_shape(std::vector<shape> inputs) const; shape compute_shape(std::vector<shape> inputs) const;
argument argument
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment