"git@developer.sourcefind.cn:gaoqiong/composable_kernel.git" did not exist on "a054f7d604d3bfee9e4ad410df15397bc354ae3d"
Commit 5394ac66 authored by Bartlomiej Kocot's avatar Bartlomiej Kocot
Browse files

Several fixes of image to column profiler

parent 5c704e2d
...@@ -106,10 +106,9 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv ...@@ -106,10 +106,9 @@ bool RunImageToColumn(const ExecutionConfig& config, const ck::utils::conv::Conv
return false; return false;
} }
float ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel}); float ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel});
std::size_t num_btype = std::size_t num_btype = 2 * NDoHoWo * CZYX * sizeof(OutDataType);
NDoHoWo * CZYX * sizeof(OutDataType) + conv_params.GetInputByte<InputDataType>(); float gb_per_sec = num_btype / 1.E6 / ave_time;
float gb_per_sec = num_btype / 1.E6 / ave_time;
std::cout << "Perf: " << ave_time << " ms, " << gb_per_sec << " GB/s" << std::endl; std::cout << "Perf: " << ave_time << " ms, " << gb_per_sec << " GB/s" << std::endl;
if(config.do_verification) if(config.do_verification)
......
...@@ -7,15 +7,11 @@ ...@@ -7,15 +7,11 @@
#include <iostream> #include <iostream>
#include <typeinfo> #include <typeinfo>
#include <limits> #include <limits>
#include "ck/ck.hpp" #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_image_to_column.hpp" #include "ck/tensor_operation/gpu/device/device_image_to_column.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp" #include "ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp"
#include "ck/library/tensor_operation_instance/gpu/image_to_column.hpp" #include "ck/library/tensor_operation_instance/gpu/image_to_column.hpp"
#include "ck/library/utility/check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp" #include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp" #include "ck/library/utility/host_tensor.hpp"
...@@ -126,9 +122,7 @@ bool profile_image_to_column_impl(int do_verification, ...@@ -126,9 +122,7 @@ bool profile_image_to_column_impl(int do_verification,
std::cout << "found " << op_ptrs.size() << " instances" << std::endl; std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
std::string best_op_name; std::string best_op_name;
float best_avg_time = std::numeric_limits<float>::max(); float best_avg_time = std::numeric_limits<float>::max();
;
float best_tflops = 0;
float best_gb_per_sec = 0; float best_gb_per_sec = 0;
// profile device op instances // profile device op instances
...@@ -157,19 +151,12 @@ bool profile_image_to_column_impl(int do_verification, ...@@ -157,19 +151,12 @@ bool profile_image_to_column_impl(int do_verification,
is_supporting_instance = true; is_supporting_instance = true;
// re-init output to zero before profiling next kernel // re-init output to zero before profiling next kernel
out_device_buf.SetZero(); out_device_buf.SetZero();
std::string op_name = op_ptr->GetTypeString(); std::string op_name = op_ptr->GetTypeString();
auto invoker_ptr = op_ptr->MakeInvokerPointer();
auto invoker_ptr = op_ptr->MakeInvokerPointer();
float avg_time = float avg_time =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel}); invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
std::size_t num_btype = 2 * NDoHoWo * CZYX * sizeof(OutputDataType);
std::size_t num_btype = float gb_per_sec = num_btype / 1.E6 / avg_time;
conv_param.GetInputByte<InputDataType>() + NDoHoWo * CZYX * sizeof(OutputDataType);
float gb_per_sec = num_btype / 1.E6 / avg_time;
std::cout << "Perf: " << std::setw(10) << avg_time << " ms, " << gb_per_sec << " GB/s, " std::cout << "Perf: " << std::setw(10) << avg_time << " ms, " << gb_per_sec << " GB/s, "
<< op_name << std::endl; << op_name << std::endl;
...@@ -183,7 +170,6 @@ bool profile_image_to_column_impl(int do_verification, ...@@ -183,7 +170,6 @@ bool profile_image_to_column_impl(int do_verification,
if(do_verification) if(do_verification)
{ {
out_device_buf.FromDevice(device_output.mData.data()); out_device_buf.FromDevice(device_output.mData.data());
pass = pass & ck::utils::check_err(device_output, host_output); pass = pass & ck::utils::check_err(device_output, host_output);
if(do_log) if(do_log)
...@@ -204,7 +190,7 @@ bool profile_image_to_column_impl(int do_verification, ...@@ -204,7 +190,7 @@ bool profile_image_to_column_impl(int do_verification,
std::cout << "Best configuration parameters:" std::cout << "Best configuration parameters:"
<< "\nname: " << best_op_name << "\navg_time: " << best_avg_time << "\nname: " << best_op_name << "\navg_time: " << best_avg_time
<< "\ntflops: " << best_tflops << "\nGB/s: " << best_gb_per_sec << std::endl; << "\nGB/s: " << best_gb_per_sec << std::endl;
return is_supporting_instance && pass; return is_supporting_instance && pass;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment