Commit 6bfdd98a authored by Astha Rai's avatar Astha Rai
Browse files

updating profiler

parent ddefb951
...@@ -43,19 +43,15 @@ bool profile_transpose_impl(int do_verification, ...@@ -43,19 +43,15 @@ bool profile_transpose_impl(int do_verification,
int init_method, int init_method,
bool do_log, bool do_log,
bool time_kernel, bool time_kernel,
index_t N, std::vector<index_t> lengths)
index_t C,
index_t D,
index_t H,
index_t W)
{ {
bool pass = true; bool pass = true;
/**index_t N = lengths[0]; index_t N = lengths[0];
index_t C = lengths[1]; index_t C = lengths[1];
index_t D = lengths[2]; index_t D = lengths[2];
index_t H = lengths[3]; index_t H = lengths[3];
index_t W = lengths[4];**/ index_t W = lengths[4];
std::vector<ck::index_t> ncdhw = {N, C, D, H, W}; std::vector<ck::index_t> ncdhw = {N, C, D, H, W};
std::vector<ck::index_t> ndhwc = {N, D, H, W, C}; std::vector<ck::index_t> ndhwc = {N, D, H, W, C};
...@@ -128,7 +124,6 @@ bool profile_transpose_impl(int do_verification, ...@@ -128,7 +124,6 @@ bool profile_transpose_impl(int do_verification,
{ {
b_device_buf.FromDevice(b.mData.data()); b_device_buf.FromDevice(b.mData.data());
// pass = pass & ck::utils::check_err(b_device_result, b_host_result);
pass &= ck::utils::check_err( pass &= ck::utils::check_err(
b.mData, host_b.mData, "Error: Incorrect results b", 1e-3, 1e-3); b.mData, host_b.mData, "Error: Incorrect results b", 1e-3, 1e-3);
......
...@@ -30,7 +30,7 @@ int profile_transpose(int argc, char* argv[]) ...@@ -30,7 +30,7 @@ int profile_transpose(int argc, char* argv[])
{ {
printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n"); printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16)\n"); printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: matrix layout (NCDHW -> NDCHW);\n"); // printf("arg3: matrix layout (NCDHW -> NDCHW);\n");
printf("arg4: verification (0: no; 1: yes)\n"); printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n"); printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg6: print tensor value (0: no; 1: yes)\n"); printf("arg6: print tensor value (0: no; 1: yes)\n");
...@@ -39,33 +39,29 @@ int profile_transpose(int argc, char* argv[]) ...@@ -39,33 +39,29 @@ int profile_transpose(int argc, char* argv[])
exit(1); exit(1);
} }
const auto data_type = static_cast<DataType>(std::stoi(argv[2])); const auto data_type = static_cast<DataType>(std::stoi(argv[2]));
const auto layout = static_cast<MatrixLayout>(std::stoi(argv[3])); // const auto layout = static_cast<MatrixLayout>(std::stoi(argv[3]));
const bool do_verification = std::stoi(argv[4]); const bool do_verification = std::stoi(argv[3]);
const int init_method = std::stoi(argv[5]); const int init_method = std::stoi(argv[4]);
const bool do_log = std::stoi(argv[6]); const bool do_log = std::stoi(argv[5]);
const bool time_kernel = std::stoi(argv[7]); const bool time_kernel = std::stoi(argv[6]);
std::vector<index_t> lengths = std::stoi(argv[7]);
const int N = std::stoi(argv[8]); /**const int N = std::stoi(argv[7]);
const int C = std::stoi(argv[9]); const int C = std::stoi(argv[8]);
const int D = std::stoi(argv[10]); const int D = std::stoi(argv[9]);
const int H = std::stoi(argv[11]); const int H = std::stoi(argv[10]);
const int W = std::stoi(argv[12]); const int W = std::stoi(argv[11]);**/
using F32 = float; using F32 = float;
using F16 = ck::half_t; using F16 = ck::half_t;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
auto profile = [&](auto a_type, auto b_type) { auto profile = [&](auto a_type, auto b_type) {
using ADataType = decltype(a_type); using ADataType = decltype(a_type);
using BDataType = decltype(b_type); using BDataType = decltype(b_type);
// using ALayout = decltype(a_layout);
// using BLayout = decltype(b_layout);
bool pass = ck::profiler::profile_transpose_impl<ADataType, BDataType>( bool pass = ck::profiler::profile_transpose_impl<ADataType, BDataType>(
do_verification, init_method, do_log, time_kernel, N, C, D, H, W); do_verification, init_method, do_log, time_kernel, lengths);
return pass ? 0 : 1; return pass ? 0 : 1;
}; };
...@@ -86,4 +82,4 @@ int profile_transpose(int argc, char* argv[]) ...@@ -86,4 +82,4 @@ int profile_transpose(int argc, char* argv[])
} }
} }
REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_splitk); REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_transpose);
\ No newline at end of file
...@@ -4,25 +4,27 @@ TYPED_TEST(TestTranspose, Test1) ...@@ -4,25 +4,27 @@ TYPED_TEST(TestTranspose, Test1)
{ {
// for 16, 8, 16, 32, 8 // for 16, 8, 16, 32, 8
std::vector<int> Ms{1, 2, 3, 4, 5, 6}; std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 16; std::vector<index_t> lengths{16, 8, 16, 32, 8};
/**constexpr int N = 16;
constexpr int C = 8; constexpr int C = 8;
constexpr int D = 16; constexpr int D = 16;
constexpr int H = 32; constexpr int H = 32;
constexpr int W = 8; constexpr int W = 8;**/
this->Run(N, C, D, H, W); this->Run();
} }
TYPED_TEST(TestTranpose, Test2) TYPED_TEST(TestTranpose, Test2)
{ {
std::vector<int> Ms{127, 255, 312, 799, 1573}; std::vector<int> Ms{127, 255, 312, 799, 1573};
constexpr int N = 16; std::vector<index_t> lengths{16, 8, 16, 32, 16};
/**constexpr int N = 16;
constexpr int C = 8; constexpr int C = 8;
constexpr int D = 16; constexpr int D = 16;
constexpr int H = 32; constexpr int H = 32;
constexpr int W = 8; constexpr int W = 8;**/
this->Run(N, C, D, H, W); this->Run();
} }
...@@ -28,20 +28,24 @@ class TestTranspose : public testing::Test ...@@ -28,20 +28,24 @@ class TestTranspose : public testing::Test
using BDataType = std::tuple_element_t<1, Tuple>; using BDataType = std::tuple_element_t<1, Tuple>;
public: public:
static constexpr bool verify_ = true; static constexpr bool verify_ = true;
static constexpr int init_method_ = 1; // decimal value initialization static constexpr int init_method_ = 1; // decimal value initialization
static constexpr bool log_ = false; static constexpr bool log_ = false;
static constexpr bool bench_ = false; // measure kernel performance static constexpr bool bench_ = false; // measure kernel performance
std::vector<std::vector<index_t>> lengths_ = {{16, 32, 16, 32, 16}, {16, 8, 16, 32, 8}};
void Run(const int N, const int C, const int D, const int H, const int W) void Run()
{ {
RunSingle(N, H, C, D, W); for(auto length : this->lengths_)
{
this->RunSingle(length);
}
} }
void RunSingle(const int N, const int C, const int D, const int H, const int W) void RunSingle()
{ {
bool pass = ck::profiler::profile_transpose_impl<ADataType, BDataType, 5>( bool pass = ck::profiler::profile_transpose_impl<ADataType, BDataType, 5>(
verify_, init_method_, log_, bench_, N, C, D, H, W); verify_, init_method_, log_, bench_, lengths_);
EXPECT_TRUE(pass); EXPECT_TRUE(pass);
} }
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment