Commit 6bfdd98a authored by Astha Rai's avatar Astha Rai
Browse files

updating profiler

parent ddefb951
......@@ -43,19 +43,15 @@ bool profile_transpose_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
index_t N,
index_t C,
index_t D,
index_t H,
index_t W)
std::vector<index_t> lengths)
{
bool pass = true;
/**index_t N = lengths[0];
index_t N = lengths[0];
index_t C = lengths[1];
index_t D = lengths[2];
index_t H = lengths[3];
index_t W = lengths[4];**/
index_t W = lengths[4];
std::vector<ck::index_t> ncdhw = {N, C, D, H, W};
std::vector<ck::index_t> ndhwc = {N, D, H, W, C};
......@@ -128,7 +124,6 @@ bool profile_transpose_impl(int do_verification,
{
b_device_buf.FromDevice(b.mData.data());
// pass = pass & ck::utils::check_err(b_device_result, b_host_result);
pass &= ck::utils::check_err(
b.mData, host_b.mData, "Error: Incorrect results b", 1e-3, 1e-3);
......
......@@ -30,7 +30,7 @@ int profile_transpose(int argc, char* argv[])
{
printf("arg1: tensor operation (" OP_NAME ": " OP_DESC ")\n");
printf("arg2: data type (0: fp32; 1: fp16)\n");
printf("arg3: matrix layout (NCDHW -> NDCHW);\n");
// printf("arg3: matrix layout (NCDHW -> NDCHW);\n");
printf("arg4: verification (0: no; 1: yes)\n");
printf("arg5: initialization (0: no init; 1: integer value; 2: decimal value)\n");
printf("arg6: print tensor value (0: no; 1: yes)\n");
......@@ -39,33 +39,29 @@ int profile_transpose(int argc, char* argv[])
exit(1);
}
const auto data_type = static_cast<DataType>(std::stoi(argv[2]));
const auto layout = static_cast<MatrixLayout>(std::stoi(argv[3]));
const bool do_verification = std::stoi(argv[4]);
const int init_method = std::stoi(argv[5]);
const bool do_log = std::stoi(argv[6]);
const bool time_kernel = std::stoi(argv[7]);
const auto data_type = static_cast<DataType>(std::stoi(argv[2]));
// const auto layout = static_cast<MatrixLayout>(std::stoi(argv[3]));
const bool do_verification = std::stoi(argv[3]);
const int init_method = std::stoi(argv[4]);
const bool do_log = std::stoi(argv[5]);
const bool time_kernel = std::stoi(argv[6]);
std::vector<index_t> lengths = std::stoi(argv[7]);
const int N = std::stoi(argv[8]);
const int C = std::stoi(argv[9]);
const int D = std::stoi(argv[10]);
const int H = std::stoi(argv[11]);
const int W = std::stoi(argv[12]);
/**const int N = std::stoi(argv[7]);
const int C = std::stoi(argv[8]);
const int D = std::stoi(argv[9]);
const int H = std::stoi(argv[10]);
const int W = std::stoi(argv[11]);**/
using F32 = float;
using F16 = ck::half_t;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
auto profile = [&](auto a_type, auto b_type) {
using ADataType = decltype(a_type);
using BDataType = decltype(b_type);
// using ALayout = decltype(a_layout);
// using BLayout = decltype(b_layout);
bool pass = ck::profiler::profile_transpose_impl<ADataType, BDataType>(
do_verification, init_method, do_log, time_kernel, N, C, D, H, W);
do_verification, init_method, do_log, time_kernel, lengths);
return pass ? 0 : 1;
};
......@@ -86,4 +82,4 @@ int profile_transpose(int argc, char* argv[])
}
}
REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_splitk);
\ No newline at end of file
REGISTER_PROFILER_OPERATION(OP_NAME, OP_DESC, profile_gemm_transpose);
......@@ -4,25 +4,27 @@ TYPED_TEST(TestTranspose, Test1)
{
// for 16, 8, 16, 32, 8
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
constexpr int N = 16;
std::vector<index_t> lengths{16, 8, 16, 32, 8};
/**constexpr int N = 16;
constexpr int C = 8;
constexpr int D = 16;
constexpr int H = 32;
constexpr int W = 8;
constexpr int W = 8;**/
this->Run(N, C, D, H, W);
this->Run();
}
TYPED_TEST(TestTranpose, Test2)
{
std::vector<int> Ms{127, 255, 312, 799, 1573};
constexpr int N = 16;
std::vector<index_t> lengths{16, 8, 16, 32, 16};
/**constexpr int N = 16;
constexpr int C = 8;
constexpr int D = 16;
constexpr int H = 32;
constexpr int W = 8;
constexpr int W = 8;**/
this->Run(N, C, D, H, W);
this->Run();
}
......@@ -28,20 +28,24 @@ class TestTranspose : public testing::Test
using BDataType = std::tuple_element_t<1, Tuple>;
public:
static constexpr bool verify_ = true;
static constexpr int init_method_ = 1; // decimal value initialization
static constexpr bool log_ = false;
static constexpr bool bench_ = false; // measure kernel performance
static constexpr bool verify_ = true;
static constexpr int init_method_ = 1; // decimal value initialization
static constexpr bool log_ = false;
static constexpr bool bench_ = false; // measure kernel performance
std::vector<std::vector<index_t>> lengths_ = {{16, 32, 16, 32, 16}, {16, 8, 16, 32, 8}};
void Run(const int N, const int C, const int D, const int H, const int W)
void Run()
{
RunSingle(N, H, C, D, W);
for(auto length : this->lengths_)
{
this->RunSingle(length);
}
}
void RunSingle(const int N, const int C, const int D, const int H, const int W)
void RunSingle()
{
bool pass = ck::profiler::profile_transpose_impl<ADataType, BDataType, 5>(
verify_, init_method_, log_, bench_, N, C, D, H, W);
verify_, init_method_, log_, bench_, lengths_);
EXPECT_TRUE(pass);
}
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment