Commit 63ecd2e7 authored by Jianfeng yan's avatar Jianfeng yan
Browse files

add debug code for reduction

parent 7887c0ac
...@@ -79,6 +79,12 @@ struct ThreadwiseTensorSliceTransfer_v1r3 ...@@ -79,6 +79,12 @@ struct ThreadwiseTensorSliceTransfer_v1r3
{ {
static_assert(SrcDesc::IsKnownAtCompileTime(), static_assert(SrcDesc::IsKnownAtCompileTime(),
"wrong! SrcDesc need to known at compile-time"); "wrong! SrcDesc need to known at compile-time");
constexpr index_t slice_size = reduce_on_sequence(SliceLengths{}, math::multiplies{}, Number<1>{});
if constexpr (0 != slice_size % DstScalarPerVector)
{
printf("%c\n", SliceLengths{});
}
static_assert( 0 == slice_size % DstScalarPerVector);
} }
__device__ void SetDstSliceOrigin(const DstDesc& dst_desc, const Index& dst_slice_origin_idx) __device__ void SetDstSliceOrigin(const DstDesc& dst_desc, const Index& dst_slice_origin_idx)
......
...@@ -22,30 +22,30 @@ include_directories(BEFORE ...@@ -22,30 +22,30 @@ include_directories(BEFORE
# ck_profiler # ck_profiler
set(PROFILER_SOURCE set(PROFILER_SOURCE
src/profiler.cpp src/profiler.cpp
src/profile_gemm.cpp # src/profile_gemm.cpp
src/profile_gemm_bias_2d.cpp # src/profile_gemm_bias_2d.cpp
src/profile_gemm_bias_relu.cpp # src/profile_gemm_bias_relu.cpp
src/profile_gemm_bias_relu_add.cpp # src/profile_gemm_bias_relu_add.cpp
src/profile_batched_gemm.cpp # src/profile_batched_gemm.cpp
src/profile_conv_fwd.cpp # src/profile_conv_fwd.cpp
src/profile_conv_fwd_bias_relu.cpp # src/profile_conv_fwd_bias_relu.cpp
src/profile_conv_fwd_bias_relu_add.cpp # src/profile_conv_fwd_bias_relu_add.cpp
src/profile_conv_fwd_bias_relu_atomic_add.cpp # src/profile_conv_fwd_bias_relu_atomic_add.cpp
src/profile_conv_bwd_data.cpp # src/profile_conv_bwd_data.cpp
src/profile_reduce.cpp src/profile_reduce.cpp
) )
add_executable(ckProfiler ${PROFILER_SOURCE}) add_executable(ckProfiler ${PROFILER_SOURCE})
target_link_libraries(ckProfiler PRIVATE host_tensor) target_link_libraries(ckProfiler PRIVATE host_tensor)
target_link_libraries(ckProfiler PRIVATE device_gemm_instance) # target_link_libraries(ckProfiler PRIVATE device_gemm_instance)
target_link_libraries(ckProfiler PRIVATE device_gemm_bias2d_instance) # target_link_libraries(ckProfiler PRIVATE device_gemm_bias2d_instance)
target_link_libraries(ckProfiler PRIVATE device_gemm_bias_relu_instance) # target_link_libraries(ckProfiler PRIVATE device_gemm_bias_relu_instance)
target_link_libraries(ckProfiler PRIVATE device_gemm_bias_relu_add_instance) # target_link_libraries(ckProfiler PRIVATE device_gemm_bias_relu_add_instance)
target_link_libraries(ckProfiler PRIVATE device_batched_gemm_instance) # target_link_libraries(ckProfiler PRIVATE device_batched_gemm_instance)
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_instance) # target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_instance)
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance) # target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance)
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance) # target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance)
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_atomic_add_instance) # target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_atomic_add_instance)
target_link_libraries(ckProfiler PRIVATE device_conv2d_bwd_data_instance) # target_link_libraries(ckProfiler PRIVATE device_conv2d_bwd_data_instance)
target_link_libraries(ckProfiler PRIVATE device_reduce_instance) target_link_libraries(ckProfiler PRIVATE device_reduce_instance)
...@@ -4,61 +4,65 @@ ...@@ -4,61 +4,65 @@
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
int profile_gemm(int, char*[]); // int profile_gemm(int, char*[]);
int profile_batched_gemm(int, char*[]); // int profile_batched_gemm(int, char*[]);
int profile_gemm_bias_2d(int, char*[]); // int profile_gemm_bias_2d(int, char*[]);
int profile_gemm_bias_relu(int, char*[]); // int profile_gemm_bias_relu(int, char*[]);
int profile_gemm_bias_relu_add(int, char*[]); // int profile_gemm_bias_relu_add(int, char*[]);
int profile_conv_fwd(int, char*[]); // int profile_conv_fwd(int, char*[]);
int profile_conv_fwd_bias_relu(int, char*[]); // int profile_conv_fwd_bias_relu(int, char*[]);
int profile_conv_fwd_bias_relu_add(int, char*[]); // int profile_conv_fwd_bias_relu_add(int, char*[]);
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]); // int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
int profile_conv_bwd_data(int, char*[]); // int profile_conv_bwd_data(int, char*[]);
int profile_reduce(int, char*[]); int profile_reduce(int, char*[]);
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
if(strcmp(argv[1], "gemm") == 0) // if(strcmp(argv[1], "gemm") == 0)
{ // {
return profile_gemm(argc, argv); // return profile_gemm(argc, argv);
} // }
else if(strcmp(argv[1], "gemm_bias_2d") == 0) // else if(strcmp(argv[1], "gemm_bias_2d") == 0)
{ // {
return profile_gemm_bias_2d(argc, argv); // return profile_gemm_bias_2d(argc, argv);
} // }
else if(strcmp(argv[1], "gemm_bias_relu") == 0) // else if(strcmp(argv[1], "gemm_bias_relu") == 0)
{ // {
return profile_gemm_bias_relu(argc, argv); // return profile_gemm_bias_relu(argc, argv);
} // }
else if(strcmp(argv[1], "gemm_bias_relu_add") == 0) // else if(strcmp(argv[1], "gemm_bias_relu_add") == 0)
{ // {
return profile_gemm_bias_relu_add(argc, argv); // return profile_gemm_bias_relu_add(argc, argv);
} // }
else if(strcmp(argv[1], "batched_gemm") == 0) // else if(strcmp(argv[1], "batched_gemm") == 0)
{ // {
return profile_batched_gemm(argc, argv); // return profile_batched_gemm(argc, argv);
} // }
else if(strcmp(argv[1], "conv_fwd") == 0) // else if(strcmp(argv[1], "conv_fwd") == 0)
{ // {
return profile_conv_fwd(argc, argv); // return profile_conv_fwd(argc, argv);
} // }
else if(strcmp(argv[1], "conv_fwd_bias_relu") == 0) // else if(strcmp(argv[1], "conv_fwd_bias_relu") == 0)
{ // {
return profile_conv_fwd_bias_relu(argc, argv); // return profile_conv_fwd_bias_relu(argc, argv);
} // }
else if(strcmp(argv[1], "conv_fwd_bias_relu_add") == 0) // else if(strcmp(argv[1], "conv_fwd_bias_relu_add") == 0)
{ // {
return profile_conv_fwd_bias_relu_add(argc, argv); // return profile_conv_fwd_bias_relu_add(argc, argv);
} // }
else if(strcmp(argv[1], "conv_fwd_bias_relu_atomic_add") == 0) // else if(strcmp(argv[1], "conv_fwd_bias_relu_atomic_add") == 0)
{ // {
return profile_conv_fwd_bias_relu_atomic_add(argc, argv); // return profile_conv_fwd_bias_relu_atomic_add(argc, argv);
} // }
else if(strcmp(argv[1], "conv_bwd") == 0) // else if(strcmp(argv[1], "conv_bwd") == 0)
{ // {
return profile_conv_bwd_data(argc, argv); // return profile_conv_bwd_data(argc, argv);
} // }
else if(strcmp(argv[1], "reduce") == 0) // else if(strcmp(argv[1], "reduce") == 0)
// {
// return profile_reduce(argc, argv);
// }
if(strcmp(argv[1], "reduce") == 0)
{ {
return profile_reduce(argc, argv); return profile_reduce(argc, argv);
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment