"docker/vscode:/vscode.git/clone" did not exist on "48d0123f0f4415b1bb78f5a538df8b0b9975c6d4"
Commit 43a20997 authored by aska-0096's avatar aska-0096
Browse files

debugging

parent 0cd587d9
...@@ -32,10 +32,8 @@ bool run_gemm(const ProblemSize& problem_size, const ExecutionConfig& config) ...@@ -32,10 +32,8 @@ bool run_gemm(const ProblemSize& problem_size, const ExecutionConfig& config)
{ {
case 0: break; case 0: break;
case 1: case 1:
ck::utils::FillUniformDistributionIntegerValue<ADataType>{-5.f, 5.f}(a_m_k.begin(), ck::utils::FillUniformDistributionIntegerValue<ADataType>{1.f, 1.f}(a_m_k.begin(), a_m_k.end());
a_m_k.end()); ck::utils::FillUniformDistributionIntegerValue<BDataType>{1.f, 1.f}(b_k_n.begin(), b_k_n.end());
ck::utils::FillUniformDistributionIntegerValue<BDataType>{-5.f, 5.f}(b_k_n.begin(),
b_k_n.end());
break; break;
default: default:
ck::utils::FillUniformDistribution<ADataType>{-1.f, 1.f}(a_m_k.begin(), a_m_k.end()); ck::utils::FillUniformDistribution<ADataType>{-1.f, 1.f}(a_m_k.begin(), a_m_k.end());
...@@ -102,7 +100,6 @@ bool run_gemm(const ProblemSize& problem_size, const ExecutionConfig& config) ...@@ -102,7 +100,6 @@ bool run_gemm(const ProblemSize& problem_size, const ExecutionConfig& config)
return true; return true;
} }
float ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel}); float ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel});
std::size_t flop = 2_uz * M * N * K; std::size_t flop = 2_uz * M * N * K;
......
...@@ -226,6 +226,7 @@ struct BlockwiseGemmWMMA_k0mk1_k0nk1_m0m1m2n0n1n2m3 ...@@ -226,6 +226,7 @@ struct BlockwiseGemmWMMA_k0mk1_k0nk1_m0m1m2n0n1n2m3
constexpr index_t c_offset = constexpr index_t c_offset =
c_thread_desc_.CalculateOffset(make_tuple(iCut, iN, 0)); c_thread_desc_.CalculateOffset(make_tuple(iCut, iN, 0));
// debug_hexprinter(0x3c003c00, a_thread_vec.template AsType<FloatAB>()(Number<0>{}));
wmma_gemm.template Run( wmma_gemm.template Run(
a_thread_vec.template AsType<wmma_input_type>()(Number<0>{}), a_thread_vec.template AsType<wmma_input_type>()(Number<0>{}),
b_thread_vec.template AsType<wmma_input_type>()(Number<0>{}), b_thread_vec.template AsType<wmma_input_type>()(Number<0>{}),
......
...@@ -359,8 +359,6 @@ struct DeviceGemmWmma : public DeviceGemm<ALayout, ...@@ -359,8 +359,6 @@ struct DeviceGemmWmma : public DeviceGemm<ALayout,
remove_reference_t<typename GridwiseGemm::DefaultBlock2CTileMap>, remove_reference_t<typename GridwiseGemm::DefaultBlock2CTileMap>,
true>; // Last Option is W/O true>; // Last Option is W/O
std::cout<<"Host kernel type is "<< type_name<decltype(kernel)>()<<std::endl;
printf("---------------------Crush before kernel launch-------------------\n");
ave_time = launch_and_time_kernel(stream_config, ave_time = launch_and_time_kernel(stream_config,
kernel, kernel,
dim3(grid_size), dim3(grid_size),
......
...@@ -356,6 +356,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma ...@@ -356,6 +356,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
/*******************************************************************************/ /*******************************************************************************/
// BlockLevel, A/B Matrix ThreadMapping in LDS, As Destinaion of BlockWise_Copy // BlockLevel, A/B Matrix ThreadMapping in LDS, As Destinaion of BlockWise_Copy
const auto K0 = a_grid_desc_k0_m_k1.GetLength(I0); const auto K0 = a_grid_desc_k0_m_k1.GetLength(I0);
printf("A_GRID_DESC: %s \n", std::string(type_name<decltype(a_grid_desc_k0_m_k1)>()).c_str());
constexpr auto max_lds_align = K1; constexpr auto max_lds_align = K1;
constexpr auto a_block_desc_k0perblock_mperblock_k1 = GetABlockDescriptor_K0PerBlock_MPerBlock_K1(); constexpr auto a_block_desc_k0perblock_mperblock_k1 = GetABlockDescriptor_K0PerBlock_MPerBlock_K1();
constexpr auto b_block_desc_k0perblock_nperblock_k1 = GetBBlockDescriptor_K0PerBlock_NPerBlock_K1(); constexpr auto b_block_desc_k0perblock_nperblock_k1 = GetBBlockDescriptor_K0PerBlock_NPerBlock_K1();
...@@ -457,7 +458,6 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma ...@@ -457,7 +458,6 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
// gridwise GEMM pipeline // gridwise GEMM pipeline
const index_t K0BlockMainLoop = __builtin_amdgcn_readfirstlane(K0 / K0PerBlock); const index_t K0BlockMainLoop = __builtin_amdgcn_readfirstlane(K0 / K0PerBlock);
GridwiseGemmPipe::template Run<HasMainKBlockLoop>(a_grid_desc_k0_m_k1, GridwiseGemmPipe::template Run<HasMainKBlockLoop>(a_grid_desc_k0_m_k1,
a_block_desc_k0perblock_mperblock_k1, a_block_desc_k0perblock_mperblock_k1,
a_blockwise_copy, a_blockwise_copy,
......
...@@ -208,6 +208,7 @@ struct ThreadwiseTensorSliceTransfer_v3r1 ...@@ -208,6 +208,7 @@ struct ThreadwiseTensorSliceTransfer_v3r1
src_buf.template Get<src_vector_t>(src_coord_.GetOffset(), is_src_valid)}; src_buf.template Get<src_vector_t>(src_coord_.GetOffset(), is_src_valid)};
// apply SrcElementwiseOperation on src_vector_container // apply SrcElementwiseOperation on src_vector_container
debug_hexprinter(0xffffffff, src_coord_.GetOffset());
static_for<0, SrcScalarPerVector, 1>{}([&](auto i) { static_for<0, SrcScalarPerVector, 1>{}([&](auto i) {
SrcData src_v; SrcData src_v;
......
...@@ -72,3 +72,11 @@ constexpr auto type_name() { ...@@ -72,3 +72,11 @@ constexpr auto type_name() {
name.remove_suffix(suffix.size()); name.remove_suffix(suffix.size());
return name; return name;
} }
template <typename T>
__device__
void debug_hexprinter(const uint32_t v_target, T v_val){
const uint32_t v_dbg = *(reinterpret_cast<uint32_t*>(&v_val));
if(v_dbg != v_target)
printf("@Thread: %d, Val: %08x != Target: %08x\n", ck::get_thread_local_1d_id(), v_dbg, v_target);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment