Commit 7c63aa5f authored by Harisankar Sadasivan's avatar Harisankar Sadasivan
Browse files

changes for debug

parent fe15fcc0
......@@ -267,7 +267,7 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
auto ref_argument = ref_gemm.MakeArgument(
a_m_k, b_k_n, c_m_n_host_result, PassThrough{}, PassThrough{}, PassThrough{});
printf("inside do verification\n");
ref_invoker.Run(ref_argument);
ave_time = invoker.Run(argument, StreamConfig{nullptr, false, 1});
......@@ -281,19 +281,19 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
return ck::utils::check_err(c_m_n_device_result_converted, c_m_n_host_result);
#else
printf("device copy initiated\n"); // HS
if((workspace_size != 0) && (Streamk_sel > 0))
{
printf("entered if\n");
workspace.FromDevice(c_m_n_device_result.mData.data());
}
else
c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data());
printf("device copy finished\n"); // HS
pass &= ck::utils::check_err(c_m_n_device_result,
c_m_n_host_result,
"Error: Incorrect results!",
get_rtol<CDataType>(),
get_atol<CDataType>());
// if((workspace_size != 0) && (Streamk_sel > 0))
// {
// printf("entered if\n");
// workspace.FromDevice(c_m_n_device_result.mData.data());
// }
// else
// c_m_n_device_buf.FromDevice(c_m_n_device_result.mData.data());
// printf("device copy finished\n"); // HS
// pass &= ck::utils::check_err(c_m_n_device_result,
// c_m_n_host_result,
// "Error: Incorrect results!",
// get_rtol<CDataType>(),
// get_atol<CDataType>());
#endif
}
......
......@@ -109,7 +109,7 @@ float launch_and_time_kernel_with_preprocess(const StreamConfig& stream_config,
printf("Warm up %d times\n", stream_config.cold_niters_);
}
// warm up
preprocess();
//HS preprocess();
for(int i = 0; i < stream_config.cold_niters_; ++i)
{
kernel<<<grid_dim, block_dim, lds_byte, stream_config.stream_id_>>>(args...);
......
......@@ -11,8 +11,8 @@ struct StreamConfig
hipStream_t stream_id_ = nullptr;
bool time_kernel_ = false;
int log_level_ = 0;
int cold_niters_ = 5;
int nrepeat_ = 50;
int cold_niters_ = 0;//HS
int nrepeat_ = 1;//HS
bool flush_cache = false;
int rotating_count = 1;
......
......@@ -1186,7 +1186,6 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
void* p_workspace,
Block2CTileMap_streamk block_2_ctile_map_streamk)
{
const AElementwiseOperation a_element_op{};
const BElementwiseOperation b_element_op{};
const CElementwiseOperation c_element_op{};
......@@ -1899,6 +1898,7 @@ struct GridwiseGemm_xdl_cshuffle_streamk_v3
}
}
} // for loop
if(threadIdx.x==0)printf("kernel ends");
}
template <bool HasMainKBlockLoop,
......
......@@ -8,7 +8,7 @@ MY_PROJECT_SOURCE=$1
if [ $# -ge 2 ] ; then
GPU_TARGETS=$2
else
GPU_TARGETS="gfx908;gfx90a;gfx940"
GPU_TARGETS="gfx90a"
fi
cmake \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment