Commit 2a0e5439 authored by aska-0096's avatar aska-0096
Browse files

clean some debug purpose code

parent b741109f
...@@ -119,29 +119,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3 ...@@ -119,29 +119,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3
using SpaceFillingCurve = SpaceFillingCurve<SliceLengths, using SpaceFillingCurve = SpaceFillingCurve<SliceLengths,
DimAccessOrder, DimAccessOrder,
remove_cv_t<decltype(dst_scalar_per_access)>>; remove_cv_t<decltype(dst_scalar_per_access)>>;
// printf("SpaceFillingCurve access_lengths = (%d, %d, %d, %d, %d, %d, %d)\n", SpaceFillingCurve::access_lengths[Number<0>{}].value,
// SpaceFillingCurve::access_lengths[Number<1>{}].value,
// SpaceFillingCurve::access_lengths[Number<2>{}].value,
// SpaceFillingCurve::access_lengths[Number<3>{}].value,
// SpaceFillingCurve::access_lengths[Number<4>{}].value,
// SpaceFillingCurve::access_lengths[Number<5>{}].value,
// SpaceFillingCurve::access_lengths[Number<6>{}].value);
//
// // printf("SpaceFillingCurve dim_access_order = (%d, %d, %d, %d, %d, %d, %d)\n", SpaceFillingCurve::dim_access_order[Number<0>{}].value,
// SpaceFillingCurve::dim_access_order[Number<1>{}].value,
// SpaceFillingCurve::dim_access_order[Number<2>{}].value,
// SpaceFillingCurve::dim_access_order[Number<3>{}].value,
// SpaceFillingCurve::dim_access_order[Number<4>{}].value,
// SpaceFillingCurve::dim_access_order[Number<5>{}].value,
// SpaceFillingCurve::dim_access_order[Number<6>{}].value);
//
// // // printf("SpaceFillingCurve ordered_access_lengths = (%d, %d, %d, %d, %d, %d, %d)\n", SpaceFillingCurve::ordered_access_lengths[Number<0>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<1>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<2>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<3>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<4>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<5>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<6>{}].value);
// TODO: Use SpaceFillingCurve::ScalarsPerAccess instread of DstScalarPerVector? // TODO: Use SpaceFillingCurve::ScalarsPerAccess instread of DstScalarPerVector?
static_assert(DstScalarPerVector == SpaceFillingCurve::ScalarPerVector, static_assert(DstScalarPerVector == SpaceFillingCurve::ScalarPerVector,
"wrong!DstScalarPerVector != SpaceFillingCurve::ScalarPerVector"); "wrong!DstScalarPerVector != SpaceFillingCurve::ScalarPerVector");
...@@ -158,7 +136,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3 ...@@ -158,7 +136,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3
static_for<0, DstScalarPerVector, 1>{}([&](auto i) { static_for<0, DstScalarPerVector, 1>{}([&](auto i) {
constexpr index_t src_offset = src_desc.CalculateOffset( constexpr index_t src_offset = src_desc.CalculateOffset(
src_slice_origin_idx + idx_md + i * dst_scalar_step_in_vector); src_slice_origin_idx + idx_md + i * dst_scalar_step_in_vector);
// debug_hexprinter(0xffffffff, src_offset, "src_coord_iteration");
SrcData v; SrcData v;
// apply element-wise operation // apply element-wise operation
...@@ -176,11 +154,10 @@ struct ThreadwiseTensorSliceTransfer_v1r3 ...@@ -176,11 +154,10 @@ struct ThreadwiseTensorSliceTransfer_v1r3
dst_coord_.GetOffset(), dst_coord_.GetOffset(),
is_dst_valid, is_dst_valid,
dst_vector.template AsType<dst_vector_t>()[Number<0>{}]); dst_vector.template AsType<dst_vector_t>()[Number<0>{}]);
// debug_hexprinter(0xffffffff, dst_coord_.GetOffset(), "dst_coord_iteration");
if constexpr(idx_1d.value != num_access - 1) if constexpr(idx_1d.value != num_access - 1)
{ {
constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d); constexpr auto forward_step = SpaceFillingCurve::GetForwardStep(idx_1d);
// printf("move forward = (%d, %d, %d, %d, %d, %d, %d)\n", forward_step[Number<0>{}], forward_step[Number<1>{}], forward_step[Number<2>{}], forward_step[Number<3>{}], forward_step[Number<4>{}], forward_step[Number<5>{}], forward_step[Number<6>{}]);
move_tensor_coordinate( move_tensor_coordinate(
dst_desc, dst_coord_, make_tensor_coordinate_step(dst_desc, forward_step)); dst_desc, dst_coord_, make_tensor_coordinate_step(dst_desc, forward_step));
} }
......
...@@ -96,7 +96,6 @@ struct ThreadwiseTensorSliceTransfer_v3r1 ...@@ -96,7 +96,6 @@ struct ThreadwiseTensorSliceTransfer_v3r1
src_element_op_(src_element_op), src_element_op_(src_element_op),
dst_element_op_(dst_element_op) dst_element_op_(dst_element_op)
{ {
// printf("global desc: %s\n", __PRETTY_FUNCTION__);
} }
__device__ void SetSrcSliceOrigin(const SrcDesc& src_desc, const Index& src_slice_origin_idx) __device__ void SetSrcSliceOrigin(const SrcDesc& src_desc, const Index& src_slice_origin_idx)
...@@ -128,6 +127,7 @@ struct ThreadwiseTensorSliceTransfer_v3r1 ...@@ -128,6 +127,7 @@ struct ThreadwiseTensorSliceTransfer_v3r1
detail::lambda_scalar_per_access<SrcVectorDim, SrcScalarPerVector>{}, Number<nDim>{}); detail::lambda_scalar_per_access<SrcVectorDim, SrcScalarPerVector>{}, Number<nDim>{});
constexpr auto src_access_lengths = SliceLengths{} / src_scalar_per_access; constexpr auto src_access_lengths = SliceLengths{} / src_scalar_per_access;
constexpr auto src_dim_access_order = SrcDimAccessOrder{}; constexpr auto src_dim_access_order = SrcDimAccessOrder{};
constexpr auto ordered_src_access_lengths = constexpr auto ordered_src_access_lengths =
......
...@@ -49,50 +49,3 @@ ...@@ -49,50 +49,3 @@
#ifdef CK_USE_AMD_MFMA #ifdef CK_USE_AMD_MFMA
#include "ck/utility/amd_xdlops.hpp" #include "ck/utility/amd_xdlops.hpp"
#endif #endif
#include <string_view>
template <typename T>
constexpr auto type_name() {
std::string_view name, prefix, suffix;
#ifdef __clang__
name = __PRETTY_FUNCTION__;
prefix = "auto type_name() [T = ";
suffix = "]";
#elif defined(__GNUC__)
name = __PRETTY_FUNCTION__;
prefix = "constexpr auto type_name() [with T = ";
suffix = "]";
#elif defined(_MSC_VER)
name = __FUNCSIG__;
prefix = "auto __cdecl type_name<";
suffix = ">(void)";
#endif
name.remove_prefix(prefix.size());
name.remove_suffix(suffix.size());
return name;
}
// Accepet int, float, and Number<> as input
template <typename T>
__host__ __device__
void debug_hexprinter(const uint32_t v_target, const T v_val, const char* info){
if constexpr(std::is_same_v<T, int> || std::is_same_v<T, float> )
{
const uint32_t v_dbg = *(reinterpret_cast<const uint32_t*>(&v_val));
if(v_dbg != v_target)
printf("%s@Thread: %d, Val: %08x != Target: %08x\n", info, ck::get_thread_local_1d_id(), v_dbg, v_target);
}
else if constexpr(std::is_same_v<T, _Float16>)
{
const uint16_t v_dbg = *(reinterpret_cast<const uint16_t*>(&v_val));
if(v_dbg != v_target)
printf("%s@Thread: %d, Val: %04x != Target: %08x\n", info, ck::get_thread_local_1d_id(), v_dbg, v_target);
}
else
{
const uint32_t v_dbg = *(reinterpret_cast<const uint32_t*>(&(v_val.value)));
if(v_dbg != v_target)
printf("%s@Thread: %d, Val: %08x != Target: %08x\n", info, ck::get_thread_local_1d_id(), v_dbg, v_target);
}
}
...@@ -942,11 +942,6 @@ using int8x16_t = typename vector_type<int8_t, 16>::type; ...@@ -942,11 +942,6 @@ using int8x16_t = typename vector_type<int8_t, 16>::type;
using int8x32_t = typename vector_type<int8_t, 32>::type; using int8x32_t = typename vector_type<int8_t, 32>::type;
using int8x64_t = typename vector_type<int8_t, 64>::type; using int8x64_t = typename vector_type<int8_t, 64>::type;
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
// i4
using int4x16_t = typename vector_type<int4_t, 16>::type;
#endif
// Convert X to Y // Convert X to Y
template <typename Y, typename X> template <typename Y, typename X>
__host__ __device__ constexpr Y type_convert(X x) __host__ __device__ constexpr Y type_convert(X x)
......
...@@ -55,7 +55,7 @@ check_err(const Range& out, ...@@ -55,7 +55,7 @@ check_err(const Range& out,
{ {
max_err = err > max_err ? err : max_err; max_err = err > max_err ? err : max_err;
err_count++; err_count++;
if(err_count < 16384) if(err_count < 5)
{ {
std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i
<< "] != ref[" << i << "]: " << o << " != " << r << std::endl; << "] != ref[" << i << "]: " << o << " != " << r << std::endl;
...@@ -103,7 +103,7 @@ check_err(const Range& out, ...@@ -103,7 +103,7 @@ check_err(const Range& out,
{ {
max_err = err > max_err ? err : max_err; max_err = err > max_err ? err : max_err;
err_count++; err_count++;
if(err_count < 16384) if(err_count < 5)
{ {
std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i
<< "] != ref[" << i << "]: " << o << " != " << r << std::endl; << "] != ref[" << i << "]: " << o << " != " << r << std::endl;
...@@ -150,7 +150,7 @@ check_err(const Range& out, ...@@ -150,7 +150,7 @@ check_err(const Range& out,
{ {
max_err = err > max_err ? err : max_err; max_err = err > max_err ? err : max_err;
err_count++; err_count++;
if(err_count < 16384) if(err_count < 5)
{ {
std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i
<< "] != ref[" << i << "]: " << o << " != " << r << std::endl; << "] != ref[" << i << "]: " << o << " != " << r << std::endl;
...@@ -202,7 +202,7 @@ check_err(const Range& out, ...@@ -202,7 +202,7 @@ check_err(const Range& out,
{ {
max_err = err > max_err ? err : max_err; max_err = err > max_err ? err : max_err;
err_count++; err_count++;
if(err_count < 16384) if(err_count < 5)
{ {
std::cerr << msg << " out[" << i << "] != ref[" << i << "]: " << o << " != " << r std::cerr << msg << " out[" << i << "] != ref[" << i << "]: " << o << " != " << r
<< std::endl; << std::endl;
......
...@@ -114,23 +114,5 @@ struct FillConstant ...@@ -114,23 +114,5 @@ struct FillConstant
} }
}; };
template <typename T>
struct FillMNID
{
T step_{0.1};
int k_num_{32};
int mn_num_{128};
template <typename ForwardIter>
void operator()(ForwardIter first, ForwardIter last) const
{
std::generate(first, last, [=, iter = 0]() mutable {
auto tmp = ((iter/k_num_) % mn_num_ ) * step_;
iter ++;
return tmp;
});
}
};
} // namespace utils } // namespace utils
} // namespace ck } // namespace ck
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment