Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
2a0e5439
"driver/src/host_tensor.cpp" did not exist on "9657baec325227d0d64424bffb394afbd6d37a60"
Commit
2a0e5439
authored
Dec 15, 2022
by
aska-0096
Browse files
clean some debug purpose code
parent
b741109f
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
9 additions
and
102 deletions
+9
-102
example/01_gemm/run_gemm_example.inc
example/01_gemm/run_gemm_example.inc
+1
-1
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
...operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
+3
-26
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
...tion/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
+1
-1
include/ck/utility/common_header.hpp
include/ck/utility/common_header.hpp
+0
-47
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+0
-5
library/include/ck/library/utility/check_err.hpp
library/include/ck/library/utility/check_err.hpp
+4
-4
library/include/ck/library/utility/fill.hpp
library/include/ck/library/utility/fill.hpp
+0
-18
No files found.
example/01_gemm/run_gemm_example.inc
View file @
2a0e5439
...
...
@@ -101,7 +101,7 @@ bool run_gemm(const ProblemSize& problem_size, const ExecutionConfig& config)
return
true
;
}
float
ave_time
=
invoker
.
Run
(
argument
,
StreamConfig
{
nullptr
,
config
.
time_kernel
});
std
::
size_t
flop
=
2_
uz
*
M
*
N
*
K
;
std
::
size_t
num_btype
=
sizeof
(
ADataType
)
*
M
*
K
+
sizeof
(
BDataType
)
*
K
*
N
+
sizeof
(
CDataType
)
*
M
*
N
;
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp
View file @
2a0e5439
...
...
@@ -119,29 +119,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3
using
SpaceFillingCurve
=
SpaceFillingCurve
<
SliceLengths
,
DimAccessOrder
,
remove_cv_t
<
decltype
(
dst_scalar_per_access
)
>>
;
// printf("SpaceFillingCurve access_lengths = (%d, %d, %d, %d, %d, %d, %d)\n", SpaceFillingCurve::access_lengths[Number<0>{}].value,
// SpaceFillingCurve::access_lengths[Number<1>{}].value,
// SpaceFillingCurve::access_lengths[Number<2>{}].value,
// SpaceFillingCurve::access_lengths[Number<3>{}].value,
// SpaceFillingCurve::access_lengths[Number<4>{}].value,
// SpaceFillingCurve::access_lengths[Number<5>{}].value,
// SpaceFillingCurve::access_lengths[Number<6>{}].value);
//
// // printf("SpaceFillingCurve dim_access_order = (%d, %d, %d, %d, %d, %d, %d)\n", SpaceFillingCurve::dim_access_order[Number<0>{}].value,
// SpaceFillingCurve::dim_access_order[Number<1>{}].value,
// SpaceFillingCurve::dim_access_order[Number<2>{}].value,
// SpaceFillingCurve::dim_access_order[Number<3>{}].value,
// SpaceFillingCurve::dim_access_order[Number<4>{}].value,
// SpaceFillingCurve::dim_access_order[Number<5>{}].value,
// SpaceFillingCurve::dim_access_order[Number<6>{}].value);
//
// // // printf("SpaceFillingCurve ordered_access_lengths = (%d, %d, %d, %d, %d, %d, %d)\n", SpaceFillingCurve::ordered_access_lengths[Number<0>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<1>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<2>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<3>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<4>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<5>{}].value,
// SpaceFillingCurve::ordered_access_lengths[Number<6>{}].value);
// TODO: Use SpaceFillingCurve::ScalarsPerAccess instread of DstScalarPerVector?
static_assert
(
DstScalarPerVector
==
SpaceFillingCurve
::
ScalarPerVector
,
"wrong!DstScalarPerVector != SpaceFillingCurve::ScalarPerVector"
);
...
...
@@ -158,7 +136,7 @@ struct ThreadwiseTensorSliceTransfer_v1r3
static_for
<
0
,
DstScalarPerVector
,
1
>
{}([
&
](
auto
i
)
{
constexpr
index_t
src_offset
=
src_desc
.
CalculateOffset
(
src_slice_origin_idx
+
idx_md
+
i
*
dst_scalar_step_in_vector
);
// debug_hexprinter(0xffffffff, src_offset, "src_coord_iteration");
SrcData
v
;
// apply element-wise operation
...
...
@@ -176,11 +154,10 @@ struct ThreadwiseTensorSliceTransfer_v1r3
dst_coord_
.
GetOffset
(),
is_dst_valid
,
dst_vector
.
template
AsType
<
dst_vector_t
>()[
Number
<
0
>
{}]);
// debug_hexprinter(0xffffffff, dst_coord_.GetOffset(), "dst_coord_iteration");
if
constexpr
(
idx_1d
.
value
!=
num_access
-
1
)
{
constexpr
auto
forward_step
=
SpaceFillingCurve
::
GetForwardStep
(
idx_1d
);
// printf("move forward = (%d, %d, %d, %d, %d, %d, %d)\n", forward_step[Number<0>{}], forward_step[Number<1>{}], forward_step[Number<2>{}], forward_step[Number<3>{}], forward_step[Number<4>{}], forward_step[Number<5>{}], forward_step[Number<6>{}]);
move_tensor_coordinate
(
dst_desc
,
dst_coord_
,
make_tensor_coordinate_step
(
dst_desc
,
forward_step
));
}
...
...
include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp
View file @
2a0e5439
...
...
@@ -96,7 +96,6 @@ struct ThreadwiseTensorSliceTransfer_v3r1
src_element_op_
(
src_element_op
),
dst_element_op_
(
dst_element_op
)
{
// printf("global desc: %s\n", __PRETTY_FUNCTION__);
}
__device__
void
SetSrcSliceOrigin
(
const
SrcDesc
&
src_desc
,
const
Index
&
src_slice_origin_idx
)
...
...
@@ -128,6 +127,7 @@ struct ThreadwiseTensorSliceTransfer_v3r1
detail
::
lambda_scalar_per_access
<
SrcVectorDim
,
SrcScalarPerVector
>
{},
Number
<
nDim
>
{});
constexpr
auto
src_access_lengths
=
SliceLengths
{}
/
src_scalar_per_access
;
constexpr
auto
src_dim_access_order
=
SrcDimAccessOrder
{};
constexpr
auto
ordered_src_access_lengths
=
...
...
include/ck/utility/common_header.hpp
View file @
2a0e5439
...
...
@@ -49,50 +49,3 @@
#ifdef CK_USE_AMD_MFMA
#include "ck/utility/amd_xdlops.hpp"
#endif
#include <string_view>
template
<
typename
T
>
constexpr
auto
type_name
()
{
std
::
string_view
name
,
prefix
,
suffix
;
#ifdef __clang__
name
=
__PRETTY_FUNCTION__
;
prefix
=
"auto type_name() [T = "
;
suffix
=
"]"
;
#elif defined(__GNUC__)
name
=
__PRETTY_FUNCTION__
;
prefix
=
"constexpr auto type_name() [with T = "
;
suffix
=
"]"
;
#elif defined(_MSC_VER)
name
=
__FUNCSIG__
;
prefix
=
"auto __cdecl type_name<"
;
suffix
=
">(void)"
;
#endif
name
.
remove_prefix
(
prefix
.
size
());
name
.
remove_suffix
(
suffix
.
size
());
return
name
;
}
// Accepet int, float, and Number<> as input
template
<
typename
T
>
__host__
__device__
void
debug_hexprinter
(
const
uint32_t
v_target
,
const
T
v_val
,
const
char
*
info
){
if
constexpr
(
std
::
is_same_v
<
T
,
int
>
||
std
::
is_same_v
<
T
,
float
>
)
{
const
uint32_t
v_dbg
=
*
(
reinterpret_cast
<
const
uint32_t
*>
(
&
v_val
));
if
(
v_dbg
!=
v_target
)
printf
(
"%s@Thread: %d, Val: %08x != Target: %08x
\n
"
,
info
,
ck
::
get_thread_local_1d_id
(),
v_dbg
,
v_target
);
}
else
if
constexpr
(
std
::
is_same_v
<
T
,
_Float16
>
)
{
const
uint16_t
v_dbg
=
*
(
reinterpret_cast
<
const
uint16_t
*>
(
&
v_val
));
if
(
v_dbg
!=
v_target
)
printf
(
"%s@Thread: %d, Val: %04x != Target: %08x
\n
"
,
info
,
ck
::
get_thread_local_1d_id
(),
v_dbg
,
v_target
);
}
else
{
const
uint32_t
v_dbg
=
*
(
reinterpret_cast
<
const
uint32_t
*>
(
&
(
v_val
.
value
)));
if
(
v_dbg
!=
v_target
)
printf
(
"%s@Thread: %d, Val: %08x != Target: %08x
\n
"
,
info
,
ck
::
get_thread_local_1d_id
(),
v_dbg
,
v_target
);
}
}
include/ck/utility/data_type.hpp
View file @
2a0e5439
...
...
@@ -942,11 +942,6 @@ using int8x16_t = typename vector_type<int8_t, 16>::type;
using
int8x32_t
=
typename
vector_type
<
int8_t
,
32
>::
type
;
using
int8x64_t
=
typename
vector_type
<
int8_t
,
64
>::
type
;
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
// i4
using
int4x16_t
=
typename
vector_type
<
int4_t
,
16
>::
type
;
#endif
// Convert X to Y
template
<
typename
Y
,
typename
X
>
__host__
__device__
constexpr
Y
type_convert
(
X
x
)
...
...
library/include/ck/library/utility/check_err.hpp
View file @
2a0e5439
...
...
@@ -55,7 +55,7 @@ check_err(const Range& out,
{
max_err
=
err
>
max_err
?
err
:
max_err
;
err_count
++
;
if
(
err_count
<
16384
)
if
(
err_count
<
5
)
{
std
::
cerr
<<
msg
<<
std
::
setw
(
12
)
<<
std
::
setprecision
(
7
)
<<
" out["
<<
i
<<
"] != ref["
<<
i
<<
"]: "
<<
o
<<
" != "
<<
r
<<
std
::
endl
;
...
...
@@ -103,7 +103,7 @@ check_err(const Range& out,
{
max_err
=
err
>
max_err
?
err
:
max_err
;
err_count
++
;
if
(
err_count
<
16384
)
if
(
err_count
<
5
)
{
std
::
cerr
<<
msg
<<
std
::
setw
(
12
)
<<
std
::
setprecision
(
7
)
<<
" out["
<<
i
<<
"] != ref["
<<
i
<<
"]: "
<<
o
<<
" != "
<<
r
<<
std
::
endl
;
...
...
@@ -150,7 +150,7 @@ check_err(const Range& out,
{
max_err
=
err
>
max_err
?
err
:
max_err
;
err_count
++
;
if
(
err_count
<
16384
)
if
(
err_count
<
5
)
{
std
::
cerr
<<
msg
<<
std
::
setw
(
12
)
<<
std
::
setprecision
(
7
)
<<
" out["
<<
i
<<
"] != ref["
<<
i
<<
"]: "
<<
o
<<
" != "
<<
r
<<
std
::
endl
;
...
...
@@ -202,7 +202,7 @@ check_err(const Range& out,
{
max_err
=
err
>
max_err
?
err
:
max_err
;
err_count
++
;
if
(
err_count
<
16384
)
if
(
err_count
<
5
)
{
std
::
cerr
<<
msg
<<
" out["
<<
i
<<
"] != ref["
<<
i
<<
"]: "
<<
o
<<
" != "
<<
r
<<
std
::
endl
;
...
...
library/include/ck/library/utility/fill.hpp
View file @
2a0e5439
...
...
@@ -114,23 +114,5 @@ struct FillConstant
}
};
template
<
typename
T
>
struct
FillMNID
{
T
step_
{
0.1
};
int
k_num_
{
32
};
int
mn_num_
{
128
};
template
<
typename
ForwardIter
>
void
operator
()(
ForwardIter
first
,
ForwardIter
last
)
const
{
std
::
generate
(
first
,
last
,
[
=
,
iter
=
0
]()
mutable
{
auto
tmp
=
((
iter
/
k_num_
)
%
mn_num_
)
*
step_
;
iter
++
;
return
tmp
;
});
}
};
}
// namespace utils
}
// namespace ck
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment