Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
54fba515
"...composable_kernel.git" did not exist on "6a1bc5939cf05580b9766b15f2d4f1e660814f83"
Commit
54fba515
authored
Aug 09, 2021
by
Chao Liu
Browse files
tidy
parent
e62bae7a
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
0 additions
and
27 deletions
+0
-27
composable_kernel/include/problem_transform/transform_forward_convolution_into_gemm_v4r4_nchw_kcyx_nkhw.hpp
...orm_forward_convolution_into_gemm_v4r4_nchw_kcyx_nkhw.hpp
+0
-6
composable_kernel/include/problem_transform/transform_forward_convolution_into_gemm_v4r4_nhwc_kyxc_nhwk.hpp
...orm_forward_convolution_into_gemm_v4r4_nhwc_kyxc_nhwk.hpp
+0
-3
composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp
...rnel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp
+0
-1
composable_kernel/include/tensor_operation/blockwise_gemm_xdlops.hpp
...kernel/include/tensor_operation/blockwise_gemm_xdlops.hpp
+0
-3
composable_kernel/include/utility/sequence.hpp
composable_kernel/include/utility/sequence.hpp
+0
-2
composable_kernel/src/kernel_wrapper/dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk.cpp
...tion_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk.cpp
+0
-1
host/driver_offline/include/device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nhwk.hpp
...on_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nhwk.hpp
+0
-8
host/driver_offline/include/driver_dynamic_gemm_xdlops_v2r3.hpp
...river_offline/include/driver_dynamic_gemm_xdlops_v2r3.hpp
+0
-3
No files found.
composable_kernel/include/problem_transform/transform_forward_convolution_into_gemm_v4r4_nchw_kcyx_nkhw.hpp
View file @
54fba515
...
...
@@ -126,9 +126,6 @@ transform_forward_convolution_into_gemm_v4r4_nchw_kcyx_nkhw_no_pad(
const
auto
C
=
in_n_c_hi_wi_global_desc
.
GetLength
(
I1
);
const
auto
K
=
out_n_k_ho_wo_global_desc
.
GetLength
(
I1
);
const
auto
Hi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
I2
);
const
auto
Wi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
I3
);
const
auto
Ho
=
out_n_k_ho_wo_global_desc
.
GetLength
(
I2
);
const
auto
Wo
=
out_n_k_ho_wo_global_desc
.
GetLength
(
I3
);
...
...
@@ -209,9 +206,6 @@ __host__ __device__ constexpr auto transform_forward_convolution_into_gemm_v4r4_
const
auto
C
=
in_n_c_hi_wi_global_desc
.
GetLength
(
I1
);
const
auto
K
=
out_n_k_ho_wo_global_desc
.
GetLength
(
I1
);
const
auto
Hi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
I2
);
const
auto
Wi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
I3
);
const
auto
Ho
=
out_n_k_ho_wo_global_desc
.
GetLength
(
I2
);
const
auto
Wo
=
out_n_k_ho_wo_global_desc
.
GetLength
(
I3
);
...
...
composable_kernel/include/problem_transform/transform_forward_convolution_into_gemm_v4r4_nhwc_kyxc_nhwk.hpp
View file @
54fba515
...
...
@@ -125,9 +125,6 @@ __host__ __device__ constexpr auto transform_forward_convolution_into_gemm_v4r4_
const
auto
C
=
in_n_hi_wi_c_grid_desc
.
GetLength
(
I3
);
const
auto
K
=
out_n_ho_wo_k_grid_desc
.
GetLength
(
I3
);
const
auto
Hi
=
in_n_hi_wi_c_grid_desc
.
GetLength
(
I1
);
const
auto
Wi
=
in_n_hi_wi_c_grid_desc
.
GetLength
(
I2
);
const
auto
Ho
=
out_n_ho_wo_k_grid_desc
.
GetLength
(
I1
);
const
auto
Wo
=
out_n_ho_wo_k_grid_desc
.
GetLength
(
I2
);
...
...
composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp
View file @
54fba515
...
...
@@ -69,7 +69,6 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v3
"wrong! K dimension not consistent
\n
"
);
constexpr
index_t
K
=
BlockMatrixA
{}.
GetLength
(
I1
);
// A is transposed
constexpr
index_t
N
=
BlockMatrixB
{}.
GetLength
(
I1
);
constexpr
index_t
H
=
BlockMatrixB
{}.
GetLength
(
I2
);
constexpr
index_t
W
=
BlockMatrixB
{}.
GetLength
(
I3
);
...
...
composable_kernel/include/tensor_operation/blockwise_gemm_xdlops.hpp
View file @
54fba515
...
...
@@ -52,7 +52,6 @@ struct BlockwiseGemmXdlops_km_kn_m0m1m2n_v1
const
index_t
waveId
=
thread_id
/
WaveSize
;
const
index_t
laneId
=
thread_id
%
WaveSize
;
const
index_t
waveId_m
=
waveId
/
NWaves
;
const
index_t
waveId_n
=
waveId
%
NWaves
;
if
constexpr
(
xdlops_gemm
.
IsKReduction
)
{
...
...
@@ -73,7 +72,6 @@ struct BlockwiseGemmXdlops_km_kn_m0m1m2n_v1
const
index_t
thread_id
=
get_thread_local_1d_id
();
const
index_t
waveId
=
thread_id
/
WaveSize
;
const
index_t
laneId
=
thread_id
%
WaveSize
;
const
index_t
waveId_m
=
waveId
/
NWaves
;
const
index_t
waveId_n
=
waveId
%
NWaves
;
if
constexpr
(
xdlops_gemm
.
IsKReduction
)
...
...
@@ -293,7 +291,6 @@ struct BlockwiseGemmXdlops_km_kn_m0m1m2n_v1_2x2pipeline
const
index_t
thread_id
=
get_thread_local_1d_id
();
const
index_t
waveId
=
thread_id
/
WaveSize
;
const
index_t
laneId
=
thread_id
%
WaveSize
;
const
index_t
waveId_m
=
waveId
/
NWaves
;
const
index_t
waveId_n
=
waveId
%
NWaves
;
if
constexpr
(
xdlops_gemm
.
IsKReduction
)
...
...
composable_kernel/include/utility/sequence.hpp
View file @
54fba515
...
...
@@ -685,8 +685,6 @@ __host__ __device__ constexpr auto operator+(Number<Y>, Sequence<Xs...>)
template
<
index_t
Y
,
index_t
...
Xs
>
__host__
__device__
constexpr
auto
operator
-
(
Number
<
Y
>
,
Sequence
<
Xs
...
>
)
{
constexpr
auto
seq_x
=
Sequence
<
Xs
...
>
{};
return
Sequence
<
(
Y
-
Xs
)...
>
{};
}
...
...
composable_kernel/src/kernel_wrapper/dynamic_convolution_forward_implicit_gemm_v4r4_xdlops_nhwc_kyxc_nhwk.cpp
View file @
54fba515
...
...
@@ -225,7 +225,6 @@ extern "C" __global__ void
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
in_n_hi_wi_c_desc
=
make_dynamic_naive_tensor_descriptor_packed_v2
(
make_tuple
(
256
,
28
,
28
,
256
));
...
...
host/driver_offline/include/device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nhwk.hpp
View file @
54fba515
...
...
@@ -35,11 +35,6 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nh
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
I4
=
Number
<
4
>
{};
constexpr
auto
I5
=
Number
<
5
>
{};
constexpr
auto
I6
=
Number
<
6
>
{};
constexpr
auto
I7
=
Number
<
7
>
{};
constexpr
auto
I8
=
Number
<
8
>
{};
DeviceMem
in_n_hi_wi_c_device_buf
(
sizeof
(
TInWei
)
*
in_n_hi_wi_c
.
mDesc
.
GetElementSpace
());
DeviceMem
wei_k_y_x_c_device_buf
(
sizeof
(
TInWei
)
*
wei_k_y_x_c
.
mDesc
.
GetElementSpace
());
...
...
@@ -218,9 +213,6 @@ void device_dynamic_convolution_forward_implicit_gemm_v4r4r2_xdlops_nhwc_kyxc_nh
const
auto
K
=
out_n_ho_wo_k_lengths
[
I3
];
const
auto
C
=
wei_k_y_x_c_lengths
[
I3
];
const
auto
Hi
=
in_n_hi_wi_c_lengths
[
I1
];
const
auto
Wi
=
in_n_hi_wi_c_lengths
[
I2
];
const
auto
Ho
=
out_n_ho_wo_k_lengths
[
I1
];
const
auto
Wo
=
out_n_ho_wo_k_lengths
[
I2
];
...
...
host/driver_offline/include/driver_dynamic_gemm_xdlops_v2r3.hpp
View file @
54fba515
...
...
@@ -66,9 +66,6 @@ __host__ float driver_dynamic_gemm_xdlops_v2r3(const FloatAB* p_a_grid,
constexpr
auto
I0
=
Number
<
0
>
{};
constexpr
auto
I1
=
Number
<
1
>
{};
constexpr
auto
I2
=
Number
<
2
>
{};
constexpr
auto
I3
=
Number
<
3
>
{};
constexpr
auto
I4
=
Number
<
4
>
{};
constexpr
auto
I5
=
Number
<
5
>
{};
using
GridwiseGemm
=
GridwiseDynamicGemm_k0mk1_k0nk1_mn_xdlops_v2r3
<
BlockSize
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment