Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
dbfe0051
Commit
dbfe0051
authored
Nov 07, 2023
by
Bartlomiej Kocot
Browse files
Minor fixes
Minor fixes Minor fixes
parent
31257062
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
28 deletions
+23
-28
example/62_conv_fwd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
..._fwd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
+13
-8
include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp
.../impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp
+4
-4
include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp
...r_operation/gpu/device/impl/device_grouped_conv_utils.hpp
+6
-16
No files found.
example/62_conv_fwd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp
View file @
dbfe0051
...
@@ -226,14 +226,17 @@ bool run_grouped_conv_fwd(bool do_verification,
...
@@ -226,14 +226,17 @@ bool run_grouped_conv_fwd(bool do_verification,
if
(
do_verification
)
if
(
do_verification
)
{
{
auto
ref_conv
=
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
auto
ref_conv
=
InDataType
,
ck
::
tensor_operation
::
host
::
ReferenceConvFwd
<
NDimSpatial
,
WeiDataType
,
InDataType
,
OutDataType
,
WeiDataType
,
InElementOp
,
OutDataType
,
WeiElementOp
,
InElementOp
,
OutElementOp
,
WeiElementOp
,
NumDs
>
();
OutElementOp
,
0
,
/*Num A Elementwise Tensors*/
0
,
/*Num B Elementwise Tensors*/
NumDs
>
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_invoker
=
ref_conv
.
MakeInvoker
();
auto
ref_argument
=
ref_conv
.
MakeArgument
(
in
,
auto
ref_argument
=
ref_conv
.
MakeArgument
(
in
,
...
@@ -246,6 +249,8 @@ bool run_grouped_conv_fwd(bool do_verification,
...
@@ -246,6 +249,8 @@ bool run_grouped_conv_fwd(bool do_verification,
in_element_op
,
in_element_op
,
wei_element_op
,
wei_element_op
,
out_element_op
,
out_element_op
,
{},
{},
d_tensors
);
d_tensors
);
ref_invoker
.
Run
(
ref_argument
);
ref_invoker
.
Run
(
ref_argument
);
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp
View file @
dbfe0051
...
@@ -104,7 +104,7 @@ __global__ void
...
@@ -104,7 +104,7 @@ __global__ void
const
long_index_t
e_batch_offset
=
__builtin_amdgcn_readfirstlane
(
const
long_index_t
e_batch_offset
=
__builtin_amdgcn_readfirstlane
(
static_cast
<
long_index_t
>
(
compute_ptr_offset_of_batch
.
GetEPtrOffset
(
g_idx
)));
static_cast
<
long_index_t
>
(
compute_ptr_offset_of_batch
.
GetEPtrOffset
(
g_idx
)));
const
auto
ds_batch_offset
=
compute_ptr_offset_of_batch
.
GetDsPtrOffset
(
g_idx
);
const
auto
&
ds_batch_offset
=
compute_ptr_offset_of_batch
.
GetDsPtrOffset
(
g_idx
);
__shared__
char
p_shared
[
GridwiseGemm
::
GetSharedMemoryNumberOfByte
()];
__shared__
char
p_shared
[
GridwiseGemm
::
GetSharedMemoryNumberOfByte
()];
...
@@ -121,13 +121,13 @@ __global__ void
...
@@ -121,13 +121,13 @@ __global__ void
AsPointer
p_as_grid_grp
;
AsPointer
p_as_grid_grp
;
BsPointer
p_bs_grid_grp
;
BsPointer
p_bs_grid_grp
;
const
auto
as_batch_offset
=
compute_ptr_offset_of_batch
.
GetAsPtrOffset
(
g_idx
);
const
auto
&
as_batch_offset
=
compute_ptr_offset_of_batch
.
GetAsPtrOffset
(
g_idx
);
static
constexpr
index_t
NumATensor
=
AGridDesc_AK0_M_AK1
::
Size
();
static
constexpr
index_t
NumATensor
=
AGridDesc_AK0_M_AK1
::
Size
();
static_for
<
0
,
NumATensor
,
1
>
{}(
static_for
<
0
,
NumATensor
,
1
>
{}(
[
&
](
auto
i
)
{
p_as_grid_grp
(
i
)
=
p_as_grid
[
i
]
+
as_batch_offset
[
i
];
});
[
&
](
auto
i
)
{
p_as_grid_grp
(
i
)
=
p_as_grid
[
i
]
+
as_batch_offset
[
i
];
});
const
auto
bs_batch_offset
=
compute_ptr_offset_of_batch
.
GetBsPtrOffset
(
g_idx
);
const
auto
&
bs_batch_offset
=
compute_ptr_offset_of_batch
.
GetBsPtrOffset
(
g_idx
);
static
constexpr
index_t
NumBTensor
=
BGridDesc_BK0_N_BK1
::
Size
();
static
constexpr
index_t
NumBTensor
=
BGridDesc_BK0_N_BK1
::
Size
();
static_for
<
0
,
NumBTensor
,
1
>
{}(
static_for
<
0
,
NumBTensor
,
1
>
{}(
...
@@ -988,7 +988,7 @@ struct DeviceGroupedConvFwdMultipleD_Xdl_CShuffle
...
@@ -988,7 +988,7 @@ struct DeviceGroupedConvFwdMultipleD_Xdl_CShuffle
static
auto
MakeArgument
(
static
auto
MakeArgument
(
APointers
p_as
,
APointers
p_as
,
BPointers
p_bs
,
BPointers
p_bs
,
std
::
array
<
const
void
*
,
NumDTensor
>&
p_ds
,
const
std
::
array
<
const
void
*
,
NumDTensor
>&
p_ds
,
void
*
p_e
,
void
*
p_e
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_lengths
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_strides
,
const
std
::
array
<
index_t
,
NDimSpatial
+
3
>&
a_g_n_c_wis_strides
,
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp
View file @
dbfe0051
...
@@ -20,17 +20,12 @@ struct ComputePtrOffsetOfStridedBatch
...
@@ -20,17 +20,12 @@ struct ComputePtrOffsetOfStridedBatch
index_t
BatchStrideB
,
index_t
BatchStrideB
,
Array
<
ck
::
index_t
,
NumDTensor
>
BatchStrideDs
,
Array
<
ck
::
index_t
,
NumDTensor
>
BatchStrideDs
,
index_t
BatchStrideE
)
index_t
BatchStrideE
)
:
BatchStrideA_
(),
:
BatchStrideA_
(
BatchStrideA
),
BatchStrideB_
(),
BatchStrideB_
(
BatchStrideB
),
BatchStrideDs_
(
BatchStrideDs
),
BatchStrideDs_
(
BatchStrideDs
),
BatchStrideE_
(
BatchStrideE
)
BatchStrideE_
(
BatchStrideE
)
{
{
if
constexpr
(
!
isMultiAB
)
if
constexpr
(
isMultiAB
)
{
BatchStrideA_
=
BatchStrideA
;
BatchStrideB_
=
BatchStrideB
;
}
else
{
{
static_assert
(
"Invalid constructor for multiple A or B"
);
static_assert
(
"Invalid constructor for multiple A or B"
);
}
}
...
@@ -40,17 +35,12 @@ struct ComputePtrOffsetOfStridedBatch
...
@@ -40,17 +35,12 @@ struct ComputePtrOffsetOfStridedBatch
Array
<
ck
::
index_t
,
NumBTensor
>
BatchStrideBs
,
Array
<
ck
::
index_t
,
NumBTensor
>
BatchStrideBs
,
Array
<
ck
::
index_t
,
NumDTensor
>
BatchStrideDs
,
Array
<
ck
::
index_t
,
NumDTensor
>
BatchStrideDs
,
index_t
BatchStrideE
)
index_t
BatchStrideE
)
:
BatchStrideA_
(),
:
BatchStrideA_
(
BatchStrideAs
),
BatchStrideB_
(),
BatchStrideB_
(
BatchStrideBs
),
BatchStrideDs_
(
BatchStrideDs
),
BatchStrideDs_
(
BatchStrideDs
),
BatchStrideE_
(
BatchStrideE
)
BatchStrideE_
(
BatchStrideE
)
{
{
if
constexpr
(
isMultiAB
)
if
constexpr
(
!
isMultiAB
)
{
BatchStrideA_
=
BatchStrideAs
;
BatchStrideB_
=
BatchStrideBs
;
}
else
{
{
static_assert
(
"Invalid constructor for single A and B"
);
static_assert
(
"Invalid constructor for single A and B"
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment