Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
e28aa8dd
Commit
e28aa8dd
authored
Nov 08, 2023
by
Bartlomiej Kocot
Browse files
Comment fixes
parent
dbfe0051
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
80 additions
and
98 deletions
+80
-98
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc
...scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc
+1
-1
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab.inc
...d_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab.inc
+6
-6
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_bf16.cpp
...vnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_bf16.cpp
+0
-2
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp16.cpp
...vnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp16.cpp
+0
-2
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp32.cpp
...vnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp32.cpp
+0
-2
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_int8.cpp
...vnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_int8.cpp
+0
-2
include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp
...r_operation/gpu/device/impl/device_grouped_conv_utils.hpp
+71
-79
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
...ary/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+1
-0
library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp
..._fwd/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp
+1
-4
No files found.
client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc
View file @
e28aa8dd
...
@@ -63,7 +63,7 @@ int execute_conv_fwd_scaleadd_scaleadd_relu()
...
@@ -63,7 +63,7 @@ int execute_conv_fwd_scaleadd_scaleadd_relu()
K
*
Z
*
Y
*
X
*
C
,
Z
*
Y
*
X
*
C
,
1
,
Y
*
X
*
C
,
X
*
C
,
C
};
K
*
Z
*
Y
*
X
*
C
,
Z
*
Y
*
X
*
C
,
1
,
Y
*
X
*
C
,
X
*
C
,
C
};
std
::
array
<
ck
::
index_t
,
6
>
out_lengths
{
G
,
N
,
K
,
Do
,
Ho
,
Wo
};
std
::
array
<
ck
::
index_t
,
6
>
out_lengths
{
G
,
N
,
K
,
Do
,
Ho
,
Wo
};
std
::
array
<
ck
::
index_t
,
6
>
out_strides
{
std
::
array
<
ck
::
index_t
,
6
>
out_strides
{
C
,
Do
*
Ho
*
Wo
*
G
*
C
,
1
,
Ho
*
Wo
*
G
*
C
,
Wo
*
G
*
C
,
G
*
C
};
K
,
Do
*
Ho
*
Wo
*
G
*
K
,
1
,
Ho
*
Wo
*
G
*
K
,
Wo
*
G
*
K
,
G
*
K
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_strides
{
1
,
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_strides
{
1
,
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_dilations
{
1
,
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_dilations
{
1
,
1
,
1
};
...
...
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab.inc
View file @
e28aa8dd
...
@@ -68,7 +68,7 @@ int execute_conv_fwd_scaleadd_ab()
...
@@ -68,7 +68,7 @@ int execute_conv_fwd_scaleadd_ab()
K
*
Z
*
Y
*
X
*
C
,
Z
*
Y
*
X
*
C
,
1
,
Y
*
X
*
C
,
X
*
C
,
C
};
K
*
Z
*
Y
*
X
*
C
,
Z
*
Y
*
X
*
C
,
1
,
Y
*
X
*
C
,
X
*
C
,
C
};
std
::
array
<
ck
::
index_t
,
6
>
out_lengths
{
G
,
N
,
K
,
Do
,
Ho
,
Wo
};
std
::
array
<
ck
::
index_t
,
6
>
out_lengths
{
G
,
N
,
K
,
Do
,
Ho
,
Wo
};
std
::
array
<
ck
::
index_t
,
6
>
out_strides
{
std
::
array
<
ck
::
index_t
,
6
>
out_strides
{
C
,
Do
*
Ho
*
Wo
*
G
*
C
,
1
,
Ho
*
Wo
*
G
*
C
,
Wo
*
G
*
C
,
G
*
C
};
K
,
Do
*
Ho
*
Wo
*
G
*
K
,
1
,
Ho
*
Wo
*
G
*
K
,
Wo
*
G
*
K
,
G
*
K
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_strides
{
1
,
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_strides
{
1
,
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_dilations
{
1
,
1
,
1
};
std
::
array
<
ck
::
index_t
,
NumDimSpatial
>
filter_dilations
{
1
,
1
,
1
};
...
@@ -147,11 +147,11 @@ int execute_conv_fwd_scaleadd_ab()
...
@@ -147,11 +147,11 @@ int execute_conv_fwd_scaleadd_ab()
{
{
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
true
});
float
avg_time
=
invoker_ptr
->
Run
(
argument_ptr
.
get
(),
StreamConfig
{
nullptr
,
true
});
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
G
*
N
*
K
*
C
*
Ho
*
Wo
*
Y
*
X
+
std
::
size_t
flop
=
std
::
size_t
(
2
)
*
G
*
N
*
K
*
C
*
Do
*
Ho
*
Wo
*
Z
*
Y
*
X
+
N
*
Hi
*
Wi
*
G
*
C
+
G
*
K
*
Y
*
X
*
C
;
N
*
Di
*
Hi
*
Wi
*
G
*
C
+
G
*
K
*
Z
*
Y
*
X
*
C
;
std
::
size_t
num_bytes
=
2
*
sizeof
(
InDataType
)
*
N
*
Hi
*
Wi
*
G
*
C
+
std
::
size_t
num_bytes
=
2
*
sizeof
(
InDataType
)
*
N
*
Di
*
Hi
*
Wi
*
G
*
C
+
2
*
sizeof
(
WeiDataType
)
*
G
*
K
*
Y
*
X
*
C
+
2
*
sizeof
(
WeiDataType
)
*
G
*
K
*
Z
*
Y
*
X
*
C
+
sizeof
(
OutDataType
)
*
N
*
Ho
*
Wo
*
G
*
K
;
sizeof
(
OutDataType
)
*
N
*
Do
*
Ho
*
Wo
*
G
*
K
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
tflops
=
static_cast
<
float
>
(
flop
)
/
1.E9
/
avg_time
;
float
gb_per_sec
=
num_bytes
/
1.E6
/
avg_time
;
float
gb_per_sec
=
num_bytes
/
1.E6
/
avg_time
;
...
...
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_bf16.cpp
View file @
e28aa8dd
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "ck/utility/data_type.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/utility/tuple.hpp"
...
...
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp16.cpp
View file @
e28aa8dd
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "ck/utility/data_type.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/utility/tuple.hpp"
...
...
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp32.cpp
View file @
e28aa8dd
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "ck/utility/data_type.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/utility/tuple.hpp"
...
...
client_example/24_grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_int8.cpp
View file @
e28aa8dd
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2023, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include "ck/utility/data_type.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/utility/tuple.hpp"
#include "ck/utility/tuple.hpp"
...
...
include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp
View file @
e28aa8dd
...
@@ -9,101 +9,99 @@ namespace ck {
...
@@ -9,101 +9,99 @@ namespace ck {
namespace
tensor_operation
{
namespace
tensor_operation
{
namespace
device
{
namespace
device
{
template
<
index_t
NumATensor
=
1
,
index_t
NumBTensor
=
1
,
index_t
NumDTensor
=
0
>
template
<
index_t
NumATensor
=
1
,
index_t
NumBTensor
=
1
,
index_t
NumDTensor
=
0
,
typename
=
void
>
struct
ComputePtrOffsetOfStridedBatch
struct
ComputePtrOffsetOfStridedBatch
{
{
static
constexpr
bool
isMultiAB
=
NumATensor
>
1
||
NumBTensor
>
1
;
}
;
template
<
index_t
NumATensor
,
index_t
NumBTensor
,
index_t
NumDTensor
>
struct
ComputePtrOffsetOfStridedBatch
<
NumATensor
,
NumBTensor
,
NumDTensor
,
ck
::
enable_if_t
<
(
NumATensor
>
1
||
NumBTensor
>
1
)
>>
{
ComputePtrOffsetOfStridedBatch
()
=
default
;
ComputePtrOffsetOfStridedBatch
()
=
default
;
ComputePtrOffsetOfStridedBatch
(
index_t
BatchStrideA
,
ComputePtrOffsetOfStridedBatch
(
Array
<
ck
::
index_t
,
NumATensor
>&
BatchStrideA
s
,
index_t
BatchStrideB
,
Array
<
ck
::
index_t
,
NumBTensor
>&
BatchStrideB
s
,
Array
<
ck
::
index_t
,
NumDTensor
>
BatchStrideDs
,
Array
<
ck
::
index_t
,
NumDTensor
>
&
BatchStrideDs
,
index_t
BatchStrideE
)
index_t
BatchStrideE
)
:
BatchStrideA_
(
BatchStrideA
),
:
BatchStrideA_
(
BatchStrideA
s
),
BatchStrideB_
(
BatchStrideB
),
BatchStrideB_
(
BatchStrideB
s
),
BatchStrideDs_
(
BatchStrideDs
),
BatchStrideDs_
(
BatchStrideDs
),
BatchStrideE_
(
BatchStrideE
)
BatchStrideE_
(
BatchStrideE
)
{
{
if
constexpr
(
isMultiAB
)
{
static_assert
(
"Invalid constructor for multiple A or B"
);
}
}
}
ComputePtrOffsetOfStridedBatch
(
Array
<
ck
::
index_t
,
NumATensor
>
BatchStrideAs
,
__host__
__device__
constexpr
auto
GetAsPtrOffset
(
index_t
g_idx
)
const
Array
<
ck
::
index_t
,
NumBTensor
>
BatchStrideBs
,
Array
<
ck
::
index_t
,
NumDTensor
>
BatchStrideDs
,
index_t
BatchStrideE
)
:
BatchStrideA_
(
BatchStrideAs
),
BatchStrideB_
(
BatchStrideBs
),
BatchStrideDs_
(
BatchStrideDs
),
BatchStrideE_
(
BatchStrideE
)
{
{
if
constexpr
(
!
isMultiAB
)
Array
<
long_index_t
,
NumATensor
>
as_offset
;
{
static_for
<
0
,
NumATensor
,
1
>
{}(
static_as
se
r
t
(
"Invalid constructor for single A and B"
);
[
&
](
auto
i
)
{
as_off
set
(
i
)
=
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideA_
[
i
]);
}
);
}
return
as_offset
;
}
}
__host__
__device__
constexpr
long_index_t
Get
A
PtrOffset
(
index_t
g_idx
)
const
__host__
__device__
constexpr
auto
Get
Bs
PtrOffset
(
index_t
g_idx
)
const
{
{
if
constexpr
(
!
isMultiAB
)
Array
<
long_index_t
,
NumBTensor
>
bs_offset
;
{
static_for
<
0
,
NumBTensor
,
1
>
{}(
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideA_
);
[
&
](
auto
i
)
{
bs_offset
(
i
)
=
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideB_
[
i
]);
});
}
return
bs_offset
;
else
{
static_assert
(
"Invalid function for multiple A or B"
);
return
0
;
}
}
}
__host__
__device__
constexpr
long_index_t
Get
B
PtrOffset
(
index_t
g_idx
)
const
__host__
__device__
constexpr
auto
Get
Ds
PtrOffset
(
index_t
g_idx
)
const
{
{
if
constexpr
(
!
isMultiAB
)
Array
<
long_index_t
,
NumDTensor
>
ds_offset
;
{
static_for
<
0
,
NumDTensor
,
1
>
{}(
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideB_
);
[
&
](
auto
i
)
{
ds_offset
(
i
)
=
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideDs_
[
i
]);
});
}
return
ds_offset
;
else
{
static_assert
(
"Invalid function for multiple A or B"
);
return
0
;
}
}
}
__host__
__device__
constexpr
auto
Get
As
PtrOffset
(
index_t
g_idx
)
const
[[
maybe_unused
]]
__host__
__device__
constexpr
long_index_t
Get
E
PtrOffset
(
index_t
g_idx
)
const
{
{
if
constexpr
(
isMultiAB
)
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideE_
);
{
Array
<
long_index_t
,
NumATensor
>
as_offset
;
static_for
<
0
,
NumATensor
,
1
>
{}([
&
](
auto
i
)
{
as_offset
(
i
)
=
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideA_
[
i
]);
});
return
as_offset
;
}
else
{
static_assert
(
"Invalid function for single A and B"
);
return
BatchStrideA_
;
}
}
}
__host__
__device__
constexpr
auto
GetBsPtrOffset
(
index_t
g_idx
)
const
// alias for kernels without multiple D
[[
maybe_unused
]]
__host__
__device__
constexpr
long_index_t
GetCPtrOffset
(
index_t
g_idx
)
const
{
{
if
constexpr
(
isMultiAB
)
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideE_
);
{
}
Array
<
long_index_t
,
NumBTensor
>
bs_offset
;
static_for
<
0
,
NumBTensor
,
1
>
{}([
&
](
auto
i
)
{
Array
<
ck
::
index_t
,
NumATensor
>
BatchStrideA_
;
bs_offset
(
i
)
=
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideB_
[
i
]);
Array
<
ck
::
index_t
,
NumBTensor
>
BatchStrideB_
;
});
Array
<
ck
::
index_t
,
NumDTensor
>
BatchStrideDs_
;
return
bs_offset
;
index_t
BatchStrideE_
;
}
index_t
&
BatchStrideC_
=
BatchStrideE_
;
// alias for kernels without multiple D
else
};
{
static_assert
(
"Invalid function for single A and B"
);
template
<
index_t
NumATensor
,
index_t
NumBTensor
,
index_t
NumDTensor
>
return
BatchStrideB_
;
struct
ComputePtrOffsetOfStridedBatch
<
NumATensor
,
}
NumBTensor
,
NumDTensor
,
ck
::
enable_if_t
<
(
NumATensor
==
1
||
NumBTensor
==
1
)
>>
{
ComputePtrOffsetOfStridedBatch
()
=
default
;
ComputePtrOffsetOfStridedBatch
(
index_t
BatchStrideA
,
index_t
BatchStrideB
,
Array
<
ck
::
index_t
,
NumDTensor
>
BatchStrideDs
,
index_t
BatchStrideE
)
:
BatchStrideA_
(
BatchStrideA
),
BatchStrideB_
(
BatchStrideB
),
BatchStrideDs_
(
BatchStrideDs
),
BatchStrideE_
(
BatchStrideE
)
{
}
__host__
__device__
constexpr
long_index_t
GetAPtrOffset
(
index_t
g_idx
)
const
{
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideA_
);
}
__host__
__device__
constexpr
long_index_t
GetBPtrOffset
(
index_t
g_idx
)
const
{
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideB_
);
}
}
__host__
__device__
constexpr
auto
GetDsPtrOffset
(
index_t
g_idx
)
const
__host__
__device__
constexpr
auto
GetDsPtrOffset
(
index_t
g_idx
)
const
...
@@ -125,14 +123,8 @@ struct ComputePtrOffsetOfStridedBatch
...
@@ -125,14 +123,8 @@ struct ComputePtrOffsetOfStridedBatch
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideE_
);
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideE_
);
}
}
// If multiAB use Array
ck
::
index_t
BatchStrideA_
;
using
BatchStrideAType
=
ck
::
index_t
BatchStrideB_
;
std
::
conditional_t
<
isMultiAB
,
Array
<
ck
::
index_t
,
NumATensor
>
,
ck
::
index_t
>
;
using
BatchStrideBType
=
std
::
conditional_t
<
isMultiAB
,
Array
<
ck
::
index_t
,
NumBTensor
>
,
ck
::
index_t
>
;
BatchStrideAType
BatchStrideA_
;
BatchStrideBType
BatchStrideB_
;
Array
<
ck
::
index_t
,
NumDTensor
>
BatchStrideDs_
;
Array
<
ck
::
index_t
,
NumDTensor
>
BatchStrideDs_
;
index_t
BatchStrideE_
;
index_t
BatchStrideE_
;
index_t
&
BatchStrideC_
=
BatchStrideE_
;
// alias for kernels without multiple D
index_t
&
BatchStrideC_
=
BatchStrideE_
;
// alias for kernels without multiple D
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
View file @
e28aa8dd
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
#include "ck/ck.hpp"
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/algorithm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/check_err.hpp"
...
...
library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp
View file @
e28aa8dd
...
@@ -27,11 +27,8 @@ using ScaleAdd = ck::tensor_operation::element_wise::ScaleAdd;
...
@@ -27,11 +27,8 @@ using ScaleAdd = ck::tensor_operation::element_wise::ScaleAdd;
static
constexpr
auto
ConvFwdDefault
=
static
constexpr
auto
ConvFwdDefault
=
ck
::
tensor_operation
::
device
::
ConvolutionForwardSpecialization
::
Default
;
ck
::
tensor_operation
::
device
::
ConvolutionForwardSpecialization
::
Default
;
static
constexpr
auto
ConvFwd1x1P0
=
ConvolutionForwardSpecialization
::
Filter1x1Pad0
;
static
constexpr
auto
ConvFwd1x1P0
=
ConvolutionForwardSpecialization
::
Filter1x1Pad0
;
static
constexpr
auto
ConvFwd1x1S1P0
=
ConvolutionForwardSpecialization
::
Filter1x1Stride1Pad0
;
static
constexpr
auto
ConvFwd1x1S1P0
=
ConvolutionForwardSpecialization
::
Filter1x1Stride1Pad0
;
static
constexpr
auto
ConvFwdOddC
=
static
constexpr
auto
ConvFwdOddC
=
ck
::
tensor_operation
::
device
::
ConvolutionForwardSpecialization
::
OddC
;
ck
::
tensor_operation
::
device
::
ConvolutionForwardSpecialization
::
OddC
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment