Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
dc7b6568
Commit
dc7b6568
authored
Mar 09, 2023
by
rocking
Browse files
Refine the quantization instance library
parent
c0be8480
Changes
6
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
244 additions
and
97 deletions
+244
-97
library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt
...tensor_operation_instance/gpu/quantization/CMakeLists.txt
+4
-4
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_bias_perchannel_quantization_int8_instance.cpp
...ice_conv2d_bias_perchannel_quantization_int8_instance.cpp
+31
-25
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_bias_perlayer_quantization_int8_instance.cpp
...evice_conv2d_bias_perlayer_quantization_int8_instance.cpp
+35
-21
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_int8_instance.hpp
...u/quantization/conv2d_fwd/device_conv2d_int8_instance.hpp
+20
-47
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_perchannel_quantization_int8_instance.cpp
...d/device_conv2d_perchannel_quantization_int8_instance.cpp
+80
-0
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_perlayer_quantization_int8_instance.cpp
...fwd/device_conv2d_perlayer_quantization_int8_instance.cpp
+74
-0
No files found.
library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt
View file @
dc7b6568
add_instance_library
(
device_quantization_instance
add_instance_library
(
device_quantization_instance
conv2d_fwd/device_conv2d_
xdl_
bias_perchannel_quantization_int8_instance.cpp
conv2d_fwd/device_conv2d_bias_perchannel_quantization_int8_instance.cpp
conv2d_fwd/device_conv2d_
xdl_
bias_perlayer_quantization_int8_instance.cpp
conv2d_fwd/device_conv2d_bias_perlayer_quantization_int8_instance.cpp
conv2d_fwd/device_conv2d_
xdl_
perchannel_quantization_int8_instance.cpp
conv2d_fwd/device_conv2d_perchannel_quantization_int8_instance.cpp
conv2d_fwd/device_conv2d_
xdl_
perlayer_quantization_int8_instance.cpp
conv2d_fwd/device_conv2d_perlayer_quantization_int8_instance.cpp
)
)
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_
xdl_
bias_perchannel_quantization_int8_instance.cpp
→
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_bias_perchannel_quantization_int8_instance.cpp
View file @
dc7b6568
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "device_conv2d_
xdl_
int8_instance.hpp"
#include "device_conv2d_int8_instance.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
@@ -22,20 +22,23 @@ void add_device_conv2d_bias_perchannel_quantization_int8_instances(
...
@@ -22,20 +22,23 @@ void add_device_conv2d_bias_perchannel_quantization_int8_instances(
Add_Mul2_Clamp
>>>&
instances
)
Add_Mul2_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_conv2d_int8_32Ds_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_Clamp
,
Add_Mul2_Clamp
,
ConvFwdDefault
>
{});
ConvFwdDefault
,
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_conv2d_int8_32Ds_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_Clamp
,
Add_Mul2_Clamp
,
ConvFwd1x1P0
>
{});
ConvFwd1x1P0
,
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_conv2d_int8_32Ds_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Mul2_Clamp
,
Add_Mul2_Clamp
,
ConvFwd1x1S1P0
>
{});
ConvFwd1x1S1P0
,
8
>
{});
}
}
void
add_device_conv2d_bias_relu_perchannel_quantization_int8_instances
(
void
add_device_conv2d_bias_relu_perchannel_quantization_int8_instances
(
...
@@ -53,20 +56,23 @@ void add_device_conv2d_bias_relu_perchannel_quantization_int8_instances(
...
@@ -53,20 +56,23 @@ void add_device_conv2d_bias_relu_perchannel_quantization_int8_instances(
Add_Relu_Mul2_Clamp
>>>&
instances
)
Add_Relu_Mul2_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_conv2d_int8_32Ds_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Relu_Mul2_Clamp
,
Add_Relu_Mul2_Clamp
,
ConvFwdDefault
>
{});
ConvFwdDefault
,
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_conv2d_int8_32Ds_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Relu_Mul2_Clamp
,
Add_Relu_Mul2_Clamp
,
ConvFwd1x1P0
>
{});
ConvFwd1x1P0
,
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_conv2d_int8_32Ds_instances
<
GK_GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
GK_GK_Tuple
,
I32_F32_Tuple
,
I32_F32_Tuple
,
Add_Relu_Mul2_Clamp
,
Add_Relu_Mul2_Clamp
,
ConvFwd1x1S1P0
>
{});
ConvFwd1x1S1P0
,
8
>
{});
}
}
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_
xdl_
bias_perlayer_quantization_int8_instance.cpp
→
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_bias_perlayer_quantization_int8_instance.cpp
View file @
dc7b6568
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "device_conv2d_
xdl_
int8_instance.hpp"
#include "device_conv2d_int8_instance.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
@@ -21,15 +21,24 @@ void add_device_conv2d_bias_perlayer_quantization_int8_instances(
...
@@ -21,15 +21,24 @@ void add_device_conv2d_bias_perlayer_quantization_int8_instances(
PassThrough
,
PassThrough
,
Add_Mul_Clamp
>>>&
instances
)
Add_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
add_device_operation_instances
(
instances
,
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Mul_Clamp
,
ConvFwdDefault
>
{});
I32_Tuple
,
add_device_operation_instances
(
Add_Mul_Clamp
,
instances
,
ConvFwdDefault
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Mul_Clamp
,
ConvFwd1x1P0
>
{});
8
>
{});
add_device_operation_instances
(
add_device_operation_instances
(
instances
,
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Mul_Clamp
,
ConvFwd1x1S1P0
>
{});
I32_Tuple
,
Add_Mul_Clamp
,
ConvFwd1x1P0
,
8
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Mul_Clamp
,
ConvFwd1x1S1P0
,
8
>
{});
}
}
void
add_device_conv2d_bias_relu_perlayer_quantization_int8_instances
(
void
add_device_conv2d_bias_relu_perlayer_quantization_int8_instances
(
...
@@ -47,20 +56,25 @@ void add_device_conv2d_bias_relu_perlayer_quantization_int8_instances(
...
@@ -47,20 +56,25 @@ void add_device_conv2d_bias_relu_perlayer_quantization_int8_instances(
Add_Relu_Mul_Clamp
>>>&
instances
)
Add_Relu_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
I32_Tuple
,
I32_Tuple
,
Add_Relu_Mul_Clamp
,
Add_Relu_Mul_Clamp
,
ConvFwdDefault
>
{});
ConvFwdDefault
,
8
>
{});
add_device_operation_instances
(
add_device_operation_instances
(
instances
,
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
I32_Tuple
,
Add_Relu_Mul_Clamp
,
ConvFwd1x1P0
>
{});
I32_Tuple
,
Add_Relu_Mul_Clamp
,
ConvFwd1x1P0
,
8
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
instances
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
I32_Tuple
,
I32_Tuple
,
Add_Relu_Mul_Clamp
,
Add_Relu_Mul_Clamp
,
ConvFwd1x1S1P0
>
{});
ConvFwd1x1S1P0
,
8
>
{});
}
}
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_
xdl_
int8_instance.hpp
→
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_int8_instance.hpp
View file @
dc7b6568
This diff is collapsed.
Click to expand it.
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_
xdl_
perchannel_quantization_int8_instance.cpp
→
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_perchannel_quantization_int8_instance.cpp
View file @
dc7b6568
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "device_conv2d_
xdl_
int8_instance.hpp"
#include "device_conv2d_int8_instance.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
@@ -21,15 +21,24 @@ void add_device_conv2d_perchannel_quantization_int8_instances(
...
@@ -21,15 +21,24 @@ void add_device_conv2d_perchannel_quantization_int8_instances(
PassThrough
,
PassThrough
,
Mul2_Clamp
>>>&
instances
)
Mul2_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
add_device_operation_instances
(
instances
,
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
F32_Tuple
,
Mul2_Clamp
,
ConvFwdDefault
>
{});
F32_Tuple
,
add_device_operation_instances
(
Mul2_Clamp
,
instances
,
ConvFwdDefault
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
F32_Tuple
,
Mul2_Clamp
,
ConvFwd1x1P0
>
{});
8
>
{});
add_device_operation_instances
(
add_device_operation_instances
(
instances
,
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
F32_Tuple
,
Mul2_Clamp
,
ConvFwd1x1S1P0
>
{});
F32_Tuple
,
Mul2_Clamp
,
ConvFwd1x1P0
,
8
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
F32_Tuple
,
Mul2_Clamp
,
ConvFwd1x1S1P0
,
8
>
{});
}
}
void
add_device_conv2d_relu_perchannel_quantization_int8_instances
(
void
add_device_conv2d_relu_perchannel_quantization_int8_instances
(
...
@@ -46,15 +55,24 @@ void add_device_conv2d_relu_perchannel_quantization_int8_instances(
...
@@ -46,15 +55,24 @@ void add_device_conv2d_relu_perchannel_quantization_int8_instances(
PassThrough
,
PassThrough
,
Relu_Mul2_Clamp
>>>&
instances
)
Relu_Mul2_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
add_device_operation_instances
(
instances
,
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
F32_Tuple
,
Relu_Mul2_Clamp
,
ConvFwdDefault
>
{});
F32_Tuple
,
add_device_operation_instances
(
Relu_Mul2_Clamp
,
instances
,
ConvFwdDefault
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
F32_Tuple
,
Relu_Mul2_Clamp
,
ConvFwd1x1P0
>
{});
8
>
{});
add_device_operation_instances
(
add_device_operation_instances
(
instances
,
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
device_conv2d_int8_32Ds_instances
<
GK_Tuple
,
F32_Tuple
,
Relu_Mul2_Clamp
,
ConvFwd1x1S1P0
>
{});
F32_Tuple
,
Relu_Mul2_Clamp
,
ConvFwd1x1P0
,
8
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
GK_Tuple
,
F32_Tuple
,
Relu_Mul2_Clamp
,
ConvFwd1x1S1P0
,
8
>
{});
}
}
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_
xdl_
perlayer_quantization_int8_instance.cpp
→
library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_perlayer_quantization_int8_instance.cpp
View file @
dc7b6568
// SPDX-License-Identifier: MIT
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "device_conv2d_
xdl_
int8_instance.hpp"
#include "device_conv2d_int8_instance.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
@@ -21,15 +21,21 @@ void add_device_conv2d_perlayer_quantization_int8_instances(
...
@@ -21,15 +21,21 @@ void add_device_conv2d_perlayer_quantization_int8_instances(
PassThrough
,
PassThrough
,
Mul_Clamp
>>>&
instances
)
Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
add_device_operation_instances
(
instances
,
instances
,
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
device_conv2d_int8_instances
<
Empty_Tuple
,
Empty_Tuple
,
Mul_Clamp
,
ConvFwdDefault
>
{});
Empty_Tuple
,
add_device_operation_instances
(
Mul_Clamp
,
instances
,
ConvFwdDefault
>
{});
device_conv2d_int8_instances
<
Empty_Tuple
,
Empty_Tuple
,
Mul_Clamp
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
instances
,
Empty_Tuple
,
device_conv2d_int8_instances
<
Empty_Tuple
,
Empty_Tuple
,
Mul_Clamp
,
ConvFwd1x1S1P0
>
{});
Mul_Clamp
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
Empty_Tuple
,
Mul_Clamp
,
ConvFwd1x1S1P0
>
{});
}
}
void
add_device_conv2d_relu_perlayer_quantization_int8_instances
(
void
add_device_conv2d_relu_perlayer_quantization_int8_instances
(
...
@@ -46,15 +52,21 @@ void add_device_conv2d_relu_perlayer_quantization_int8_instances(
...
@@ -46,15 +52,21 @@ void add_device_conv2d_relu_perlayer_quantization_int8_instances(
PassThrough
,
PassThrough
,
Relu_Mul_Clamp
>>>&
instances
)
Relu_Mul_Clamp
>>>&
instances
)
{
{
add_device_operation_instances
(
add_device_operation_instances
(
instances
,
instances
,
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
device_conv2d_int8_instances
<
Empty_Tuple
,
Empty_Tuple
,
Relu_Mul_Clamp
,
ConvFwdDefault
>
{});
Empty_Tuple
,
add_device_operation_instances
(
Relu_Mul_Clamp
,
instances
,
ConvFwdDefault
>
{});
device_conv2d_int8_instances
<
Empty_Tuple
,
Empty_Tuple
,
Relu_Mul_Clamp
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
add_device_operation_instances
(
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
instances
,
Empty_Tuple
,
device_conv2d_int8_instances
<
Empty_Tuple
,
Empty_Tuple
,
Relu_Mul_Clamp
,
ConvFwd1x1S1P0
>
{});
Relu_Mul_Clamp
,
ConvFwd1x1P0
>
{});
add_device_operation_instances
(
instances
,
device_grouped_conv2d_xdl_int8_instances
<
Empty_Tuple
,
Empty_Tuple
,
Relu_Mul_Clamp
,
ConvFwd1x1S1P0
>
{});
}
}
}
// namespace instance
}
// namespace instance
}
// namespace device
}
// namespace device
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment