Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
867f613e
Commit
867f613e
authored
Mar 16, 2023
by
rocking
Browse files
Add comment for quantization
parent
cdb30d7c
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
106 additions
and
6 deletions
+106
-6
include/ck/tensor_operation/gpu/element/quantization_operation.hpp
...k/tensor_operation/gpu/element/quantization_operation.hpp
+106
-6
No files found.
include/ck/tensor_operation/gpu/element/quantization_operation.hpp
View file @
867f613e
...
@@ -7,10 +7,30 @@ namespace ck {
...
@@ -7,10 +7,30 @@ namespace ck {
namespace
tensor_operation
{
namespace
tensor_operation
{
namespace
element_wise
{
namespace
element_wise
{
// Y = Sy * Qy
// W = Sw * Qw
// X = Sx * Qx
// B = Sb * Qb = Sw * Sx * Qb
// Where X, W, Y are float32, Qx, Qw, Qy are int8
// Sx, Sw, Sy are scale of x, w, y (float32), which is calculated from quantization range
// Qb is int32, scale of B is Sw * Sx for convenient
// Y = W @ X, where @ is convolution or matrix multiplication
// Sy * Qy = Sw * Qw @ Sx * Qx
// Qy = [(Sw*Sx)/Sy] * Qw @ Qx
// For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
// For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
// Activation(Sy * Qy) = Sy * Activation(Qy)
template
<
typename
Activation
>
template
<
typename
Activation
>
struct
Activation_Mul_Clamp
struct
Activation_Mul_Clamp
{
{
// Convolution + Activation (piecewise linear function)
// If an activation is piecewise linear function, then Activation(Sy * Qy) = Sy * Activation(Qy)
// Z = Activation(Y) = Activation(W @ X)
// Sz * Qz = Activation(Sy * Qy)
// Qz = Sy / Sz * Activation(Qy) = (Sw * Sx / Sz) * Activation(Qy)
// requantScale_ = Sw * Sx / Sy
Activation_Mul_Clamp
(
float
requantScale
,
Activation
activationOp
)
Activation_Mul_Clamp
(
float
requantScale
,
Activation
activationOp
)
:
requantScale_
(
requantScale
),
activationOp_
(
activationOp
)
:
requantScale_
(
requantScale
),
activationOp_
(
activationOp
)
{
{
...
@@ -45,8 +65,39 @@ struct Activation_Mul_Clamp
...
@@ -45,8 +65,39 @@ struct Activation_Mul_Clamp
Activation
activationOp_
;
Activation
activationOp_
;
};
};
// For Activation function which is non piecewise linear function, such as TanH, Sigmoid ...etc
// If an activation is not piecewise linear function
// then Activation(Sy * Qy) != Sy * Activation(Qy)
template
<
typename
Activation
>
struct
Mul_Activation_Mul_Clamp
{
// Convolution + Activation (non piecewise linear function)
// Z = Activation(Y) = Activation(W @ X)
// Sz * Qz = Activation(Sy * Qy)
// Qz = S1 * Activation[Sacc * (Qw @ Qx)]
// Where S1 = 1 / Sz, Sacc = Sw * Sx
Mul_Activation_Mul_Clamp
(
float
scale_z_inv
,
float
scaleAcc
,
Activation
activationOp
)
:
scale_z_inv_
(
scale_z_inv
),
scaleAcc_
(
scaleAcc
),
activationOp_
(
activationOp
)
{
}
__host__
__device__
constexpr
void
operator
()(
int8_t
&
y
,
const
int32_t
&
x
)
const
{
float
y_fp32
=
ck
::
type_convert
<
float
>
(
x
);
y_fp32
=
scaleAcc_
*
y_fp32
;
activationOp_
(
y_fp32
,
y_fp32
);
y_fp32
=
math
::
clamp
(
scale_z_inv_
*
y_fp32
,
-
128.
f
,
127.
f
);
y
=
ck
::
type_convert
<
int8_t
>
(
y_fp32
);
}
float
scale_z_inv_
;
float
scaleAcc_
;
Activation
activationOp_
;
};
// Conv Perchannel quantization + Activation function which is piecewise linear function, such as
// Conv Perchannel quantization + Activation function which is piecewise linear function, such as
// relu, leaky relu ...etc
// relu, leaky relu ...etc
// Activation(Sy * Qy) = Sy * Activation(Qy)
template
<
typename
Activation
>
template
<
typename
Activation
>
struct
Activation_Mul2_Clamp
struct
Activation_Mul2_Clamp
{
{
...
@@ -76,9 +127,16 @@ struct Activation_Mul2_Clamp
...
@@ -76,9 +127,16 @@ struct Activation_Mul2_Clamp
};
};
// For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
// For Activation function which is piecewise linear function, such as relu, leaky relu ...etc
// Activation(Sy * Qy) = Sy * Activation(Qy)
template
<
typename
Activation
>
template
<
typename
Activation
>
struct
Add_Activation_Mul_Clamp
struct
Add_Activation_Mul_Clamp
{
{
// Convolution + bias
// Let Bias = B = Sw * Sx * Qb
// Where Qb is int32
// Y = W @ X + B
// Sy * Qy = Sw * Qw @ Sx * Qx + Sw * Sx * Qb
// Qy = [(Sw*Sx)/Sy] * (Qw @ Qx + Qb)
Add_Activation_Mul_Clamp
(
float
requantScale
,
Activation
activationOp
)
Add_Activation_Mul_Clamp
(
float
requantScale
,
Activation
activationOp
)
:
requantScale_
(
requantScale
),
activationOp_
(
activationOp
)
:
requantScale_
(
requantScale
),
activationOp_
(
activationOp
)
{
{
...
@@ -139,11 +197,18 @@ struct Add_Activation_Mul2_Clamp
...
@@ -139,11 +197,18 @@ struct Add_Activation_Mul2_Clamp
};
};
// For Activation function which is non piecewise linear function, such as TanH, Sigmoid ...etc
// For Activation function which is non piecewise linear function, such as TanH, Sigmoid ...etc
// If an activation is not piecewise linear function
// then Activation(Sy * Qy) != Sy * Activation(Qy)
template
<
typename
Activation
>
template
<
typename
Activation
>
struct
Add_Mul_Activation_Mul_Clamp
struct
Add_Mul_Activation_Mul_Clamp
{
{
Add_Mul_Activation_Mul_Clamp
(
float
requantScale1
,
float
requantScale2
,
Activation
activationOp
)
// Convolution + Activation (non piecewise linear function)
:
requantScale1_
(
requantScale1
),
requantScale2_
(
requantScale2
),
activationOp_
(
activationOp
)
// Z = Activation(Y) = Activation(W @ X + B)
// Sz * Qz = Activation(Sy * Qy)
// Qz = S1 * Activation[Sacc * (Qw @ Qx + Qb)]
// Where S1 = 1 / Sz, Sacc = Sw * Sx
Add_Mul_Activation_Mul_Clamp
(
float
scale_z_inv
,
float
scaleAcc
,
Activation
activationOp
)
:
scale_z_inv_
(
scale_z_inv
),
scaleAcc_
(
scaleAcc
),
activationOp_
(
activationOp
)
{
{
}
}
...
@@ -151,14 +216,49 @@ struct Add_Mul_Activation_Mul_Clamp
...
@@ -151,14 +216,49 @@ struct Add_Mul_Activation_Mul_Clamp
operator
()(
int8_t
&
y
,
const
int32_t
&
x
,
const
int32_t
&
bias
)
const
operator
()(
int8_t
&
y
,
const
int32_t
&
x
,
const
int32_t
&
bias
)
const
{
{
float
y_fp32
=
ck
::
type_convert
<
float
>
(
x
+
bias
);
float
y_fp32
=
ck
::
type_convert
<
float
>
(
x
+
bias
);
y_fp32
=
requantS
cale
1
_
*
y_fp32
;
y_fp32
=
s
cale
Acc
_
*
y_fp32
;
activationOp_
(
y_fp32
,
y_fp32
);
activationOp_
(
y_fp32
,
y_fp32
);
y_fp32
=
math
::
clamp
(
requantScale2
_
*
y_fp32
,
-
128.
f
,
127.
f
);
y_fp32
=
math
::
clamp
(
scale_z_inv
_
*
y_fp32
,
-
128.
f
,
127.
f
);
y
=
ck
::
type_convert
<
int8_t
>
(
y_fp32
);
y
=
ck
::
type_convert
<
int8_t
>
(
y_fp32
);
}
}
float
requantScale1_
;
float
scale_z_inv_
;
float
requantScale2_
;
float
scaleAcc_
;
Activation
activationOp_
;
};
// Conv Perchannel quantization + Activation function which is non piecewise linear function,
// such as TanH, Sigmoid ...etc
// If an activation is not piecewise linear function
// then Activation(Sy *Qy) != Sy * Activation(Qy)
template
<
typename
Activation
>
struct
Add_Mul2_Activation_Mul_Clamp
{
Add_Mul2_Activation_Mul_Clamp
(
Activation
activationOp
)
:
activationOp_
(
activationOp
)
{}
__host__
__device__
constexpr
void
operator
()(
int8_t
&
y
,
const
int32_t
&
x
,
const
int32_t
&
bias
,
const
float
&
scaleAcc
)
const
{
float
y_fp32
=
ck
::
type_convert
<
float
>
(
x
+
bias
);
y_fp32
=
scaleAcc
*
y_fp32
;
activationOp_
(
y_fp32
,
y_fp32
);
y_fp32
=
math
::
clamp
(
scale_z_inv_
*
y_fp32
,
-
128.
f
,
127.
f
);
y
=
ck
::
type_convert
<
int8_t
>
(
y_fp32
);
}
__host__
__device__
constexpr
void
operator
()(
int32_t
&
y
,
const
int32_t
&
x
,
const
int32_t
&
bias
,
const
float
&
scaleAcc
)
const
{
// CAUSION - We might type_convert to int8 in threadwise copy
// eg. GridwiseGemmDlMultipleD_km_kn_mn
float
y_fp32
=
ck
::
type_convert
<
float
>
(
x
+
bias
);
y_fp32
=
scaleAcc
*
y_fp32
;
activationOp_
(
y_fp32
,
y_fp32
);
y_fp32
=
math
::
clamp
(
scale_z_inv_
*
y_fp32
,
-
128.
f
,
127.
f
);
y
=
ck
::
type_convert
<
int32_t
>
(
y_fp32
);
}
float
scale_z_inv_
;
Activation
activationOp_
;
Activation
activationOp_
;
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment