Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
cc30b7c1
Commit
cc30b7c1
authored
Apr 07, 2023
by
Alan Turner
Browse files
Add all layouts and make qdq use fp16 instaead of float
parent
734c2e74
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
72 additions
and
75 deletions
+72
-75
src/quantization.cpp
src/quantization.cpp
+17
-17
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+4
-4
src/targets/gpu/fuse_ck.cpp
src/targets/gpu/fuse_ck.cpp
+3
-3
src/targets/gpu/jit/ck_gemm.cpp
src/targets/gpu/jit/ck_gemm.cpp
+48
-51
No files found.
src/quantization.cpp
View file @
cc30b7c1
...
...
@@ -117,23 +117,23 @@ void quantize_int8(program& prog,
// use all calibration data to run the program to calculate the
// quantization scale and shift
for
(
auto
&&
arg
:
calibration
)
{
parameter_map
m
;
for
(
auto
&&
x
:
capture_prog
.
get_parameter_shapes
())
{
if
(
arg
.
count
(
x
.
first
)
>
0
)
{
assert
(
x
.
second
==
arg
.
at
(
x
.
first
).
get_shape
());
m
[
x
.
first
]
=
t
.
copy_to
(
arg
.
at
(
x
.
first
));
}
else
{
m
[
x
.
first
]
=
t
.
allocate
(
x
.
second
);
}
}
capture_prog
.
eval
(
m
);
}
//
for(auto&& arg : calibration)
//
{
//
parameter_map m;
//
for(auto&& x : capture_prog.get_parameter_shapes())
//
{
//
if(arg.count(x.first) > 0)
//
{
//
assert(x.second == arg.at(x.first).get_shape());
//
m[x.first] = t.copy_to(arg.at(x.first));
//
}
//
else
//
{
//
m[x.first] = t.allocate(x.second);
//
}
//
}
//
capture_prog.eval(m);
//
}
// print the quantization parameters in only the main module
if
(
enabled
(
MIGRAPHX_INT8_QUANTIZATION_PARAMS
{}))
...
...
src/rewrite_quantization.cpp
View file @
cc30b7c1
...
...
@@ -40,7 +40,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
if
(
x
->
get_shape
().
type
()
!=
y_scale
->
get_shape
().
type
())
{
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
float
_type
}}),
x
);
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
half
_type
}}),
x
);
}
auto
div
=
m
.
insert_instruction
(
ins
,
make_op
(
"div"
),
x
,
y_scale
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"round"
),
div
);
...
...
@@ -48,7 +48,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
if
(
ins
->
inputs
().
size
()
==
3
)
{
auto
zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
float
_type
}}),
ins
->
inputs
()[
2
]);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
half
_type
}}),
ins
->
inputs
()[
2
]);
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"add"
),
add_zero_point
,
zero_point
);
}
...
...
@@ -73,13 +73,13 @@ void apply_dequantizelinear(module& m, instruction_ref ins)
{
assert
(
ins
->
name
()
==
"dequantizelinear"
);
auto
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
float
_type
}}),
ins
->
inputs
()[
0
]);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
half
_type
}}),
ins
->
inputs
()[
0
]);
auto
x_scale
=
ins
->
inputs
()[
1
];
if
(
ins
->
inputs
().
size
()
==
3
)
{
auto
x_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
float
_type
}}),
ins
->
inputs
()[
2
]);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
half
_type
}}),
ins
->
inputs
()[
2
]);
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"sub"
),
x
,
x_zero_point
);
}
...
...
src/targets/gpu/fuse_ck.cpp
View file @
cc30b7c1
...
...
@@ -60,8 +60,8 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
return
false
;
auto
a
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
b
=
ins
->
inputs
().
back
()
->
get_shape
();
if
(
a
.
lens
().
back
()
>
2048
)
return
false
;
//
if(a.lens().back() > 2048)
//
return false;
return
true
;
}
...
...
@@ -87,7 +87,7 @@ struct find_ck_gemm_pointwise
auto
gemm_it
=
std
::
find
(
inputs
.
begin
(),
inputs
.
end
(),
x_ins
);
auto
gemm_idx
=
gemm_it
-
inputs
.
begin
();
assert
(
gemm_it
!=
inputs
.
end
());
if
(
ins
->
get_shape
().
type
()
!=
shape
::
int8_type
and
ins
->
get_shape
().
type
()
)
if
(
ins
->
get_shape
().
type
()
!=
shape
::
int8_type
)
return
;
if
(
gemm_idx
!=
0
)
{
...
...
src/targets/gpu/jit/ck_gemm.cpp
View file @
cc30b7c1
...
...
@@ -71,47 +71,7 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using Empty_Tuple = ck::Tuple<>;
using GEMM = ck::tensor_operation::device::DeviceGemmMultipleD_Dl<
Row,
Row,
Empty_Tuple,
Row,
int8_t,
int8_t,
int32_t,
Empty_Tuple,
int8_t, //EDataType
PassThrough,
PassThrough,
PassThrough,
ck::tensor_operation::device::GemmSpecialization::MNKPadding,
256,
128,
128,
16,
4,
4,
4,
1,
S<8,2>,
S<8,2>,
S<8,1,1,4>,
S<2,1,128,1>,
S<1,2,0,3>,
S<1,2,0,3>,
S<4,1,1,4>,
S<1,2,0,3>,
S<1,1,1,4>,
S<2,1,4,4>,
S<8,1,32,1>,
S<0,3,1,2>,
S<0,3,1,2>,
S<1,1,4,1>,
S<0,3,1,2>,
S<1,1,4,4>,
S<0,1,2,3,4,5>,
5,
4>;
using GEMM = ck::tensor_operation::device::${instance1}${padding}${instance2};
namespace migraphx {
...
...
@@ -335,6 +295,34 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
auto
a_shape
=
inputs
[
0
];
auto
b_shape
=
inputs
[
1
];
auto
c_shape
=
inputs
.
back
();
auto
transa
=
transposed_matrix
(
a_shape
);
auto
transb
=
transposed_matrix
(
b_shape
);
std
::
string
instance_str1
;
std
::
string
instance_str2
;
if
(
transa
and
not
transb
)
{
instance_str1
=
"DeviceGemmMultipleD_Dl< Col, Row, Empty_Tuple, Row, int8_t, int8_t, int32_t, Empty_Tuple, int8_t, PassThrough, PassThrough, PassThrough, "
;
instance_str2
=
", 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<2, 1, 4, 4>, S<8, 1, 32, 1>, S<0, 3, 1, 2>, S<0, 3, 1, 2>, S<1, 1, 4, 1>, S<0, 3, 1, 2>, S<1, 1, 4, 4>, S<2, 1, 4, 4>, S<8, 1, 32, 1>, S<0, 3, 1, 2>, S<0, 3, 1, 2>, S<1, 1, 4, 1>, S<0, 3, 1, 2>, S<1, 1, 4, 4>, S<0, 1, 2, 3, 4, 5>, 5, 4>"
;
}
else
if
(
transa
and
transb
)
{
instance_str1
=
"DeviceGemmMultipleD_Dl< Col, Col, Empty_Tuple, Row, int8_t, int8_t, int32_t, Empty_Tuple, int8_t, PassThrough, PassThrough, PassThrough, "
;
instance_str2
=
", 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<2, 1, 4, 4>, S<8, 1, 32, 1>, S<0, 3, 1, 2>, S<0, 3, 1, 2>, S<1, 1, 4, 1>, S<0, 3, 1, 2>, S<1, 1, 4, 4>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<0, 1, 2, 3, 4, 5>, 5, 4>"
;
}
else
if
(
not
transa
and
not
transb
)
{
instance_str1
=
"DeviceGemmMultipleD_Dl< Row, Row, Empty_Tuple, Row, int8_t, int8_t, int32_t, Empty_Tuple, int8_t, PassThrough, PassThrough, PassThrough, "
;
instance_str2
=
", 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<2, 1, 4, 4>, S<8, 1, 32, 1>, S<0, 3, 1, 2>, S<0, 3, 1, 2>, S<1, 1, 4, 1>, S<0, 3, 1, 2>, S<1, 1, 4, 4>, S<0, 1, 2, 3, 4, 5>, 5, 4>"
;
}
else
{
instance_str1
=
"DeviceGemmMultipleD_Dl< Row, Col, Empty_Tuple, Row, int8_t, int8_t, int32_t, Empty_Tuple, int8_t, PassThrough, PassThrough, PassThrough, "
;
instance_str2
=
", 256, 128, 128, 16, 4, 4, 4, 1, S<8, 2>, S<8, 2>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<8, 1, 1, 4>, S<2, 1, 128, 1>, S<1, 2, 0, 3>, S<1, 2, 0, 3>, S<4, 1, 1, 4>, S<1, 2, 0, 3>, S<1, 1, 1, 4>, S<0, 1, 2, 3, 4, 5>, 5, 4>"
;
}
auto
rank
=
a_shape
.
lens
().
size
();
auto
b_strides
=
b_shape
.
strides
();
...
...
@@ -361,19 +349,26 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
ip
.
set_ds_op
(
v
.
at
(
"post"
).
to
<
std
::
string
>
());
}
auto
m_per_block
=
128
;
auto
n_per_block
=
128
;
auto
k_per_block
=
16
;
auto
padding
=
ip
.
get_pad
(
config
);
std
::
string
gemm_type
;
for
(
auto
i
:
range
(
padding
.
size
()))
{
if
(
padding
[
i
]
!=
0
)
gemm_type
+=
keys
[
i
];
}
if
(
gemm_type
.
empty
())
gemm_type
=
"Default"
;
// if (int_div_ceil(m, m_per_block) * m_per_block - m != 0)
// gemm_type += "M";
// if (int_div_ceil(n, n_per_block) * n_per_block - n != 0)
// gemm_type += "N";
// if (int_div_ceil(k, k_per_block) * k_per_block - k != 0)
// gemm_type += "K";
if
((
int_div_ceil
(
m
,
m_per_block
)
*
m_per_block
-
m
!=
0
)
or
(
int_div_ceil
(
n
,
n_per_block
)
*
n_per_block
-
n
!=
0
))
gemm_type
=
"MNPadding"
;
else
gemm_type
+
=
"
Padding
"
;
gemm_type
=
"
Default
"
;
ip
.
set_gemm
(
"ck::tensor_operation::device::GemmSpecialization::"
+
gemm_type
);
std
::
string
padding_str
=
"ck::tensor_operation::device::GemmSpecialization::"
+
gemm_type
;
std
::
cout
<<
padding_str
<<
std
::
endl
;
//std::exit(0);
auto
blocks_per_batch
=
int_div_ceil
(
m
,
128
)
*
int_div_ceil
(
n
,
128
);
;
// ip.get_grid_size(config);
...
...
@@ -402,7 +397,9 @@ struct ck_gemm_compiler : compiler<ck_gemm_compiler>
options
.
params
+=
" -DMIGRAPHX_CK_CHECK=1"
;
auto
src
=
interpolate_string
(
ck_gemm_kernel
,
{{
"instance"
,
ip
.
str
()},
{{
"instance1"
,
instance_str1
},
{
"instance2"
,
instance_str2
},
{
"padding"
,
padding_str
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"blocks_per_batch"
,
to_string
(
blocks_per_batch
)},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment