Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
2f2757ac
Commit
2f2757ac
authored
May 19, 2023
by
Alan Turner
Browse files
Merge remote-tracking branch 'origin/fp16-int8-quantize' into ck-int8-fusion
parents
80bf741a
08cf2242
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
38 additions
and
17 deletions
+38
-17
src/driver/main.cpp
src/driver/main.cpp
+21
-9
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+12
-8
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+5
-0
No files found.
src/driver/main.cpp
View file @
2f2757ac
...
...
@@ -415,7 +415,8 @@ struct compiler
program_params
parameters
;
compiler_target
ct
;
compile_options
co
;
precision
quantize
=
precision
::
fp32
;
bool
to_fp16
=
false
;
bool
to_int8
=
false
;
std
::
vector
<
std
::
string
>
fill0
;
std
::
vector
<
std
::
string
>
fill1
;
...
...
@@ -436,8 +437,8 @@ struct compiler
{
"--exhaustive-tune"
},
ap
.
help
(
"Exhastively search for best tuning parameters for kernels"
),
ap
.
set_value
(
true
));
ap
(
quantize
,
{
"--fp16"
},
ap
.
help
(
"Quantize for fp16"
),
ap
.
set_value
(
precision
::
fp16
));
ap
(
quantize
,
{
"--int8"
},
ap
.
help
(
"Quantize for int8"
),
ap
.
set_value
(
precision
::
int8
));
ap
(
to_fp16
,
{
"--fp16"
},
ap
.
help
(
"Quantize for fp16"
),
ap
.
set_value
(
true
));
ap
(
to_int8
,
{
"--int8"
},
ap
.
help
(
"Quantize for int8"
),
ap
.
set_value
(
true
));
}
auto
params
(
const
program
&
p
)
...
...
@@ -445,6 +446,11 @@ struct compiler
return
parameters
.
generate
(
p
,
ct
.
get_target
(),
co
.
offload_copy
,
l
.
batch
);
}
auto
host_params
(
const
program
&
p
)
{
return
parameters
.
generate
(
p
,
ct
.
get_target
(),
true
,
l
.
batch
);
}
program
compile
()
{
auto
p
=
l
.
load
();
...
...
@@ -452,13 +458,13 @@ struct compiler
if
(
p
.
is_compiled
())
return
p
;
auto
t
=
ct
.
get_target
();
if
(
quantize
==
precision
::
fp16
)
if
(
to_
fp16
)
{
quantize_fp16
(
p
);
}
else
if
(
quantize
==
precision
::
int8
)
if
(
to_
int8
)
{
quantize_int8
(
p
,
t
,
{
params
(
p
)});
quantize_int8
(
p
,
t
,
{
host_
params
(
p
)});
}
p
.
compile
(
t
,
co
);
l
.
save
(
p
);
...
...
@@ -517,17 +523,23 @@ struct verify : command<verify>
auto
t
=
c
.
ct
.
get_target
();
auto
m
=
c
.
parameters
.
generate
(
p
,
t
,
true
,
c
.
l
.
batch
);
auto
quantize
=
precision
::
fp32
;
if
(
c
.
to_fp16
)
quantize
=
precision
::
fp16
;
if
(
c
.
to_int8
)
quantize
=
precision
::
int8
;
if
(
per_instruction
)
{
verify_instructions
(
p
,
t
,
c
.
co
,
c
.
quantize
,
tolerance
);
verify_instructions
(
p
,
t
,
c
.
co
,
quantize
,
tolerance
);
}
else
if
(
reduce
)
{
verify_reduced_program
(
p
,
t
,
c
.
co
,
c
.
quantize
,
m
,
tolerance
);
verify_reduced_program
(
p
,
t
,
c
.
co
,
quantize
,
m
,
tolerance
);
}
else
{
verify_program
(
c
.
l
.
file
,
p
,
t
,
c
.
co
,
c
.
quantize
,
m
,
tolerance
);
verify_program
(
c
.
l
.
file
,
p
,
t
,
c
.
co
,
quantize
,
m
,
tolerance
);
}
}
};
...
...
src/rewrite_quantization.cpp
View file @
2f2757ac
...
...
@@ -41,15 +41,18 @@ void apply_quantizelinear(module& m, instruction_ref ins)
if
(
x
->
get_shape
().
type
()
!=
y_scale
->
get_shape
().
type
())
{
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
scale_type
}}),
x
);
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
y_scale
->
get_shape
().
type
()}}),
x
);
}
auto
div
=
m
.
insert_instruction
(
ins
,
make_op
(
"div"
),
x
,
y_scale
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"round"
),
div
);
if
(
ins
->
inputs
().
size
()
==
3
)
{
auto
zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
scale_type
}}),
ins
->
inputs
()[
2
]);
auto
zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
y_scale
->
get_shape
().
type
()}}),
ins
->
inputs
()[
2
]);
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"add"
),
add_zero_point
,
zero_point
);
}
...
...
@@ -74,14 +77,15 @@ void apply_dequantizelinear(module& m, instruction_ref ins)
{
assert
(
ins
->
name
()
==
"dequantizelinear"
);
auto
x_scale
=
ins
->
inputs
()[
1
];
auto
scale_type
=
x_scale
->
get_shape
().
type
();
auto
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
scale
_
type
}}),
ins
->
inputs
()[
0
]);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
x_
scale
->
get_shape
().
type
()
}}),
ins
->
inputs
()[
0
]);
if
(
ins
->
inputs
().
size
()
==
3
)
{
auto
x_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
scale_type
}}),
ins
->
inputs
()[
2
]);
auto
x_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
x_scale
->
get_shape
().
type
()}}),
ins
->
inputs
()[
2
]);
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"sub"
),
x
,
x_zero_point
);
}
...
...
src/simplify_algebra.cpp
View file @
2f2757ac
...
...
@@ -501,6 +501,11 @@ struct find_inner_broadcast
auto
broadcasts
=
ins
->
inputs
();
if
(
broadcasts
.
empty
())
return
;
// Skip if different data types are used
if
(
any_of
(
broadcasts
,
[
&
](
auto
i
)
{
return
i
->
get_shape
().
type
()
!=
broadcasts
.
front
()
->
get_shape
().
type
();
}))
return
;
bool
mixed_broadcasts
=
any_of
(
broadcasts
,
non_scalar_op
(
"broadcast"
))
and
any_of
(
broadcasts
,
non_scalar_op
(
"multibroadcast"
));
// If the broadcast is not a single dimension, then dont perform inner_broadcast
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment