Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
1ccbd8ce
Commit
1ccbd8ce
authored
May 17, 2023
by
Paul
Browse files
Allow quantizing for both int8 and fp16
parent
5e35957b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
31 additions
and
15 deletions
+31
-15
src/driver/main.cpp
src/driver/main.cpp
+21
-9
src/module.cpp
src/module.cpp
+1
-1
src/rewrite_quantization.cpp
src/rewrite_quantization.cpp
+5
-5
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+4
-0
No files found.
src/driver/main.cpp
View file @
1ccbd8ce
...
@@ -415,7 +415,8 @@ struct compiler
...
@@ -415,7 +415,8 @@ struct compiler
program_params
parameters
;
program_params
parameters
;
compiler_target
ct
;
compiler_target
ct
;
compile_options
co
;
compile_options
co
;
precision
quantize
=
precision
::
fp32
;
bool
to_fp16
=
false
;
bool
to_int8
=
false
;
std
::
vector
<
std
::
string
>
fill0
;
std
::
vector
<
std
::
string
>
fill0
;
std
::
vector
<
std
::
string
>
fill1
;
std
::
vector
<
std
::
string
>
fill1
;
...
@@ -436,8 +437,8 @@ struct compiler
...
@@ -436,8 +437,8 @@ struct compiler
{
"--exhaustive-tune"
},
{
"--exhaustive-tune"
},
ap
.
help
(
"Exhastively search for best tuning parameters for kernels"
),
ap
.
help
(
"Exhastively search for best tuning parameters for kernels"
),
ap
.
set_value
(
true
));
ap
.
set_value
(
true
));
ap
(
quantize
,
{
"--fp16"
},
ap
.
help
(
"Quantize for fp16"
),
ap
.
set_value
(
precision
::
fp16
));
ap
(
to_fp16
,
{
"--fp16"
},
ap
.
help
(
"Quantize for fp16"
),
ap
.
set_value
(
true
));
ap
(
quantize
,
{
"--int8"
},
ap
.
help
(
"Quantize for int8"
),
ap
.
set_value
(
precision
::
int8
));
ap
(
to_int8
,
{
"--int8"
},
ap
.
help
(
"Quantize for int8"
),
ap
.
set_value
(
true
));
}
}
auto
params
(
const
program
&
p
)
auto
params
(
const
program
&
p
)
...
@@ -445,6 +446,11 @@ struct compiler
...
@@ -445,6 +446,11 @@ struct compiler
return
parameters
.
generate
(
p
,
ct
.
get_target
(),
co
.
offload_copy
,
l
.
batch
);
return
parameters
.
generate
(
p
,
ct
.
get_target
(),
co
.
offload_copy
,
l
.
batch
);
}
}
auto
host_params
(
const
program
&
p
)
{
return
parameters
.
generate
(
p
,
ct
.
get_target
(),
true
,
l
.
batch
);
}
program
compile
()
program
compile
()
{
{
auto
p
=
l
.
load
();
auto
p
=
l
.
load
();
...
@@ -452,13 +458,13 @@ struct compiler
...
@@ -452,13 +458,13 @@ struct compiler
if
(
p
.
is_compiled
())
if
(
p
.
is_compiled
())
return
p
;
return
p
;
auto
t
=
ct
.
get_target
();
auto
t
=
ct
.
get_target
();
if
(
quantize
==
precision
::
fp16
)
if
(
to_
fp16
)
{
{
quantize_fp16
(
p
);
quantize_fp16
(
p
);
}
}
else
if
(
quantize
==
precision
::
int8
)
if
(
to_
int8
)
{
{
quantize_int8
(
p
,
t
,
{
params
(
p
)});
quantize_int8
(
p
,
t
,
{
host_
params
(
p
)});
}
}
p
.
compile
(
t
,
co
);
p
.
compile
(
t
,
co
);
l
.
save
(
p
);
l
.
save
(
p
);
...
@@ -517,17 +523,23 @@ struct verify : command<verify>
...
@@ -517,17 +523,23 @@ struct verify : command<verify>
auto
t
=
c
.
ct
.
get_target
();
auto
t
=
c
.
ct
.
get_target
();
auto
m
=
c
.
parameters
.
generate
(
p
,
t
,
true
,
c
.
l
.
batch
);
auto
m
=
c
.
parameters
.
generate
(
p
,
t
,
true
,
c
.
l
.
batch
);
auto
quantize
=
precision
::
fp32
;
if
(
c
.
to_fp16
)
quantize
=
precision
::
fp16
;
if
(
c
.
to_int8
)
quantize
=
precision
::
int8
;
if
(
per_instruction
)
if
(
per_instruction
)
{
{
verify_instructions
(
p
,
t
,
c
.
co
,
c
.
quantize
,
tolerance
);
verify_instructions
(
p
,
t
,
c
.
co
,
quantize
,
tolerance
);
}
}
else
if
(
reduce
)
else
if
(
reduce
)
{
{
verify_reduced_program
(
p
,
t
,
c
.
co
,
c
.
quantize
,
m
,
tolerance
);
verify_reduced_program
(
p
,
t
,
c
.
co
,
quantize
,
m
,
tolerance
);
}
}
else
else
{
{
verify_program
(
c
.
l
.
file
,
p
,
t
,
c
.
co
,
c
.
quantize
,
m
,
tolerance
);
verify_program
(
c
.
l
.
file
,
p
,
t
,
c
.
co
,
quantize
,
m
,
tolerance
);
}
}
}
}
};
};
...
...
src/module.cpp
View file @
1ccbd8ce
...
@@ -346,7 +346,7 @@ instruction_ref module::replace_instruction(instruction_ref ins, instruction_ref
...
@@ -346,7 +346,7 @@ instruction_ref module::replace_instruction(instruction_ref ins, instruction_ref
assert
(
out
->
valid
(
begin
()));
assert
(
out
->
valid
(
begin
()));
}
}
// Replacement should not be dead code unless its the last instruction
// Replacement should not be dead code unless its the last instruction
assert
(
not
rep
->
outputs
().
empty
()
or
rep
==
std
::
prev
(
end
()));
//
assert(not rep->outputs().empty() or rep == std::prev(end()));
// Output of the original instruction should only be the replacement or empty
// Output of the original instruction should only be the replacement or empty
assert
(
ins
->
outputs
().
empty
()
or
std
::
all_of
(
ins
->
outputs
().
begin
(),
assert
(
ins
->
outputs
().
empty
()
or
std
::
all_of
(
ins
->
outputs
().
begin
(),
ins
->
outputs
().
end
(),
ins
->
outputs
().
end
(),
...
...
src/rewrite_quantization.cpp
View file @
1ccbd8ce
...
@@ -40,7 +40,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
...
@@ -40,7 +40,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
if
(
x
->
get_shape
().
type
()
!=
y_scale
->
get_shape
().
type
())
if
(
x
->
get_shape
().
type
()
!=
y_scale
->
get_shape
().
type
())
{
{
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
float_
type
}}),
x
);
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
y_scale
->
get_shape
().
type
()
}}),
x
);
}
}
auto
div
=
m
.
insert_instruction
(
ins
,
make_op
(
"div"
),
x
,
y_scale
);
auto
div
=
m
.
insert_instruction
(
ins
,
make_op
(
"div"
),
x
,
y_scale
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"round"
),
div
);
auto
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"round"
),
div
);
...
@@ -48,7 +48,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
...
@@ -48,7 +48,7 @@ void apply_quantizelinear(module& m, instruction_ref ins)
if
(
ins
->
inputs
().
size
()
==
3
)
if
(
ins
->
inputs
().
size
()
==
3
)
{
{
auto
zero_point
=
m
.
insert_instruction
(
auto
zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
float_
type
}}),
ins
->
inputs
()[
2
]);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
y_scale
->
get_shape
().
type
()
}}),
ins
->
inputs
()[
2
]);
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"add"
),
add_zero_point
,
zero_point
);
add_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"add"
),
add_zero_point
,
zero_point
);
}
}
...
@@ -72,14 +72,14 @@ void apply_quantizelinear(module& m, instruction_ref ins)
...
@@ -72,14 +72,14 @@ void apply_quantizelinear(module& m, instruction_ref ins)
void
apply_dequantizelinear
(
module
&
m
,
instruction_ref
ins
)
void
apply_dequantizelinear
(
module
&
m
,
instruction_ref
ins
)
{
{
assert
(
ins
->
name
()
==
"dequantizelinear"
);
assert
(
ins
->
name
()
==
"dequantizelinear"
);
auto
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
float_type
}}),
ins
->
inputs
()[
0
]);
auto
x_scale
=
ins
->
inputs
()[
1
];
auto
x_scale
=
ins
->
inputs
()[
1
];
auto
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
x_scale
->
get_shape
().
type
()}}),
ins
->
inputs
()[
0
]);
if
(
ins
->
inputs
().
size
()
==
3
)
if
(
ins
->
inputs
().
size
()
==
3
)
{
{
auto
x_zero_point
=
m
.
insert_instruction
(
auto
x_zero_point
=
m
.
insert_instruction
(
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
shape
::
float_
type
}}),
ins
->
inputs
()[
2
]);
ins
,
make_op
(
"convert"
,
{{
"target_type"
,
x_scale
->
get_shape
().
type
()
}}),
ins
->
inputs
()[
2
]);
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"sub"
),
x
,
x_zero_point
);
x
=
m
.
insert_instruction
(
ins
,
make_op
(
"sub"
),
x
,
x_zero_point
);
}
}
...
...
src/simplify_algebra.cpp
View file @
1ccbd8ce
...
@@ -484,6 +484,7 @@ struct find_double_add_lit_broadcast
...
@@ -484,6 +484,7 @@ struct find_double_add_lit_broadcast
struct
find_inner_broadcast
struct
find_inner_broadcast
{
{
// (match::none_of(match::name("quantizelinear", "dequantizelinear")))
auto
matcher
()
const
{
return
pointwise
(
match
::
all_of
[
match
::
inputs
()](
match
::
broadcast
()));
}
auto
matcher
()
const
{
return
pointwise
(
match
::
all_of
[
match
::
inputs
()](
match
::
broadcast
()));
}
static
auto
non_scalar_op
(
const
std
::
string
&
name
)
static
auto
non_scalar_op
(
const
std
::
string
&
name
)
...
@@ -501,6 +502,9 @@ struct find_inner_broadcast
...
@@ -501,6 +502,9 @@ struct find_inner_broadcast
auto
broadcasts
=
ins
->
inputs
();
auto
broadcasts
=
ins
->
inputs
();
if
(
broadcasts
.
empty
())
if
(
broadcasts
.
empty
())
return
;
return
;
// Skip if different data types are used
if
(
any_of
(
broadcasts
,
[
&
](
auto
i
)
{
return
i
->
get_shape
().
type
()
!=
broadcasts
.
front
()
->
get_shape
().
type
();
}))
return
;
bool
mixed_broadcasts
=
any_of
(
broadcasts
,
non_scalar_op
(
"broadcast"
))
and
bool
mixed_broadcasts
=
any_of
(
broadcasts
,
non_scalar_op
(
"broadcast"
))
and
any_of
(
broadcasts
,
non_scalar_op
(
"multibroadcast"
));
any_of
(
broadcasts
,
non_scalar_op
(
"multibroadcast"
));
// If the broadcast is not a single dimension, then dont perform inner_broadcast
// If the broadcast is not a single dimension, then dont perform inner_broadcast
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment