Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
e12ee1f8
Commit
e12ee1f8
authored
Aug 29, 2019
by
Shucai Xiao
Browse files
fix cppcheck error
parent
a3affafc
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
158 additions
and
165 deletions
+158
-165
src/include/migraphx/quantization.hpp
src/include/migraphx/quantization.hpp
+2
-2
src/quantization.cpp
src/quantization.cpp
+156
-163
No files found.
src/include/migraphx/quantization.hpp
View file @
e12ee1f8
...
@@ -24,11 +24,11 @@ std::size_t capture_arguments(program& prog,
...
@@ -24,11 +24,11 @@ std::size_t capture_arguments(program& prog,
const
std
::
function
<
void
(
std
::
size_t
,
std
::
vector
<
argument
>
)
>&
func
);
const
std
::
function
<
void
(
std
::
size_t
,
std
::
vector
<
argument
>
)
>&
func
);
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
capture_arguments_impl
(
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
capture_arguments_impl
(
program
&
prog
,
const
target
&
t
,
const
std
::
vector
<
std
::
string
>&
ins_names
=
{
"dot"
});
program
&
prog
,
const
target
&
t
,
const
std
::
vector
<
std
::
string
>&
ins_names
=
{
"dot"
,
"convolution"
});
template
<
class
T
>
template
<
class
T
>
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
capture_arguments
(
program
&
prog
,
T
&&
t
,
const
std
::
vector
<
std
::
string
>&
ins_names
=
{
"dot"
})
capture_arguments
(
program
&
prog
,
T
&&
t
,
const
std
::
vector
<
std
::
string
>&
ins_names
=
{
"dot"
,
"convolution"
})
{
{
static_assert
(
std
::
is_same
<
std
::
remove_cv_t
<
std
::
remove_reference_t
<
T
>>
,
target
>
{}
&&
static_assert
(
std
::
is_same
<
std
::
remove_cv_t
<
std
::
remove_reference_t
<
T
>>
,
target
>
{}
&&
std
::
is_lvalue_reference
<
T
>
{},
std
::
is_lvalue_reference
<
T
>
{},
...
...
src/quantization.cpp
View file @
e12ee1f8
...
@@ -162,115 +162,12 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -162,115 +162,12 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
// int8 quantization is different from fp16 since int8 can only handle value
static
void
quantize_ins
(
program
&
prog
,
instruction_ref
ins
,
// -128 ~ 127. To convert the float or double to int8, we need a scale and
std
::
vector
<
instruction_ref
>&
converted_inputs
,
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
const
std
::
vector
<
std
::
pair
<
float
,
float
>>&
ins_quant_params
)
// To simplify the changes, we consider shift as 0.0f for now.
void
quantize_int8
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>>&
quant_params
)
{
{
for
(
size_t
i
=
0
;
i
<
quant_params
.
size
();
i
++
)
auto
orig_type
=
ins
->
get_shape
().
type
();
{
auto
param
=
quant_params
.
at
(
i
);
std
::
cout
<<
"index = "
<<
i
<<
", scale = "
<<
param
.
first
<<
"
\t
"
<<
param
.
second
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
// For now, we only support the int8 quantization of gemm and convolution
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
};
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
return
(
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
());
}))
{
MIGRAPHX_THROW
(
"QUANTIZE_INT8: only support DOT and CONVOLUTION operation"
);
}
std
::
size_t
quant_param_index
=
0
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_quant_ins
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
map_index
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
}
shape
::
type_t
orig_type
=
ins
->
get_shape
().
type
();
// for the dot operator, there could be 2 or 3 input arguments
// if the 3rd argument is available, convert it to an int32.
std
::
vector
<
instruction_ref
>
converted_inputs
;
// process all inputs, if input is a fp32 or fp64, convert it
// to a int8 type by adding a convert operator and replace
// the operator with the corresponding int8 version
auto
inputs
=
ins
->
inputs
();
auto
inputs
=
ins
->
inputs
();
std
::
vector
<
std
::
pair
<
float
,
float
>>
ins_quant_params
;
for
(
auto
input
:
inputs
)
{
// calculate the index of each instruction to be quantized
if
(
map_index
.
count
(
input
)
==
0
)
{
map_index
[
input
]
=
quant_param_index
++
;
}
auto
param
=
quant_params
[
map_index
[
input
]];
ins_quant_params
.
push_back
(
param
);
// In general, the target_type is int8, but for the dot
// operation, if it has 3 inputs, then the last one should
// be converted to int32_type
shape
::
type_t
quant_type
=
shape
::
int8_type
;
if
(
ins
->
name
()
==
"dot"
and
inputs
.
size
()
==
3
and
input
==
inputs
.
back
())
{
quant_type
=
shape
::
int32_type
;
}
auto
s
=
input
->
get_shape
();
if
((
s
.
type
()
==
shape
::
float_type
or
s
.
type
()
==
shape
::
double_type
or
s
.
type
()
==
shape
::
int32_type
)
and
s
.
type
()
!=
quant_type
)
{
// if the input is a convert operator, uses its input
// as its current input
instruction_ref
quant_input
{};
if
(
input
->
name
()
==
"convert"
)
{
auto
tmp_ins
=
input
->
inputs
().
front
();
if
(
tmp_ins
->
get_shape
().
type
()
==
quant_type
)
{
quant_input
=
input
->
inputs
().
front
();
}
else
{
quant_input
=
insert_quant_ins
(
prog
,
input
,
quant_type
,
map_quant_ins
,
param
.
first
,
param
.
second
);
}
}
else
{
quant_input
=
insert_quant_ins
(
prog
,
input
,
quant_type
,
map_quant_ins
,
param
.
first
,
param
.
second
);
}
converted_inputs
.
push_back
(
quant_input
);
}
else
{
converted_inputs
.
push_back
(
input
);
}
}
// no change for the input, go to the next instruction
if
(
inputs
==
converted_inputs
)
{
continue
;
}
// When converting from other types to int8_type, there are parameters
// used as scale and shift(.0f), which will generate results diffrent from
// the original results. To adjust the output to be "correct(approximatly
// equal)", we need additional calculation for the adjustment
if
(
ins
->
name
()
==
"dot"
)
if
(
ins
->
name
()
==
"dot"
)
{
{
auto
dot_op
=
any_cast
<
op
::
dot
>
(
ins
->
get_operator
());
auto
dot_op
=
any_cast
<
op
::
dot
>
(
ins
->
get_operator
());
...
@@ -402,6 +299,102 @@ void quantize_int8(program& prog,
...
@@ -402,6 +299,102 @@ void quantize_int8(program& prog,
{
{
MIGRAPHX_THROW
(
"QUANTIZE_INT8: does not support operator"
+
ins
->
name
());
MIGRAPHX_THROW
(
"QUANTIZE_INT8: does not support operator"
+
ins
->
name
());
}
}
}
// int8 quantization is different from fp16 since int8 can only handle value
// -128 ~ 127. To convert the float or double to int8, we need a scale and
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
// To simplify the changes, we consider shift as 0.0f for now.
void
quantize_int8
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>>&
quant_params
)
{
// for(size_t i = 0; i < quant_params.size(); i++)
// {
// auto param = quant_params.at(i);
// std::cout << "index = " << i << ", scale = " << param.first << "\t" << param.second
// << std::endl;
// }
// std::cout << std::endl;
// For now, we only support the int8 quantization of gemm and convolution
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
};
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
return
(
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
());
}))
{
MIGRAPHX_THROW
(
"QUANTIZE_INT8: only support DOT and CONVOLUTION operation"
);
}
std
::
size_t
quant_param_index
=
0
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_quant_ins
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
map_ins_index
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
}
// for the dot operator, there could be 2 or 3 input arguments
// if the 3rd argument is available, convert it to an int32.
std
::
vector
<
instruction_ref
>
converted_inputs
;
// process all inputs, if input is a fp32 or fp64, convert it
// to a int8 type by adding a convert operator and replace
// the operator with the corresponding int8 version
auto
inputs
=
ins
->
inputs
();
std
::
vector
<
std
::
pair
<
float
,
float
>>
ins_quant_params
;
for
(
auto
input
:
inputs
)
{
// calculate the index of each instruction to be quantized
std
::
size_t
ins_index
=
(
map_ins_index
.
count
(
input
)
>
0
)
?
map_ins_index
[
input
]
:
quant_param_index
++
;
map_ins_index
[
input
]
=
ins_index
;
auto
param
=
quant_params
[
map_ins_index
[
input
]];
ins_quant_params
.
push_back
(
param
);
// In general, the target_type is int8, but for the dot
// operation, if it has 3 inputs, then the last one should
// be converted to int32_type
shape
::
type_t
quant_type
=
shape
::
int8_type
;
if
((
ins
->
name
()
==
"dot"
)
and
(
inputs
.
size
()
==
3
)
and
(
input
==
inputs
.
back
()))
{
quant_type
=
shape
::
int32_type
;
}
auto
s
=
input
->
get_shape
();
if
((
s
.
type
()
==
shape
::
float_type
or
s
.
type
()
==
shape
::
double_type
or
s
.
type
()
==
shape
::
int32_type
)
and
s
.
type
()
!=
quant_type
)
{
// if the input is a convert operator, uses its input
// as its current input
instruction_ref
quant_input
{};
if
(
input
->
name
()
==
"convert"
and
input
->
inputs
().
front
()
->
get_shape
().
type
()
==
quant_type
)
{
quant_input
=
input
->
inputs
().
front
();
}
else
{
quant_input
=
insert_quant_ins
(
prog
,
input
,
quant_type
,
map_quant_ins
,
param
.
first
,
param
.
second
);
}
converted_inputs
.
push_back
(
quant_input
);
}
else
{
converted_inputs
.
push_back
(
input
);
}
}
// no change for the input, go to the next instruction
if
(
inputs
==
converted_inputs
)
{
continue
;
}
quantize_ins
(
prog
,
ins
,
converted_inputs
,
ins_quant_params
);
}
}
if
(
quant_param_index
!=
quant_params
.
size
())
if
(
quant_param_index
!=
quant_params
.
size
())
...
@@ -462,7 +455,7 @@ std::size_t capture_arguments(program& prog,
...
@@ -462,7 +455,7 @@ std::size_t capture_arguments(program& prog,
size_t
num_quant_params
=
0
;
size_t
num_quant_params
=
0
;
// the int8 quantization only support dot and convolution
// the int8 quantization only support dot and convolution
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
,
"quant_dot"
,
"quant_convolution"
};
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
};
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
return
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
();
return
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
();
}))
}))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment