Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
e12ee1f8
Commit
e12ee1f8
authored
Aug 29, 2019
by
Shucai Xiao
Browse files
fix cppcheck error
parent
a3affafc
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
158 additions
and
165 deletions
+158
-165
src/include/migraphx/quantization.hpp
src/include/migraphx/quantization.hpp
+2
-2
src/quantization.cpp
src/quantization.cpp
+156
-163
No files found.
src/include/migraphx/quantization.hpp
View file @
e12ee1f8
...
@@ -24,11 +24,11 @@ std::size_t capture_arguments(program& prog,
...
@@ -24,11 +24,11 @@ std::size_t capture_arguments(program& prog,
const
std
::
function
<
void
(
std
::
size_t
,
std
::
vector
<
argument
>
)
>&
func
);
const
std
::
function
<
void
(
std
::
size_t
,
std
::
vector
<
argument
>
)
>&
func
);
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
capture_arguments_impl
(
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
capture_arguments_impl
(
program
&
prog
,
const
target
&
t
,
const
std
::
vector
<
std
::
string
>&
ins_names
=
{
"dot"
});
program
&
prog
,
const
target
&
t
,
const
std
::
vector
<
std
::
string
>&
ins_names
=
{
"dot"
,
"convolution"
});
template
<
class
T
>
template
<
class
T
>
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
capture_arguments
(
program
&
prog
,
T
&&
t
,
const
std
::
vector
<
std
::
string
>&
ins_names
=
{
"dot"
})
capture_arguments
(
program
&
prog
,
T
&&
t
,
const
std
::
vector
<
std
::
string
>&
ins_names
=
{
"dot"
,
"convolution"
})
{
{
static_assert
(
std
::
is_same
<
std
::
remove_cv_t
<
std
::
remove_reference_t
<
T
>>
,
target
>
{}
&&
static_assert
(
std
::
is_same
<
std
::
remove_cv_t
<
std
::
remove_reference_t
<
T
>>
,
target
>
{}
&&
std
::
is_lvalue_reference
<
T
>
{},
std
::
is_lvalue_reference
<
T
>
{},
...
...
src/quantization.cpp
View file @
e12ee1f8
...
@@ -162,115 +162,12 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -162,115 +162,12 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
// int8 quantization is different from fp16 since int8 can only handle value
static
void
quantize_ins
(
program
&
prog
,
instruction_ref
ins
,
// -128 ~ 127. To convert the float or double to int8, we need a scale and
std
::
vector
<
instruction_ref
>&
converted_inputs
,
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
const
std
::
vector
<
std
::
pair
<
float
,
float
>>&
ins_quant_params
)
// To simplify the changes, we consider shift as 0.0f for now.
void
quantize_int8
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>>&
quant_params
)
{
{
for
(
size_t
i
=
0
;
i
<
quant_params
.
size
();
i
++
)
auto
orig_type
=
ins
->
get_shape
().
type
();
{
auto
param
=
quant_params
.
at
(
i
);
std
::
cout
<<
"index = "
<<
i
<<
", scale = "
<<
param
.
first
<<
"
\t
"
<<
param
.
second
<<
std
::
endl
;
}
std
::
cout
<<
std
::
endl
;
// For now, we only support the int8 quantization of gemm and convolution
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
};
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
return
(
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
());
}))
{
MIGRAPHX_THROW
(
"QUANTIZE_INT8: only support DOT and CONVOLUTION operation"
);
}
std
::
size_t
quant_param_index
=
0
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_quant_ins
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
map_index
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
}
shape
::
type_t
orig_type
=
ins
->
get_shape
().
type
();
// for the dot operator, there could be 2 or 3 input arguments
// if the 3rd argument is available, convert it to an int32.
std
::
vector
<
instruction_ref
>
converted_inputs
;
// process all inputs, if input is a fp32 or fp64, convert it
// to a int8 type by adding a convert operator and replace
// the operator with the corresponding int8 version
auto
inputs
=
ins
->
inputs
();
auto
inputs
=
ins
->
inputs
();
std
::
vector
<
std
::
pair
<
float
,
float
>>
ins_quant_params
;
for
(
auto
input
:
inputs
)
{
// calculate the index of each instruction to be quantized
if
(
map_index
.
count
(
input
)
==
0
)
{
map_index
[
input
]
=
quant_param_index
++
;
}
auto
param
=
quant_params
[
map_index
[
input
]];
ins_quant_params
.
push_back
(
param
);
// In general, the target_type is int8, but for the dot
// operation, if it has 3 inputs, then the last one should
// be converted to int32_type
shape
::
type_t
quant_type
=
shape
::
int8_type
;
if
(
ins
->
name
()
==
"dot"
and
inputs
.
size
()
==
3
and
input
==
inputs
.
back
())
{
quant_type
=
shape
::
int32_type
;
}
auto
s
=
input
->
get_shape
();
if
((
s
.
type
()
==
shape
::
float_type
or
s
.
type
()
==
shape
::
double_type
or
s
.
type
()
==
shape
::
int32_type
)
and
s
.
type
()
!=
quant_type
)
{
// if the input is a convert operator, uses its input
// as its current input
instruction_ref
quant_input
{};
if
(
input
->
name
()
==
"convert"
)
{
auto
tmp_ins
=
input
->
inputs
().
front
();
if
(
tmp_ins
->
get_shape
().
type
()
==
quant_type
)
{
quant_input
=
input
->
inputs
().
front
();
}
else
{
quant_input
=
insert_quant_ins
(
prog
,
input
,
quant_type
,
map_quant_ins
,
param
.
first
,
param
.
second
);
}
}
else
{
quant_input
=
insert_quant_ins
(
prog
,
input
,
quant_type
,
map_quant_ins
,
param
.
first
,
param
.
second
);
}
converted_inputs
.
push_back
(
quant_input
);
}
else
{
converted_inputs
.
push_back
(
input
);
}
}
// no change for the input, go to the next instruction
if
(
inputs
==
converted_inputs
)
{
continue
;
}
// When converting from other types to int8_type, there are parameters
// used as scale and shift(.0f), which will generate results diffrent from
// the original results. To adjust the output to be "correct(approximatly
// equal)", we need additional calculation for the adjustment
if
(
ins
->
name
()
==
"dot"
)
if
(
ins
->
name
()
==
"dot"
)
{
{
auto
dot_op
=
any_cast
<
op
::
dot
>
(
ins
->
get_operator
());
auto
dot_op
=
any_cast
<
op
::
dot
>
(
ins
->
get_operator
());
...
@@ -402,6 +299,102 @@ void quantize_int8(program& prog,
...
@@ -402,6 +299,102 @@ void quantize_int8(program& prog,
{
{
MIGRAPHX_THROW
(
"QUANTIZE_INT8: does not support operator"
+
ins
->
name
());
MIGRAPHX_THROW
(
"QUANTIZE_INT8: does not support operator"
+
ins
->
name
());
}
}
}
// int8 quantization is different from fp16 since int8 can only handle value
// -128 ~ 127. To convert the float or double to int8, we need a scale and
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
// To simplify the changes, we consider shift as 0.0f for now.
void
quantize_int8
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>>&
quant_params
)
{
// for(size_t i = 0; i < quant_params.size(); i++)
// {
// auto param = quant_params.at(i);
// std::cout << "index = " << i << ", scale = " << param.first << "\t" << param.second
// << std::endl;
// }
// std::cout << std::endl;
// For now, we only support the int8 quantization of gemm and convolution
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
};
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
return
(
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
());
}))
{
MIGRAPHX_THROW
(
"QUANTIZE_INT8: only support DOT and CONVOLUTION operation"
);
}
std
::
size_t
quant_param_index
=
0
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_quant_ins
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
map_ins_index
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
}
// for the dot operator, there could be 2 or 3 input arguments
// if the 3rd argument is available, convert it to an int32.
std
::
vector
<
instruction_ref
>
converted_inputs
;
// process all inputs, if input is a fp32 or fp64, convert it
// to a int8 type by adding a convert operator and replace
// the operator with the corresponding int8 version
auto
inputs
=
ins
->
inputs
();
std
::
vector
<
std
::
pair
<
float
,
float
>>
ins_quant_params
;
for
(
auto
input
:
inputs
)
{
// calculate the index of each instruction to be quantized
std
::
size_t
ins_index
=
(
map_ins_index
.
count
(
input
)
>
0
)
?
map_ins_index
[
input
]
:
quant_param_index
++
;
map_ins_index
[
input
]
=
ins_index
;
auto
param
=
quant_params
[
map_ins_index
[
input
]];
ins_quant_params
.
push_back
(
param
);
// In general, the target_type is int8, but for the dot
// operation, if it has 3 inputs, then the last one should
// be converted to int32_type
shape
::
type_t
quant_type
=
shape
::
int8_type
;
if
((
ins
->
name
()
==
"dot"
)
and
(
inputs
.
size
()
==
3
)
and
(
input
==
inputs
.
back
()))
{
quant_type
=
shape
::
int32_type
;
}
auto
s
=
input
->
get_shape
();
if
((
s
.
type
()
==
shape
::
float_type
or
s
.
type
()
==
shape
::
double_type
or
s
.
type
()
==
shape
::
int32_type
)
and
s
.
type
()
!=
quant_type
)
{
// if the input is a convert operator, uses its input
// as its current input
instruction_ref
quant_input
{};
if
(
input
->
name
()
==
"convert"
and
input
->
inputs
().
front
()
->
get_shape
().
type
()
==
quant_type
)
{
quant_input
=
input
->
inputs
().
front
();
}
else
{
quant_input
=
insert_quant_ins
(
prog
,
input
,
quant_type
,
map_quant_ins
,
param
.
first
,
param
.
second
);
}
converted_inputs
.
push_back
(
quant_input
);
}
else
{
converted_inputs
.
push_back
(
input
);
}
}
// no change for the input, go to the next instruction
if
(
inputs
==
converted_inputs
)
{
continue
;
}
quantize_ins
(
prog
,
ins
,
converted_inputs
,
ins_quant_params
);
}
}
if
(
quant_param_index
!=
quant_params
.
size
())
if
(
quant_param_index
!=
quant_params
.
size
())
...
@@ -462,7 +455,7 @@ std::size_t capture_arguments(program& prog,
...
@@ -462,7 +455,7 @@ std::size_t capture_arguments(program& prog,
size_t
num_quant_params
=
0
;
size_t
num_quant_params
=
0
;
// the int8 quantization only support dot and convolution
// the int8 quantization only support dot and convolution
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
,
"quant_dot"
,
"quant_convolution"
};
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
};
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
return
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
();
return
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
();
}))
}))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment