Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
ffa6a45a
Commit
ffa6a45a
authored
Jun 05, 2019
by
Shucai Xiao
Browse files
fixe mismatch between cpu and gpu execution.
parent
e6158f10
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
27 additions
and
125 deletions
+27
-125
src/quantization.cpp
src/quantization.cpp
+24
-124
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+3
-1
No files found.
src/quantization.cpp
View file @
ffa6a45a
...
@@ -278,99 +278,33 @@ void quantize_int8(program& prog,
...
@@ -278,99 +278,33 @@ void quantize_int8(program& prog,
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
},
quant_dot
);
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
},
quant_dot
);
}
}
}
}
// only alpha can be quantized, quantization of beta will cause
// either alpha or beta cannot be quantized because of too big
// big error, so we have to manually do the multiplication and
// relative rounding error
// addition
else
if
(
fabs
(
new_alpha
)
>=
threshold
)
{
// truncate to the nearest integer
new_alpha
=
new_alpha
>
0.0
?
new_alpha
+
0.5
:
new_alpha
-
0.5
;
int32_t
quant_alpha
=
static_cast
<
int32_t
>
(
new_alpha
);
int32_t
quant_beta
=
0
;
if
(
orig_type
==
shape
::
int32_type
)
{
if
(
inputs
.
size
()
==
2
or
dot_op
.
beta
==
0.0
f
)
{
prog
.
replace_instruction
(
ins
,
op
::
quant_dot
{
quant_alpha
,
quant_beta
},
converted_inputs
);
}
// if there are 3 inputs, we need to consider the third argument
else
{
auto
q_dot
=
prog
.
insert_instruction
(
ins
,
op
::
quant_dot
{
quant_alpha
,
quant_beta
},
converted_inputs
);
std
::
vector
<
float
>
vec_beta
(
q_dot
->
get_shape
().
elements
(),
dot_op
.
beta
);
auto
l_beta
=
prog
.
add_literal
(
literal
{
orig_type
,
vec_beta
});
auto
beta_c
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_beta
,
inputs
.
back
());
prog
.
replace_instruction
(
ins
,
op
::
add
{},
q_dot
,
beta_c
);
}
}
else
{
if
(
inputs
.
size
()
==
2
or
dot_op
.
beta
==
0.0
f
)
{
auto
q_dot
=
prog
.
insert_instruction
(
ins
,
op
::
quant_dot
{
quant_alpha
,
quant_beta
},
converted_inputs
);
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
},
q_dot
);
}
// if there are 3 inputs, we need to consider the third argument
else
{
auto
q_dot
=
prog
.
insert_instruction
(
ins
,
op
::
quant_dot
{
quant_alpha
,
quant_beta
},
converted_inputs
);
auto
oq_dot
=
prog
.
insert_instruction
(
ins
,
op
::
convert
{
orig_type
},
q_dot
);
std
::
vector
<
float
>
vec_beta
(
q_dot
->
get_shape
().
elements
(),
dot_op
.
beta
);
auto
l_beta
=
prog
.
add_literal
(
literal
{
oq_dot
->
get_shape
(),
vec_beta
});
auto
beta_c
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_beta
,
inputs
.
back
());
prog
.
replace_instruction
(
ins
,
op
::
add
{},
oq_dot
,
beta_c
);
}
}
}
else
else
{
{
auto
q_dot
=
prog
.
insert_instruction
(
ins
,
op
::
quant_dot
{
1
,
0
},
converted_inputs
);
auto
q_dot
=
prog
.
insert_instruction
(
ins
,
op
::
quant_dot
{
1
,
0
},
converted_inputs
);
std
::
vector
<
float
>
vec_alpha
(
q_dot
->
get_shape
().
elements
(),
new_alpha
);
if
(
inputs
.
size
()
==
3
and
dot_op
.
beta
!=
0.0
f
)
if
(
orig_type
==
shape
::
int32_type
)
{
{
auto
l_alpha
=
prog
.
add_literal
(
literal
(
ins
->
get_shape
(),
vec_alpha
));
auto
alpha_ab
=
prog
.
insert_instruction
(
ins
,
op
::
convert
{
orig_type
,
new_alpha
,
0.0
f
},
q_dot
);
if
(
converted_inputs
.
size
()
==
2
or
dot_op
.
beta
==
0.0
f
)
auto
c_shape
=
q_dot
->
get_shape
();
std
::
vector
<
float
>
vec_beta
(
c_shape
.
elements
(),
dot_op
.
beta
);
auto
l_beta
=
prog
.
add_literal
(
literal
({
shape
::
float_type
,
c_shape
.
lens
()},
vec_beta
));
instruction_ref
beta_c
{};
if
(
orig_type
!=
shape
::
float_type
)
{
{
prog
.
replace_instruction
(
ins
,
op
::
mul
{},
l_alpha
,
q_dot
);
auto
fp32_c
=
prog
.
insert_instruction
(
ins
,
op
::
convert
{
shape
::
float_type
},
inputs
.
back
());
auto
fp32_beta_c
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_beta
,
fp32_c
);
beta_c
=
prog
.
insert_instruction
(
ins
,
op
::
convert
{
orig_type
},
fp32_beta_c
);
}
}
// case of 3 arguments
else
else
{
{
std
::
vector
<
float
>
vec_beta
(
ins
->
get_shape
().
elements
(),
new_beta
);
beta_c
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_beta
,
inputs
.
back
());
auto
l_beta
=
prog
.
add_literal
(
literal
(
ins
->
get_shape
(),
vec_beta
));
auto
alpha_ab
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_alpha
,
q_dot
);
auto
beta_c
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_beta
,
inputs
.
back
());
prog
.
replace_instruction
(
ins
,
op
::
add
{},
alpha_ab
,
beta_c
);
}
}
prog
.
replace_instruction
(
ins
,
op
::
add
{},
alpha_ab
,
beta_c
);
}
}
else
else
{
{
auto
oq_dot
=
prog
.
insert_instruction
(
ins
,
op
::
convert
{
orig_type
},
q_dot
);
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
,
new_alpha
,
0.0
f
},
q_dot
);
auto
l_alpha
=
prog
.
add_literal
(
literal
(
ins
->
get_shape
(),
vec_alpha
));
if
(
converted_inputs
.
size
()
==
2
or
dot_op
.
beta
==
0.0
f
)
{
prog
.
replace_instruction
(
ins
,
op
::
mul
{},
l_alpha
,
oq_dot
);
}
// case of 3 arguments
else
{
std
::
vector
<
float
>
vec_beta
(
ins
->
get_shape
().
elements
(),
new_beta
);
auto
l_beta
=
prog
.
add_literal
(
literal
(
ins
->
get_shape
(),
vec_beta
));
auto
alpha_ab
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_alpha
,
oq_dot
);
auto
beta_c
=
prog
.
insert_instruction
(
ins
,
op
::
mul
{},
l_beta
,
inputs
.
back
());
prog
.
replace_instruction
(
ins
,
op
::
add
{},
alpha_ab
,
beta_c
);
// auto gemm_res = prog.insert_instruction(ins, op::add{}, alpha_ab,
// beta_c); prog.replace_instruction(ins, op::capture{0, print_gemm_res},
// gemm_res);
}
}
}
}
}
}
}
...
@@ -384,49 +318,15 @@ void quantize_int8(program& prog,
...
@@ -384,49 +318,15 @@ void quantize_int8(program& prog,
auto
dilation
=
conv_op
.
dilation
;
auto
dilation
=
conv_op
.
dilation
;
auto
padding_mode
=
conv_op
.
padding_mode
;
auto
padding_mode
=
conv_op
.
padding_mode
;
auto
group
=
conv_op
.
group
;
auto
group
=
conv_op
.
group
;
auto
adjust_factor
=
1.0
/
(
ins_quant_params
[
0
].
first
*
ins_quant_params
[
1
].
first
);
auto
adjust_factor
=
1.0
f
/
(
ins_quant_params
[
0
].
first
*
ins_quant_params
[
1
].
first
);
shape
quant_shape
=
auto
quant_conv
=
prog
.
insert_instruction
(
compute_shape
(
op
::
quant_convolution
{
padding
,
stride
,
dilation
,
padding_mode
,
group
},
ins
,
converted_inputs
);
op
::
quant_convolution
{
padding
,
stride
,
dilation
,
padding_mode
,
group
},
std
::
vector
<
float
>
vec_factor
(
quant_shape
.
elements
(),
adjust_factor
);
converted_inputs
);
auto
fl
=
prog
.
add_literal
(
literal
{{
orig_type
,
quant_shape
.
lens
()},
vec_factor
});
auto
fp_conv
=
prog
.
insert_instruction
(
if
(
quant_shape
.
type
()
==
orig_type
)
ins
,
op
::
convert
{
shape
::
float_type
,
adjust_factor
,
0.0
f
},
quant_conv
);
{
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
,
1.0
f
,
0.0
f
},
fp_conv
);
if
(
adjust_factor
==
1.0
f
)
{
prog
.
replace_instruction
(
ins
,
op
::
quant_convolution
{
padding
,
stride
,
dilation
,
padding_mode
,
group
},
converted_inputs
);
}
else
{
auto
quant_conv
=
prog
.
insert_instruction
(
ins
,
op
::
quant_convolution
{
padding
,
stride
,
dilation
,
padding_mode
,
group
},
converted_inputs
);
prog
.
replace_instruction
(
ins
,
op
::
mul
{},
quant_conv
,
fl
);
// auto q_conv = prog.insert_instruction(ins, op::mul{}, quant_conv, fl);
// prog.replace_instruction(ins, op::capture{10000, print_conv_res}, q_conv);
}
}
else
{
auto
quant_conv
=
prog
.
insert_instruction
(
ins
,
op
::
quant_convolution
{
padding
,
stride
,
dilation
,
padding_mode
,
group
},
converted_inputs
);
if
(
adjust_factor
==
1.0
f
)
{
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
},
quant_conv
);
}
else
{
auto
oq_conv
=
prog
.
insert_instruction
(
ins
,
op
::
convert
{
orig_type
},
quant_conv
);
prog
.
replace_instruction
(
ins
,
op
::
mul
{},
oq_conv
,
fl
);
}
}
}
}
else
else
{
{
...
...
src/targets/cpu/lowering.cpp
View file @
ffa6a45a
...
@@ -9,6 +9,8 @@
...
@@ -9,6 +9,8 @@
#include <migraphx/cpu/gemm.hpp>
#include <migraphx/cpu/gemm.hpp>
#include <unordered_map>
#include <unordered_map>
#include <utility>
#include <utility>
#include <fstream>
#include <iomanip>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -247,7 +249,7 @@ struct cpu_quant_convolution
...
@@ -247,7 +249,7 @@ struct cpu_quant_convolution
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
{
{
acc
+=
input
(
o
,
in_ch
,
in_x
,
in_y
)
*
weights
(
w
,
k
,
x
,
y
);
acc
+=
static_cast
<
int32_t
>
(
input
(
o
,
in_ch
,
in_x
,
in_y
)
)
*
weights
(
w
,
k
,
x
,
y
);
}
}
});
});
output
(
o
,
w
,
i
,
j
)
=
acc
;
output
(
o
,
w
,
i
,
j
)
=
acc
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment