Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
7f3a960b
Commit
7f3a960b
authored
Jun 10, 2019
by
Shucai Xiao
Browse files
some code refinement related to quantization.
parent
f8613dd1
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
129 additions
and
148 deletions
+129
-148
src/include/migraphx/op/convert.hpp
src/include/migraphx/op/convert.hpp
+3
-3
src/quantization.cpp
src/quantization.cpp
+79
-4
src/targets/gpu/device/convert.cpp
src/targets/gpu/device/convert.cpp
+1
-1
src/targets/gpu/quant_convolution.cpp
src/targets/gpu/quant_convolution.cpp
+46
-140
No files found.
src/include/migraphx/op/convert.hpp
View file @
7f3a960b
...
@@ -42,10 +42,10 @@ struct convert : unary<convert>
...
@@ -42,10 +42,10 @@ struct convert : unary<convert>
float
res
=
scale
*
x
+
shift
;
float
res
=
scale
*
x
+
shift
;
if
(
target_type
==
shape
::
int8_type
)
if
(
target_type
==
shape
::
int8_type
)
{
{
int
factor
=
(
res
>
0
)
?
1
:
-
1
;
int
factor
=
(
res
>
=
0
.0
f
)
?
1
:
-
1
;
res
=
res
+
factor
*
0.5
f
;
res
=
res
+
factor
*
0.5
f
;
res
=
res
>
127.0
?
127.0
:
res
;
res
=
res
>
127.0
f
?
127.0
f
:
res
;
res
=
res
<
-
128.0
?
-
128.0
:
res
;
res
=
res
<
-
128.0
f
?
-
128.0
f
:
res
;
}
}
return
res
;
return
res
;
...
...
src/quantization.cpp
View file @
7f3a960b
...
@@ -7,6 +7,7 @@
...
@@ -7,6 +7,7 @@
#include <migraphx/op/mul.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/capture.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/multibroadcast.hpp>
#include <migraphx/op/multibroadcast.hpp>
...
@@ -124,6 +125,23 @@ void quantize(program& prog) { quantize(prog, {"all"}); }
...
@@ -124,6 +125,23 @@ void quantize(program& prog) { quantize(prog, {"all"}); }
static
std
::
vector
<
std
::
pair
<
float
,
float
>>
int8_quant_params
;
static
std
::
vector
<
std
::
pair
<
float
,
float
>>
int8_quant_params
;
// function to compute the scale for each convert operator to convert to int8
void
calc_quant_params
(
std
::
size_t
ins_index
,
std
::
vector
<
migraphx
::
argument
>
args
)
{
std
::
pair
<
float
,
float
>
param_pair
{
1.0
f
,
0.0
f
};
// scale and shift is need for only int8 type, and we do not
// consider shift, so set shift to 0
std
::
vector
<
float
>
vec_val
;
args
.
front
().
visit
([
&
](
auto
output
)
{
vec_val
.
assign
(
output
.
begin
(),
output
.
end
());
});
auto
max_val
=
*
std
::
max_element
(
vec_val
.
begin
(),
vec_val
.
end
());
auto
min_val
=
*
std
::
min_element
(
vec_val
.
begin
(),
vec_val
.
end
());
auto
max_abs
=
std
::
max
(
std
::
fabs
(
max_val
),
std
::
fabs
(
min_val
));
param_pair
.
first
=
127.0
f
/
max_abs
;
int8_quant_params
[
ins_index
]
=
param_pair
;
};
// int8 quantization is different from fp16 since int8 can only handle value
// int8 quantization is different from fp16 since int8 can only handle value
// -128 ~ 127. To convert the float or double to int8, we need a scale and
// -128 ~ 127. To convert the float or double to int8, we need a scale and
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
...
@@ -309,9 +327,7 @@ void quantize_int8(program& prog,
...
@@ -309,9 +327,7 @@ void quantize_int8(program& prog,
ins
,
ins
,
op
::
quant_convolution
{
padding
,
stride
,
dilation
,
padding_mode
,
group
},
op
::
quant_convolution
{
padding
,
stride
,
dilation
,
padding_mode
,
group
},
converted_inputs
);
converted_inputs
);
auto
fp_conv
=
prog
.
insert_instruction
(
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
,
adjust_factor
,
0.0
f
},
quant_conv
);
ins
,
op
::
convert
{
shape
::
float_type
,
adjust_factor
,
0.0
f
},
quant_conv
);
prog
.
replace_instruction
(
ins
,
op
::
convert
{
orig_type
,
1.0
f
,
0.0
f
},
fp_conv
);
}
}
else
else
{
{
...
@@ -333,7 +349,66 @@ void quantize_int8(program& prog, const std::vector<std::string>& ins_names)
...
@@ -333,7 +349,66 @@ void quantize_int8(program& prog, const std::vector<std::string>& ins_names)
void
quantize_int8
(
program
&
prog
)
void
quantize_int8
(
program
&
prog
)
{
{
std
::
vector
<
std
::
string
>
ins_names
=
{
"dot"
,
"convolution"
};
std
::
vector
<
std
::
string
>
ins_names
=
{
"dot"
,
"convolution"
};
quantize_int8
(
prog
,
ins_names
);
quantize_int8
(
prog
,
ins_names
,
int8_quant_params
);
}
// For the input of each input argument, we need to insert a
// capture operator to compute the scale and shift
void
capture_arguments
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
,
std
::
function
<
void
(
std
::
size_t
,
std
::
vector
<
argument
>
)
>
func
)
{
size_t
num_quant_params
=
0
;
// the int8 quantization only support dot and convolution
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
,
"quant_dot"
,
"quant_convolution"
};
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
return
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
();
}))
{
MIGRAPHX_THROW
(
"CAPTURE_ARGUMENTS: input operator is not supported"
);
}
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
ins_map
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
}
auto
inputs
=
ins
->
inputs
();
std
::
vector
<
instruction_ref
>
new_args
;
for
(
auto
input
:
inputs
)
{
instruction_ref
new_ins
{};
if
(
ins_map
.
count
(
input
)
>
0
)
{
new_ins
=
ins_map
[
input
];
}
else
{
new_ins
=
prog
.
insert_instruction
(
std
::
next
(
input
),
op
::
capture
{
num_quant_params
++
,
func
},
input
);
ins_map
[
input
]
=
new_ins
;
}
new_args
.
push_back
(
new_ins
);
}
instruction
::
replace
(
ins
,
ins
->
get_operator
(),
ins
->
get_shape
(),
new_args
);
}
// set one pair of parameter for each argument
int8_quant_params
.
resize
(
num_quant_params
,
std
::
make_pair
(
-
1.0
f
,
-
1.0
f
));
}
void
capture_arguments
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
capture_arguments
(
prog
,
ins_names
,
calc_quant_params
);
}
void
capture_arguments
(
program
&
prog
)
{
std
::
vector
<
std
::
string
>
ins_names
=
{
"dot"
,
"convolution"
};
capture_arguments
(
prog
,
ins_names
);
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/device/convert.cpp
View file @
7f3a960b
...
@@ -21,7 +21,7 @@ void convert(hipStream_t stream,
...
@@ -21,7 +21,7 @@ void convert(hipStream_t stream,
{
{
gs_launch
(
stream
,
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
gs_launch
(
stream
,
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
float
res
=
input_ptr
[
i
]
*
scale
+
shift
;
float
res
=
input_ptr
[
i
]
*
scale
+
shift
;
int
factor
=
(
res
>
0
)
?
1
:
-
1
;
int
factor
=
(
res
>
=
0
.0
f
)
?
1
:
-
1
;
output_ptr
[
i
]
=
static_cast
<
int8_t
>
(
output_ptr
[
i
]
=
static_cast
<
int8_t
>
(
std
::
min
<
float
>
(
std
::
max
<
float
>
(
-
128.0
f
,
res
+
factor
*
0.5
),
127.0
f
));
std
::
min
<
float
>
(
std
::
max
<
float
>
(
-
128.0
f
,
res
+
factor
*
0.5
),
127.0
f
));
});
});
...
...
src/targets/gpu/quant_convolution.cpp
View file @
7f3a960b
#include <migraphx/gpu/quant_convolution.hpp>
#ifndef MIGRAPHX_GUARD_OPERATORS_CONVERT_HPP
#include <migraphx/gpu/device/convert.hpp>
#define MIGRAPHX_GUARD_OPERATORS_CONVERT_HPP
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
#include <array>
#include <migraphx/op/unary.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
op
{
s
hape
miopen_quant_convolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
s
truct
convert
:
unary
<
convert
>
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
5
).
standard
();
shape
::
type_t
target_type
=
shape
::
half_type
;
return
op
.
compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
float
scale
=
1.0
f
;
}
float
shift
=
0.0
f
;
argument
miopen_quant_convolution
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
x_desc_vec4
=
make_tensor
(
args
[
0
].
get_shape
(),
true
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
w_desc_vec4
=
make_tensor
(
args
[
1
].
get_shape
(),
true
);
shape
tmp_output_shape
{
shape
::
float_type
,
output_shape
.
lens
()};
auto
y_desc
=
make_tensor
(
tmp_output_shape
);
float
alpha
=
1
;
float
beta
=
0
;
// pack input to vec4 format
template
<
class
Self
,
class
F
>
auto
status
=
miopenTransformTensor
(
ctx
.
get_stream
().
get_miopen
(),
static
auto
reflect
(
Self
&
self
,
F
f
)
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
x_desc_vec4
.
get
(),
arg_vec4_x
.
implicit
());
if
(
status
!=
miopenStatusSuccess
)
{
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: transform input tensor failed"
);
return
pack
(
f
(
self
.
target_type
,
"target_type"
),
f
(
self
.
scale
,
"scale"
),
f
(
self
.
shift
,
"shift"
));
}
}
// pack input to vec4 format
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
status
=
miopenTransformTensor
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
w_desc
.
get
(),
args
[
1
].
implicit
(),
&
beta
,
w_desc_vec4
.
get
(),
arg_vec4_w
.
implicit
());
if
(
status
!=
miopenStatusSuccess
)
{
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: transform weight tensor failed"
);
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
return
{
target_type
,
inputs
.
at
(
0
).
lens
(),
inputs
.
at
(
0
).
strides
()};
}
}
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
auto
apply
()
const
&
alpha
,
x_desc_vec4
.
get
(),
arg_vec4_x
.
implicit
(),
w_desc_vec4
.
get
(),
arg_vec4_w
.
implicit
(),
cd
.
get
(),
algo
,
&
beta
,
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
if
(
status
!=
miopenStatusSuccess
)
{
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: run convolution forward failed"
);
return
[
&
](
auto
x
)
{
}
float
res
=
scale
*
x
+
shift
;
if
(
target_type
==
shape
::
int8_type
)
// Add a conversion from float to int32_t
device
::
convert
(
ctx
.
get_stream
().
get
(),
args
[
4
],
args
[
3
],
1.0
f
,
0.0
f
,
shape
::
int32_type
);
return
args
[
4
];
}
shape
miopen_quant_convolution
::
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
],
true
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
true
);
shape
tmp_output_shape
{
shape
::
float_type
,
output_shape
.
lens
()};
auto
y_desc
=
make_tensor
(
tmp_output_shape
);
std
::
size_t
workspace_size
=
0
;
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
arg_vec4_x
=
to_gpu
(
generate_argument
(
pack_int8_shape
(
inputs
[
0
])));
arg_vec4_w
=
to_gpu
(
generate_argument
(
pack_int8_shape
(
inputs
[
1
])));
auto
y
=
allocate_gpu
(
tmp_output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
arg_vec4_x
.
implicit
(),
w_desc
.
get
(),
arg_vec4_w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
{
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: find convolution failed"
);
int
factor
=
(
res
>=
0.0
f
)
?
1
:
-
1
;
res
=
res
+
factor
*
0.5
f
;
res
=
res
>
127.0
f
?
127.0
f
:
res
;
res
=
res
<
-
128.0
f
?
-
128.0
f
:
res
;
}
}
handle
=
ctx
.
get_stream
().
get_miopen
();
algo
=
perf
.
fwd_algo
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
void
miopen_quant_convolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
handle
==
ctx
.
get_stream
().
get_miopen
())
return
;
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
compile
(
ctx
,
output_shape
,
std
::
move
(
inputs
));
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"Workspace has changed during finalization."
);
}
shape
miopen_quant_convolution
::
pack_int8_shape
(
shape
&
s
)
return
res
;
{
};
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
MIGRAPHX_THROW
(
"PACK_INT8_SHAPE: only process int8_type"
);
}
}
auto
lens
=
s
.
lens
();
convert
(
shape
::
type_t
t
)
:
target_type
{
t
}
{}
auto
strides
=
s
.
strides
();
convert
(
shape
::
type_t
t
,
float
sle
,
float
sft
)
:
target_type
{
t
},
scale
{
sle
},
shift
{
sft
}
{}
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
convert
()
{}
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
]
;
}
;
return
{
s
.
type
(),
lens
,
strides
};
}
// namespace op
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment