Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
3003b4e3
Commit
3003b4e3
authored
May 14, 2019
by
Shucai Xiao
Browse files
temp code backup
parent
e762116b
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
213 additions
and
40 deletions
+213
-40
src/include/migraphx/quantization.hpp
src/include/migraphx/quantization.hpp
+42
-8
src/quantization.cpp
src/quantization.cpp
+132
-21
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-0
src/targets/gpu/convert.cpp
src/targets/gpu/convert.cpp
+26
-0
src/targets/gpu/device/convert.cpp
src/targets/gpu/device/convert.cpp
+2
-2
src/targets/gpu/include/migraphx/gpu/convert.hpp
src/targets/gpu/include/migraphx/gpu/convert.hpp
+9
-8
src/targets/gpu/include/migraphx/gpu/device/convert.hpp
src/targets/gpu/include/migraphx/gpu/device/convert.hpp
+1
-1
No files found.
src/include/migraphx/quantization.hpp
View file @
3003b4e3
#ifndef MIGRAPHX_GUARD_
RTGLIB_QUANTIZATION
_HPP
#ifndef MIGRAPHX_GUARD_
OPERATORS_CONVERT
_HPP
#define MIGRAPHX_GUARD_
RTGLIB_QUANTIZATION
_HPP
#define MIGRAPHX_GUARD_
OPERATORS_CONVERT
_HPP
#include <string>
#include <array>
#include <vector>
#include <migraphx/op/unary.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
struct
program
;
struct
convert
:
unary
<
convert
>
{
shape
::
type_t
target_type
=
shape
::
half_type
;
float
scale
=
1.0
f
;
float
shift
=
0.0
f
;
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
);
template
<
class
Self
,
class
F
>
void
quantize
(
program
&
prog
);
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
target_type
,
"target_type"
),
f
(
self
.
scale
,
"scale"
),
f
(
self
.
shift
,
"shift"
));
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
return
{
target_type
,
inputs
.
at
(
0
).
lens
(),
inputs
.
at
(
0
).
strides
()};
}
auto
apply
()
const
{
// return [&](auto x) { return (target_type == shape::int8_type) ? static_cast<int8_t>(x * scale + shift) : x; };
return
[
&
](
auto
x
)
{
return
scale
*
x
+
shift
;
};
}
convert
(
shape
::
type_t
t
)
:
target_type
{
t
}
{}
convert
(
shape
::
type_t
t
,
float
sle
,
float
sft
)
:
target_type
{
t
},
scale
(
sle
),
shift
(
sft
)
{}
convert
()
{}
};
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/quantization.cpp
View file @
3003b4e3
...
@@ -10,25 +10,32 @@
...
@@ -10,25 +10,32 @@
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
instruction_ref
insert_
fp16
(
program
&
prog
,
instruction_ref
insert_
quant_ins
(
program
&
prog
,
instruction_ref
&
ins
,
instruction_ref
&
ins
,
shape
::
type_t
type
,
shape
::
type_t
type
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>&
map_fp16
)
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>&
map_ins
,
float
scale
=
1.0
f
,
float
shift
=
0.0
f
)
{
{
if
(
map_
fp16
.
count
(
ins
)
>
0
)
if
(
map_
ins
.
count
(
ins
)
>
0
)
{
{
return
map_
fp16
[
ins
];
return
map_
ins
[
ins
];
}
}
assert
(
ins
->
get_shape
().
type
()
==
shape
::
float_type
||
assert
(
ins
->
get_shape
().
type
()
==
shape
::
float_type
||
ins
->
get_shape
().
type
()
==
shape
::
double_type
);
ins
->
get_shape
().
type
()
==
shape
::
double_type
||
instruction_ref
ins_fp16
{};
ins
->
get_shape
().
type
()
==
shape
::
int32_type
);
ins_fp16
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
type
},
ins
);
instruction_ref
quant_ins
{};
map_fp16
[
ins
]
=
ins_fp16
;
quant_ins
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
type
},
ins
);
map_ins
[
ins
]
=
quant_ins
;
return
ins_fp16
;
return
quant_ins
;
}
}
// This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator.
// For the conversion, there could be cases of overflowing, but it
// is very rare in the area of deeping learning, so we just do a
// truncate of the input to get the fp16.
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
{
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_fp16
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_fp16
;
...
@@ -59,7 +66,7 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -59,7 +66,7 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
}
}
else
else
{
{
input_fp16
=
insert_
fp16
(
prog
,
input
,
shape
::
half_type
,
map_fp16
);
input_fp16
=
insert_
quant_ins
(
prog
,
input
,
shape
::
half_type
,
map_fp16
);
}
}
converted_inputs
.
push_back
(
input_fp16
);
converted_inputs
.
push_back
(
input_fp16
);
}
}
...
@@ -79,29 +86,133 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -79,29 +86,133 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
auto
ins_shape
=
compute_shape
(
op
,
converted_inputs
);
auto
ins_shape
=
compute_shape
(
op
,
converted_inputs
);
if
(
ins_shape
.
type
()
!=
orig_type
)
if
(
ins_shape
.
type
()
!=
orig_type
)
{
{
// insert another convert instruction to convert it back
// check the dead code case to avoid assert
if
(
ins
==
std
::
prev
(
prog
.
end
()))
bool
output_empty
=
ins
->
outputs
().
empty
();
auto
ins_orig_type
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
orig_type
},
ins
);
if
(
!
output_empty
)
{
{
prog
.
add
_instruction
(
op
::
convert
{
orig_type
},
ins
);
prog
.
replace
_instruction
(
ins
,
ins_
orig_type
);
}
}
else
}
prog
.
replace_instruction
(
ins
,
op
,
converted_inputs
);
}
}
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
// int8 quantization is different from fp16 since int8 can only handle value
// -128 ~ 127. To convert the float or double to int8, we need a scale and
// a shift, then the convert can be done as v_int8 = fp * scale + shift.
// To simplify the changes, we consider shift as 0.0f for now.
void
quantize_int8
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
// For now, we only support the int8 quantization of gemm and convolution
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
};
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
return
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
);
}))
{
MIGRAPHX_THROW
(
"QUANTIZE_INT8: only support DOT and CONVOLUTION operation"
);
}
// tmp value used just testing
std
::
vector
<
std
::
pair
<
float
,
float
>>
int8_param
{{
1.0
f
,
0.0
f
},
{
1.0
f
,
0.0
f
},
{
1.0
f
,
0.0
f
}};
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_quant_ins
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
}
shape
::
type_t
orig_type
=
ins
->
get_shape
().
type
();
// for the dot operator, there could be 2 or 3 input arguments
// if the 3rd argument is available, convert it to an int32.
std
::
vector
<
instruction_ref
>
converted_inputs
;
// process all inputs, if input is a fp32 or fp64, convert it
// to a int8 type by adding a convert operator and replace
// the operator with the corresponding int8 version
auto
inputs
=
ins
->
inputs
();
std
::
size_t
param_index
=
0
;
for
(
auto
input
:
inputs
)
{
// In general, the target_type is int8, but for the dot
// operation, if it has 3 inputs, then the last one should
// be converted to int32_type
shape
::
type_t
quant_type
=
shape
::
int8_type
;
if
(
ins
->
name
()
==
"dot"
and
inputs
.
size
()
==
3
and
input
==
inputs
.
back
())
{
{
// check the dead code case to avoid assert
quant_type
=
shape
::
int32_type
;
bool
output_empty
=
ins
->
outputs
().
empty
();
}
auto
ins_orig_type
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
orig_type
},
ins
);
auto
param
=
int8_param
[
param_index
++
];
if
(
!
output_empty
)
auto
s
=
input
->
get_shape
();
if
(
s
.
type
()
==
shape
::
float_type
||
s
.
type
()
==
shape
::
double_type
||
s
.
type
()
==
shape
::
int32_type
)
{
// if the input is a convert operator, uses its input
// as its current input
instruction_ref
quant_input
{};
if
(
input
->
name
()
==
"convert"
)
{
auto
tmp_ins
=
input
->
inputs
().
front
();
if
(
tmp_ins
->
get_shape
().
type
()
==
quant_type
)
{
quant_input
=
input
->
inputs
().
front
();
}
else
{
quant_input
=
insert_quant_ins
(
prog
,
input
,
quant_type
,
map_quant_ins
,
param
.
first
,
param
.
second
);
}
}
else
{
{
prog
.
replace_instruction
(
ins
,
ins_orig_type
);
quant_input
=
insert_quant_ins
(
prog
,
input
,
quant_type
,
map_quant_ins
,
param
.
first
,
param
.
second
);
}
}
converted_inputs
.
push_back
(
quant_input
);
}
else
{
converted_inputs
.
push_back
(
input
);
}
}
// no change for the input, go to the next instruction
if
(
inputs
==
converted_inputs
)
{
continue
;
}
auto
op
=
ins
->
get_operator
();
auto
ins_shape
=
compute_shape
(
op
,
converted_inputs
);
if
(
ins_shape
.
type
()
!=
orig_type
)
{
// check the dead code case to avoid assert
bool
output_empty
=
ins
->
outputs
().
empty
();
// this conversion can be only from int32 to float or double
auto
ins_orig_type
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
orig_type
},
ins
);
if
(
!
output_empty
)
{
prog
.
replace_instruction
(
ins
,
ins_orig_type
);
}
}
}
}
// When converting from other types to int8_type, there are parameters
// used as scale and shift(.0f), which will generate results diffrent from
// the original results. To adjust the output to be "correct(approximatly
// equal)", we need additional calculation for that.
prog
.
replace_instruction
(
ins
,
op
,
converted_inputs
);
prog
.
replace_instruction
(
ins
,
op
,
converted_inputs
);
}
}
}
}
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/gpu/CMakeLists.txt
View file @
3003b4e3
...
@@ -68,6 +68,7 @@ add_library(migraphx_gpu
...
@@ -68,6 +68,7 @@ add_library(migraphx_gpu
elu.cpp
elu.cpp
pad.cpp
pad.cpp
gather.cpp
gather.cpp
convert.cpp
lrn.cpp
lrn.cpp
schedule_model.cpp
schedule_model.cpp
adjust_allocation.cpp
adjust_allocation.cpp
...
...
src/targets/gpu/convert.cpp
0 → 100644
View file @
3003b4e3
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_convert
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
check_shapes
{
inputs
}.
packed
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_convert
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
convert
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
],
op
.
scale
,
op
.
shift
);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/convert.cpp
View file @
3003b4e3
...
@@ -6,14 +6,14 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -6,14 +6,14 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
convert
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
void
convert
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
float
scale
,
float
shift
)
{
{
result
.
visit
([
&
](
auto
output
)
{
result
.
visit
([
&
](
auto
output
)
{
arg
.
visit
([
&
](
auto
input
)
{
arg
.
visit
([
&
](
auto
input
)
{
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
gs_launch
(
stream
,
gs_launch
(
stream
,
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
output_ptr
[
i
]
=
input_ptr
[
i
];
});
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
output_ptr
[
i
]
=
input_ptr
[
i
]
*
scale
+
shift
;
});
});
});
});
});
}
}
...
...
src/targets/gpu/include/migraphx/gpu/convert.hpp
View file @
3003b4e3
...
@@ -3,8 +3,6 @@
...
@@ -3,8 +3,6 @@
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -12,7 +10,7 @@ namespace gpu {
...
@@ -12,7 +10,7 @@ namespace gpu {
struct
context
;
struct
context
;
struct
hip_convert
:
unary_device
<
hip_convert
,
device
::
convert
>
struct
hip_convert
{
{
op
::
convert
op
;
op
::
convert
op
;
...
@@ -22,13 +20,16 @@ struct hip_convert : unary_device<hip_convert, device::convert>
...
@@ -22,13 +20,16 @@ struct hip_convert : unary_device<hip_convert, device::convert>
return
migraphx
::
reflect
(
self
.
op
,
f
);
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
}
hip_convert
(
op
::
convert
oper
)
:
op
(
oper
)
{}
std
::
string
name
()
const
{
return
"gpu::convert"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
inputs
.
pop_back
();
return
shapes
.
size
()
-
1
;
check_shapes
{
inputs
}.
packed
();
return
op
.
compute_shape
(
inputs
);
}
}
};
};
...
...
src/targets/gpu/include/migraphx/gpu/device/convert.hpp
View file @
3003b4e3
...
@@ -11,7 +11,7 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -11,7 +11,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
convert
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
void
convert
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
float
scale
,
float
shift
);
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment