Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
98fd5e1d
Commit
98fd5e1d
authored
Aug 27, 2019
by
Paul
Browse files
Merge branch 'develop' into eliminate-more-contiguous
parents
f7a6d87f
a1c7e7a5
Changes
76
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
522 additions
and
109 deletions
+522
-109
src/include/migraphx/verify.hpp
src/include/migraphx/verify.hpp
+1
-0
src/onnx/onnx.cpp
src/onnx/onnx.cpp
+49
-18
src/opt/memory_coloring_impl.cpp
src/opt/memory_coloring_impl.cpp
+3
-0
src/opt/memory_coloring_impl.hpp
src/opt/memory_coloring_impl.hpp
+1
-1
src/program.cpp
src/program.cpp
+1
-1
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+1
-0
src/quantization.cpp
src/quantization.cpp
+127
-27
src/rewrite_rnn.cpp
src/rewrite_rnn.cpp
+0
-1
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+7
-5
src/targets/cpu/gemm.cpp
src/targets/cpu/gemm.cpp
+26
-19
src/targets/cpu/include/migraphx/cpu/gemm.hpp
src/targets/cpu/include/migraphx/cpu/gemm.hpp
+5
-0
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+143
-18
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+9
-1
src/targets/gpu/convert.cpp
src/targets/gpu/convert.cpp
+24
-0
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
...targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+7
-18
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
...targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
+1
-0
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+77
-0
src/targets/gpu/device/round.cpp
src/targets/gpu/device/round.cpp
+18
-0
src/targets/gpu/device/sigmoid.cpp
src/targets/gpu/device/sigmoid.cpp
+18
-0
src/targets/gpu/gemm.cpp
src/targets/gpu/gemm.cpp
+4
-0
No files found.
src/include/migraphx/verify.hpp
View file @
98fd5e1d
...
@@ -168,6 +168,7 @@ bool verify_range(R1&& r1, R2&& r2, double tolerance = 80, double* out_error = n
...
@@ -168,6 +168,7 @@ bool verify_range(R1&& r1, R2&& r2, double tolerance = 80, double* out_error = n
{
{
double
threshold
=
std
::
numeric_limits
<
range_value
<
R1
>>::
epsilon
()
*
tolerance
;
double
threshold
=
std
::
numeric_limits
<
range_value
<
R1
>>::
epsilon
()
*
tolerance
;
auto
error
=
rms_range
(
r1
,
r2
);
auto
error
=
rms_range
(
r1
,
r2
);
// cppcheck-suppress uninitvar
if
(
out_error
!=
nullptr
)
if
(
out_error
!=
nullptr
)
*
out_error
=
error
;
*
out_error
=
error
;
return
error
<=
threshold
;
return
error
<=
threshold
;
...
...
src/onnx/onnx.cpp
View file @
98fd5e1d
...
@@ -55,6 +55,7 @@ struct onnx_parser
...
@@ -55,6 +55,7 @@ struct onnx_parser
add_generic_op
(
"Acos"
,
op
::
acos
{});
add_generic_op
(
"Acos"
,
op
::
acos
{});
add_generic_op
(
"Atan"
,
op
::
atan
{});
add_generic_op
(
"Atan"
,
op
::
atan
{});
add_generic_op
(
"Sqrt"
,
op
::
sqrt
{});
add_generic_op
(
"Sqrt"
,
op
::
sqrt
{});
add_generic_op
(
"Round"
,
op
::
round
{});
add_generic_op
(
"Sign"
,
op
::
sign
{});
add_generic_op
(
"Sign"
,
op
::
sign
{});
add_binary_op
(
"Add"
,
op
::
add
{});
add_binary_op
(
"Add"
,
op
::
add
{});
...
@@ -206,6 +207,16 @@ struct onnx_parser
...
@@ -206,6 +207,16 @@ struct onnx_parser
return
out_lens
;
return
out_lens
;
}
}
instruction_ref
make_contiguous
(
instruction_ref
ins
)
{
if
(
ins
->
get_shape
().
standard
())
{
return
ins
;
}
return
prog
.
add_instruction
(
op
::
contiguous
{},
ins
);
}
template
<
class
T
>
template
<
class
T
>
instruction_ref
add_broadcastable_binary_op
(
instruction_ref
arg0
,
instruction_ref
arg1
,
T
x
)
instruction_ref
add_broadcastable_binary_op
(
instruction_ref
arg0
,
instruction_ref
arg1
,
T
x
)
{
{
...
@@ -313,7 +324,11 @@ struct onnx_parser
...
@@ -313,7 +324,11 @@ struct onnx_parser
{
{
if
(
contains
(
attributes
,
"auto_pad"
))
if
(
contains
(
attributes
,
"auto_pad"
))
{
{
MIGRAPHX_THROW
(
"auto_pad and padding cannot be specified simultaneously"
);
auto
s
=
attributes
[
"auto_pad"
].
s
();
if
(
contains
(
attributes
,
"pads"
)
and
to_upper
(
s
)
!=
"NOTSET"
)
{
MIGRAPHX_THROW
(
"auto_pad and padding cannot be specified simultaneously"
);
}
}
}
std
::
vector
<
std
::
int64_t
>
padding
;
std
::
vector
<
std
::
int64_t
>
padding
;
copy
(
attributes
[
"pads"
].
ints
(),
std
::
back_inserter
(
padding
));
copy
(
attributes
[
"pads"
].
ints
(),
std
::
back_inserter
(
padding
));
...
@@ -361,7 +376,7 @@ struct onnx_parser
...
@@ -361,7 +376,7 @@ struct onnx_parser
if
(
args
.
size
()
==
3
)
if
(
args
.
size
()
==
3
)
{
{
uint64_t
axis
=
1
;
uint64_t
axis
=
1
;
auto
l1
=
prog
.
add_instruction
(
op
,
args
[
0
]
,
args
[
1
]);
auto
l1
=
prog
.
add_instruction
(
op
,
l0
,
args
[
1
]);
auto
l2
=
prog
.
add_instruction
(
op
::
broadcast
{
axis
,
l1
->
get_shape
().
lens
()},
args
[
2
]);
auto
l2
=
prog
.
add_instruction
(
op
::
broadcast
{
axis
,
l1
->
get_shape
().
lens
()},
args
[
2
]);
return
prog
.
add_instruction
(
op
::
add
{},
l1
,
l2
);
return
prog
.
add_instruction
(
op
::
add
{},
l1
,
l2
);
}
}
...
@@ -437,12 +452,7 @@ struct onnx_parser
...
@@ -437,12 +452,7 @@ struct onnx_parser
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
dims
));
});
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
dims
));
});
}
}
if
(
!
args
[
0
]
->
get_shape
().
standard
())
return
prog
.
add_instruction
(
op
,
make_contiguous
(
args
[
0
]));
{
args
[
0
]
=
prog
.
add_instruction
(
op
::
contiguous
{},
args
[
0
]);
}
return
prog
.
add_instruction
(
op
,
args
[
0
]);
}
}
instruction_ref
instruction_ref
...
@@ -490,23 +500,41 @@ struct onnx_parser
...
@@ -490,23 +500,41 @@ struct onnx_parser
{
{
axis
=
parse_value
(
attributes
.
at
(
"axis"
)).
at
<
int
>
();
axis
=
parse_value
(
attributes
.
at
(
"axis"
)).
at
<
int
>
();
}
}
op
::
gather
op
{
axis
};
op
::
gather
op
{
axis
};
return
prog
.
add_instruction
(
op
,
std
::
move
(
args
));
return
prog
.
add_instruction
(
op
,
make_contiguous
(
args
[
0
]),
make_contiguous
(
args
[
1
]
));
}
}
instruction_ref
instruction_ref
parse_slice
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
parse_slice
(
const
std
::
string
&
,
attribute_map
attributes
,
std
::
vector
<
instruction_ref
>
args
)
{
{
op
::
slice
op
;
op
::
slice
op
;
std
::
vector
<
size_t
>
dims
=
args
[
0
]
->
get_shape
().
lens
();
size_t
num_dims
=
dims
.
size
();
if
(
contains
(
attributes
,
"axes"
))
if
(
contains
(
attributes
,
"axes"
))
{
{
literal
s
=
parse_value
(
attributes
.
at
(
"axes"
));
literal
s
=
parse_value
(
attributes
.
at
(
"axes"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
axes
));
});
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
axes
));
});
}
}
else
{
op
.
axes
=
std
::
vector
<
int64_t
>
(
num_dims
);
std
::
iota
(
op
.
axes
.
begin
(),
op
.
axes
.
end
(),
0
);
}
if
(
contains
(
attributes
,
"ends"
))
{
{
literal
s
=
parse_value
(
attributes
.
at
(
"ends"
));
literal
s
=
parse_value
(
attributes
.
at
(
"ends"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
ends
));
});
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
ends
));
});
for
(
size_t
i
=
0
;
i
<
num_dims
;
i
++
)
{
if
(
static_cast
<
size_t
>
(
op
.
ends
[
i
])
>
dims
[
i
])
{
op
.
ends
[
i
]
=
dims
[
i
];
}
}
}
}
if
(
contains
(
attributes
,
"starts"
))
{
{
literal
s
=
parse_value
(
attributes
.
at
(
"starts"
));
literal
s
=
parse_value
(
attributes
.
at
(
"starts"
));
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
starts
));
});
s
.
visit
([
&
](
auto
v
)
{
copy
(
v
,
std
::
back_inserter
(
op
.
starts
));
});
...
@@ -1011,9 +1039,10 @@ struct onnx_parser
...
@@ -1011,9 +1039,10 @@ struct onnx_parser
}
}
std
::
vector
<
operation
>
vec_actv_funcs
(
vec_names
.
size
());
std
::
vector
<
operation
>
vec_actv_funcs
(
vec_names
.
size
());
std
::
transform
(
vec_names
.
begin
(),
vec_names
.
end
(),
vec_actv_funcs
.
begin
(),
[
&
](
auto
&
fn
)
{
std
::
transform
(
vec_names
.
begin
(),
return
map_actv_funcs
[
fn
];
vec_names
.
end
(),
});
vec_actv_funcs
.
begin
(),
[
&
](
const
auto
&
fn
)
{
return
map_actv_funcs
[
fn
];
});
// To be added later
// To be added later
float
clip
=
0.0
;
float
clip
=
0.0
;
...
@@ -1127,9 +1156,10 @@ struct onnx_parser
...
@@ -1127,9 +1156,10 @@ struct onnx_parser
}
}
std
::
vector
<
operation
>
vec_actv_funcs
(
vec_names
.
size
());
std
::
vector
<
operation
>
vec_actv_funcs
(
vec_names
.
size
());
std
::
transform
(
vec_names
.
begin
(),
vec_names
.
end
(),
vec_actv_funcs
.
begin
(),
[
&
](
auto
&
name
)
{
std
::
transform
(
vec_names
.
begin
(),
return
map_actv_funcs
[
name
];
vec_names
.
end
(),
});
vec_actv_funcs
.
begin
(),
[
&
](
const
auto
&
name
)
{
return
map_actv_funcs
[
name
];
});
float
clip
=
0.0
;
float
clip
=
0.0
;
if
(
contains
(
attributes
,
"clip"
))
if
(
contains
(
attributes
,
"clip"
))
...
@@ -1299,9 +1329,10 @@ struct onnx_parser
...
@@ -1299,9 +1329,10 @@ struct onnx_parser
}
}
std
::
vector
<
operation
>
vec_actv_funcs
(
vec_names
.
size
());
std
::
vector
<
operation
>
vec_actv_funcs
(
vec_names
.
size
());
std
::
transform
(
vec_names
.
begin
(),
vec_names
.
end
(),
vec_actv_funcs
.
begin
(),
[
&
](
auto
&
name
)
{
std
::
transform
(
vec_names
.
begin
(),
return
map_actv_funcs
[
name
];
vec_names
.
end
(),
});
vec_actv_funcs
.
begin
(),
[
&
](
const
auto
&
name
)
{
return
map_actv_funcs
[
name
];
});
float
clip
=
0.0
;
float
clip
=
0.0
;
if
(
contains
(
attributes
,
"clip"
))
if
(
contains
(
attributes
,
"clip"
))
...
...
src/opt/memory_coloring_impl.cpp
View file @
98fd5e1d
...
@@ -85,6 +85,9 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
...
@@ -85,6 +85,9 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
offset
+=
(
element_size
-
(
offset
%
element_size
));
offset
+=
(
element_size
-
(
offset
%
element_size
));
conflict_queue
.
pop
();
conflict_queue
.
pop
();
}
}
// when int8 type is used, the offset could be any number
// if not 4-byte aligned, miopen int8 convolution can crash
offset
=
(
offset
+
3
)
/
4
*
4
;
segment
.
offset
=
offset
;
segment
.
offset
=
offset
;
MIGRAPHX_DEBUG
(
segment
.
dump
());
MIGRAPHX_DEBUG
(
segment
.
dump
());
required_bytes
=
std
::
max
(
required_bytes
,
offset
+
segment
.
size
);
required_bytes
=
std
::
max
(
required_bytes
,
offset
+
segment
.
size
);
...
...
src/opt/memory_coloring_impl.hpp
View file @
98fd5e1d
...
@@ -107,7 +107,7 @@ struct memory_coloring_impl
...
@@ -107,7 +107,7 @@ struct memory_coloring_impl
return
ins
->
name
()
==
"check_context"
;
return
ins
->
name
()
==
"check_context"
;
}
}
static
bool
is_disjoin
(
live_range
&
range1
,
live_range
&
range2
)
static
bool
is_disjoin
(
const
live_range
&
range1
,
const
live_range
&
range2
)
{
{
if
((
range1
.
size
==
0
)
||
(
range2
.
size
==
0
))
if
((
range1
.
size
==
0
)
||
(
range2
.
size
==
0
))
return
false
;
return
false
;
...
...
src/program.cpp
View file @
98fd5e1d
...
@@ -241,7 +241,7 @@ instruction_ref program::remove_instructions(instruction_ref first, instruction_
...
@@ -241,7 +241,7 @@ instruction_ref program::remove_instructions(instruction_ref first, instruction_
// TODO: Check every element
// TODO: Check every element
assert
(
has_instruction
(
first
));
assert
(
has_instruction
(
first
));
std
::
for_each
(
first
,
last
,
[
&
](
instruction
&
ins
)
{
ins
.
clear_arguments
();
});
std
::
for_each
(
first
,
last
,
[
&
](
instruction
&
ins
)
{
ins
.
clear_arguments
();
});
assert
(
std
::
all_of
(
first
,
last
,
[
&
](
instruction
&
ins
)
{
return
ins
.
outputs
().
empty
();
}));
assert
(
std
::
all_of
(
first
,
last
,
[
&
](
const
instruction
&
ins
)
{
return
ins
.
outputs
().
empty
();
}));
return
impl
->
instructions
.
erase
(
first
,
last
);
return
impl
->
instructions
.
erase
(
first
,
last
);
}
}
...
...
src/py/migraphx_py.cpp
View file @
98fd5e1d
...
@@ -156,6 +156,7 @@ PYBIND11_MODULE(migraphx, m)
...
@@ -156,6 +156,7 @@ PYBIND11_MODULE(migraphx, m)
py
::
class_
<
migraphx
::
target
>
(
m
,
"target"
);
py
::
class_
<
migraphx
::
target
>
(
m
,
"target"
);
py
::
class_
<
migraphx
::
program
>
(
m
,
"program"
)
py
::
class_
<
migraphx
::
program
>
(
m
,
"program"
)
.
def
(
"clone"
,
[](
migraphx
::
program
&
p
)
{
return
*
(
new
migraphx
::
program
(
p
));
})
.
def
(
"get_parameter_shapes"
,
&
migraphx
::
program
::
get_parameter_shapes
)
.
def
(
"get_parameter_shapes"
,
&
migraphx
::
program
::
get_parameter_shapes
)
.
def
(
"get_shape"
,
&
migraphx
::
program
::
get_shape
)
.
def
(
"get_shape"
,
&
migraphx
::
program
::
get_shape
)
.
def
(
"compile"
,
[](
migraphx
::
program
&
p
,
const
migraphx
::
target
&
t
)
{
p
.
compile
(
t
);
})
.
def
(
"compile"
,
[](
migraphx
::
program
&
p
,
const
migraphx
::
target
&
t
)
{
p
.
compile
(
t
);
})
...
...
src/quantization.cpp
View file @
98fd5e1d
...
@@ -3,32 +3,53 @@
...
@@ -3,32 +3,53 @@
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/capture.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/multibroadcast.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
#include <utility>
#include <utility>
#include <iomanip>
#include <fstream>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
instruction_ref
insert_
fp16
(
program
&
prog
,
instruction_ref
insert_
quant_ins
(
program
&
prog
,
instruction_ref
&
ins
,
instruction_ref
&
ins
,
shape
::
type_t
type
,
shape
::
type_t
type
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>&
map_
fp16
)
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>&
map_
ins
)
{
{
if
(
map_
fp16
.
count
(
ins
)
>
0
)
if
(
map_
ins
.
count
(
ins
)
>
0
)
{
{
return
map_fp16
[
ins
];
return
map_ins
[
ins
];
}
if
(
ins
->
name
()
==
"undefined"
)
{
return
ins
;
}
}
assert
(
ins
->
get_shape
().
type
()
==
shape
::
float_type
||
assert
(
ins
->
get_shape
().
type
()
==
shape
::
float_type
||
ins
->
get_shape
().
type
()
==
shape
::
double_type
);
ins
->
get_shape
().
type
()
==
shape
::
double_type
||
instruction_ref
ins_fp16
{};
ins
->
get_shape
().
type
()
==
shape
::
int32_type
);
ins_fp16
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
type
},
ins
);
instruction_ref
quant_ins
{};
map_fp16
[
ins
]
=
ins_fp16
;
quant_ins
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
type
},
ins
);
map_ins
[
ins
]
=
quant_ins
;
return
ins_fp16
;
return
quant_ins
;
}
}
// This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator.
// For the conversion, there could be cases of overflowing, but it
// is very rare in the area of deeping learning, so we just do a
// truncate of the input to get the fp16.
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
{
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_fp16
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_fp16
;
...
@@ -53,13 +74,14 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -53,13 +74,14 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
// if the input is a convert operator, uses its input
// if the input is a convert operator, uses its input
// as its current input
// as its current input
instruction_ref
input_fp16
{};
instruction_ref
input_fp16
{};
if
(
input
->
name
()
==
"convert"
)
if
(
input
->
name
()
==
"convert"
and
input
->
inputs
().
front
()
->
get_shape
().
type
()
==
shape
::
half_type
)
{
{
input_fp16
=
input
->
inputs
().
front
();
input_fp16
=
input
->
inputs
().
front
();
}
}
else
else
{
{
input_fp16
=
insert_
fp16
(
prog
,
input
,
shape
::
half_type
,
map_fp16
);
input_fp16
=
insert_
quant_ins
(
prog
,
input
,
shape
::
half_type
,
map_fp16
);
}
}
converted_inputs
.
push_back
(
input_fp16
);
converted_inputs
.
push_back
(
input_fp16
);
}
}
...
@@ -79,21 +101,13 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -79,21 +101,13 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
auto
ins_shape
=
compute_shape
(
op
,
converted_inputs
);
auto
ins_shape
=
compute_shape
(
op
,
converted_inputs
);
if
(
ins_shape
.
type
()
!=
orig_type
)
if
(
ins_shape
.
type
()
!=
orig_type
)
{
{
// insert another convert instruction to convert it back
// check the dead code case to avoid assert
if
(
ins
==
std
::
prev
(
prog
.
end
()))
bool
output_empty
=
ins
->
outputs
().
empty
();
auto
ins_orig_type
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
orig_type
},
ins
);
if
(
!
output_empty
)
{
{
prog
.
add_instruction
(
op
::
convert
{
orig_type
},
ins
);
prog
.
replace_instruction
(
ins
,
ins_orig_type
);
}
else
{
// check the dead code case to avoid assert
bool
output_empty
=
ins
->
outputs
().
empty
();
auto
ins_orig_type
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
orig_type
},
ins
);
if
(
!
output_empty
)
{
prog
.
replace_instruction
(
ins
,
ins_orig_type
);
}
}
}
}
}
...
@@ -103,5 +117,91 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -103,5 +117,91 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
// For the input of each input argument, we need to insert a
// capture operator to compute the scale and shift
std
::
size_t
capture_arguments
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
,
const
std
::
function
<
void
(
std
::
size_t
,
std
::
vector
<
argument
>
)
>&
func
)
{
size_t
num_quant_params
=
0
;
// the int8 quantization only support dot and convolution
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
};
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
return
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
();
}))
{
MIGRAPHX_THROW
(
"CAPTURE_ARGUMENTS: input operator is not supported"
);
}
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
ins_map
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
}
auto
inputs
=
ins
->
inputs
();
std
::
vector
<
instruction_ref
>
new_args
;
for
(
auto
input
:
inputs
)
{
instruction_ref
new_ins
{};
if
(
ins_map
.
count
(
input
)
>
0
)
{
new_ins
=
ins_map
[
input
];
}
else
{
new_ins
=
prog
.
insert_instruction
(
std
::
next
(
input
),
op
::
capture
{
num_quant_params
++
,
func
},
input
);
ins_map
[
input
]
=
new_ins
;
}
new_args
.
push_back
(
new_ins
);
}
instruction
::
replace
(
ins
,
ins
->
get_operator
(),
ins
->
get_shape
(),
new_args
);
}
return
num_quant_params
;
}
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
capture_arguments
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
int8_quant_params
=
std
::
make_shared
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
();
std
::
shared_ptr
<
std
::
vector
<
float
>>
max_abs_vals
=
std
::
make_shared
<
std
::
vector
<
float
>>
();
auto
calc_quant_params
=
[
int8_quant_params
,
max_abs_vals
](
std
::
size_t
ins_index
,
std
::
vector
<
migraphx
::
argument
>
args
)
{
std
::
pair
<
float
,
float
>
param_pair
{
64.0
f
,
0.0
f
};
// scale and shift is need for only int8 type, and we do not
// consider shift, so set shift to 0
std
::
vector
<
float
>
vec_val
;
args
.
front
().
visit
([
&
](
auto
output
)
{
vec_val
.
assign
(
output
.
begin
(),
output
.
end
());
});
auto
max_val
=
*
std
::
max_element
(
vec_val
.
begin
(),
vec_val
.
end
());
auto
min_val
=
*
std
::
min_element
(
vec_val
.
begin
(),
vec_val
.
end
());
auto
max_abs
=
std
::
max
(
std
::
fabs
(
max_val
),
std
::
fabs
(
min_val
));
max_abs_vals
->
at
(
ins_index
)
=
std
::
max
(
max_abs_vals
->
at
(
ins_index
),
max_abs
);
param_pair
.
first
=
127.0
f
/
max_abs_vals
->
at
(
ins_index
);
int8_quant_params
->
at
(
ins_index
)
=
param_pair
;
};
auto
num_params
=
capture_arguments
(
prog
,
ins_names
,
calc_quant_params
);
int8_quant_params
->
resize
(
num_params
,
std
::
pair
<
float
,
float
>
(
64.0
f
,
0.0
f
));
max_abs_vals
->
resize
(
num_params
,
0.0
f
);
return
int8_quant_params
;
}
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
capture_arguments
(
program
&
prog
)
{
std
::
vector
<
std
::
string
>
ins_names
=
{
"dot"
,
"convolution"
};
return
capture_arguments
(
prog
,
ins_names
);
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/rewrite_rnn.cpp
View file @
98fd5e1d
...
@@ -674,7 +674,6 @@ void rewrite_rnn::apply_lstm(program& prog, instruction_ref ins) const
...
@@ -674,7 +674,6 @@ void rewrite_rnn::apply_lstm(program& prog, instruction_ref ins) const
std
::
vector
<
float
>
ihc_data
(
ihc_shape
.
elements
(),
0.0
);
std
::
vector
<
float
>
ihc_data
(
ihc_shape
.
elements
(),
0.0
);
migraphx
::
shape
pph_shape
{
type
,
{
1
,
3
*
hidden_size
}};
migraphx
::
shape
pph_shape
{
type
,
{
1
,
3
*
hidden_size
}};
std
::
vector
<
float
>
pph_data
(
pph_shape
.
elements
(),
0.0
);
auto
actv_funcs
=
lstm_actv_funcs
(
ins
);
auto
actv_funcs
=
lstm_actv_funcs
(
ins
);
auto
lstm_op
=
any_cast
<
op
::
lstm
>
(
ins
->
get_operator
());
auto
lstm_op
=
any_cast
<
op
::
lstm
>
(
ins
->
get_operator
());
...
...
src/simplify_algebra.cpp
View file @
98fd5e1d
...
@@ -52,6 +52,7 @@ struct find_mul_conv
...
@@ -52,6 +52,7 @@ struct find_mul_conv
}
}
};
};
// a * (x + b) => a * x + a * b
struct
find_mul_add
struct
find_mul_add
{
{
auto
matcher
()
const
auto
matcher
()
const
...
@@ -60,7 +61,7 @@ struct find_mul_add
...
@@ -60,7 +61,7 @@ struct find_mul_add
match
::
name
(
"add"
)(
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
either_arg
(
0
,
1
)(
match
::
any
().
bind
(
"x"
),
match
::
any
().
bind
(
"x"
),
match
::
any_of
(
conv_const_weights
(),
match
::
is_constant
()).
bind
(
"
y
"
)),
match
::
any_of
(
conv_const_weights
(),
match
::
is_constant
()).
bind
(
"
b
"
)),
match
::
none_of
(
match
::
args
(
match
::
is_constant
(),
match
::
is_constant
())),
match
::
none_of
(
match
::
args
(
match
::
is_constant
(),
match
::
is_constant
())),
match
::
used_once
()),
match
::
used_once
()),
match
::
is_constant
().
bind
(
"a"
)));
match
::
is_constant
().
bind
(
"a"
)));
...
@@ -70,12 +71,13 @@ struct find_mul_add
...
@@ -70,12 +71,13 @@ struct find_mul_add
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
b_ins
=
r
.
instructions
[
"b"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
a
uto
y
_ins
=
r
.
instructions
[
"y"
]
;
a
ssert
(
x
_ins
!
=
b_ins
)
;
auto
x
a_ins
=
p
.
insert_instruction
(
ins
,
op
::
mul
{},
x
_ins
,
a
_ins
);
auto
a
x
_ins
=
p
.
insert_instruction
(
ins
,
op
::
mul
{},
a
_ins
,
x
_ins
);
auto
y
a_ins
=
p
.
insert_instruction
(
ins
,
op
::
mul
{},
y
_ins
,
a
_ins
);
auto
a
b
_ins
=
p
.
insert_instruction
(
ins
,
op
::
mul
{},
a
_ins
,
b
_ins
);
p
.
replace_instruction
(
ins
,
op
::
add
{},
x
a_ins
,
y
a_ins
);
p
.
replace_instruction
(
ins
,
op
::
add
{},
a
x
_ins
,
a
b
_ins
);
}
}
};
};
...
...
src/targets/cpu/gemm.cpp
View file @
98fd5e1d
...
@@ -44,13 +44,9 @@ struct is_fast_gemm_type<float> : std::true_type
...
@@ -44,13 +44,9 @@ struct is_fast_gemm_type<float> : std::true_type
{
{
};
};
template
<
class
T
>
template
<
class
T
,
class
F
>
void
migemm_impl
(
tensor_view
<
T
>
cmat
,
void
migemm_impl
(
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
F
alpha
,
F
beta
,
std
::
true_type
)
tensor_view
<
T
>
bmat
,
float
alpha
,
float
beta
,
std
::
true_type
)
{
{
visit_mat
(
amat
,
[
&
](
const
auto
&
a
)
{
visit_mat
(
amat
,
[
&
](
const
auto
&
a
)
{
visit_mat
(
bmat
,
[
&
](
const
auto
&
b
)
{
visit_mat
(
bmat
,
[
&
](
const
auto
&
b
)
{
...
@@ -66,13 +62,9 @@ void migemm_impl(tensor_view<T> cmat,
...
@@ -66,13 +62,9 @@ void migemm_impl(tensor_view<T> cmat,
});
});
}
}
template
<
class
T
>
template
<
class
T
,
class
F
>
void
migemm_impl
(
tensor_view
<
T
>
cmat
,
void
migemm_impl
(
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
F
alpha
,
F
beta
,
std
::
false_type
)
tensor_view
<
T
>
bmat
,
float
alpha
,
float
beta
,
std
::
false_type
)
{
{
std
::
size_t
n_dims
=
cmat
.
get_shape
().
lens
().
size
();
std
::
size_t
n_dims
=
cmat
.
get_shape
().
lens
().
size
();
std
::
size_t
dim_0
=
n_dims
-
2
;
std
::
size_t
dim_0
=
n_dims
-
2
;
...
@@ -95,9 +87,8 @@ void migemm_impl(tensor_view<T> cmat,
...
@@ -95,9 +87,8 @@ void migemm_impl(tensor_view<T> cmat,
});
});
}
}
template
<
class
T
>
template
<
class
T
,
class
F
>
void
migemm_impl
(
void
migemm_impl
(
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
F
alpha
,
F
beta
)
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
float
alpha
,
float
beta
)
{
{
auto
lens
=
amat
.
get_shape
().
lens
();
auto
lens
=
amat
.
get_shape
().
lens
();
bool
batch_mul
=
bool
batch_mul
=
...
@@ -113,13 +104,29 @@ void migemm_impl(
...
@@ -113,13 +104,29 @@ void migemm_impl(
}
}
}
}
void
migemm
(
template
<
class
F
>
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
)
void
migemm_tpl
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
F
alpha
,
F
beta
)
{
{
visit_all
(
c_arg
,
a_arg
,
b_arg
)(
visit_all
(
c_arg
,
a_arg
,
b_arg
)(
[
&
](
auto
cmat
,
auto
amat
,
auto
bmat
)
{
migemm_impl
(
cmat
,
amat
,
bmat
,
alpha
,
beta
);
});
[
&
](
auto
cmat
,
auto
amat
,
auto
bmat
)
{
migemm_impl
(
cmat
,
amat
,
bmat
,
alpha
,
beta
);
});
}
}
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
)
{
migemm_tpl
(
c_arg
,
a_arg
,
b_arg
,
alpha
,
beta
);
}
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
int32_t
alpha
,
int32_t
beta
)
{
migemm_tpl
(
c_arg
,
a_arg
,
b_arg
,
alpha
,
beta
);
}
}
// namespace cpu
}
// namespace cpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/cpu/include/migraphx/cpu/gemm.hpp
View file @
98fd5e1d
...
@@ -10,6 +10,11 @@ namespace cpu {
...
@@ -10,6 +10,11 @@ namespace cpu {
void
migemm
(
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
);
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
);
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
int32_t
alpha
,
int32_t
beta
);
}
// namespace cpu
}
// namespace cpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/cpu/lowering.cpp
View file @
98fd5e1d
...
@@ -4,7 +4,9 @@
...
@@ -4,7 +4,9 @@
#include <migraphx/dfor.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/leaky_relu.hpp>
...
@@ -216,6 +218,61 @@ struct cpu_convolution
...
@@ -216,6 +218,61 @@ struct cpu_convolution
}
}
};
};
struct
cpu_quant_convolution
{
op
::
quant_convolution
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::quant_convolution"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
auto
output
=
result
.
get
<
int32_t
>
();
visit_all
(
args
[
0
],
args
[
1
])([
&
](
auto
input
,
auto
weights
)
{
auto
in
=
input
.
get_shape
().
lens
();
auto
in_h
=
in
[
2
];
auto
in_w
=
in
[
3
];
auto
wei
=
weights
.
get_shape
().
lens
();
auto
wei_n
=
wei
[
0
];
auto
wei_c
=
wei
[
1
];
auto
wei_h
=
wei
[
2
];
auto
wei_w
=
wei
[
3
];
par_dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
const
auto
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
auto
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
auto
group_id
=
w
/
(
wei_n
/
op
.
group
);
int32_t
acc
=
0
;
dfor
(
wei_c
,
wei_h
,
wei_w
)([
&
](
std
::
size_t
k
,
std
::
size_t
x
,
std
::
size_t
y
)
{
const
auto
in_x
=
start_x
+
x
;
const
auto
in_y
=
start_y
+
y
;
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
{
acc
+=
static_cast
<
int32_t
>
(
input
(
o
,
in_ch
,
in_x
,
in_y
))
*
weights
(
w
,
k
,
x
,
y
);
}
});
output
(
o
,
w
,
i
,
j
)
=
acc
;
});
});
return
result
;
}
};
struct
cpu_im2col
struct
cpu_im2col
{
{
op
::
im2col
op
;
op
::
im2col
op
;
...
@@ -245,17 +302,17 @@ struct cpu_im2col
...
@@ -245,17 +302,17 @@ struct cpu_im2col
const
std
::
size_t
&
stride_h
=
op
.
stride
[
0
];
const
std
::
size_t
&
stride_h
=
op
.
stride
[
0
];
const
std
::
size_t
&
stride_w
=
op
.
stride
[
1
];
const
std
::
size_t
&
stride_w
=
op
.
stride
[
1
];
auto
kdiv2_h
=
kernel_h
/
2
;
long
kdiv2_h
=
long
(
kernel_h
)
/
2
;
auto
kdiv2_w
=
kernel_w
/
2
;
long
kdiv2_w
=
long
(
kernel_w
)
/
2
;
// calculate output sizes
// calculate output sizes
const
std
::
size_t
col_height
=
(
height
-
kernel_h
+
2
*
pad_h
)
/
stride_h
+
1
;
const
std
::
size_t
col_height
=
(
height
-
kernel_h
+
2
*
pad_h
)
/
stride_h
+
1
;
const
std
::
size_t
col_width
=
(
width
-
kernel_w
+
2
*
pad_w
)
/
stride_w
+
1
;
const
std
::
size_t
col_width
=
(
width
-
kernel_w
+
2
*
pad_w
)
/
stride_w
+
1
;
// account for padding for the starting position of the input pixels
// account for padding for the starting position of the input pixels
std
::
size_t
iinput
=
kdiv2_h
-
pad_h
;
long
iinput
=
kdiv2_h
-
long
(
pad_h
)
;
// loop over output pixels (ioutput, joutput)
// loop over output pixels (ioutput, joutput)
for
(
std
::
size_t
ioutput
=
0
;
ioutput
<
col_height
;
ioutput
++
,
iinput
+=
stride_h
)
for
(
std
::
size_t
ioutput
=
0
;
ioutput
<
col_height
;
ioutput
++
,
iinput
+=
stride_h
)
{
{
std
::
size_t
jinput
=
kdiv2_w
-
pad_w
;
long
jinput
=
kdiv2_w
-
long
(
pad_w
)
;
for
(
std
::
size_t
joutput
=
0
;
joutput
<
col_width
;
joutput
++
,
jinput
+=
stride_w
)
for
(
std
::
size_t
joutput
=
0
;
joutput
<
col_width
;
joutput
++
,
jinput
+=
stride_w
)
{
{
// compute linear index for output
// compute linear index for output
...
@@ -264,8 +321,8 @@ struct cpu_im2col
...
@@ -264,8 +321,8 @@ struct cpu_im2col
dfor
(
channels
,
dfor
(
channels
,
kernel_h
,
kernel_h
,
kernel_w
)([
&
](
std
::
size_t
c
,
std
::
size_t
koffset
,
std
::
size_t
loffset
)
{
kernel_w
)([
&
](
std
::
size_t
c
,
std
::
size_t
koffset
,
std
::
size_t
loffset
)
{
auto
idx
=
iinput
+
koffset
-
kdiv2_h
;
auto
idx
=
iinput
+
long
(
koffset
)
-
kdiv2_h
;
auto
jdx
=
jinput
+
loffset
-
kdiv2_w
;
auto
jdx
=
jinput
+
long
(
loffset
)
-
kdiv2_w
;
col
(
ldx
,
p
)
=
((
idx
>=
0
)
&&
(
idx
<
height
)
&&
(
jdx
>=
0
)
&&
(
jdx
<
width
))
col
(
ldx
,
p
)
=
((
idx
>=
0
)
&&
(
idx
<
height
)
&&
(
jdx
>=
0
)
&&
(
jdx
<
width
))
?
input
(
0
,
c
,
idx
,
jdx
)
?
input
(
0
,
c
,
idx
,
jdx
)
:
0
;
:
0
;
...
@@ -433,7 +490,7 @@ struct cpu_gemm
...
@@ -433,7 +490,7 @@ struct cpu_gemm
{
{
argument
result
{
output_shape
};
argument
result
{
output_shape
};
// 3 inputs, it is alpha * A * B + beta * C, then
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrics, and C is
broadcastable to
A * B
// A and B are matric
e
s, and C is
of the same shape as
A * B
if
(
args
.
size
()
==
3
)
if
(
args
.
size
()
==
3
)
{
{
// no need to consider the value of args[2]
// no need to consider the value of args[2]
...
@@ -460,13 +517,79 @@ struct cpu_gemm
...
@@ -460,13 +517,79 @@ struct cpu_gemm
}
}
};
};
struct
cpu_quant_gemm
{
op
::
quant_dot
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::quant_dot"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
if
(
inputs
.
size
()
==
3
)
{
auto
c_shape
=
inputs
.
at
(
2
);
check_shapes
{{
c_shape
}}.
not_broadcasted
();
}
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrices, and C is of the same shape to A * B
// first, convert the args[0] and args[1] from int8_t to int32_t
argument
arg_0
{{
shape
::
int32_type
,
{
args
.
at
(
0
).
get_shape
().
lens
()}}};
argument
arg_1
{{
shape
::
int32_type
,
{
args
.
at
(
1
).
get_shape
().
lens
()}}};
arg_0
.
visit
([
&
](
auto
output
)
{
args
.
at
(
0
).
visit
(
[
&
](
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
});
arg_1
.
visit
([
&
](
auto
output
)
{
args
.
at
(
1
).
visit
(
[
&
](
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
});
if
(
args
.
size
()
==
3
)
{
// no need to consider the value of args[2]
if
(
op
.
beta
==
0
)
{
result
.
visit
([
&
](
auto
output
)
{
std
::
fill
(
output
.
begin
(),
output
.
end
(),
0
);
});
}
else
{
visit_all
(
result
,
args
[
2
])([
&
](
auto
output
,
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
}
migemm
(
result
,
arg_0
,
arg_1
,
op
.
alpha
,
op
.
beta
);
return
result
;
}
// 2 input arguments
migemm
(
result
,
arg_0
,
arg_1
,
op
.
alpha
,
int32_t
{
0
});
return
result
;
}
};
struct
leaky_relu_op
struct
leaky_relu_op
{
{
op
::
leaky_relu
op
;
op
::
leaky_relu
op
;
std
::
string
name
()
const
{
return
"cpu::leaky_relu"
;
}
std
::
string
name
()
const
{
return
"cpu::leaky_relu"
;
}
auto
fcn
()
const
auto
fcn
()
const
{
{
auto
&
a
=
op
.
alpha
;
auto
a
=
op
.
alpha
;
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
x
*
a
;
};
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
x
*
a
;
};
}
}
};
};
...
@@ -477,7 +600,7 @@ struct elu_op
...
@@ -477,7 +600,7 @@ struct elu_op
std
::
string
name
()
const
{
return
"cpu::elu"
;
}
std
::
string
name
()
const
{
return
"cpu::elu"
;
}
auto
fcn
()
const
auto
fcn
()
const
{
{
auto
&
a
=
op
.
alpha
;
auto
a
=
op
.
alpha
;
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
a
*
std
::
expm1
(
x
);
};
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
a
*
std
::
expm1
(
x
);
};
}
}
};
};
...
@@ -671,15 +794,17 @@ struct cpu_apply
...
@@ -671,15 +794,17 @@ struct cpu_apply
{
{
apply_map
[
"batch_norm_inference"
]
=
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
cpu_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"quant_convolution"
]
=
extend_op
<
cpu_quant_convolution
,
op
::
quant_convolution
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"softmax"
]
=
extend_op
<
cpu_softmax
,
op
::
softmax
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"softmax"
]
=
extend_op
<
cpu_softmax
,
op
::
softmax
>
();
}
}
void
apply
()
void
apply
()
...
...
src/targets/gpu/CMakeLists.txt
View file @
98fd5e1d
...
@@ -34,16 +34,19 @@ add_library(migraphx_device
...
@@ -34,16 +34,19 @@ add_library(migraphx_device
device/contiguous.cpp
device/contiguous.cpp
device/logsoftmax.cpp
device/logsoftmax.cpp
device/softmax.cpp
device/softmax.cpp
device/sigmoid.cpp
device/convert.cpp
device/convert.cpp
device/mul.cpp
device/mul.cpp
device/concat.cpp
device/concat.cpp
device/pad.cpp
device/pad.cpp
device/gather.cpp
device/gather.cpp
device/sub.cpp
device/sub.cpp
device/int8_gemm_pack.cpp
device/div.cpp
device/div.cpp
device/clip.cpp
device/clip.cpp
device/reduce_sum.cpp
device/reduce_sum.cpp
device/rsqrt.cpp
device/rsqrt.cpp
device/round.cpp
device/sqrt.cpp
device/sqrt.cpp
device/reduce_mean.cpp
device/reduce_mean.cpp
device/pow.cpp
device/pow.cpp
...
@@ -65,8 +68,10 @@ add_library(migraphx_gpu
...
@@ -65,8 +68,10 @@ add_library(migraphx_gpu
target.cpp
target.cpp
lowering.cpp
lowering.cpp
gemm.cpp
gemm.cpp
quant_gemm.cpp
pooling.cpp
pooling.cpp
convolution.cpp
convolution.cpp
quant_convolution.cpp
softmax.cpp
softmax.cpp
logsoftmax.cpp
logsoftmax.cpp
contiguous.cpp
contiguous.cpp
...
@@ -75,17 +80,20 @@ add_library(migraphx_gpu
...
@@ -75,17 +80,20 @@ add_library(migraphx_gpu
batchnorm.cpp
batchnorm.cpp
write_literals.cpp
write_literals.cpp
rocblas.cpp
rocblas.cpp
sigmoid.cpp
abs.cpp
abs.cpp
elu.cpp
elu.cpp
pad.cpp
pad.cpp
gather.cpp
gather.cpp
convert.cpp
lrn.cpp
lrn.cpp
schedule_model.cpp
schedule_model.cpp
adjust_allocation.cpp
adjust_allocation.cpp
pack_int8_args.cpp
clip.cpp
clip.cpp
reduce_sum.cpp
reduce_sum.cpp
reduce_mean.cpp
reduce_mean.cpp
int8_gemm_pack.cpp
int8_conv_pack.cpp
)
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/convert.cpp
0 → 100644
View file @
98fd5e1d
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_convert
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
check_shapes
{
inputs
}.
packed
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_convert
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
convert
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
View file @
98fd5e1d
...
@@ -155,8 +155,8 @@ __device__ void dpp_reduce(T& in, Op op)
...
@@ -155,8 +155,8 @@ __device__ void dpp_reduce(T& in, Op op)
__device__
inline
void
dpp_reduce
(
float
&
x
,
sum
)
__device__
inline
void
dpp_reduce
(
float
&
x
,
sum
)
{
{
#ifdef
MIGRAPHX_USE_CLANG_TIDY
#if
def
ined(
MIGRAPHX_USE_CLANG_TIDY
) || defined(CPPCHECK)
(
void
)
x
;
x
=
1
;
#else
#else
__asm__
volatile
(
"s_nop 4
\n
"
__asm__
volatile
(
"s_nop 4
\n
"
"v_add_f32 %0 %0 %0 row_shr:1
\n
"
"v_add_f32 %0 %0 %0 row_shr:1
\n
"
...
@@ -245,8 +245,7 @@ void reduce_standard_impl(hipStream_t stream,
...
@@ -245,8 +245,7 @@ void reduce_standard_impl(hipStream_t stream,
T
init
,
T
init
,
Input
read_input
,
Input
read_input
,
Output
read_output
,
Output
read_output
,
std
::
size_t
relements
,
std
::
size_t
relements
)
std
::
size_t
stride
)
{
{
hip_visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
hip_visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
auto
nelements
=
result
.
get_shape
().
elements
();
auto
nelements
=
result
.
get_shape
().
elements
();
...
@@ -255,7 +254,7 @@ void reduce_standard_impl(hipStream_t stream,
...
@@ -255,7 +254,7 @@ void reduce_standard_impl(hipStream_t stream,
const
std
::
size_t
block_size
=
compute_block_size
(
relements
,
max_block_size
);
const
std
::
size_t
block_size
=
compute_block_size
(
relements
,
max_block_size
);
gs_launch
(
stream
,
nelements
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
gs_launch
(
stream
,
nelements
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
const
auto
out_idx
=
i
/
block_size
;
const
auto
out_idx
=
i
/
block_size
;
const
auto
base_idx
=
out_idx
*
stride
;
const
auto
base_idx
=
out_idx
*
relements
;
auto
r
=
block_reduce
<
max_block_size
>
(
idx
,
op
,
init
,
relements
,
[
&
](
auto
j
)
__device__
{
auto
r
=
block_reduce
<
max_block_size
>
(
idx
,
op
,
init
,
relements
,
[
&
](
auto
j
)
__device__
{
return
read_input
(
input
.
data
()[
base_idx
+
j
]);
return
read_input
(
input
.
data
()[
base_idx
+
j
]);
});
});
...
@@ -276,25 +275,15 @@ void reduce(hipStream_t stream,
...
@@ -276,25 +275,15 @@ void reduce(hipStream_t stream,
{
{
auto
&&
output_shape
=
result
.
get_shape
();
auto
&&
output_shape
=
result
.
get_shape
();
auto
&&
input_shape
=
arg
.
get_shape
();
auto
&&
input_shape
=
arg
.
get_shape
();
assert
(
output_shape
.
lens
().
size
()
==
input_shape
.
lens
().
size
());
if
(
input_shape
.
standard
()
and
output_shape
.
standard
()
and
if
(
input_shape
.
standard
()
and
output_shape
.
standard
()
and
output_shape
.
lens
().
back
()
!=
input_shape
.
lens
().
back
()
and
output_shape
.
lens
().
back
()
!=
input_shape
.
lens
().
back
()
and
std
::
equal
(
output_shape
.
lens
().
begin
(),
std
::
equal
(
output_shape
.
lens
().
begin
(),
std
::
prev
(
output_shape
.
lens
().
end
()),
std
::
prev
(
output_shape
.
lens
().
end
()),
input_shape
.
lens
().
begin
()))
input_shape
.
lens
().
begin
()))
{
{
std
::
size_t
stride
=
std
::
accumulate
(
input_shape
.
strides
().
begin
(),
reduce_standard_impl
(
input_shape
.
strides
().
end
(),
stream
,
result
,
arg
,
op
,
init
,
read_input
,
read_output
,
input_shape
.
lens
().
back
());
1
,
std
::
multiplies
<
size_t
>
());
reduce_standard_impl
(
stream
,
result
,
arg
,
op
,
init
,
read_input
,
read_output
,
input_shape
.
lens
().
back
(),
stride
);
}
}
else
else
{
{
...
...
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
View file @
98fd5e1d
...
@@ -31,6 +31,7 @@ struct hip_tensor_descriptor
...
@@ -31,6 +31,7 @@ struct hip_tensor_descriptor
result
[
is
]
=
tidx
/
strides
[
is
];
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
}
return
result
;
return
result
;
}
}
__device__
__host__
std
::
size_t
linear
(
hip_tensor_index
<
NDim
>
s
)
const
__device__
__host__
std
::
size_t
linear
(
hip_tensor_index
<
NDim
>
s
)
const
...
...
src/targets/gpu/device/int8_gemm_pack.cpp
0 → 100644
View file @
98fd5e1d
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
comp_shape
=
arg
.
get_shape
();
auto
out_lens
=
comp_shape
.
lens
();
auto
dim_0
=
out_lens
.
size
()
-
2
;
auto
dim_1
=
out_lens
.
size
()
-
1
;
std
::
size_t
lda
=
comp_shape
.
strides
()[
dim_0
];
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_m
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_m
+
(
i_k
/
nb
)
*
lda
)
*
nb
+
offset
]
=
in_ptr
[
i_m
+
i_k
*
lda
+
offset
];
});
});
});
}
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
trans_shape
=
arg
.
get_shape
();
auto
out_lens
=
trans_shape
.
lens
();
auto
dim_0
=
trans_shape
.
lens
().
size
()
-
2
;
auto
dim_1
=
trans_shape
.
lens
().
size
()
-
1
;
std
::
size_t
ldb
=
trans_shape
.
strides
()[
dim_1
];
auto
wrap_lens
=
out_lens
;
std
::
swap
(
wrap_lens
[
dim_0
],
wrap_lens
[
dim_1
]);
shape
comp_shape
{
trans_shape
.
type
(),
wrap_lens
};
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_n
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_n
+
(
i_k
/
nb
)
*
ldb
)
*
nb
+
offset
]
=
in_ptr
[
i_n
+
i_k
*
ldb
+
offset
];
});
});
});
}
void
sync_stream
(
hipStream_t
stream
)
{
hipStreamSynchronize
(
stream
);
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/round.cpp
0 → 100644
View file @
98fd5e1d
#include <migraphx/gpu/device/round.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
round
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
round
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/sigmoid.cpp
0 → 100644
View file @
98fd5e1d
#include <migraphx/gpu/device/sigmoid.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
sigmoid
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
1.
f
/
(
1.
f
+
::
exp
(
to_hip_type
(
-
x
)));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/gemm.cpp
View file @
98fd5e1d
...
@@ -233,6 +233,10 @@ argument miopen_gemm::compute(context& ctx,
...
@@ -233,6 +233,10 @@ argument miopen_gemm::compute(context& ctx,
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
if
(
num_matrices
==
1
)
if
(
num_matrices
==
1
)
{
{
// the rocblas_gemm API handles inputs and output matrices as
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// A and args[0] as B in calling the rocblas_gemm.
generic_rocblas_gemm
(
as
,
generic_rocblas_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment