Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
50174953
"vscode:/vscode.git/clone" did not exist on "af80021301485595e81867eb000d06d78d4c0669"
Unverified
Commit
50174953
authored
Aug 19, 2019
by
mvermeulen
Committed by
GitHub
Aug 19, 2019
Browse files
Merge branch 'develop' into conv_fusion_fix
parents
8409c08d
3ec62e53
Changes
41
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
673 additions
and
78 deletions
+673
-78
src/include/migraphx/argument.hpp
src/include/migraphx/argument.hpp
+1
-1
src/include/migraphx/op/binary.hpp
src/include/migraphx/op/binary.hpp
+14
-8
src/include/migraphx/op/capture.hpp
src/include/migraphx/op/capture.hpp
+52
-0
src/include/migraphx/op/quant_convolution.hpp
src/include/migraphx/op/quant_convolution.hpp
+79
-0
src/include/migraphx/op/quant_dot.hpp
src/include/migraphx/op/quant_dot.hpp
+92
-0
src/include/migraphx/op/unary.hpp
src/include/migraphx/op/unary.hpp
+21
-13
src/include/migraphx/operators.hpp
src/include/migraphx/operators.hpp
+3
-0
src/include/migraphx/program.hpp
src/include/migraphx/program.hpp
+3
-0
src/include/migraphx/quantization.hpp
src/include/migraphx/quantization.hpp
+8
-0
src/opt/memory_coloring_impl.cpp
src/opt/memory_coloring_impl.cpp
+3
-0
src/program.cpp
src/program.cpp
+2
-1
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+6
-0
src/quantization.cpp
src/quantization.cpp
+114
-26
src/targets/cpu/gemm.cpp
src/targets/cpu/gemm.cpp
+26
-19
src/targets/cpu/include/migraphx/cpu/gemm.hpp
src/targets/cpu/include/migraphx/cpu/gemm.hpp
+5
-0
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+135
-10
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+7
-0
src/targets/gpu/convert.cpp
src/targets/gpu/convert.cpp
+24
-0
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
...targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
+1
-0
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+77
-0
No files found.
src/include/migraphx/argument.hpp
View file @
50174953
...
@@ -36,7 +36,7 @@ struct argument : raw_data<argument>
...
@@ -36,7 +36,7 @@ struct argument : raw_data<argument>
}
}
/// Provides a raw pointer to the data
/// Provides a raw pointer to the data
std
::
function
<
char
*
()
>
data
;
std
::
function
<
char
*
()
>
data
=
nullptr
;
/// Whether data is available
/// Whether data is available
bool
empty
()
const
{
return
not
data
;
}
bool
empty
()
const
{
return
not
data
;
}
...
...
src/include/migraphx/op/binary.hpp
View file @
50174953
...
@@ -30,23 +30,29 @@ struct binary : op_name<Derived>
...
@@ -30,23 +30,29 @@ struct binary : op_name<Derived>
argument
result
{
output_shape
};
argument
result
{
output_shape
};
auto
s1
=
args
[
0
].
get_shape
();
auto
s1
=
args
[
0
].
get_shape
();
auto
s2
=
args
[
1
].
get_shape
();
auto
s2
=
args
[
1
].
get_shape
();
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
if
(
s1
==
s2
and
s1
.
packed
())
if
(
s1
==
s2
and
input1
.
get_shape
().
packed
()
and
input2
.
get_shape
().
packed
())
{
{
shape
std_shape
{
s1
.
type
(),
s1
.
lens
()};
argument
std_result
{
std_shape
,
result
.
data
()};
argument
std_arg0
{
std_shape
,
args
[
0
].
data
()};
argument
std_arg1
{
std_shape
,
args
[
1
].
data
()};
visit_all
(
std_result
,
std_arg0
,
std_arg1
)([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
std
::
transform
(
input1
.
begin
(),
std
::
transform
(
input1
.
begin
(),
input1
.
end
(),
input1
.
end
(),
input2
.
begin
(),
input2
.
begin
(),
output
.
begin
(),
output
.
begin
(),
static_cast
<
const
Derived
&>
(
*
this
).
apply
());
static_cast
<
const
Derived
&>
(
*
this
).
apply
());
});
}
}
else
else
{
{
visit_all
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input1
,
auto
input2
)
{
shape_for_each
(
output
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
shape_for_each
(
output
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
output
(
idx
.
begin
(),
idx
.
end
())
=
static_cast
<
const
Derived
&>
(
*
this
).
apply
()(
output
(
idx
.
begin
(),
idx
.
end
())
=
static_cast
<
const
Derived
&>
(
*
this
).
apply
()(
input1
(
idx
.
begin
(),
idx
.
end
()),
input2
(
idx
.
begin
(),
idx
.
end
()));
input1
(
idx
.
begin
(),
idx
.
end
()),
input2
(
idx
.
begin
(),
idx
.
end
()));
});
});
}
});
});
}
return
result
;
return
result
;
}
}
...
...
src/include/migraphx/op/capture.hpp
0 → 100644
View file @
50174953
#ifndef MIGRAPHX_GUARD_OPERATORS_CAPTURE_HPP
#define MIGRAPHX_GUARD_OPERATORS_CAPTURE_HPP
#include <array>
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
struct
capture
{
std
::
size_t
ins_index
;
std
::
function
<
void
(
std
::
size_t
ins_index
,
std
::
vector
<
argument
>
)
>
f
{};
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
ins_index
,
"ins_index"
));
}
std
::
string
name
()
const
{
return
"capture"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
return
inputs
.
front
();
}
argument
compute
(
const
shape
&
,
std
::
vector
<
argument
>
args
)
const
{
if
(
f
)
{
f
(
ins_index
,
args
);
}
else
{
MIGRAPHX_THROW
(
"CAPTURE: callback function is not callable!"
);
}
return
args
.
front
();
}
};
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/include/migraphx/op/quant_convolution.hpp
0 → 100644
View file @
50174953
#ifndef MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_OPERATORS_QUANT_CONVOLUTION_HPP
#include <array>
#include <migraphx/op/common.hpp>
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
struct
quant_convolution
{
std
::
array
<
std
::
size_t
,
2
>
padding
=
{{
0
,
0
}};
std
::
array
<
std
::
size_t
,
2
>
stride
=
{{
1
,
1
}};
std
::
array
<
std
::
size_t
,
2
>
dilation
=
{{
1
,
1
}};
padding_mode_t
padding_mode
=
default_
;
int
group
=
1
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
padding
,
"padding"
),
f
(
self
.
stride
,
"stride"
),
f
(
self
.
dilation
,
"dilation"
),
f
(
self
.
padding_mode
,
"padding_mode"
),
f
(
self
.
group
,
"group"
));
}
std
::
string
name
()
const
{
return
"quant_convolution"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
same_type
().
same_ndims
().
only_dims
(
4
);
const
shape
&
input
=
inputs
.
at
(
0
);
const
shape
&
weights
=
inputs
.
at
(
1
);
auto
t
=
input
.
type
();
// all input type must be int8_type and output is float_type
if
(
t
!=
shape
::
int8_type
)
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: only accept input and weights of type int8_t"
);
}
t
=
shape
::
int32_type
;
return
{
t
,
{
input
.
lens
()[
0
],
weights
.
lens
()[
0
],
std
::
size_t
(
std
::
max
<
std
::
ptrdiff_t
>
(
1
,
(
input
.
lens
()[
2
]
-
(
1
+
dilation
[
0
]
*
(
weights
.
lens
()[
2
]
-
1
))
+
2
*
padding
[
0
])
/
stride
[
0
]
+
1
)),
std
::
size_t
(
std
::
max
<
std
::
ptrdiff_t
>
(
1
,
(
input
.
lens
()[
3
]
-
(
1
+
dilation
[
1
]
*
(
weights
.
lens
()[
3
]
-
1
))
+
2
*
padding
[
1
])
/
stride
[
1
]
+
1
)),
}};
}
};
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/include/migraphx/op/quant_dot.hpp
0 → 100644
View file @
50174953
#ifndef MIGRAPHX_GUARD_OPERATORS_QUANT_DOT_HPP
#define MIGRAPHX_GUARD_OPERATORS_QUANT_DOT_HPP
#include <array>
#include <migraphx/operation.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/streamutils.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/config.hpp>
#include <cmath>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
op
{
struct
quant_dot
{
int32_t
alpha
=
1
;
int32_t
beta
=
1
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
as_number
(
self
.
alpha
),
"alpha"
),
f
(
as_number
(
self
.
beta
),
"beta"
));
}
std
::
string
name
()
const
{
return
"quant_dot"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
),
inputs
.
at
(
1
)},
*
this
}.
same_type
();
const
shape
&
a
=
inputs
.
at
(
0
);
const
shape
&
b
=
inputs
.
at
(
1
);
auto
t
=
a
.
type
();
if
(
t
!=
shape
::
int8_type
)
{
MIGRAPHX_THROW
(
"QUANT_DOT: only support data type int8_t"
);
}
if
(
!
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[](
auto
s
)
{
return
s
.
lens
().
size
()
>=
2
;
}))
{
MIGRAPHX_THROW
(
"QUANT_DOT: dot only accept 2 or more dims operands"
);
}
// only handle the case that the batch size of a and b are the same
if
(
!
std
::
equal
(
a
.
lens
().
rbegin
()
+
2
,
a
.
lens
().
rend
(),
b
.
lens
().
rbegin
()
+
2
,
b
.
lens
().
rend
()))
{
MIGRAPHX_THROW
(
"QUANT_DOT: batch size of A and B mismatch: {"
+
to_string_range
(
a
.
lens
())
+
"} x {"
+
to_string_range
(
b
.
lens
())
+
"}"
);
}
std
::
size_t
dim_0
=
a
.
lens
().
size
()
-
2
;
std
::
size_t
dim_1
=
a
.
lens
().
size
()
-
1
;
if
(
a
.
lens
()[
dim_1
]
!=
b
.
lens
()[
dim_0
])
{
MIGRAPHX_THROW
(
"QUANT_DOT: inner dimensions do not match: {"
+
to_string_range
(
a
.
lens
())
+
"} x {"
+
to_string_range
(
b
.
lens
())
+
"}"
);
}
// k be multiple of 4
if
((
a
.
lens
()[
dim_1
]
%
4
)
!=
0
)
{
MIGRAPHX_THROW
(
"QUANT_DOT: size of A {"
+
to_string_range
(
a
.
lens
())
+
"} and B {"
+
to_string_range
(
b
.
lens
())
+
"} must be multiple of 4 for int8 type"
);
}
auto
out_lens
=
a
.
lens
();
out_lens
[
dim_1
]
=
b
.
lens
()[
dim_1
];
if
(
inputs
.
size
()
==
3
&&
out_lens
!=
inputs
.
at
(
2
).
lens
())
{
MIGRAPHX_THROW
(
"QUANT_DOT: dimension mismatch, operand C: {"
+
to_string_range
(
inputs
.
at
(
2
).
lens
())
+
"}, cannot add to operand A * B: {"
+
to_string_range
(
out_lens
)
+
"}"
);
}
if
(
inputs
.
size
()
==
3
&&
inputs
.
at
(
2
).
type
()
!=
shape
::
int32_type
)
{
MIGRAPHX_THROW
(
"QUANT_DOT: operand C type must be int32"
);
}
return
{
shape
::
int32_type
,
out_lens
};
}
};
}
// namespace op
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/include/migraphx/op/unary.hpp
View file @
50174953
...
@@ -27,26 +27,34 @@ struct unary : op_name<Derived>
...
@@ -27,26 +27,34 @@ struct unary : op_name<Derived>
argument
compute
(
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
{
argument
result
{
output_shape
};
argument
result
{
output_shape
};
result
.
visit
([
&
](
auto
output
)
{
auto
in_shape
=
args
[
0
].
get_shape
();
args
[
0
].
visit
([
&
](
auto
input
)
{
if
(
in_shape
.
packed
())
if
(
input
.
get_shape
().
packed
())
{
{
shape
std_in_shape
{
in_shape
.
type
(),
in_shape
.
lens
()};
shape
std_out_shape
{
output_shape
.
type
(),
output_shape
.
lens
()};
argument
arg_in
{
std_in_shape
,
args
[
0
].
data
()};
argument
arg_out
{
std_out_shape
,
result
.
data
()};
arg_out
.
visit
([
&
](
auto
output
)
{
arg_in
.
visit
([
&
](
auto
input
)
{
std
::
transform
(
input
.
begin
(),
std
::
transform
(
input
.
begin
(),
input
.
end
(),
input
.
end
(),
output
.
begin
(),
output
.
begin
(),
static_cast
<
const
Derived
&>
(
*
this
).
apply
());
static_cast
<
const
Derived
&>
(
*
this
).
apply
());
return
result
;
});
});
}
}
else
{
result
.
visit
([
&
](
auto
output
)
{
args
[
0
].
visit
([
&
](
auto
input
)
{
shape_for_each
(
output
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
shape_for_each
(
output
.
get_shape
(),
[
&
](
const
auto
&
idx
)
{
output
(
idx
.
begin
(),
idx
.
end
())
=
output
(
idx
.
begin
(),
idx
.
end
())
=
static_cast
<
const
Derived
&>
(
*
this
).
apply
()(
static_cast
<
const
Derived
&>
(
*
this
).
apply
()(
input
(
idx
.
begin
(),
idx
.
end
()));
input
(
idx
.
begin
(),
idx
.
end
()));
});
});
return
result
;
});
});
});
});
}
return
result
;
return
result
;
}
}
...
...
src/include/migraphx/operators.hpp
View file @
50174953
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/binary.hpp>
#include <migraphx/op/binary.hpp>
#include <migraphx/op/broadcast.hpp>
#include <migraphx/op/broadcast.hpp>
#include <migraphx/op/capture.hpp>
#include <migraphx/op/clip.hpp>
#include <migraphx/op/clip.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/op/common.hpp>
#include <migraphx/op/concat.hpp>
#include <migraphx/op/concat.hpp>
...
@@ -45,6 +46,8 @@
...
@@ -45,6 +46,8 @@
#include <migraphx/op/outline.hpp>
#include <migraphx/op/outline.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pad.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/pow.hpp>
#include <migraphx/op/pow.hpp>
#include <migraphx/op/reduce_sum.hpp>
#include <migraphx/op/reduce_sum.hpp>
#include <migraphx/op/reduce_mean.hpp>
#include <migraphx/op/reduce_mean.hpp>
...
...
src/include/migraphx/program.hpp
View file @
50174953
...
@@ -126,6 +126,9 @@ struct program
...
@@ -126,6 +126,9 @@ struct program
friend
bool
operator
==
(
const
program
&
x
,
const
program
&
y
);
friend
bool
operator
==
(
const
program
&
x
,
const
program
&
y
);
friend
bool
operator
!=
(
const
program
&
x
,
const
program
&
y
)
{
return
!
(
x
==
y
);
}
friend
bool
operator
!=
(
const
program
&
x
,
const
program
&
y
)
{
return
!
(
x
==
y
);
}
std
::
shared_ptr
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
int8_quant_params
=
std
::
make_shared
<
std
::
vector
<
std
::
pair
<
float
,
float
>>>
();
private:
private:
void
assign
(
const
program
&
p
);
void
assign
(
const
program
&
p
);
...
...
src/include/migraphx/quantization.hpp
View file @
50174953
...
@@ -15,6 +15,14 @@ struct program;
...
@@ -15,6 +15,14 @@ struct program;
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
);
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
);
void
quantize
(
program
&
prog
);
void
quantize
(
program
&
prog
);
// insert the capture operator for the inputs of each operator to be quantized
// to int8
void
capture_arguments
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
,
const
std
::
function
<
void
(
std
::
size_t
,
std
::
vector
<
argument
>
)
>&
func
);
void
capture_arguments
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
);
void
capture_arguments
(
program
&
prog
);
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/opt/memory_coloring_impl.cpp
View file @
50174953
...
@@ -85,6 +85,9 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
...
@@ -85,6 +85,9 @@ bool memory_coloring_impl::allocate(interval_ptr interval)
offset
+=
(
element_size
-
(
offset
%
element_size
));
offset
+=
(
element_size
-
(
offset
%
element_size
));
conflict_queue
.
pop
();
conflict_queue
.
pop
();
}
}
// when int8 type is used, the offset could be any number
// if not 4-byte aligned, miopen int8 convolution can crash
offset
=
(
offset
+
3
)
/
4
*
4
;
segment
.
offset
=
offset
;
segment
.
offset
=
offset
;
MIGRAPHX_DEBUG
(
segment
.
dump
());
MIGRAPHX_DEBUG
(
segment
.
dump
());
required_bytes
=
std
::
max
(
required_bytes
,
offset
+
segment
.
size
);
required_bytes
=
std
::
max
(
required_bytes
,
offset
+
segment
.
size
);
...
...
src/program.cpp
View file @
50174953
...
@@ -113,6 +113,7 @@ void program::assign(const program& p)
...
@@ -113,6 +113,7 @@ void program::assign(const program& p)
impl
->
instructions
.
clear
();
impl
->
instructions
.
clear
();
}
}
impl
->
ctx
=
p
.
impl
->
ctx
;
impl
->
ctx
=
p
.
impl
->
ctx
;
int8_quant_params
=
p
.
int8_quant_params
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
ins_map
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
ins_map
;
for
(
auto
ins
:
iterator_for
(
p
))
for
(
auto
ins
:
iterator_for
(
p
))
...
...
src/py/migraphx_py.cpp
View file @
50174953
...
@@ -156,6 +156,7 @@ PYBIND11_MODULE(migraphx, m)
...
@@ -156,6 +156,7 @@ PYBIND11_MODULE(migraphx, m)
py
::
class_
<
migraphx
::
target
>
(
m
,
"target"
);
py
::
class_
<
migraphx
::
target
>
(
m
,
"target"
);
py
::
class_
<
migraphx
::
program
>
(
m
,
"program"
)
py
::
class_
<
migraphx
::
program
>
(
m
,
"program"
)
.
def
(
"clone"
,
[](
migraphx
::
program
&
p
)
{
return
*
(
new
migraphx
::
program
(
p
));
})
.
def
(
"get_parameter_shapes"
,
&
migraphx
::
program
::
get_parameter_shapes
)
.
def
(
"get_parameter_shapes"
,
&
migraphx
::
program
::
get_parameter_shapes
)
.
def
(
"get_shape"
,
&
migraphx
::
program
::
get_shape
)
.
def
(
"get_shape"
,
&
migraphx
::
program
::
get_shape
)
.
def
(
"compile"
,
[](
migraphx
::
program
&
p
,
const
migraphx
::
target
&
t
)
{
p
.
compile
(
t
);
})
.
def
(
"compile"
,
[](
migraphx
::
program
&
p
,
const
migraphx
::
target
&
t
)
{
p
.
compile
(
t
);
})
...
@@ -186,6 +187,11 @@ PYBIND11_MODULE(migraphx, m)
...
@@ -186,6 +187,11 @@ PYBIND11_MODULE(migraphx, m)
migraphx
::
quantize
(
p
,
ins_names
);
migraphx
::
quantize
(
p
,
ins_names
);
});
});
m
.
def
(
"quantize"
,
[](
migraphx
::
program
&
p
)
{
migraphx
::
quantize
(
p
,
{
"all"
});
});
m
.
def
(
"quantize"
,
[](
migraphx
::
program
&
p
)
{
migraphx
::
quantize
(
p
,
{
"all"
});
});
m
.
def
(
"capture_arguments"
,
[](
migraphx
::
program
&
p
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
migraphx
::
capture_arguments
(
p
,
ins_names
);
});
m
.
def
(
"capture_arguments"
,
[](
migraphx
::
program
&
p
)
{
migraphx
::
capture_arguments
(
p
);
});
#ifdef HAVE_GPU
#ifdef HAVE_GPU
m
.
def
(
"allocate_gpu"
,
&
migraphx
::
gpu
::
allocate_gpu
,
py
::
arg
(
"s"
),
py
::
arg
(
"host"
)
=
false
);
m
.
def
(
"allocate_gpu"
,
&
migraphx
::
gpu
::
allocate_gpu
,
py
::
arg
(
"s"
),
py
::
arg
(
"host"
)
=
false
);
...
...
src/quantization.cpp
View file @
50174953
...
@@ -3,32 +3,53 @@
...
@@ -3,32 +3,53 @@
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/mul.hpp>
#include <migraphx/op/add.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/capture.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/multibroadcast.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
#include <utility>
#include <utility>
#include <iomanip>
#include <fstream>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
instruction_ref
insert_
fp16
(
program
&
prog
,
instruction_ref
insert_
quant_ins
(
program
&
prog
,
instruction_ref
&
ins
,
instruction_ref
&
ins
,
shape
::
type_t
type
,
shape
::
type_t
type
,
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>&
map_
fp16
)
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>&
map_
ins
)
{
{
if
(
map_
fp16
.
count
(
ins
)
>
0
)
if
(
map_
ins
.
count
(
ins
)
>
0
)
{
{
return
map_fp16
[
ins
];
return
map_ins
[
ins
];
}
if
(
ins
->
name
()
==
"undefined"
)
{
return
ins
;
}
}
assert
(
ins
->
get_shape
().
type
()
==
shape
::
float_type
||
assert
(
ins
->
get_shape
().
type
()
==
shape
::
float_type
||
ins
->
get_shape
().
type
()
==
shape
::
double_type
);
ins
->
get_shape
().
type
()
==
shape
::
double_type
||
instruction_ref
ins_fp16
{};
ins
->
get_shape
().
type
()
==
shape
::
int32_type
);
ins_fp16
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
type
},
ins
);
instruction_ref
quant_ins
{};
map_fp16
[
ins
]
=
ins_fp16
;
quant_ins
=
prog
.
insert_instruction
(
std
::
next
(
ins
),
op
::
convert
{
type
},
ins
);
map_ins
[
ins
]
=
quant_ins
;
return
ins_fp16
;
return
quant_ins
;
}
}
// This function is to convert any instructions specified in the input
// from double or float to float16 by inserting a convert operator.
// For the conversion, there could be cases of overflowing, but it
// is very rare in the area of deeping learning, so we just do a
// truncate of the input to get the fp16.
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
void
quantize
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
{
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_fp16
;
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
map_fp16
;
...
@@ -59,7 +80,7 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -59,7 +80,7 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
}
}
else
else
{
{
input_fp16
=
insert_
fp16
(
prog
,
input
,
shape
::
half_type
,
map_fp16
);
input_fp16
=
insert_
quant_ins
(
prog
,
input
,
shape
::
half_type
,
map_fp16
);
}
}
converted_inputs
.
push_back
(
input_fp16
);
converted_inputs
.
push_back
(
input_fp16
);
}
}
...
@@ -78,13 +99,6 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -78,13 +99,6 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
auto
op
=
ins
->
get_operator
();
auto
op
=
ins
->
get_operator
();
auto
ins_shape
=
compute_shape
(
op
,
converted_inputs
);
auto
ins_shape
=
compute_shape
(
op
,
converted_inputs
);
if
(
ins_shape
.
type
()
!=
orig_type
)
if
(
ins_shape
.
type
()
!=
orig_type
)
{
// insert another convert instruction to convert it back
if
(
ins
==
std
::
prev
(
prog
.
end
()))
{
prog
.
add_instruction
(
op
::
convert
{
orig_type
},
ins
);
}
else
{
{
// check the dead code case to avoid assert
// check the dead code case to avoid assert
bool
output_empty
=
ins
->
outputs
().
empty
();
bool
output_empty
=
ins
->
outputs
().
empty
();
...
@@ -95,7 +109,6 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -95,7 +109,6 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
prog
.
replace_instruction
(
ins
,
ins_orig_type
);
prog
.
replace_instruction
(
ins
,
ins_orig_type
);
}
}
}
}
}
prog
.
replace_instruction
(
ins
,
op
,
converted_inputs
);
prog
.
replace_instruction
(
ins
,
op
,
converted_inputs
);
}
}
...
@@ -103,5 +116,80 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
...
@@ -103,5 +116,80 @@ void quantize(program& prog, const std::vector<std::string>& ins_names)
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
void
quantize
(
program
&
prog
)
{
quantize
(
prog
,
{
"all"
});
}
// For the input of each input argument, we need to insert a
// capture operator to compute the scale and shift
void
capture_arguments
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
,
const
std
::
function
<
void
(
std
::
size_t
,
std
::
vector
<
argument
>
)
>&
func
)
{
size_t
num_quant_params
=
0
;
// the int8 quantization only support dot and convolution
std
::
vector
<
std
::
string
>
op_names
=
{
"dot"
,
"convolution"
};
if
(
!
std
::
all_of
(
ins_names
.
begin
(),
ins_names
.
end
(),
[
&
](
auto
name
)
{
return
std
::
find
(
op_names
.
begin
(),
op_names
.
end
(),
name
)
!=
op_names
.
end
();
}))
{
MIGRAPHX_THROW
(
"CAPTURE_ARGUMENTS: input operator is not supported"
);
}
std
::
unordered_map
<
instruction_ref
,
instruction_ref
>
ins_map
;
for
(
auto
ins
:
iterator_for
(
prog
))
{
if
(
not
contains
(
ins_names
,
ins
->
name
()))
{
continue
;
}
auto
inputs
=
ins
->
inputs
();
std
::
vector
<
instruction_ref
>
new_args
;
for
(
auto
input
:
inputs
)
{
instruction_ref
new_ins
{};
if
(
ins_map
.
count
(
input
)
>
0
)
{
new_ins
=
ins_map
[
input
];
}
else
{
new_ins
=
prog
.
insert_instruction
(
std
::
next
(
input
),
op
::
capture
{
num_quant_params
++
,
func
},
input
);
ins_map
[
input
]
=
new_ins
;
}
new_args
.
push_back
(
new_ins
);
}
instruction
::
replace
(
ins
,
ins
->
get_operator
(),
ins
->
get_shape
(),
new_args
);
}
// set one pair of parameter for each argument
prog
.
int8_quant_params
->
resize
(
num_quant_params
,
std
::
make_pair
(
-
1.0
f
,
-
1.0
f
));
}
void
capture_arguments
(
program
&
prog
,
const
std
::
vector
<
std
::
string
>&
ins_names
)
{
auto
calc_quant_params
=
[
&
](
std
::
size_t
ins_index
,
std
::
vector
<
migraphx
::
argument
>
args
)
{
std
::
pair
<
float
,
float
>
param_pair
{
1.0
f
,
0.0
f
};
// scale and shift is need for only int8 type, and we do not
// consider shift, so set shift to 0
std
::
vector
<
float
>
vec_val
;
args
.
front
().
visit
([
&
](
auto
output
)
{
vec_val
.
assign
(
output
.
begin
(),
output
.
end
());
});
auto
max_val
=
*
std
::
max_element
(
vec_val
.
begin
(),
vec_val
.
end
());
auto
min_val
=
*
std
::
min_element
(
vec_val
.
begin
(),
vec_val
.
end
());
auto
max_abs
=
std
::
max
(
std
::
fabs
(
max_val
),
std
::
fabs
(
min_val
));
param_pair
.
first
=
127.0
f
/
max_abs
;
(
*
prog
.
int8_quant_params
)[
ins_index
]
=
param_pair
;
};
capture_arguments
(
prog
,
ins_names
,
calc_quant_params
);
}
void
capture_arguments
(
program
&
prog
)
{
std
::
vector
<
std
::
string
>
ins_names
=
{
"dot"
,
"convolution"
};
capture_arguments
(
prog
,
ins_names
);
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/cpu/gemm.cpp
View file @
50174953
...
@@ -44,13 +44,9 @@ struct is_fast_gemm_type<float> : std::true_type
...
@@ -44,13 +44,9 @@ struct is_fast_gemm_type<float> : std::true_type
{
{
};
};
template
<
class
T
>
template
<
class
T
,
class
F
>
void
migemm_impl
(
tensor_view
<
T
>
cmat
,
void
migemm_impl
(
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
F
alpha
,
F
beta
,
std
::
true_type
)
tensor_view
<
T
>
bmat
,
float
alpha
,
float
beta
,
std
::
true_type
)
{
{
visit_mat
(
amat
,
[
&
](
const
auto
&
a
)
{
visit_mat
(
amat
,
[
&
](
const
auto
&
a
)
{
visit_mat
(
bmat
,
[
&
](
const
auto
&
b
)
{
visit_mat
(
bmat
,
[
&
](
const
auto
&
b
)
{
...
@@ -66,13 +62,9 @@ void migemm_impl(tensor_view<T> cmat,
...
@@ -66,13 +62,9 @@ void migemm_impl(tensor_view<T> cmat,
});
});
}
}
template
<
class
T
>
template
<
class
T
,
class
F
>
void
migemm_impl
(
tensor_view
<
T
>
cmat
,
void
migemm_impl
(
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
F
alpha
,
F
beta
,
std
::
false_type
)
tensor_view
<
T
>
bmat
,
float
alpha
,
float
beta
,
std
::
false_type
)
{
{
std
::
size_t
n_dims
=
cmat
.
get_shape
().
lens
().
size
();
std
::
size_t
n_dims
=
cmat
.
get_shape
().
lens
().
size
();
std
::
size_t
dim_0
=
n_dims
-
2
;
std
::
size_t
dim_0
=
n_dims
-
2
;
...
@@ -95,9 +87,8 @@ void migemm_impl(tensor_view<T> cmat,
...
@@ -95,9 +87,8 @@ void migemm_impl(tensor_view<T> cmat,
});
});
}
}
template
<
class
T
>
template
<
class
T
,
class
F
>
void
migemm_impl
(
void
migemm_impl
(
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
F
alpha
,
F
beta
)
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
float
alpha
,
float
beta
)
{
{
auto
lens
=
amat
.
get_shape
().
lens
();
auto
lens
=
amat
.
get_shape
().
lens
();
bool
batch_mul
=
bool
batch_mul
=
...
@@ -113,13 +104,29 @@ void migemm_impl(
...
@@ -113,13 +104,29 @@ void migemm_impl(
}
}
}
}
void
migemm
(
template
<
class
F
>
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
)
void
migemm_tpl
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
F
alpha
,
F
beta
)
{
{
visit_all
(
c_arg
,
a_arg
,
b_arg
)(
visit_all
(
c_arg
,
a_arg
,
b_arg
)(
[
&
](
auto
cmat
,
auto
amat
,
auto
bmat
)
{
migemm_impl
(
cmat
,
amat
,
bmat
,
alpha
,
beta
);
});
[
&
](
auto
cmat
,
auto
amat
,
auto
bmat
)
{
migemm_impl
(
cmat
,
amat
,
bmat
,
alpha
,
beta
);
});
}
}
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
)
{
migemm_tpl
(
c_arg
,
a_arg
,
b_arg
,
alpha
,
beta
);
}
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
int32_t
alpha
,
int32_t
beta
)
{
migemm_tpl
(
c_arg
,
a_arg
,
b_arg
,
alpha
,
beta
);
}
}
// namespace cpu
}
// namespace cpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/cpu/include/migraphx/cpu/gemm.hpp
View file @
50174953
...
@@ -10,6 +10,11 @@ namespace cpu {
...
@@ -10,6 +10,11 @@ namespace cpu {
void
migemm
(
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
);
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
);
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
int32_t
alpha
,
int32_t
beta
);
}
// namespace cpu
}
// namespace cpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/cpu/lowering.cpp
View file @
50174953
...
@@ -4,7 +4,9 @@
...
@@ -4,7 +4,9 @@
#include <migraphx/dfor.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/leaky_relu.hpp>
...
@@ -216,6 +218,61 @@ struct cpu_convolution
...
@@ -216,6 +218,61 @@ struct cpu_convolution
}
}
};
};
struct
cpu_quant_convolution
{
op
::
quant_convolution
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::quant_convolution"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
auto
output
=
result
.
get
<
int32_t
>
();
visit_all
(
args
[
0
],
args
[
1
])([
&
](
auto
input
,
auto
weights
)
{
auto
in
=
input
.
get_shape
().
lens
();
auto
in_h
=
in
[
2
];
auto
in_w
=
in
[
3
];
auto
wei
=
weights
.
get_shape
().
lens
();
auto
wei_n
=
wei
[
0
];
auto
wei_c
=
wei
[
1
];
auto
wei_h
=
wei
[
2
];
auto
wei_w
=
wei
[
3
];
par_dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
const
auto
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
auto
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
auto
group_id
=
w
/
(
wei_n
/
op
.
group
);
int32_t
acc
=
0
;
dfor
(
wei_c
,
wei_h
,
wei_w
)([
&
](
std
::
size_t
k
,
std
::
size_t
x
,
std
::
size_t
y
)
{
const
auto
in_x
=
start_x
+
x
;
const
auto
in_y
=
start_y
+
y
;
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
{
acc
+=
static_cast
<
int32_t
>
(
input
(
o
,
in_ch
,
in_x
,
in_y
))
*
weights
(
w
,
k
,
x
,
y
);
}
});
output
(
o
,
w
,
i
,
j
)
=
acc
;
});
});
return
result
;
}
};
struct
cpu_im2col
struct
cpu_im2col
{
{
op
::
im2col
op
;
op
::
im2col
op
;
...
@@ -433,7 +490,7 @@ struct cpu_gemm
...
@@ -433,7 +490,7 @@ struct cpu_gemm
{
{
argument
result
{
output_shape
};
argument
result
{
output_shape
};
// 3 inputs, it is alpha * A * B + beta * C, then
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrics, and C is
broadcastable to
A * B
// A and B are matric
e
s, and C is
of the same shape as
A * B
if
(
args
.
size
()
==
3
)
if
(
args
.
size
()
==
3
)
{
{
// no need to consider the value of args[2]
// no need to consider the value of args[2]
...
@@ -460,6 +517,72 @@ struct cpu_gemm
...
@@ -460,6 +517,72 @@ struct cpu_gemm
}
}
};
};
struct
cpu_quant_gemm
{
op
::
quant_dot
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::quant_dot"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
if
(
inputs
.
size
()
==
3
)
{
auto
c_shape
=
inputs
.
at
(
2
);
check_shapes
{{
c_shape
}}.
not_broadcasted
();
}
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrices, and C is of the same shape to A * B
// first, convert the args[0] and args[1] from int8_t to int32_t
argument
arg_0
{{
shape
::
int32_type
,
{
args
.
at
(
0
).
get_shape
().
lens
()}}};
argument
arg_1
{{
shape
::
int32_type
,
{
args
.
at
(
1
).
get_shape
().
lens
()}}};
arg_0
.
visit
([
&
](
auto
output
)
{
args
.
at
(
0
).
visit
(
[
&
](
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
});
arg_1
.
visit
([
&
](
auto
output
)
{
args
.
at
(
1
).
visit
(
[
&
](
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
});
if
(
args
.
size
()
==
3
)
{
// no need to consider the value of args[2]
if
(
op
.
beta
==
0
)
{
result
.
visit
([
&
](
auto
output
)
{
std
::
fill
(
output
.
begin
(),
output
.
end
(),
0
);
});
}
else
{
visit_all
(
result
,
args
[
2
])([
&
](
auto
output
,
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
}
migemm
(
result
,
arg_0
,
arg_1
,
op
.
alpha
,
op
.
beta
);
return
result
;
}
// 2 input arguments
migemm
(
result
,
arg_0
,
arg_1
,
op
.
alpha
,
int32_t
{
0
});
return
result
;
}
};
struct
leaky_relu_op
struct
leaky_relu_op
{
{
op
::
leaky_relu
op
;
op
::
leaky_relu
op
;
...
@@ -673,6 +796,8 @@ struct cpu_apply
...
@@ -673,6 +796,8 @@ struct cpu_apply
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
cpu_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"quant_convolution"
]
=
extend_op
<
cpu_quant_convolution
,
op
::
quant_convolution
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
...
...
src/targets/gpu/CMakeLists.txt
View file @
50174953
...
@@ -39,6 +39,7 @@ add_library(migraphx_device
...
@@ -39,6 +39,7 @@ add_library(migraphx_device
device/pad.cpp
device/pad.cpp
device/gather.cpp
device/gather.cpp
device/sub.cpp
device/sub.cpp
device/int8_gemm_pack.cpp
device/div.cpp
device/div.cpp
device/clip.cpp
device/clip.cpp
device/reduce_sum.cpp
device/reduce_sum.cpp
...
@@ -64,8 +65,10 @@ add_library(migraphx_gpu
...
@@ -64,8 +65,10 @@ add_library(migraphx_gpu
target.cpp
target.cpp
lowering.cpp
lowering.cpp
gemm.cpp
gemm.cpp
quant_gemm.cpp
pooling.cpp
pooling.cpp
convolution.cpp
convolution.cpp
quant_convolution.cpp
softmax.cpp
softmax.cpp
logsoftmax.cpp
logsoftmax.cpp
contiguous.cpp
contiguous.cpp
...
@@ -79,12 +82,16 @@ add_library(migraphx_gpu
...
@@ -79,12 +82,16 @@ add_library(migraphx_gpu
elu.cpp
elu.cpp
pad.cpp
pad.cpp
gather.cpp
gather.cpp
convert.cpp
lrn.cpp
lrn.cpp
schedule_model.cpp
schedule_model.cpp
adjust_allocation.cpp
adjust_allocation.cpp
pack_int8_args.cpp
clip.cpp
clip.cpp
reduce_sum.cpp
reduce_sum.cpp
reduce_mean.cpp
reduce_mean.cpp
int8_gemm_pack.cpp
int8_conv_pack.cpp
)
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/convert.cpp
0 → 100644
View file @
50174953
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_convert
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
check_shapes
{
inputs
}.
packed
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_convert
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
convert
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
View file @
50174953
...
@@ -31,6 +31,7 @@ struct hip_tensor_descriptor
...
@@ -31,6 +31,7 @@ struct hip_tensor_descriptor
result
[
is
]
=
tidx
/
strides
[
is
];
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
}
return
result
;
return
result
;
}
}
__device__
__host__
std
::
size_t
linear
(
hip_tensor_index
<
NDim
>
s
)
const
__device__
__host__
std
::
size_t
linear
(
hip_tensor_index
<
NDim
>
s
)
const
...
...
src/targets/gpu/device/int8_gemm_pack.cpp
0 → 100644
View file @
50174953
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
comp_shape
=
arg
.
get_shape
();
auto
out_lens
=
comp_shape
.
lens
();
auto
dim_0
=
out_lens
.
size
()
-
2
;
auto
dim_1
=
out_lens
.
size
()
-
1
;
std
::
size_t
lda
=
comp_shape
.
strides
()[
dim_0
];
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_m
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_m
+
(
i_k
/
nb
)
*
lda
)
*
nb
+
offset
]
=
in_ptr
[
i_m
+
i_k
*
lda
+
offset
];
});
});
});
}
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
trans_shape
=
arg
.
get_shape
();
auto
out_lens
=
trans_shape
.
lens
();
auto
dim_0
=
trans_shape
.
lens
().
size
()
-
2
;
auto
dim_1
=
trans_shape
.
lens
().
size
()
-
1
;
std
::
size_t
ldb
=
trans_shape
.
strides
()[
dim_1
];
auto
wrap_lens
=
out_lens
;
std
::
swap
(
wrap_lens
[
dim_0
],
wrap_lens
[
dim_1
]);
shape
comp_shape
{
trans_shape
.
type
(),
wrap_lens
};
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_n
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_n
+
(
i_k
/
nb
)
*
ldb
)
*
nb
+
offset
]
=
in_ptr
[
i_n
+
i_k
*
ldb
+
offset
];
});
});
});
}
void
sync_stream
(
hipStream_t
stream
)
{
hipStreamSynchronize
(
stream
);
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment