Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
704d3b1c
"src/vscode:/vscode.git/clone" did not exist on "e862412f4ace207e00f90b2efa71e513194ed14d"
Unverified
Commit
704d3b1c
authored
Aug 19, 2019
by
mvermeulen
Committed by
GitHub
Aug 19, 2019
Browse files
Merge branch 'develop' into pooling
parents
cffdc27c
3ec62e53
Changes
50
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
598 additions
and
52 deletions
+598
-52
src/rewrite_rnn.cpp
src/rewrite_rnn.cpp
+0
-1
src/targets/cpu/gemm.cpp
src/targets/cpu/gemm.cpp
+26
-19
src/targets/cpu/include/migraphx/cpu/gemm.hpp
src/targets/cpu/include/migraphx/cpu/gemm.hpp
+5
-0
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+143
-18
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+7
-0
src/targets/gpu/convert.cpp
src/targets/gpu/convert.cpp
+24
-0
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
...targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+2
-2
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
...targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
+1
-0
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+77
-0
src/targets/gpu/gemm.cpp
src/targets/gpu/gemm.cpp
+4
-0
src/targets/gpu/include/migraphx/gpu/convert.hpp
src/targets/gpu/include/migraphx/gpu/convert.hpp
+8
-8
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
...argets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
+22
-0
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
+29
-0
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
+40
-0
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+24
-4
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
+24
-0
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+47
-0
src/targets/gpu/include/migraphx/gpu/quant_gemm.hpp
src/targets/gpu/include/migraphx/gpu/quant_gemm.hpp
+38
-0
src/targets/gpu/int8_conv_pack.cpp
src/targets/gpu/int8_conv_pack.cpp
+40
-0
src/targets/gpu/int8_gemm_pack.cpp
src/targets/gpu/int8_gemm_pack.cpp
+37
-0
No files found.
src/rewrite_rnn.cpp
View file @
704d3b1c
...
@@ -674,7 +674,6 @@ void rewrite_rnn::apply_lstm(program& prog, instruction_ref ins) const
...
@@ -674,7 +674,6 @@ void rewrite_rnn::apply_lstm(program& prog, instruction_ref ins) const
std
::
vector
<
float
>
ihc_data
(
ihc_shape
.
elements
(),
0.0
);
std
::
vector
<
float
>
ihc_data
(
ihc_shape
.
elements
(),
0.0
);
migraphx
::
shape
pph_shape
{
type
,
{
1
,
3
*
hidden_size
}};
migraphx
::
shape
pph_shape
{
type
,
{
1
,
3
*
hidden_size
}};
std
::
vector
<
float
>
pph_data
(
pph_shape
.
elements
(),
0.0
);
auto
actv_funcs
=
lstm_actv_funcs
(
ins
);
auto
actv_funcs
=
lstm_actv_funcs
(
ins
);
auto
lstm_op
=
any_cast
<
op
::
lstm
>
(
ins
->
get_operator
());
auto
lstm_op
=
any_cast
<
op
::
lstm
>
(
ins
->
get_operator
());
...
...
src/targets/cpu/gemm.cpp
View file @
704d3b1c
...
@@ -44,13 +44,9 @@ struct is_fast_gemm_type<float> : std::true_type
...
@@ -44,13 +44,9 @@ struct is_fast_gemm_type<float> : std::true_type
{
{
};
};
template
<
class
T
>
template
<
class
T
,
class
F
>
void
migemm_impl
(
tensor_view
<
T
>
cmat
,
void
migemm_impl
(
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
F
alpha
,
F
beta
,
std
::
true_type
)
tensor_view
<
T
>
bmat
,
float
alpha
,
float
beta
,
std
::
true_type
)
{
{
visit_mat
(
amat
,
[
&
](
const
auto
&
a
)
{
visit_mat
(
amat
,
[
&
](
const
auto
&
a
)
{
visit_mat
(
bmat
,
[
&
](
const
auto
&
b
)
{
visit_mat
(
bmat
,
[
&
](
const
auto
&
b
)
{
...
@@ -66,13 +62,9 @@ void migemm_impl(tensor_view<T> cmat,
...
@@ -66,13 +62,9 @@ void migemm_impl(tensor_view<T> cmat,
});
});
}
}
template
<
class
T
>
template
<
class
T
,
class
F
>
void
migemm_impl
(
tensor_view
<
T
>
cmat
,
void
migemm_impl
(
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
F
alpha
,
F
beta
,
std
::
false_type
)
tensor_view
<
T
>
bmat
,
float
alpha
,
float
beta
,
std
::
false_type
)
{
{
std
::
size_t
n_dims
=
cmat
.
get_shape
().
lens
().
size
();
std
::
size_t
n_dims
=
cmat
.
get_shape
().
lens
().
size
();
std
::
size_t
dim_0
=
n_dims
-
2
;
std
::
size_t
dim_0
=
n_dims
-
2
;
...
@@ -95,9 +87,8 @@ void migemm_impl(tensor_view<T> cmat,
...
@@ -95,9 +87,8 @@ void migemm_impl(tensor_view<T> cmat,
});
});
}
}
template
<
class
T
>
template
<
class
T
,
class
F
>
void
migemm_impl
(
void
migemm_impl
(
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
F
alpha
,
F
beta
)
tensor_view
<
T
>
cmat
,
tensor_view
<
T
>
amat
,
tensor_view
<
T
>
bmat
,
float
alpha
,
float
beta
)
{
{
auto
lens
=
amat
.
get_shape
().
lens
();
auto
lens
=
amat
.
get_shape
().
lens
();
bool
batch_mul
=
bool
batch_mul
=
...
@@ -113,13 +104,29 @@ void migemm_impl(
...
@@ -113,13 +104,29 @@ void migemm_impl(
}
}
}
}
void
migemm
(
template
<
class
F
>
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
)
void
migemm_tpl
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
F
alpha
,
F
beta
)
{
{
visit_all
(
c_arg
,
a_arg
,
b_arg
)(
visit_all
(
c_arg
,
a_arg
,
b_arg
)(
[
&
](
auto
cmat
,
auto
amat
,
auto
bmat
)
{
migemm_impl
(
cmat
,
amat
,
bmat
,
alpha
,
beta
);
});
[
&
](
auto
cmat
,
auto
amat
,
auto
bmat
)
{
migemm_impl
(
cmat
,
amat
,
bmat
,
alpha
,
beta
);
});
}
}
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
)
{
migemm_tpl
(
c_arg
,
a_arg
,
b_arg
,
alpha
,
beta
);
}
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
int32_t
alpha
,
int32_t
beta
)
{
migemm_tpl
(
c_arg
,
a_arg
,
b_arg
,
alpha
,
beta
);
}
}
// namespace cpu
}
// namespace cpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/cpu/include/migraphx/cpu/gemm.hpp
View file @
704d3b1c
...
@@ -10,6 +10,11 @@ namespace cpu {
...
@@ -10,6 +10,11 @@ namespace cpu {
void
migemm
(
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
);
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
float
alpha
,
float
beta
);
void
migemm
(
const
argument
&
c_arg
,
const
argument
&
a_arg
,
const
argument
&
b_arg
,
int32_t
alpha
,
int32_t
beta
);
}
// namespace cpu
}
// namespace cpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/cpu/lowering.cpp
View file @
704d3b1c
...
@@ -4,7 +4,9 @@
...
@@ -4,7 +4,9 @@
#include <migraphx/dfor.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/leaky_relu.hpp>
...
@@ -216,6 +218,61 @@ struct cpu_convolution
...
@@ -216,6 +218,61 @@ struct cpu_convolution
}
}
};
};
struct
cpu_quant_convolution
{
op
::
quant_convolution
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::quant_convolution"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
auto
output
=
result
.
get
<
int32_t
>
();
visit_all
(
args
[
0
],
args
[
1
])([
&
](
auto
input
,
auto
weights
)
{
auto
in
=
input
.
get_shape
().
lens
();
auto
in_h
=
in
[
2
];
auto
in_w
=
in
[
3
];
auto
wei
=
weights
.
get_shape
().
lens
();
auto
wei_n
=
wei
[
0
];
auto
wei_c
=
wei
[
1
];
auto
wei_h
=
wei
[
2
];
auto
wei_w
=
wei
[
3
];
par_dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
const
auto
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
auto
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
auto
group_id
=
w
/
(
wei_n
/
op
.
group
);
int32_t
acc
=
0
;
dfor
(
wei_c
,
wei_h
,
wei_w
)([
&
](
std
::
size_t
k
,
std
::
size_t
x
,
std
::
size_t
y
)
{
const
auto
in_x
=
start_x
+
x
;
const
auto
in_y
=
start_y
+
y
;
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
{
acc
+=
static_cast
<
int32_t
>
(
input
(
o
,
in_ch
,
in_x
,
in_y
))
*
weights
(
w
,
k
,
x
,
y
);
}
});
output
(
o
,
w
,
i
,
j
)
=
acc
;
});
});
return
result
;
}
};
struct
cpu_im2col
struct
cpu_im2col
{
{
op
::
im2col
op
;
op
::
im2col
op
;
...
@@ -245,17 +302,17 @@ struct cpu_im2col
...
@@ -245,17 +302,17 @@ struct cpu_im2col
const
std
::
size_t
&
stride_h
=
op
.
stride
[
0
];
const
std
::
size_t
&
stride_h
=
op
.
stride
[
0
];
const
std
::
size_t
&
stride_w
=
op
.
stride
[
1
];
const
std
::
size_t
&
stride_w
=
op
.
stride
[
1
];
auto
kdiv2_h
=
kernel_h
/
2
;
long
kdiv2_h
=
long
(
kernel_h
)
/
2
;
auto
kdiv2_w
=
kernel_w
/
2
;
long
kdiv2_w
=
long
(
kernel_w
)
/
2
;
// calculate output sizes
// calculate output sizes
const
std
::
size_t
col_height
=
(
height
-
kernel_h
+
2
*
pad_h
)
/
stride_h
+
1
;
const
std
::
size_t
col_height
=
(
height
-
kernel_h
+
2
*
pad_h
)
/
stride_h
+
1
;
const
std
::
size_t
col_width
=
(
width
-
kernel_w
+
2
*
pad_w
)
/
stride_w
+
1
;
const
std
::
size_t
col_width
=
(
width
-
kernel_w
+
2
*
pad_w
)
/
stride_w
+
1
;
// account for padding for the starting position of the input pixels
// account for padding for the starting position of the input pixels
std
::
size_t
iinput
=
kdiv2_h
-
pad_h
;
long
iinput
=
kdiv2_h
-
long
(
pad_h
)
;
// loop over output pixels (ioutput, joutput)
// loop over output pixels (ioutput, joutput)
for
(
std
::
size_t
ioutput
=
0
;
ioutput
<
col_height
;
ioutput
++
,
iinput
+=
stride_h
)
for
(
std
::
size_t
ioutput
=
0
;
ioutput
<
col_height
;
ioutput
++
,
iinput
+=
stride_h
)
{
{
std
::
size_t
jinput
=
kdiv2_w
-
pad_w
;
long
jinput
=
kdiv2_w
-
long
(
pad_w
)
;
for
(
std
::
size_t
joutput
=
0
;
joutput
<
col_width
;
joutput
++
,
jinput
+=
stride_w
)
for
(
std
::
size_t
joutput
=
0
;
joutput
<
col_width
;
joutput
++
,
jinput
+=
stride_w
)
{
{
// compute linear index for output
// compute linear index for output
...
@@ -264,8 +321,8 @@ struct cpu_im2col
...
@@ -264,8 +321,8 @@ struct cpu_im2col
dfor
(
channels
,
dfor
(
channels
,
kernel_h
,
kernel_h
,
kernel_w
)([
&
](
std
::
size_t
c
,
std
::
size_t
koffset
,
std
::
size_t
loffset
)
{
kernel_w
)([
&
](
std
::
size_t
c
,
std
::
size_t
koffset
,
std
::
size_t
loffset
)
{
auto
idx
=
iinput
+
koffset
-
kdiv2_h
;
auto
idx
=
iinput
+
long
(
koffset
)
-
kdiv2_h
;
auto
jdx
=
jinput
+
loffset
-
kdiv2_w
;
auto
jdx
=
jinput
+
long
(
loffset
)
-
kdiv2_w
;
col
(
ldx
,
p
)
=
((
idx
>=
0
)
&&
(
idx
<
height
)
&&
(
jdx
>=
0
)
&&
(
jdx
<
width
))
col
(
ldx
,
p
)
=
((
idx
>=
0
)
&&
(
idx
<
height
)
&&
(
jdx
>=
0
)
&&
(
jdx
<
width
))
?
input
(
0
,
c
,
idx
,
jdx
)
?
input
(
0
,
c
,
idx
,
jdx
)
:
0
;
:
0
;
...
@@ -433,7 +490,7 @@ struct cpu_gemm
...
@@ -433,7 +490,7 @@ struct cpu_gemm
{
{
argument
result
{
output_shape
};
argument
result
{
output_shape
};
// 3 inputs, it is alpha * A * B + beta * C, then
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrics, and C is
broadcastable to
A * B
// A and B are matric
e
s, and C is
of the same shape as
A * B
if
(
args
.
size
()
==
3
)
if
(
args
.
size
()
==
3
)
{
{
// no need to consider the value of args[2]
// no need to consider the value of args[2]
...
@@ -460,13 +517,79 @@ struct cpu_gemm
...
@@ -460,13 +517,79 @@ struct cpu_gemm
}
}
};
};
struct
cpu_quant_gemm
{
op
::
quant_dot
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::quant_dot"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
if
(
inputs
.
size
()
==
3
)
{
auto
c_shape
=
inputs
.
at
(
2
);
check_shapes
{{
c_shape
}}.
not_broadcasted
();
}
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrices, and C is of the same shape to A * B
// first, convert the args[0] and args[1] from int8_t to int32_t
argument
arg_0
{{
shape
::
int32_type
,
{
args
.
at
(
0
).
get_shape
().
lens
()}}};
argument
arg_1
{{
shape
::
int32_type
,
{
args
.
at
(
1
).
get_shape
().
lens
()}}};
arg_0
.
visit
([
&
](
auto
output
)
{
args
.
at
(
0
).
visit
(
[
&
](
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
});
arg_1
.
visit
([
&
](
auto
output
)
{
args
.
at
(
1
).
visit
(
[
&
](
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
});
if
(
args
.
size
()
==
3
)
{
// no need to consider the value of args[2]
if
(
op
.
beta
==
0
)
{
result
.
visit
([
&
](
auto
output
)
{
std
::
fill
(
output
.
begin
(),
output
.
end
(),
0
);
});
}
else
{
visit_all
(
result
,
args
[
2
])([
&
](
auto
output
,
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
}
migemm
(
result
,
arg_0
,
arg_1
,
op
.
alpha
,
op
.
beta
);
return
result
;
}
// 2 input arguments
migemm
(
result
,
arg_0
,
arg_1
,
op
.
alpha
,
int32_t
{
0
});
return
result
;
}
};
struct
leaky_relu_op
struct
leaky_relu_op
{
{
op
::
leaky_relu
op
;
op
::
leaky_relu
op
;
std
::
string
name
()
const
{
return
"cpu::leaky_relu"
;
}
std
::
string
name
()
const
{
return
"cpu::leaky_relu"
;
}
auto
fcn
()
const
auto
fcn
()
const
{
{
auto
&
a
=
op
.
alpha
;
auto
a
=
op
.
alpha
;
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
x
*
a
;
};
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
x
*
a
;
};
}
}
};
};
...
@@ -477,7 +600,7 @@ struct elu_op
...
@@ -477,7 +600,7 @@ struct elu_op
std
::
string
name
()
const
{
return
"cpu::elu"
;
}
std
::
string
name
()
const
{
return
"cpu::elu"
;
}
auto
fcn
()
const
auto
fcn
()
const
{
{
auto
&
a
=
op
.
alpha
;
auto
a
=
op
.
alpha
;
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
a
*
std
::
expm1
(
x
);
};
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
a
*
std
::
expm1
(
x
);
};
}
}
};
};
...
@@ -671,15 +794,17 @@ struct cpu_apply
...
@@ -671,15 +794,17 @@ struct cpu_apply
{
{
apply_map
[
"batch_norm_inference"
]
=
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
cpu_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"quant_convolution"
]
=
extend_op
<
cpu_quant_convolution
,
op
::
quant_convolution
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"softmax"
]
=
extend_op
<
cpu_softmax
,
op
::
softmax
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"softmax"
]
=
extend_op
<
cpu_softmax
,
op
::
softmax
>
();
}
}
void
apply
()
void
apply
()
...
...
src/targets/gpu/CMakeLists.txt
View file @
704d3b1c
...
@@ -39,6 +39,7 @@ add_library(migraphx_device
...
@@ -39,6 +39,7 @@ add_library(migraphx_device
device/pad.cpp
device/pad.cpp
device/gather.cpp
device/gather.cpp
device/sub.cpp
device/sub.cpp
device/int8_gemm_pack.cpp
device/div.cpp
device/div.cpp
device/clip.cpp
device/clip.cpp
device/reduce_sum.cpp
device/reduce_sum.cpp
...
@@ -64,8 +65,10 @@ add_library(migraphx_gpu
...
@@ -64,8 +65,10 @@ add_library(migraphx_gpu
target.cpp
target.cpp
lowering.cpp
lowering.cpp
gemm.cpp
gemm.cpp
quant_gemm.cpp
pooling.cpp
pooling.cpp
convolution.cpp
convolution.cpp
quant_convolution.cpp
softmax.cpp
softmax.cpp
logsoftmax.cpp
logsoftmax.cpp
contiguous.cpp
contiguous.cpp
...
@@ -79,12 +82,16 @@ add_library(migraphx_gpu
...
@@ -79,12 +82,16 @@ add_library(migraphx_gpu
elu.cpp
elu.cpp
pad.cpp
pad.cpp
gather.cpp
gather.cpp
convert.cpp
lrn.cpp
lrn.cpp
schedule_model.cpp
schedule_model.cpp
adjust_allocation.cpp
adjust_allocation.cpp
pack_int8_args.cpp
clip.cpp
clip.cpp
reduce_sum.cpp
reduce_sum.cpp
reduce_mean.cpp
reduce_mean.cpp
int8_gemm_pack.cpp
int8_conv_pack.cpp
)
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/convert.cpp
0 → 100644
View file @
704d3b1c
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_convert
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
check_shapes
{
inputs
}.
packed
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_convert
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
convert
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
View file @
704d3b1c
...
@@ -155,8 +155,8 @@ __device__ void dpp_reduce(T& in, Op op)
...
@@ -155,8 +155,8 @@ __device__ void dpp_reduce(T& in, Op op)
__device__
inline
void
dpp_reduce
(
float
&
x
,
sum
)
__device__
inline
void
dpp_reduce
(
float
&
x
,
sum
)
{
{
#ifdef
MIGRAPHX_USE_CLANG_TIDY
#if
def
ined(
MIGRAPHX_USE_CLANG_TIDY
) || defined(CPPCHECK)
(
void
)
x
;
x
=
1
;
#else
#else
__asm__
volatile
(
"s_nop 4
\n
"
__asm__
volatile
(
"s_nop 4
\n
"
"v_add_f32 %0 %0 %0 row_shr:1
\n
"
"v_add_f32 %0 %0 %0 row_shr:1
\n
"
...
...
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
View file @
704d3b1c
...
@@ -31,6 +31,7 @@ struct hip_tensor_descriptor
...
@@ -31,6 +31,7 @@ struct hip_tensor_descriptor
result
[
is
]
=
tidx
/
strides
[
is
];
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
}
return
result
;
return
result
;
}
}
__device__
__host__
std
::
size_t
linear
(
hip_tensor_index
<
NDim
>
s
)
const
__device__
__host__
std
::
size_t
linear
(
hip_tensor_index
<
NDim
>
s
)
const
...
...
src/targets/gpu/device/int8_gemm_pack.cpp
0 → 100644
View file @
704d3b1c
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
comp_shape
=
arg
.
get_shape
();
auto
out_lens
=
comp_shape
.
lens
();
auto
dim_0
=
out_lens
.
size
()
-
2
;
auto
dim_1
=
out_lens
.
size
()
-
1
;
std
::
size_t
lda
=
comp_shape
.
strides
()[
dim_0
];
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_m
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_m
+
(
i_k
/
nb
)
*
lda
)
*
nb
+
offset
]
=
in_ptr
[
i_m
+
i_k
*
lda
+
offset
];
});
});
});
}
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
trans_shape
=
arg
.
get_shape
();
auto
out_lens
=
trans_shape
.
lens
();
auto
dim_0
=
trans_shape
.
lens
().
size
()
-
2
;
auto
dim_1
=
trans_shape
.
lens
().
size
()
-
1
;
std
::
size_t
ldb
=
trans_shape
.
strides
()[
dim_1
];
auto
wrap_lens
=
out_lens
;
std
::
swap
(
wrap_lens
[
dim_0
],
wrap_lens
[
dim_1
]);
shape
comp_shape
{
trans_shape
.
type
(),
wrap_lens
};
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_n
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_n
+
(
i_k
/
nb
)
*
ldb
)
*
nb
+
offset
]
=
in_ptr
[
i_n
+
i_k
*
ldb
+
offset
];
});
});
});
}
void
sync_stream
(
hipStream_t
stream
)
{
hipStreamSynchronize
(
stream
);
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/gemm.cpp
View file @
704d3b1c
...
@@ -233,6 +233,10 @@ argument miopen_gemm::compute(context& ctx,
...
@@ -233,6 +233,10 @@ argument miopen_gemm::compute(context& ctx,
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
if
(
num_matrices
==
1
)
if
(
num_matrices
==
1
)
{
{
// the rocblas_gemm API handles inputs and output matrices as
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// A and args[0] as B in calling the rocblas_gemm.
generic_rocblas_gemm
(
as
,
generic_rocblas_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
...
...
src/targets/gpu/include/migraphx/gpu/convert.hpp
View file @
704d3b1c
...
@@ -3,8 +3,6 @@
...
@@ -3,8 +3,6 @@
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -12,7 +10,7 @@ namespace gpu {
...
@@ -12,7 +10,7 @@ namespace gpu {
struct
context
;
struct
context
;
struct
hip_convert
:
unary_device
<
hip_convert
,
device
::
convert
>
struct
hip_convert
{
{
op
::
convert
op
;
op
::
convert
op
;
...
@@ -22,13 +20,15 @@ struct hip_convert : unary_device<hip_convert, device::convert>
...
@@ -22,13 +20,15 @@ struct hip_convert : unary_device<hip_convert, device::convert>
return
migraphx
::
reflect
(
self
.
op
,
f
);
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
}
hip_convert
(
op
::
convert
oper
)
:
op
(
oper
)
{
}
std
::
string
name
()
const
{
return
"gpu::convert"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
{
inputs
.
pop_back
();
return
shapes
.
size
()
-
1
;
check_shapes
{
inputs
}.
packed
();
return
op
.
compute_shape
(
inputs
);
}
}
};
};
...
...
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
0 → 100644
View file @
704d3b1c
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
0 → 100644
View file @
704d3b1c
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_int8_conv_pack
{
std
::
string
name
()
const
{
return
"gpu::int8_conv_pack"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
0 → 100644
View file @
704d3b1c
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_int8_gemm_pack_a
{
std
::
string
name
()
const
{
return
"gpu::int8_gemm_pack_a"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
hip_int8_gemm_pack_b
{
std
::
string
name
()
const
{
return
"gpu::int8_gemm_pack_b"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
704d3b1c
...
@@ -34,11 +34,11 @@ Result make_obj(F f, Ts... xs)
...
@@ -34,11 +34,11 @@ Result make_obj(F f, Ts... xs)
auto
status
=
f
(
&
x
,
xs
...);
auto
status
=
f
(
&
x
,
xs
...);
Result
r
{
x
};
Result
r
{
x
};
if
(
status
!=
miopenStatusSuccess
)
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen call failed"
);
MIGRAPHX_THROW
(
"
MAKE_OBJ:
MIOpen call failed"
);
return
r
;
return
r
;
}
}
inline
tensor_descriptor
make_tensor
(
const
migraphx
::
shape
&
s
)
inline
tensor_descriptor
make_tensor
(
const
migraphx
::
shape
&
s
,
bool
pack
=
false
)
{
{
auto
t
=
make_obj
<
tensor_descriptor
>
(
&
miopenCreateTensorDescriptor
);
auto
t
=
make_obj
<
tensor_descriptor
>
(
&
miopenCreateTensorDescriptor
);
// Convert to ints
// Convert to ints
...
@@ -49,13 +49,33 @@ inline tensor_descriptor make_tensor(const migraphx::shape& s)
...
@@ -49,13 +49,33 @@ inline tensor_descriptor make_tensor(const migraphx::shape& s)
d
=
miopenFloat
;
d
=
miopenFloat
;
else
if
(
s
.
type
()
==
shape
::
half_type
)
else
if
(
s
.
type
()
==
shape
::
half_type
)
d
=
miopenHalf
;
d
=
miopenHalf
;
else
if
(
s
.
type
()
==
shape
::
int32_type
)
d
=
miopenInt32
;
else
if
(
s
.
type
()
==
shape
::
int8_type
)
{
if
(
pack
)
{
// update the lens and corresponding strides
d
=
miopenInt8x4
;
lens
[
1
]
=
((
lens
[
1
]
+
3
)
/
4
)
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
}
else
{
d
=
miopenInt8
;
}
}
else
else
MIGRAPHX_THROW
(
"Unsupported type"
);
{
MIGRAPHX_THROW
(
"MAKE_TENSOR: unsupported type"
);
}
miopenSetTensorDescriptor
(
t
.
get
(),
d
,
s
.
lens
().
size
(),
lens
.
data
(),
strides
.
data
());
miopenSetTensorDescriptor
(
t
.
get
(),
d
,
s
.
lens
().
size
(),
lens
.
data
(),
strides
.
data
());
return
t
;
return
t
;
}
}
inline
convolution_descriptor
make_conv
(
const
migraphx
::
op
::
convolution
&
op
)
template
<
class
T
>
inline
convolution_descriptor
make_conv
(
const
T
&
op
)
{
{
auto
c
=
make_obj
<
convolution_descriptor
>
(
&
miopenCreateConvolutionDescriptor
);
auto
c
=
make_obj
<
convolution_descriptor
>
(
&
miopenCreateConvolutionDescriptor
);
miopenConvolutionMode_t
c_mode
=
miopenConvolution
;
miopenConvolutionMode_t
c_mode
=
miopenConvolution
;
...
...
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
0 → 100644
View file @
704d3b1c
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
pack_int8_args
{
std
::
string
name
()
const
{
return
"gpu::pack_int8_args"
;
}
void
apply
(
program
&
p
)
const
;
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
0 → 100644
View file @
704d3b1c
#ifndef MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_quant_convolution
{
op
::
quant_convolution
op
;
shared
<
convolution_descriptor
>
cd
;
miopenConvFwdAlgorithm_t
algo
{};
miopenHandle_t
handle
=
nullptr
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
// TODO: Add algo
return
op
::
quant_convolution
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::quant_convolution"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
shape
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
private:
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/quant_gemm.hpp
0 → 100644
View file @
704d3b1c
#ifndef MIGRAPHX_GUARD_RTGLIB_QUANT_GEMM_HPP
#define MIGRAPHX_GUARD_RTGLIB_QUANT_GEMM_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/quant_dot.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
rocblas_quant_gemm
{
op
::
quant_dot
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::quant_gemm"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
void
batch_not_transposed
(
const
std
::
vector
<
std
::
size_t
>&
strides
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/int8_conv_pack.cpp
0 → 100644
View file @
704d3b1c
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_int8_conv_pack
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
standard
();
return
inputs
.
at
(
0
);
}
argument
miopen_int8_conv_pack
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
arg_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
arg_desc_vec4
=
make_tensor
(
args
[
0
].
get_shape
(),
true
);
float
alpha
=
1
;
float
beta
=
0
;
// pack input to vec4 format
auto
status
=
miopenTransformTensor
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
arg_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
arg_desc_vec4
.
get
(),
args
[
1
].
implicit
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"INT8_CONV_PACK: transform input tensor failed"
);
}
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/int8_gemm_pack.cpp
0 → 100644
View file @
704d3b1c
#include <migraphx/gpu/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_int8_gemm_pack_a
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
not_broadcasted
().
packed
();
return
inputs
.
at
(
0
);
}
argument
hip_int8_gemm_pack_a
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
int8_gemm_pack_a
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
shape
hip_int8_gemm_pack_b
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
not_broadcasted
().
packed
();
return
inputs
.
at
(
0
);
}
argument
hip_int8_gemm_pack_b
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
int8_gemm_pack_b
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment