Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
00b0396b
"vscode:/vscode.git/clone" did not exist on "f41abee544d751b4b3ce44b3deda2f9777a3f1b3"
Commit
00b0396b
authored
Aug 19, 2019
by
Khalique
Browse files
adjust tests
parents
78bcbe2c
0628e570
Changes
51
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
669 additions
and
45 deletions
+669
-45
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+135
-10
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+7
-0
src/targets/gpu/convert.cpp
src/targets/gpu/convert.cpp
+24
-0
src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
+6
-0
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
...targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+85
-21
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
...targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
+1
-0
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+77
-0
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+0
-2
src/targets/gpu/gemm.cpp
src/targets/gpu/gemm.cpp
+4
-0
src/targets/gpu/include/migraphx/gpu/convert.hpp
src/targets/gpu/include/migraphx/gpu/convert.hpp
+8
-8
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
...argets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
+22
-0
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
+29
-0
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
+40
-0
src/targets/gpu/include/migraphx/gpu/miopen.hpp
src/targets/gpu/include/migraphx/gpu/miopen.hpp
+24
-4
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
+24
-0
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+47
-0
src/targets/gpu/include/migraphx/gpu/quant_gemm.hpp
src/targets/gpu/include/migraphx/gpu/quant_gemm.hpp
+38
-0
src/targets/gpu/int8_conv_pack.cpp
src/targets/gpu/int8_conv_pack.cpp
+40
-0
src/targets/gpu/int8_gemm_pack.cpp
src/targets/gpu/int8_gemm_pack.cpp
+37
-0
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+21
-0
No files found.
src/targets/cpu/lowering.cpp
View file @
00b0396b
...
...
@@ -4,7 +4,9 @@
#include <migraphx/dfor.hpp>
#include <migraphx/op/batch_norm.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
...
...
@@ -216,6 +218,61 @@ struct cpu_convolution
}
};
struct
cpu_quant_convolution
{
op
::
quant_convolution
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::quant_convolution"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
auto
output
=
result
.
get
<
int32_t
>
();
visit_all
(
args
[
0
],
args
[
1
])([
&
](
auto
input
,
auto
weights
)
{
auto
in
=
input
.
get_shape
().
lens
();
auto
in_h
=
in
[
2
];
auto
in_w
=
in
[
3
];
auto
wei
=
weights
.
get_shape
().
lens
();
auto
wei_n
=
wei
[
0
];
auto
wei_c
=
wei
[
1
];
auto
wei_h
=
wei
[
2
];
auto
wei_w
=
wei
[
3
];
par_dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
[
&
](
std
::
size_t
o
,
std
::
size_t
w
,
std
::
size_t
i
,
std
::
size_t
j
)
{
const
auto
start_x
=
i
*
op
.
stride
[
0
]
-
op
.
padding
[
0
];
const
auto
start_y
=
j
*
op
.
stride
[
1
]
-
op
.
padding
[
1
];
const
auto
group_id
=
w
/
(
wei_n
/
op
.
group
);
int32_t
acc
=
0
;
dfor
(
wei_c
,
wei_h
,
wei_w
)([
&
](
std
::
size_t
k
,
std
::
size_t
x
,
std
::
size_t
y
)
{
const
auto
in_x
=
start_x
+
x
;
const
auto
in_y
=
start_y
+
y
;
const
auto
in_ch
=
group_id
*
wei_c
+
k
;
if
(
in_x
>=
0
&&
in_x
<
in_h
&&
in_y
>=
0
&&
in_y
<
in_w
)
{
acc
+=
static_cast
<
int32_t
>
(
input
(
o
,
in_ch
,
in_x
,
in_y
))
*
weights
(
w
,
k
,
x
,
y
);
}
});
output
(
o
,
w
,
i
,
j
)
=
acc
;
});
});
return
result
;
}
};
struct
cpu_im2col
{
op
::
im2col
op
;
...
...
@@ -433,7 +490,7 @@ struct cpu_gemm
{
argument
result
{
output_shape
};
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrics, and C is
broadcastable to
A * B
// A and B are matric
e
s, and C is
of the same shape as
A * B
if
(
args
.
size
()
==
3
)
{
// no need to consider the value of args[2]
...
...
@@ -460,6 +517,72 @@ struct cpu_gemm
}
};
struct
cpu_quant_gemm
{
op
::
quant_dot
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::quant_dot"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
if
(
inputs
.
size
()
==
3
)
{
auto
c_shape
=
inputs
.
at
(
2
);
check_shapes
{{
c_shape
}}.
not_broadcasted
();
}
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
// 3 inputs, it is alpha * A * B + beta * C, then
// A and B are matrices, and C is of the same shape to A * B
// first, convert the args[0] and args[1] from int8_t to int32_t
argument
arg_0
{{
shape
::
int32_type
,
{
args
.
at
(
0
).
get_shape
().
lens
()}}};
argument
arg_1
{{
shape
::
int32_type
,
{
args
.
at
(
1
).
get_shape
().
lens
()}}};
arg_0
.
visit
([
&
](
auto
output
)
{
args
.
at
(
0
).
visit
(
[
&
](
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
});
arg_1
.
visit
([
&
](
auto
output
)
{
args
.
at
(
1
).
visit
(
[
&
](
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
});
if
(
args
.
size
()
==
3
)
{
// no need to consider the value of args[2]
if
(
op
.
beta
==
0
)
{
result
.
visit
([
&
](
auto
output
)
{
std
::
fill
(
output
.
begin
(),
output
.
end
(),
0
);
});
}
else
{
visit_all
(
result
,
args
[
2
])([
&
](
auto
output
,
auto
input
)
{
std
::
copy
(
input
.
begin
(),
input
.
end
(),
output
.
begin
());
});
}
migemm
(
result
,
arg_0
,
arg_1
,
op
.
alpha
,
op
.
beta
);
return
result
;
}
// 2 input arguments
migemm
(
result
,
arg_0
,
arg_1
,
op
.
alpha
,
int32_t
{
0
});
return
result
;
}
};
struct
leaky_relu_op
{
op
::
leaky_relu
op
;
...
...
@@ -671,15 +794,17 @@ struct cpu_apply
{
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
cpu_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"softmax"
]
=
extend_op
<
cpu_softmax
,
op
::
softmax
>
();
apply_map
[
"convolution"
]
=
extend_op
<
cpu_convolution
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
cpu_gemm
,
op
::
dot
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
cpu_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"quant_convolution"
]
=
extend_op
<
cpu_quant_convolution
,
op
::
quant_convolution
>
();
apply_map
[
"elu"
]
=
extend_op
<
cpu_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"im2col"
]
=
extend_op
<
cpu_im2col
,
op
::
im2col
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
cpu_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"softmax"
]
=
extend_op
<
cpu_softmax
,
op
::
softmax
>
();
}
void
apply
()
...
...
src/targets/gpu/CMakeLists.txt
View file @
00b0396b
...
...
@@ -39,6 +39,7 @@ add_library(migraphx_device
device/pad.cpp
device/gather.cpp
device/sub.cpp
device/int8_gemm_pack.cpp
device/div.cpp
device/clip.cpp
device/reduce_sum.cpp
...
...
@@ -64,8 +65,10 @@ add_library(migraphx_gpu
target.cpp
lowering.cpp
gemm.cpp
quant_gemm.cpp
pooling.cpp
convolution.cpp
quant_convolution.cpp
softmax.cpp
logsoftmax.cpp
contiguous.cpp
...
...
@@ -79,12 +82,16 @@ add_library(migraphx_gpu
elu.cpp
pad.cpp
gather.cpp
convert.cpp
lrn.cpp
schedule_model.cpp
adjust_allocation.cpp
pack_int8_args.cpp
clip.cpp
reduce_sum.cpp
reduce_mean.cpp
int8_gemm_pack.cpp
int8_conv_pack.cpp
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/convert.cpp
0 → 100644
View file @
00b0396b
#include <migraphx/gpu/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_convert
::
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
{
inputs
.
pop_back
();
check_shapes
{
inputs
}.
packed
();
return
op
.
compute_shape
(
inputs
);
}
argument
hip_convert
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
convert
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
View file @
00b0396b
...
...
@@ -16,6 +16,12 @@ struct hip_array
MIGRAPHX_DEVICE_CONSTEXPR
T
&
operator
[](
std
::
size_t
i
)
{
return
d
[
i
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
operator
[](
std
::
size_t
i
)
const
{
return
d
[
i
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
&
front
()
{
return
d
[
0
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
front
()
const
{
return
d
[
0
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
&
back
()
{
return
d
[
N
-
1
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
back
()
const
{
return
d
[
N
-
1
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
data
()
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
data
()
const
{
return
d
;
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
View file @
00b0396b
...
...
@@ -209,28 +209,15 @@ constexpr std::size_t compute_block_size(std::size_t n, std::size_t max_block_si
}
template
<
class
Op
,
class
T
,
class
Input
,
class
Output
>
void
reduce
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
Op
op
,
T
init
,
Input
read_input
,
Output
read_output
)
void
reduce_multi_impl
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
Op
op
,
T
init
,
Input
read_input
,
Output
read_output
,
const
shape
&
reduce_slice
)
{
auto
&&
output_shape
=
result
.
get_shape
();
auto
&&
input_shape
=
arg
.
get_shape
();
std
::
vector
<
std
::
size_t
>
reduce_lens
;
std
::
transform
(
output_shape
.
lens
().
begin
(),
output_shape
.
lens
().
end
(),
input_shape
.
lens
().
begin
(),
std
::
back_inserter
(
reduce_lens
),
[](
auto
x
,
auto
y
)
->
std
::
size_t
{
if
(
x
==
y
)
return
1
;
else
return
y
;
});
shape
reduce_slice
{
output_shape
.
type
(),
reduce_lens
};
hip_visit_all
(
result
,
arg
,
reduce_slice
)([
&
](
auto
output
,
auto
input
,
auto
reduce_shape
)
{
auto
nelements
=
result
.
get_shape
().
elements
();
auto
relements
=
reduce_slice
.
elements
();
...
...
@@ -250,6 +237,83 @@ void reduce(hipStream_t stream,
});
}
template
<
class
Op
,
class
T
,
class
Input
,
class
Output
>
void
reduce_standard_impl
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
Op
op
,
T
init
,
Input
read_input
,
Output
read_output
,
std
::
size_t
relements
,
std
::
size_t
stride
)
{
hip_visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
auto
nelements
=
result
.
get_shape
().
elements
();
const
std
::
size_t
max_block_size
=
256
;
const
std
::
size_t
block_size
=
compute_block_size
(
relements
,
max_block_size
);
gs_launch
(
stream
,
nelements
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
const
auto
out_idx
=
i
/
block_size
;
const
auto
base_idx
=
out_idx
*
stride
;
auto
r
=
block_reduce
<
max_block_size
>
(
idx
,
op
,
init
,
relements
,
[
&
](
auto
j
)
__device__
{
return
read_input
(
input
.
data
()[
base_idx
+
j
]);
});
if
(
idx
.
local
==
0
)
output
.
data
()[
out_idx
]
=
read_output
(
r
);
});
});
}
template
<
class
Op
,
class
T
,
class
Input
,
class
Output
>
void
reduce
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
Op
op
,
T
init
,
Input
read_input
,
Output
read_output
)
{
auto
&&
output_shape
=
result
.
get_shape
();
auto
&&
input_shape
=
arg
.
get_shape
();
if
(
input_shape
.
standard
()
and
output_shape
.
standard
()
and
output_shape
.
lens
().
back
()
!=
input_shape
.
lens
().
back
()
and
std
::
equal
(
output_shape
.
lens
().
begin
(),
std
::
prev
(
output_shape
.
lens
().
end
()),
input_shape
.
lens
().
begin
()))
{
std
::
size_t
stride
=
std
::
accumulate
(
input_shape
.
strides
().
begin
(),
input_shape
.
strides
().
end
(),
1
,
std
::
multiplies
<
size_t
>
());
reduce_standard_impl
(
stream
,
result
,
arg
,
op
,
init
,
read_input
,
read_output
,
input_shape
.
lens
().
back
(),
stride
);
}
else
{
std
::
vector
<
std
::
size_t
>
reduce_lens
;
std
::
transform
(
output_shape
.
lens
().
begin
(),
output_shape
.
lens
().
end
(),
input_shape
.
lens
().
begin
(),
std
::
back_inserter
(
reduce_lens
),
[](
auto
x
,
auto
y
)
->
std
::
size_t
{
if
(
x
==
y
)
return
1
;
else
return
y
;
});
shape
reduce_slice
{
output_shape
.
type
(),
reduce_lens
};
reduce_multi_impl
(
stream
,
result
,
arg
,
op
,
init
,
read_input
,
read_output
,
reduce_slice
);
}
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
View file @
00b0396b
...
...
@@ -31,6 +31,7 @@ struct hip_tensor_descriptor
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
return
result
;
}
__device__
__host__
std
::
size_t
linear
(
hip_tensor_index
<
NDim
>
s
)
const
...
...
src/targets/gpu/device/int8_gemm_pack.cpp
0 → 100644
View file @
00b0396b
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
comp_shape
=
arg
.
get_shape
();
auto
out_lens
=
comp_shape
.
lens
();
auto
dim_0
=
out_lens
.
size
()
-
2
;
auto
dim_1
=
out_lens
.
size
()
-
1
;
std
::
size_t
lda
=
comp_shape
.
strides
()[
dim_0
];
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_m
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_m
+
(
i_k
/
nb
)
*
lda
)
*
nb
+
offset
]
=
in_ptr
[
i_m
+
i_k
*
lda
+
offset
];
});
});
});
}
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
trans_shape
=
arg
.
get_shape
();
auto
out_lens
=
trans_shape
.
lens
();
auto
dim_0
=
trans_shape
.
lens
().
size
()
-
2
;
auto
dim_1
=
trans_shape
.
lens
().
size
()
-
1
;
std
::
size_t
ldb
=
trans_shape
.
strides
()[
dim_1
];
auto
wrap_lens
=
out_lens
;
std
::
swap
(
wrap_lens
[
dim_0
],
wrap_lens
[
dim_1
]);
shape
comp_shape
{
trans_shape
.
type
(),
wrap_lens
};
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_n
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_n
+
(
i_k
/
nb
)
*
ldb
)
*
nb
+
offset
]
=
in_ptr
[
i_n
+
i_k
*
ldb
+
offset
];
});
});
});
}
void
sync_stream
(
hipStream_t
stream
)
{
hipStreamSynchronize
(
stream
);
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/fuse_ops.cpp
View file @
00b0396b
...
...
@@ -134,8 +134,6 @@ MIGRAPHX_PRED_MATCHER(fusable_conv, instruction_ref ins)
auto
conv
=
any_cast
<
miopen_convolution
>
(
ins
->
get_operator
());
if
(
conv
.
op
.
group
>
1
)
return
false
;
if
(
conv
.
op
.
padding_mode
!=
op
::
padding_mode_t
::
default_
)
return
false
;
if
(
wei
.
lens
()[
1
]
>
512
and
conv
.
algo
!=
miopenConvolutionFwdAlgoWinograd
)
return
false
;
auto
op
=
conv
.
op
;
...
...
src/targets/gpu/gemm.cpp
View file @
00b0396b
...
...
@@ -233,6 +233,10 @@ argument miopen_gemm::compute(context& ctx,
auto
to_pointer
=
[
&
](
auto
&&
arg
)
{
return
to_rocblas_type
(
as
.
from
(
arg
.
data
()));
};
if
(
num_matrices
==
1
)
{
// the rocblas_gemm API handles inputs and output matrices as
// column-major format. When doing a C = A * B, we actually do
// C^T = (B^T) * (A^T). That is the reason we input args[1] as
// A and args[0] as B in calling the rocblas_gemm.
generic_rocblas_gemm
(
as
,
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
...
...
src/targets/gpu/include/migraphx/gpu/convert.hpp
View file @
00b0396b
...
...
@@ -3,8 +3,6 @@
#include <migraphx/shape.hpp>
#include <migraphx/op/convert.hpp>
#include <migraphx/gpu/oper.hpp>
#include <migraphx/gpu/device/convert.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -12,7 +10,7 @@ namespace gpu {
struct
context
;
struct
hip_convert
:
unary_device
<
hip_convert
,
device
::
convert
>
struct
hip_convert
{
op
::
convert
op
;
...
...
@@ -22,13 +20,15 @@ struct hip_convert : unary_device<hip_convert, device::convert>
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
hip_convert
(
op
::
convert
oper
)
:
op
(
oper
)
{
}
std
::
string
name
()
const
{
return
"gpu::convert"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
inputs
.
pop_back
();
check_shapes
{
inputs
}.
packed
();
return
op
.
compute_shape
(
inputs
);
return
shapes
.
size
()
-
1
;
}
};
...
...
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
0 → 100644
View file @
00b0396b
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
0 → 100644
View file @
00b0396b
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_int8_conv_pack
{
std
::
string
name
()
const
{
return
"gpu::int8_conv_pack"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
0 → 100644
View file @
00b0396b
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_int8_gemm_pack_a
{
std
::
string
name
()
const
{
return
"gpu::int8_gemm_pack_a"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
hip_int8_gemm_pack_b
{
std
::
string
name
()
const
{
return
"gpu::int8_gemm_pack_b"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/miopen.hpp
View file @
00b0396b
...
...
@@ -34,11 +34,11 @@ Result make_obj(F f, Ts... xs)
auto
status
=
f
(
&
x
,
xs
...);
Result
r
{
x
};
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen call failed"
);
MIGRAPHX_THROW
(
"
MAKE_OBJ:
MIOpen call failed"
);
return
r
;
}
inline
tensor_descriptor
make_tensor
(
const
migraphx
::
shape
&
s
)
inline
tensor_descriptor
make_tensor
(
const
migraphx
::
shape
&
s
,
bool
pack
=
false
)
{
auto
t
=
make_obj
<
tensor_descriptor
>
(
&
miopenCreateTensorDescriptor
);
// Convert to ints
...
...
@@ -49,13 +49,33 @@ inline tensor_descriptor make_tensor(const migraphx::shape& s)
d
=
miopenFloat
;
else
if
(
s
.
type
()
==
shape
::
half_type
)
d
=
miopenHalf
;
else
if
(
s
.
type
()
==
shape
::
int32_type
)
d
=
miopenInt32
;
else
if
(
s
.
type
()
==
shape
::
int8_type
)
{
if
(
pack
)
{
// update the lens and corresponding strides
d
=
miopenInt8x4
;
lens
[
1
]
=
((
lens
[
1
]
+
3
)
/
4
)
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
}
else
{
d
=
miopenInt8
;
}
}
else
MIGRAPHX_THROW
(
"Unsupported type"
);
{
MIGRAPHX_THROW
(
"MAKE_TENSOR: unsupported type"
);
}
miopenSetTensorDescriptor
(
t
.
get
(),
d
,
s
.
lens
().
size
(),
lens
.
data
(),
strides
.
data
());
return
t
;
}
inline
convolution_descriptor
make_conv
(
const
migraphx
::
op
::
convolution
&
op
)
template
<
class
T
>
inline
convolution_descriptor
make_conv
(
const
T
&
op
)
{
auto
c
=
make_obj
<
convolution_descriptor
>
(
&
miopenCreateConvolutionDescriptor
);
miopenConvolutionMode_t
c_mode
=
miopenConvolution
;
...
...
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
0 → 100644
View file @
00b0396b
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
pack_int8_args
{
std
::
string
name
()
const
{
return
"gpu::pack_int8_args"
;
}
void
apply
(
program
&
p
)
const
;
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
0 → 100644
View file @
00b0396b
#ifndef MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_QUANT_CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/gpu/miopen.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_quant_convolution
{
op
::
quant_convolution
op
;
shared
<
convolution_descriptor
>
cd
;
miopenConvFwdAlgorithm_t
algo
{};
miopenHandle_t
handle
=
nullptr
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
// TODO: Add algo
return
op
::
quant_convolution
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::quant_convolution"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
shape
compile
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
void
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
);
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
private:
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/quant_gemm.hpp
0 → 100644
View file @
00b0396b
#ifndef MIGRAPHX_GUARD_RTGLIB_QUANT_GEMM_HPP
#define MIGRAPHX_GUARD_RTGLIB_QUANT_GEMM_HPP
#include <migraphx/shape.hpp>
#include <migraphx/op/quant_dot.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
rocblas_quant_gemm
{
op
::
quant_dot
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"gpu::quant_gemm"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
void
batch_not_transposed
(
const
std
::
vector
<
std
::
size_t
>&
strides
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/int8_conv_pack.cpp
0 → 100644
View file @
00b0396b
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_int8_conv_pack
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
standard
();
return
inputs
.
at
(
0
);
}
argument
miopen_int8_conv_pack
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
arg_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
arg_desc_vec4
=
make_tensor
(
args
[
0
].
get_shape
(),
true
);
float
alpha
=
1
;
float
beta
=
0
;
// pack input to vec4 format
auto
status
=
miopenTransformTensor
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
arg_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
arg_desc_vec4
.
get
(),
args
[
1
].
implicit
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"INT8_CONV_PACK: transform input tensor failed"
);
}
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/int8_gemm_pack.cpp
0 → 100644
View file @
00b0396b
#include <migraphx/gpu/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_int8_gemm_pack_a
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
not_broadcasted
().
packed
();
return
inputs
.
at
(
0
);
}
argument
hip_int8_gemm_pack_a
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
int8_gemm_pack_a
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
shape
hip_int8_gemm_pack_b
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
not_broadcasted
().
packed
();
return
inputs
.
at
(
0
);
}
argument
hip_int8_gemm_pack_b
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
int8_gemm_pack_b
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/lowering.cpp
View file @
00b0396b
...
...
@@ -16,6 +16,7 @@
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/contiguous.hpp>
#include <migraphx/gpu/relu.hpp>
#include <migraphx/gpu/sigmoid.hpp>
...
...
@@ -46,6 +47,7 @@
#include <migraphx/gpu/batchnorm.hpp>
#include <migraphx/gpu/pooling.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/quant_gemm.hpp>
#include <migraphx/gpu/concat.hpp>
#include <migraphx/gpu/pad.hpp>
#include <migraphx/gpu/gather.hpp>
...
...
@@ -58,6 +60,7 @@
#include <migraphx/gpu/reduce_mean.hpp>
#include <migraphx/gpu/pow.hpp>
#include <migraphx/gpu/sqdiff.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <utility>
#include <functional>
#include <algorithm>
...
...
@@ -115,6 +118,7 @@ struct miopen_apply
add_generic_op
<
hip_sign
>
(
"sign"
);
add_extend_op
<
miopen_gemm
,
op
::
dot
>
(
"dot"
);
add_extend_op
<
rocblas_quant_gemm
,
op
::
quant_dot
>
(
"quant_dot"
);
add_extend_op
<
miopen_contiguous
,
op
::
contiguous
>
(
"contiguous"
);
add_extend_op
<
hip_concat
,
op
::
concat
>
(
"concat"
);
add_extend_op
<
hip_softmax
,
op
::
softmax
>
(
"softmax"
);
...
...
@@ -130,6 +134,8 @@ struct miopen_apply
add_lrn_op
();
add_convolution_op
();
add_quant_convolution_op
();
// add_quant_dot_op();
add_pooling_op
();
add_batch_norm_inference_op
();
}
...
...
@@ -176,6 +182,21 @@ struct miopen_apply
});
}
void
add_quant_convolution_op
()
{
apply_map
.
emplace
(
"quant_convolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
quant_convolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_quant_convolution
{
op
,
make_conv
(
op
)};
auto
ws
=
conv
.
compile
(
ctx
,
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
args
=
ins
->
inputs
();
auto
workspace
=
insert_allocation
(
ins
,
ws
,
"workspace"
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
prog
->
replace_instruction
(
ins
,
conv
,
args
[
0
],
args
[
1
],
workspace
,
output
);
});
}
void
add_pooling_op
()
{
apply_map
.
emplace
(
"pooling"
,
[
=
](
instruction_ref
ins
)
{
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment