Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
eab3cafb
Commit
eab3cafb
authored
Aug 06, 2019
by
Shucai Xiao
Browse files
merge changes from int8_miopen_call
parents
5edf61dc
93b7fb54
Changes
17
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
370 additions
and
146 deletions
+370
-146
CMakeLists.txt
CMakeLists.txt
+3
-1
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+4
-1
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+4
-4
src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
...argets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
+4
-6
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
+29
-0
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
+40
-0
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
+24
-0
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
+1
-3
src/targets/gpu/include/migraphx/gpu/quant_gemm.hpp
src/targets/gpu/include/migraphx/gpu/quant_gemm.hpp
+2
-1
src/targets/gpu/int8_conv_pack.cpp
src/targets/gpu/int8_conv_pack.cpp
+40
-0
src/targets/gpu/int8_gemm_pack.cpp
src/targets/gpu/int8_gemm_pack.cpp
+37
-0
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+5
-21
src/targets/gpu/pack_int8_args.cpp
src/targets/gpu/pack_int8_args.cpp
+74
-0
src/targets/gpu/quant_convolution.cpp
src/targets/gpu/quant_convolution.cpp
+21
-49
src/targets/gpu/quant_gemm.cpp
src/targets/gpu/quant_gemm.cpp
+31
-30
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+3
-0
test/gpu/miopen.cpp
test/gpu/miopen.cpp
+48
-30
No files found.
CMakeLists.txt
View file @
eab3cafb
...
...
@@ -32,7 +32,9 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
endif
()
endif
()
if
(
CMAKE_CXX_COMPILER MATCHES
".*hcc"
)
include
(
CheckCXXCompilerFlag
)
check_cxx_compiler_flag
(
"--cuda-host-only -x hip"
HAS_HIP
)
if
(
HAS_HIP
)
message
(
STATUS
"Enable miopen backend"
)
set
(
MIGRAPHX_ENABLE_GPU On CACHE BOOL
""
)
else
()
...
...
src/targets/gpu/CMakeLists.txt
View file @
eab3cafb
...
...
@@ -37,7 +37,7 @@ add_library(migraphx_device
device/pad.cpp
device/gather.cpp
device/sub.cpp
device/pack.cpp
device/
int8_gemm_
pack.cpp
device/div.cpp
device/clip.cpp
device/reduce_sum.cpp
...
...
@@ -85,9 +85,12 @@ add_library(migraphx_gpu
lrn.cpp
schedule_model.cpp
adjust_allocation.cpp
pack_int8_args.cpp
clip.cpp
reduce_sum.cpp
reduce_mean.cpp
int8_gemm_pack.cpp
int8_conv_pack.cpp
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/device/pack.cpp
→
src/targets/gpu/device/
int8_gemm_
pack.cpp
View file @
eab3cafb
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/pack.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
...
...
@@ -11,7 +11,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
void
pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
void
int8_gemm_
pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
comp_shape
=
arg
.
get_shape
();
auto
out_lens
=
comp_shape
.
lens
();
...
...
@@ -38,7 +38,7 @@ void pack_a(hipStream_t stream, const argument& result, const argument& arg)
});
}
void
pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
void
int8_gemm_
pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
trans_shape
=
arg
.
get_shape
();
auto
out_lens
=
trans_shape
.
lens
();
...
...
src/targets/gpu/include/migraphx/gpu/device/pack.hpp
→
src/targets/gpu/include/migraphx/gpu/device/
int8_gemm_
pack.hpp
View file @
eab3cafb
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_PACK_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_
INT8_GEMM_
PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_
INT8_GEMM_
PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
...
...
@@ -10,11 +10,9 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
void
pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
void
int8_gemm_
pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
void
pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
void
sync_stream
(
hipStream_t
stream
);
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
);
}
// namespace device
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
0 → 100644
View file @
eab3cafb
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
miopen_int8_conv_pack
{
std
::
string
name
()
const
{
return
"gpu::int8_conv_pack"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
0 → 100644
View file @
eab3cafb
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/config.hpp>
#include <utility>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
context
;
struct
hip_int8_gemm_pack_a
{
std
::
string
name
()
const
{
return
"gpu::int8_gemm_pack_a"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
struct
hip_int8_gemm_pack_b
{
std
::
string
name
()
const
{
return
"gpu::int8_gemm_pack_b"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
0 → 100644
View file @
eab3cafb
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
#include <migraphx/program.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
pack_int8_args
{
std
::
string
name
()
const
{
return
"gpu::pack_int8_args"
;
}
void
apply
(
program
&
p
)
const
;
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/include/migraphx/gpu/quant_convolution.hpp
View file @
eab3cafb
...
...
@@ -17,8 +17,6 @@ struct miopen_quant_convolution
shared
<
convolution_descriptor
>
cd
;
miopenConvFwdAlgorithm_t
algo
{};
miopenHandle_t
handle
=
nullptr
;
argument
arg_vec4_x
{};
argument
arg_vec4_w
{};
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
...
...
@@ -39,7 +37,7 @@ struct miopen_quant_convolution
}
private:
shape
pack_int8_shape
(
shape
&
s
);
shape
pack_int8_shape
(
const
shape
&
s
)
const
;
};
}
// namespace gpu
...
...
src/targets/gpu/include/migraphx/gpu/quant_gemm.hpp
View file @
eab3cafb
...
...
@@ -10,7 +10,7 @@ namespace gpu {
struct
context
;
struct
miopen
_quant_gemm
struct
rocblas
_quant_gemm
{
op
::
quant_dot
op
;
...
...
@@ -24,6 +24,7 @@ struct miopen_quant_gemm
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
void
batch_not_transposed
(
const
std
::
vector
<
std
::
size_t
>&
strides
)
const
;
std
::
ptrdiff_t
output_alias
(
const
std
::
vector
<
shape
>&
shapes
)
const
{
return
shapes
.
size
()
-
1
;
...
...
src/targets/gpu/int8_conv_pack.cpp
0 → 100644
View file @
eab3cafb
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_int8_conv_pack
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
standard
();
return
inputs
.
at
(
0
);
}
argument
miopen_int8_conv_pack
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
arg_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
arg_desc_vec4
=
make_tensor
(
args
[
0
].
get_shape
(),
true
);
float
alpha
=
1
;
float
beta
=
0
;
// pack input to vec4 format
auto
status
=
miopenTransformTensor
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
arg_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
arg_desc_vec4
.
get
(),
args
[
1
].
implicit
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"INT8_CONV_PACK: transform input tensor failed"
);
}
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/int8_gemm_pack.cpp
0 → 100644
View file @
eab3cafb
#include <migraphx/gpu/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_int8_gemm_pack_a
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
not_broadcasted
().
packed
();
return
inputs
.
at
(
0
);
}
argument
hip_int8_gemm_pack_a
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
int8_gemm_pack_a
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
shape
hip_int8_gemm_pack_b
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{{
inputs
.
at
(
0
)},
*
this
}.
has
(
1
).
not_broadcasted
().
packed
();
return
inputs
.
at
(
0
);
}
argument
hip_int8_gemm_pack_b
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
int8_gemm_pack_b
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
]);
return
args
[
1
];
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/lowering.cpp
View file @
eab3cafb
...
...
@@ -59,6 +59,7 @@
#include <migraphx/gpu/reduce_mean.hpp>
#include <migraphx/gpu/pow.hpp>
#include <migraphx/gpu/sqdiff.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <utility>
#include <functional>
#include <algorithm>
...
...
@@ -115,6 +116,7 @@ struct miopen_apply
add_generic_op
<
hip_sqdiff
>
(
"sqdiff"
);
add_extend_op
<
miopen_gemm
,
op
::
dot
>
(
"dot"
);
add_extend_op
<
rocblas_quant_gemm
,
op
::
quant_dot
>
(
"quant_dot"
);
add_extend_op
<
miopen_contiguous
,
op
::
contiguous
>
(
"contiguous"
);
add_extend_op
<
hip_concat
,
op
::
concat
>
(
"concat"
);
add_extend_op
<
hip_softmax
,
op
::
softmax
>
(
"softmax"
);
...
...
@@ -131,7 +133,7 @@ struct miopen_apply
add_lrn_op
();
add_convolution_op
();
add_quant_convolution_op
();
add_quant_dot_op
();
//
add_quant_dot_op();
add_pooling_op
();
add_batch_norm_inference_op
();
}
...
...
@@ -182,32 +184,14 @@ struct miopen_apply
{
apply_map
.
emplace
(
"quant_convolution"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
quant_convolution
>
(
ins
->
get_operator
());
auto
conv
=
miopen_quant_convolution
{
op
,
make_conv
(
op
)};
auto
ws
=
conv
.
compile
(
ctx
,
ins
->
get_shape
(),
to_shapes
(
ins
->
inputs
()));
auto
args
=
ins
->
inputs
();
auto
workspace
=
insert_allocation
(
ins
,
ws
,
"workspace"
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
prog
->
replace_instruction
(
ins
,
conv
,
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
workspace
,
output
);
});
}
void
add_quant_dot_op
()
{
apply_map
.
emplace
(
"quant_dot"
,
[
=
](
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
quant_dot
>
(
ins
->
get_operator
());
auto
inputs
=
ins
->
inputs
();
auto
in_shapes
=
to_shapes
(
inputs
);
auto
pack_a
=
insert_allocation
(
ins
,
in_shapes
[
0
],
"pack_a"
);
auto
pack_b
=
insert_allocation
(
ins
,
in_shapes
[
1
],
"pack_b"
);
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
inputs
.
push_back
(
pack_a
);
inputs
.
push_back
(
pack_b
);
inputs
.
push_back
(
output
);
return
prog
->
replace_instruction
(
ins
,
miopen_quant_gemm
{
op
},
inputs
);
return
prog
->
replace_instruction
(
ins
,
conv
,
args
[
0
],
args
[
1
],
workspace
,
output
);
});
}
...
...
src/targets/gpu/pack_int8_args.cpp
0 → 100644
View file @
eab3cafb
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/gpu/int8_gemm_pack.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <migraphx/iterator_for.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
void
pack_int8_args
::
apply
(
program
&
p
)
const
{
for
(
auto
ins
:
iterator_for
(
p
))
{
if
(
ins
->
name
()
==
"gpu::quant_gemm"
)
{
auto
inputs
=
ins
->
inputs
();
bool
transa
=
inputs
[
0
]
->
get_shape
().
transposed
();
bool
transb
=
inputs
[
1
]
->
get_shape
().
transposed
();
if
(
!
transb
)
{
auto
packed_b
=
p
.
insert_instruction
(
ins
,
hip_allocate
{
inputs
[
1
]
->
get_shape
()});
auto
output_b
=
p
.
insert_instruction
(
ins
,
hip_int8_gemm_pack_a
{},
{
inputs
[
1
],
packed_b
});
instruction
::
replace_argument
(
ins
,
inputs
[
1
],
output_b
);
}
if
(
transa
)
{
auto
packed_a
=
p
.
insert_instruction
(
ins
,
hip_allocate
{
inputs
[
0
]
->
get_shape
()});
auto
output_a
=
p
.
insert_instruction
(
ins
,
hip_int8_gemm_pack_b
{},
{
inputs
[
0
],
packed_a
});
instruction
::
replace_argument
(
ins
,
inputs
[
0
],
output_a
);
}
}
else
if
(
ins
->
name
()
==
"gpu::quant_convolution"
)
{
auto
inputs
=
ins
->
inputs
();
auto
packed_x
=
p
.
insert_instruction
(
ins
,
hip_allocate
{
pack_int8_shape
(
inputs
[
0
]
->
get_shape
())});
auto
output_x
=
p
.
insert_instruction
(
ins
,
miopen_int8_conv_pack
{},
{
inputs
[
0
],
packed_x
});
instruction
::
replace_argument
(
ins
,
inputs
[
0
],
output_x
);
auto
packed_w
=
p
.
insert_instruction
(
ins
,
hip_allocate
{
pack_int8_shape
(
inputs
[
1
]
->
get_shape
())});
auto
output_w
=
p
.
insert_instruction
(
ins
,
miopen_int8_conv_pack
{},
{
inputs
[
1
],
packed_w
});
instruction
::
replace_argument
(
ins
,
inputs
[
1
],
output_w
);
}
}
}
shape
pack_int8_args
::
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
MIGRAPHX_THROW
(
"PACK_INT8_ARGS: only process int8_type"
);
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/quant_convolution.cpp
View file @
eab3cafb
...
...
@@ -16,54 +16,26 @@ argument miopen_quant_convolution::compute(context& ctx,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
x_desc_vec4
=
make_tensor
(
args
[
0
].
get_shape
(),
true
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
());
auto
w_desc_vec4
=
make_tensor
(
args
[
1
].
get_shape
(),
true
);
auto
y_desc
=
make_tensor
(
output_shape
);
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
(),
true
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
(),
true
);
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
;
float
beta
=
0
;
// pack input to vec4 format
auto
status
=
miopenTransformTensor
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
x_desc_vec4
.
get
(),
arg_vec4_x
.
implicit
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: transform input tensor failed"
);
}
// pack input to vec4 format
status
=
miopenTransformTensor
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
w_desc
.
get
(),
args
[
1
].
implicit
(),
&
beta
,
w_desc_vec4
.
get
(),
arg_vec4_w
.
implicit
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: transform weight tensor failed"
);
}
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc_vec4
.
get
(),
arg_vec4_x
.
implicit
(),
w_desc_vec4
.
get
(),
arg_vec4_w
.
implicit
(),
cd
.
get
(),
algo
,
&
beta
,
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
auto
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
w_desc
.
get
(),
args
[
1
].
implicit
(),
cd
.
get
(),
algo
,
&
beta
,
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: run convolution forward failed"
);
...
...
@@ -90,10 +62,10 @@ shape miopen_quant_convolution::compile(context& ctx,
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
arg_vec4_x
=
to_gpu
(
generate_argument
(
pack_int8_shape
(
inputs
[
0
])));
arg_vec4_w
=
to_gpu
(
generate_argument
(
pack_int8_shape
(
inputs
[
1
])));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
auto
arg_vec4_x
=
to_gpu
(
generate_argument
(
pack_int8_shape
(
inputs
[
0
])));
auto
arg_vec4_w
=
to_gpu
(
generate_argument
(
pack_int8_shape
(
inputs
[
1
])));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
...
...
@@ -133,7 +105,7 @@ void miopen_quant_convolution::finalize(context& ctx,
MIGRAPHX_THROW
(
"Workspace has changed during finalization."
);
}
shape
miopen_quant_convolution
::
pack_int8_shape
(
shape
&
s
)
shape
miopen_quant_convolution
::
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
...
...
src/targets/gpu/quant_gemm.cpp
View file @
eab3cafb
#include <migraphx/gpu/quant_gemm.hpp>
#include <migraphx/gpu/device/pack.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
#include <fstream>
...
...
@@ -54,43 +53,46 @@ rb_type<T>* to_rocblas_type(T* x)
return
reinterpret_cast
<
rb_type
<
T
>*>
(
x
);
}
shape
miopen
_quant_gemm
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
shape
rocblas
_quant_gemm
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
std
::
vector
<
shape
>
in_shapes
(
inputs
);
in_shapes
.
erase
(
in_shapes
.
begin
()
+
in_shapes
.
size
()
-
3
,
in_shapes
.
end
()
);
in_shapes
.
pop_back
(
);
check_shapes
{
in_shapes
}.
not_broadcasted
();
batch_not_transposed
(
inputs
[
0
].
strides
());
batch_not_transposed
(
inputs
[
1
].
strides
());
return
op
.
compute_shape
(
in_shapes
);
}
argument
miopen_quant_gemm
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
void
rocblas_quant_gemm
::
batch_not_transposed
(
const
std
::
vector
<
std
::
size_t
>&
strides
)
const
{
if
(
strides
.
size
()
<=
2
)
return
;
auto
dim_0
=
strides
.
size
()
-
2
;
auto
matrix_size
=
std
::
max
(
strides
[
dim_0
],
strides
[
dim_0
+
1
]);
std
::
vector
<
std
::
size_t
>
batch
(
strides
.
begin
(),
strides
.
begin
()
+
dim_0
);
if
(
std
::
adjacent_find
(
batch
.
begin
(),
batch
.
end
(),
[
&
](
auto
i
,
auto
j
)
{
return
(
i
<
j
or
i
<
matrix_size
or
j
<
matrix_size
);
})
!=
batch
.
end
())
{
MIGRAPHX_THROW
(
"QUANT_DOT: batch size {"
+
to_string_range
(
strides
)
+
"} is transposed!"
);
}
}
argument
rocblas_quant_gemm
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
bool
transa
=
args
[
0
].
get_shape
().
transposed
();
bool
transb
=
args
[
1
].
get_shape
().
transposed
();
auto
n_dim
=
output_shape
.
lens
().
size
();
auto
dim_1
=
n_dim
-
1
;
auto
dim_0
=
n_dim
-
2
;
auto
arg_num
=
args
.
size
();
rocblas_int
lda
=
args
[
0
].
get_shape
().
strides
()[
transa
?
dim_1
:
dim_0
];
rocblas_int
ldb
=
args
[
1
].
get_shape
().
strides
()[
transb
?
dim_1
:
dim_0
];
rocblas_int
ldc
=
args
[
arg_num
-
1
].
get_shape
().
strides
()[
dim_0
];
if
(
!
transb
)
{
device
::
pack_a
(
ctx
.
get_stream
().
get
(),
args
[
arg_num
-
2
],
args
[
1
]);
}
// need to pack A in this scenario, use the algorithm to pack B in the
// comment of the API
if
(
transa
)
{
device
::
pack_b
(
ctx
.
get_stream
().
get
(),
args
[
arg_num
-
3
],
args
[
0
]);
}
device
::
sync_stream
(
ctx
.
get_stream
().
get
());
rocblas_int
ldc
=
args
[
2
].
get_shape
().
strides
()[
dim_0
];
bool
is_3inputs
=
(
arg
_num
==
6
);
bool
is_3inputs
=
(
arg
s
.
size
()
==
4
);
int32_t
beta
=
0
;
if
(
is_3inputs
)
{
...
...
@@ -124,18 +126,17 @@ argument miopen_quant_gemm::compute(context& ctx,
m
,
k
,
&
alpha_r
,
(
!
transb
)
?
to_pointer
(
args
[
arg_num
-
2
])
:
to_pointer
(
args
.
at
(
1
)),
to_pointer
(
args
.
at
(
1
)),
rocblas_datatype_i8_r
,
ldb
,
transa
?
to_pointer
(
args
[
arg_num
-
3
])
:
to_pointer
(
args
.
at
(
0
)),
to_pointer
(
args
.
at
(
0
)),
rocblas_datatype_i8_r
,
lda
,
&
beta_r
,
to_pointer
(
args
[
2
]),
rocblas_datatype_i32_r
,
ldc
,
to_pointer
(
args
[
arg_num
-
1
]),
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
]),
rocblas_datatype_i32_r
,
ldc
,
rocblas_datatype_i32_r
,
...
...
@@ -155,11 +156,11 @@ argument miopen_quant_gemm::compute(context& ctx,
m
,
k
,
&
alpha_r
,
(
!
transb
)
?
to_pointer
(
args
[
arg_num
-
2
])
:
to_pointer
(
args
.
at
(
1
)),
to_pointer
(
args
.
at
(
1
)),
rocblas_datatype_i8_r
,
ldb
,
k
*
n
,
transa
?
to_pointer
(
args
[
arg_num
-
3
])
:
to_pointer
(
args
.
at
(
0
)),
to_pointer
(
args
.
at
(
0
)),
rocblas_datatype_i8_r
,
lda
,
m
*
k
,
...
...
@@ -168,7 +169,7 @@ argument miopen_quant_gemm::compute(context& ctx,
rocblas_datatype_i32_r
,
ldc
,
m
*
n
,
to_pointer
(
args
[
arg_num
-
1
]),
is_3inputs
?
to_pointer
(
args
[
3
])
:
to_pointer
(
args
[
2
]),
rocblas_datatype_i32_r
,
ldc
,
m
*
n
,
...
...
@@ -182,7 +183,7 @@ argument miopen_quant_gemm::compute(context& ctx,
}
});
return
args
[
arg_num
-
1
];
return
is_3inputs
?
args
[
3
]
:
args
[
2
];
}
}
// namespace gpu
...
...
src/targets/gpu/target.cpp
View file @
eab3cafb
...
...
@@ -21,6 +21,7 @@
#include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/gpu/schedule_model.hpp>
#include <migraphx/gpu/adjust_allocation.hpp>
#include <migraphx/gpu/pack_int8_args.hpp>
#include <migraphx/eliminate_pad.hpp>
#include <migraphx/schedule.hpp>
...
...
@@ -62,6 +63,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const
dead_code_elimination
{},
adjust_allocation
{},
dead_code_elimination
{},
pack_int8_args
{},
dead_code_elimination
{},
fuse_ops
{
&
ctx
},
dead_code_elimination
{},
write_literals
{
&
ctx
},
...
...
test/gpu/miopen.cpp
View file @
eab3cafb
...
...
@@ -663,13 +663,13 @@ struct test_softmax2 : verify_program<test_softmax2>
}
};
template
<
int
Axis
>
struct
test_softmax
:
verify_program
<
test_softmax
<
Axis
>>
template
<
int
Axis
,
migraphx
::
shape
::
type_t
T
>
struct
test_softmax
:
verify_program
<
test_softmax
<
Axis
,
T
>>
{
migraphx
::
program
create_program
()
const
{
migraphx
::
program
p
;
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
3
,
4
,
5
,
6
}};
migraphx
::
shape
s
{
T
,
{
512
,
4
,
1067
,
6
}};
auto
param
=
p
.
add_parameter
(
"0"
,
s
);
p
.
add_instruction
(
migraphx
::
op
::
softmax
{
Axis
},
param
);
...
...
@@ -677,10 +677,38 @@ struct test_softmax : verify_program<test_softmax<Axis>>
}
};
template
struct
test_softmax
<
0
>;
template
struct
test_softmax
<
1
>;
template
struct
test_softmax
<
2
>;
template
struct
test_softmax
<
3
>;
template
struct
test_softmax
<
0
,
migraphx
::
shape
::
float_type
>;
template
struct
test_softmax
<
2
,
migraphx
::
shape
::
float_type
>;
template
struct
test_softmax
<
1
,
migraphx
::
shape
::
double_type
>;
template
struct
test_softmax
<
3
,
migraphx
::
shape
::
double_type
>;
template
struct
test_softmax
<
0
,
migraphx
::
shape
::
half_type
>;
template
struct
test_softmax
<
1
,
migraphx
::
shape
::
half_type
>;
template
struct
test_softmax
<
2
,
migraphx
::
shape
::
half_type
>;
template
struct
test_softmax
<
3
,
migraphx
::
shape
::
half_type
>;
template
<
class
T
,
int
Axis
>
struct
test_arg_ops
:
verify_program
<
test_arg_ops
<
T
,
Axis
>>
{
migraphx
::
program
create_program
()
const
{
migraphx
::
program
p
;
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
,
4
,
1025
}};
auto
param
=
p
.
add_parameter
(
"data"
,
s
);
p
.
add_instruction
(
T
{
Axis
},
param
);
return
p
;
}
};
template
struct
test_arg_ops
<
migraphx
::
op
::
argmax
,
0
>;
template
struct
test_arg_ops
<
migraphx
::
op
::
argmax
,
1
>;
template
struct
test_arg_ops
<
migraphx
::
op
::
argmax
,
2
>;
template
struct
test_arg_ops
<
migraphx
::
op
::
argmax
,
3
>;
template
struct
test_arg_ops
<
migraphx
::
op
::
argmin
,
0
>;
template
struct
test_arg_ops
<
migraphx
::
op
::
argmin
,
1
>;
template
struct
test_arg_ops
<
migraphx
::
op
::
argmin
,
2
>;
template
struct
test_arg_ops
<
migraphx
::
op
::
argmin
,
3
>;
struct
test_conv
:
verify_program
<
test_conv
>
{
...
...
@@ -3601,32 +3629,13 @@ struct test_lstm_bidirct_default_actv2 : verify_program<test_lstm_bidirct_defaul
}
};
template
<
int
Axis
>
struct
test_logsoftmax
:
verify_program
<
test_logsoftmax
<
Axis
>>
{
migraphx
::
program
create_program
()
const
{
migraphx
::
program
p
;
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
3
,
4
,
5
,
6
}};
auto
param
=
p
.
add_parameter
(
"0"
,
s
);
p
.
add_instruction
(
migraphx
::
op
::
logsoftmax
{
Axis
},
param
);
return
p
;
}
};
template
struct
test_logsoftmax
<
0
>;
template
struct
test_logsoftmax
<
1
>;
template
struct
test_logsoftmax
<
2
>;
template
struct
test_logsoftmax
<
3
>;
template
<
int
Axis
>
struct
test_logsoftmax_1
:
verify_program
<
test_logsoftmax_1
<
Axis
>>
template
<
int
Axis
,
migraphx
::
shape
::
type_t
T
>
struct
test_logsoftmax
:
verify_program
<
test_logsoftmax
<
Axis
,
T
>>
{
migraphx
::
program
create_program
()
const
{
migraphx
::
program
p
;
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
3
}};
migraphx
::
shape
s
{
T
,
{
10
,
4
,
2080
,
6
}};
auto
param
=
p
.
add_parameter
(
"0"
,
s
);
p
.
add_instruction
(
migraphx
::
op
::
logsoftmax
{
Axis
},
param
);
...
...
@@ -3634,7 +3643,16 @@ struct test_logsoftmax_1 : verify_program<test_logsoftmax_1<Axis>>
}
};
template
struct
test_logsoftmax_1
<
0
>;
template
struct
test_logsoftmax
<
0
,
migraphx
::
shape
::
float_type
>;
template
struct
test_logsoftmax
<
1
,
migraphx
::
shape
::
float_type
>;
template
struct
test_logsoftmax
<
2
,
migraphx
::
shape
::
float_type
>;
template
struct
test_logsoftmax
<
3
,
migraphx
::
shape
::
float_type
>;
template
struct
test_logsoftmax
<
1
,
migraphx
::
shape
::
double_type
>;
template
struct
test_logsoftmax
<
3
,
migraphx
::
shape
::
double_type
>;
template
struct
test_logsoftmax
<
1
,
migraphx
::
shape
::
half_type
>;
template
struct
test_logsoftmax
<
0
,
migraphx
::
shape
::
half_type
>;
template
struct
test_logsoftmax
<
2
,
migraphx
::
shape
::
half_type
>;
template
struct
test_logsoftmax
<
3
,
migraphx
::
shape
::
half_type
>;
struct
test_fp32_fp16_lall
:
verify_program
<
test_fp32_fp16_lall
>
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment