Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
7f97b8ef
Unverified
Commit
7f97b8ef
authored
Oct 07, 2022
by
Ted Themistokleous
Committed by
GitHub
Oct 07, 2022
Browse files
Merge branch 'simplify_1_mul_div_ops' into divide_by_zero_check
parents
2ba401f0
d1fed367
Changes
448
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
739 additions
and
244 deletions
+739
-244
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+36
-82
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+48
-11
src/targets/gpu/pack_int8_args.cpp
src/targets/gpu/pack_int8_args.cpp
+1
-1
src/targets/gpu/perfdb.cpp
src/targets/gpu/perfdb.cpp
+128
-0
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+59
-37
src/targets/gpu/quant_convolution.cpp
src/targets/gpu/quant_convolution.cpp
+0
-1
src/targets/gpu/rocblas.cpp
src/targets/gpu/rocblas.cpp
+33
-0
src/targets/gpu/softmax.cpp
src/targets/gpu/softmax.cpp
+0
-49
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+5
-2
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+53
-1
src/tf/parse_conv.cpp
src/tf/parse_conv.cpp
+1
-1
src/tf/parse_depthwiseconv.cpp
src/tf/parse_depthwiseconv.cpp
+1
-1
src/tf/parse_pooling.cpp
src/tf/parse_pooling.cpp
+1
-1
src/tf/parse_relu6.cpp
src/tf/parse_relu6.cpp
+3
-2
src/tf/tf_parser.cpp
src/tf/tf_parser.cpp
+3
-3
src/tmp_dir.cpp
src/tmp_dir.cpp
+1
-1
src/value.cpp
src/value.cpp
+5
-12
test/api/test_custom_op.cpp
test/api/test_custom_op.cpp
+43
-0
test/api/test_custom_op_gpu.cpp
test/api/test_custom_op_gpu.cpp
+258
-37
test/api/test_gpu.cpp
test/api/test_gpu.cpp
+60
-2
No files found.
src/targets/gpu/lowering.cpp
View file @
7f97b8ef
...
@@ -26,43 +26,27 @@
...
@@ -26,43 +26,27 @@
#include <migraphx/manage_ptr.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/op/abs.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/pooling.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/gpu/abs.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/elu.hpp>
#include <migraphx/gpu/equal.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/gemm.hpp>
#include <migraphx/gpu/greater.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/int8_conv_pack.hpp>
#include <migraphx/gpu/leaky_relu.hpp>
#include <migraphx/gpu/less.hpp>
#include <migraphx/gpu/logical_and.hpp>
#include <migraphx/gpu/logical_or.hpp>
#include <migraphx/gpu/logical_xor.hpp>
#include <migraphx/gpu/lrn.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/miopen.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/unary_not.hpp>
#include <migraphx/gpu/where.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/program.hpp>
#include <migraphx/program.hpp>
...
@@ -99,78 +83,21 @@ struct miopen_apply
...
@@ -99,78 +83,21 @@ struct miopen_apply
(
void
)
i
;
(
void
)
i
;
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
void
init
()
void
init
()
{
{
assert
(
mod
!=
nullptr
);
assert
(
mod
!=
nullptr
);
assert
(
pass
!=
nullptr
);
assert
(
pass
!=
nullptr
);
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto
&
ctx
=
get_context
();
auto
&
ctx
=
get_context
();
int8_x4_format
=
get_int8_x4_format
(
ctx
);
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
compute_fp32
=
get_compute_fp32_flag
();
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
compute_fp32
=
true
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
#endif
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
offload_copy
=
(
mod
->
name
()
==
"main"
)
?
pass
->
offload_copy
:
false
;
add_generic_op
(
"acos"
);
add_generic_op
(
"acosh"
);
add_generic_op
(
"add"
);
add_generic_op
(
"asin"
);
add_generic_op
(
"asinh"
);
add_generic_op
(
"atan"
);
add_generic_op
(
"atanh"
);
add_generic_op
(
"ceil"
);
add_generic_op
(
"contiguous"
);
add_generic_op
(
"contiguous"
);
add_generic_op
(
"cos"
);
add_generic_op
(
"cosh"
);
add_generic_op
(
"div"
);
add_generic_op
(
"equal"
);
add_generic_op
(
"erf"
);
add_generic_op
(
"exp"
);
add_generic_op
(
"floor"
);
add_generic_op
(
"greater"
);
add_generic_op
(
"less"
);
add_generic_op
(
"log"
);
add_generic_op
(
"logical_and"
);
add_generic_op
(
"logical_or"
);
add_generic_op
(
"logical_xor"
);
add_generic_op
(
"max"
);
add_generic_op
(
"min"
);
add_generic_op
(
"mul"
);
add_generic_op
(
"not"
);
add_generic_op
(
"pow"
);
add_generic_op
(
"prelu"
);
add_generic_op
(
"recip"
);
add_generic_op
(
"relu"
);
add_generic_op
(
"round"
);
add_generic_op
(
"rsqrt"
);
add_generic_op
(
"sigmoid"
);
add_generic_op
(
"sign"
);
add_generic_op
(
"sin"
);
add_generic_op
(
"sinh"
);
add_generic_op
(
"sqdiff"
);
add_generic_op
(
"sqrt"
);
add_generic_op
(
"sub"
);
add_generic_op
(
"tan"
);
add_generic_op
(
"tanh"
);
add_generic_op
(
"where"
);
add_extend_op
(
"abs"
);
add_extend_op
(
"argmax"
);
add_extend_op
(
"argmax"
);
add_extend_op
(
"argmin"
);
add_extend_op
(
"argmin"
);
add_extend_op
(
"clip"
);
add_extend_op
(
"concat"
);
add_extend_op
(
"convert"
);
add_extend_op
(
"elu"
);
add_extend_op
(
"elu"
);
add_extend_op
(
"gather"
);
add_extend_op
(
"gather"
);
add_extend_op
(
"leaky_relu"
);
add_extend_op
(
"leaky_relu"
);
...
@@ -246,7 +173,8 @@ struct miopen_apply
...
@@ -246,7 +173,8 @@ struct miopen_apply
init
();
init
();
for
(
auto
it
=
mod
->
begin
();
it
!=
mod
->
end
();
it
++
)
for
(
auto
it
=
mod
->
begin
();
it
!=
mod
->
end
();
it
++
)
{
{
auto
s
=
it
->
get_shape
();
auto
s
=
it
->
get_shape
();
auto
attrs
=
it
->
get_operator
().
attributes
();
if
(
apply_map
.
count
(
it
->
name
())
>
0
)
if
(
apply_map
.
count
(
it
->
name
())
>
0
)
{
{
check_shape
(
s
,
apply_map
.
at
(
it
->
name
())(
it
));
check_shape
(
s
,
apply_map
.
at
(
it
->
name
())(
it
));
...
@@ -255,11 +183,37 @@ struct miopen_apply
...
@@ -255,11 +183,37 @@ struct miopen_apply
{
{
check_shape
(
s
,
insert_precompile_op
(
it
));
check_shape
(
s
,
insert_precompile_op
(
it
));
}
}
else
if
(
attrs
.
contains
(
"target"
))
{
check_shape
(
s
,
insert_custom_op
(
it
,
attrs
));
}
}
}
copy_params
();
copy_params
();
}
}
instruction_ref
insert_custom_op
(
instruction_ref
ins
,
const
value
&
attrs
)
const
{
const
auto
&
custom_op
=
ins
->
get_operator
();
if
(
attrs
.
at
(
"target"
)
==
"cpu"
)
{
auto
s
=
ins
->
get_shape
();
std
::
vector
<
instruction_ref
>
cpu_inputs
;
auto
inputs
=
ins
->
inputs
();
auto
output
=
inputs
.
back
();
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
cpu_inputs
),
[
&
](
auto
in
)
{
return
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::copy_from_gpu"
),
in
);
});
cpu_inputs
.
front
()
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::sync_stream"
),
cpu_inputs
);
auto
cpu_out
=
mod
->
insert_instruction
(
ins
,
custom_op
,
cpu_inputs
);
auto
gpu_out
=
mod
->
insert_instruction
(
ins
,
make_op
(
"hip::copy_to_gpu"
),
cpu_out
,
output
);
return
mod
->
replace_instruction
(
ins
,
gpu_out
);
}
return
ins
;
}
instruction_ref
insert_precompile_op
(
instruction_ref
ins
)
const
instruction_ref
insert_precompile_op
(
instruction_ref
ins
)
const
{
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
...
@@ -341,7 +295,7 @@ struct miopen_apply
...
@@ -341,7 +295,7 @@ struct miopen_apply
catch
(
migraphx
::
exception
&
)
catch
(
migraphx
::
exception
&
)
{
{
// In case no solver supports the default format, retry using the other format.
// In case no solver supports the default format, retry using the other format.
compile_quant_conv_with_format
(
!
int8_x4_format
);
compile_quant_conv_with_format
(
not
int8_x4_format
);
}
}
auto
args
=
ins
->
inputs
();
auto
args
=
ins
->
inputs
();
...
...
src/targets/gpu/mlir.cpp
View file @
7f97b8ef
...
@@ -44,9 +44,14 @@
...
@@ -44,9 +44,14 @@
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <deque>
#include <deque>
#include <variant>
#include <variant>
#if defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) && MLIR_MIGRAPHX_DIALECT_API_VERSION >= 2
#define MIGRAPHX_MLIR_BARE_POINTER
#endif
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
...
@@ -73,7 +78,7 @@ struct mlir_handle
...
@@ -73,7 +78,7 @@ struct mlir_handle
friend
bool
operator
==
(
ptr
x
,
ptr
y
)
{
return
x
.
get_value
()
==
y
.
get_value
();
}
friend
bool
operator
==
(
ptr
x
,
ptr
y
)
{
return
x
.
get_value
()
==
y
.
get_value
();
}
friend
bool
operator
!=
(
ptr
x
,
ptr
y
)
{
return
!
(
x
==
y
);
}
friend
bool
operator
!=
(
ptr
x
,
ptr
y
)
{
return
not
(
x
==
y
);
}
T
obj
{};
T
obj
{};
};
};
...
@@ -145,6 +150,12 @@ std::string mlir_print(F f, T x)
...
@@ -145,6 +150,12 @@ std::string mlir_print(F f, T x)
return
ss
.
str
();
return
ss
.
str
();
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_xdlops_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
struct
mlir_program
struct
mlir_program
{
{
mlir_program
()
mlir_program
()
...
@@ -487,6 +498,17 @@ struct mlir_program
...
@@ -487,6 +498,17 @@ struct mlir_program
ops
.
add_attribute_value
(
get_operator_value
(
ins
->
get_operator
()));
ops
.
add_attribute_value
(
get_operator_value
(
ins
->
get_operator
()));
if
(
ins
->
name
()
!=
"@return"
)
if
(
ins
->
name
()
!=
"@return"
)
ops
.
add_results
({
get_shape
(
ins
)});
ops
.
add_results
({
get_shape
(
ins
)});
if
(
ins
->
name
()
==
"convolution"
)
{
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
std
::
string
tuned
=
get_tune_params
();
if
(
not
tuned
.
empty
())
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
// check if HW supports xdlops
if
(
contains
(
get_xdlops_archs
(),
target_name
))
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
}
std
::
vector
<
MlirValue
>
inputs
;
std
::
vector
<
MlirValue
>
inputs
;
transform
(
transform
(
...
@@ -508,14 +530,7 @@ struct mlir_program
...
@@ -508,14 +530,7 @@ struct mlir_program
// 1st pipeline to call
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
// 2nd pipeline to call
// 2nd pipeline to call
std
::
string
tname
=
get_device_name
();
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_name
.
c_str
(),
"amdgcn-amd-amdhsa"
,
""
);
// HACK: Since MLIR can't handle the full target name
auto
hacked_tname
=
tname
.
substr
(
0
,
tname
.
find
(
':'
));
if
(
tname
.
size
()
!=
hacked_tname
.
size
())
std
::
cout
<<
"*************** WARNING: MLIR may not compile the correct target features for: "
<<
tname
<<
std
::
endl
;
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
hacked_tname
.
c_str
(),
"amdgcn-amd-amdhsa"
,
""
);
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
code_object_op
op
{};
code_object_op
op
{};
...
@@ -525,6 +540,17 @@ struct mlir_program
...
@@ -525,6 +540,17 @@ struct mlir_program
return
op
;
return
op
;
}
}
void
find_target
()
{
std
::
string
tname
=
get_device_name
();
// HACK: Since MLIR can't handle the full target name
target_name
=
trim
(
split_string
(
tname
,
':'
).
front
());
if
(
tname
.
size
()
!=
target_name
.
size
())
std
::
cout
<<
"*************** WARNING: MLIR may not compile the correct target features for: "
<<
tname
<<
std
::
endl
;
}
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
{
{
uint32_t
attrs
[
2
];
uint32_t
attrs
[
2
];
...
@@ -545,10 +571,14 @@ struct mlir_program
...
@@ -545,10 +571,14 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
}
std
::
string
get_tune_params
()
{
return
get_mlir_perf_for_conv
(
pp
);
}
mlir_context
ctx
;
mlir_context
ctx
;
MlirLocation
location
;
MlirLocation
location
;
mlir_module
mmodule
;
mlir_module
mmodule
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_name
;
};
};
std
::
string
dump_mlir
(
const
module
&
m
)
std
::
string
dump_mlir
(
const
module
&
m
)
...
@@ -565,6 +595,7 @@ code_object_op compile_mlir(const context&, const module& m)
...
@@ -565,6 +595,7 @@ code_object_op compile_mlir(const context&, const module& m)
if
(
trace
)
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
std
::
cout
<<
m
<<
std
::
endl
;
mlir_program
mp
;
mlir_program
mp
;
mp
.
find_target
();
mp
.
parse
(
m
);
mp
.
parse
(
m
);
auto
mod_op
=
mlirModuleGetOperation
(
mp
.
mmodule
.
get
());
auto
mod_op
=
mlirModuleGetOperation
(
mp
.
mmodule
.
get
());
if
(
trace
)
if
(
trace
)
...
@@ -579,9 +610,15 @@ instruction_ref insert_mlir(module& m,
...
@@ -579,9 +610,15 @@ instruction_ref insert_mlir(module& m,
code_object_op
co
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
{
std
::
vector
<
instruction_ref
>
refs
;
std
::
vector
<
instruction_ref
>
refs
;
std
::
size_t
last
=
0
;
#ifdef MIGRAPHX_MLIR_BARE_POINTER
refs
.
reserve
(
inputs
.
size
());
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
refs
));
last
=
refs
.
size
()
-
1
;
#else
refs
.
reserve
(
inputs
.
size
()
*
15
);
refs
.
reserve
(
inputs
.
size
()
*
15
);
std
::
unordered_map
<
uint64_t
,
instruction_ref
>
literal_map
{};
std
::
unordered_map
<
uint64_t
,
instruction_ref
>
literal_map
{};
auto
get_literal
=
[
&
](
uint64_t
value
)
{
auto
get_literal
=
[
&
](
uint64_t
value
)
{
auto
fi
=
literal_map
.
find
(
value
);
auto
fi
=
literal_map
.
find
(
value
);
...
@@ -592,7 +629,6 @@ instruction_ref insert_mlir(module& m,
...
@@ -592,7 +629,6 @@ instruction_ref insert_mlir(module& m,
return
lit
;
return
lit
;
};
};
std
::
size_t
last
=
0
;
for
(
auto
input
:
inputs
)
for
(
auto
input
:
inputs
)
{
{
const
size_t
offset
=
0
;
const
size_t
offset
=
0
;
...
@@ -616,6 +652,7 @@ instruction_ref insert_mlir(module& m,
...
@@ -616,6 +652,7 @@ instruction_ref insert_mlir(module& m,
[
&
](
const
auto
&
lval
)
{
return
get_literal
(
lval
);
});
[
&
](
const
auto
&
lval
)
{
return
get_literal
(
lval
);
});
// refs.push_back(get_literal(1)); // G
// refs.push_back(get_literal(1)); // G
}
}
#endif
co
.
expected_inputs
=
to_shapes
(
refs
);
co
.
expected_inputs
=
to_shapes
(
refs
);
co
.
output_arg
=
last
;
co
.
output_arg
=
last
;
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
...
...
src/targets/gpu/pack_int8_args.cpp
View file @
7f97b8ef
...
@@ -154,7 +154,7 @@ void pack_int8_args::apply(module& m) const
...
@@ -154,7 +154,7 @@ void pack_int8_args::apply(module& m) const
bool
transa
=
inputs
[
0
]
->
get_shape
().
transposed
();
bool
transa
=
inputs
[
0
]
->
get_shape
().
transposed
();
bool
transb
=
inputs
[
1
]
->
get_shape
().
transposed
();
bool
transb
=
inputs
[
1
]
->
get_shape
().
transposed
();
if
(
!
transb
)
if
(
not
transb
)
{
{
auto
packed_b
=
m
.
insert_instruction
(
auto
packed_b
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
inputs
[
1
]
->
get_shape
())}}));
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
inputs
[
1
]
->
get_shape
())}}));
...
...
src/targets/gpu/perfdb.cpp
0 → 100644
View file @
7f97b8ef
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/value.hpp>
#include <migraphx/sqlite.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/permutation.hpp>
#include <fstream>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
{
std
::
string
get_layout
(
const
shape
&
s
,
std
::
string
labels
)
{
auto
result
=
labels
;
auto
p
=
find_permutation
(
s
);
std
::
transform
(
p
.
begin
(),
p
.
end
(),
result
.
begin
(),
[
&
](
auto
i
)
{
return
labels
[
i
];
});
return
"'"
+
result
+
"'"
;
}
std
::
string
get_type
(
const
shape
&
s
)
{
static
const
std
::
unordered_map
<
shape
::
type_t
,
std
::
string
>
m
=
{
{
shape
::
float_type
,
"'FP32'"
},
{
shape
::
half_type
,
"'FP16'"
},
{
shape
::
double_type
,
"'FP64'"
},
{
shape
::
int8_type
,
"'INT8'"
},
{
shape
::
int32_type
,
"'INT32'"
},
};
auto
it
=
m
.
find
(
s
.
type
());
if
(
it
==
m
.
end
())
return
"UNKNOWN"
;
return
it
->
second
;
}
std
::
string
generate_miopen_config
(
const
problem_params
&
pp
)
{
value
v
=
pp
.
op
.
to_value
();
auto
input
=
pp
.
inputs
[
0
].
lens
();
auto
weights
=
pp
.
inputs
[
1
].
lens
();
auto
padding
=
v
[
"padding"
].
to_vector
<
std
::
size_t
>
();
auto
stride
=
v
[
"stride"
].
to_vector
<
std
::
size_t
>
();
auto
dilation
=
v
[
"dilation"
].
to_vector
<
std
::
size_t
>
();
if
(
padding
.
size
()
!=
stride
.
size
())
padding
.
erase
(
padding
.
begin
()
+
padding
.
size
()
/
2
,
padding
.
end
());
return
to_string_range
({
std
::
string
{
" C.in_channels="
},
to_string
(
input
[
1
]),
std
::
string
{
" AND C.in_h="
},
to_string
(
input
[
2
]),
std
::
string
{
" AND C.in_w="
},
to_string
(
input
[
3
]),
std
::
string
{
" AND C.fil_h="
},
to_string
(
weights
[
2
]),
std
::
string
{
" AND C.fil_w="
},
to_string
(
weights
[
3
]),
std
::
string
{
" AND C.out_channels="
},
to_string
(
weights
[
0
]),
std
::
string
{
" AND C.batchsize="
},
to_string
(
input
[
0
]),
std
::
string
{
" AND C.pad_h="
},
to_string
(
padding
[
0
]),
std
::
string
{
" AND C.pad_w="
},
to_string
(
padding
[
2
]),
std
::
string
{
" AND C.dilation_h="
},
to_string
(
dilation
[
0
]),
std
::
string
{
" AND C.dilation_w="
},
to_string
(
dilation
[
1
]),
std
::
string
{
" AND C.conv_stride_h="
},
to_string
(
stride
[
0
]),
std
::
string
{
" AND C.conv_stride_w="
},
to_string
(
stride
[
1
]),
std
::
string
{
" AND C.layout="
},
get_layout
(
pp
.
inputs
[
0
],
"NCHW"
),
std
::
string
{
" AND C.data_type="
},
get_type
(
pp
.
inputs
[
0
]),
std
::
string
{
" AND C.direction="
},
std
::
string
{
"'F'"
}},
" "
);
}
auto
query_miopen_db
(
const
std
::
string
&
query
)
{
// TODO: Store db as a static variable
const
auto
dbpath
=
fs
::
path
{
"/opt"
}
/
"rocm"
/
"share"
/
"miopen"
/
"db"
/
"miopen.db"
;
// Check if db file exists.
std
::
ifstream
dbs
(
dbpath
);
if
(
dbs
.
is_open
())
{
dbs
.
close
();
}
else
{
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
std
::
string
>>
empty
;
return
empty
;
}
auto
db
=
sqlite
::
read
(
dbpath
);
return
db
.
execute
(
query
);
}
}
// namespace
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
)
{
std
::
string
query
=
"select P.* \
from perf_db P, config C \
where P.config = C.id AND \
P.solver = 'ConvMlirIgemmFwdXdlops' AND \
${config}"
;
auto
results
=
query_miopen_db
(
interpolate_string
(
query
,
{{
"config"
,
generate_miopen_config
(
pp
)}}));
if
(
results
.
empty
())
return
""
;
return
results
.
front
().
at
(
"params"
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/prefuse_ops.cpp
View file @
7f97b8ef
...
@@ -23,13 +23,62 @@
...
@@ -23,13 +23,62 @@
*/
*/
#include <migraphx/gpu/prefuse_ops.hpp>
#include <migraphx/gpu/prefuse_ops.hpp>
#include <migraphx/match/layernorm.hpp>
#include <migraphx/match/layernorm.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/register_op.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
{
namespace
{
template
<
class
Derived
,
std
::
size_t
N
>
struct
layernorm_base
{
float
epsilon
=
1e-12
f
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
epsilon
,
"epsilon"
));
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
,
std
::
vector
<
module_ref
>
mods
)
const
{
std
::
size_t
nargs
=
1
;
if
(
not
mods
.
empty
())
{
auto
*
pm
=
mods
.
front
();
nargs
=
pm
->
get_parameter_names
().
size
();
}
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
nargs
+
N
);
auto
s
=
inputs
.
at
(
0
);
if
(
s
.
scalar
())
{
return
s
;
}
else
if
(
s
.
broadcasted
())
{
return
{
s
.
type
(),
s
.
lens
()};
}
else
{
return
s
.
with_lens
(
s
.
lens
());
}
}
};
struct
layernorm
:
layernorm_base
<
layernorm
,
0
>
{
std
::
string
name
()
const
{
return
"gpu::prelayernorm"
;
}
};
MIGRAPHX_REGISTER_OP
(
layernorm
);
struct
add_layernorm
:
layernorm_base
<
add_layernorm
,
1
>
{
std
::
string
name
()
const
{
return
"gpu::preadd_layernorm"
;
}
};
MIGRAPHX_REGISTER_OP
(
add_layernorm
);
struct
find_layernorm
struct
find_layernorm
{
{
auto
matcher
()
const
{
return
match
::
layernorm
();
}
auto
matcher
()
const
{
return
match
::
layernorm
();
}
...
@@ -38,60 +87,33 @@ struct find_layernorm
...
@@ -38,60 +87,33 @@ struct find_layernorm
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
if
(
not
x_ins
->
get_shape
().
standard
())
m
.
replace_instruction
(
ins
,
layernorm
{
eps
},
x_ins
);
x_ins
=
m
.
insert_instruction
(
ins
,
make_op
(
"contiguous"
),
x_ins
);
auto
relements
=
x_ins
->
get_shape
().
lens
().
back
();
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
return
;
auto
a
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
x_ins
->
get_shape
())}}));
m
.
replace_instruction
(
ins
,
make_op
(
"gpu::layernorm"
),
x_ins
,
a
);
}
}
};
};
struct
find_
tri
addlayernorm
struct
find_add
_
layernorm
{
{
auto
matcher
()
const
auto
matcher
()
const
{
{
auto
add1
=
return
match
::
layernorm
()(
match
::
var
(
"x"
)(
match
::
name
(
"add"
).
bind
(
"add"
)));
match
::
name
(
"add"
)(
match
::
none_of
(
match
::
is_constant
()),
match
::
args
(
match
::
any
().
bind
(
"z1"
),
match
::
any
().
bind
(
"z2"
)));
auto
add2
=
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
add1
,
match
::
any
().
bind
(
"z3"
)));
return
match
::
layernorm
()(
match
::
var
(
"x"
)(
add2
));
}
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
{
auto
ins
=
r
.
result
;
auto
ins
=
r
.
result
;
auto
x_ins
=
r
.
instructions
[
"z1"
];
auto
add_ins
=
r
.
instructions
[
"add"
];
auto
y_ins
=
r
.
instructions
[
"z2"
];
auto
eps
=
r
.
instructions
[
"eps"
]
->
eval
().
at
<
float
>
();
auto
z_ins
=
r
.
instructions
[
"z3"
];
for
(
auto
*
pins
:
{
&
x_ins
,
&
y_ins
,
&
z_ins
})
{
if
(
not
(
*
pins
)
->
get_shape
().
standard
())
*
pins
=
m
.
insert_instruction
(
ins
,
make_op
(
"contiguous"
),
*
pins
);
}
auto
relements
=
x_ins
->
get_shape
().
lens
().
back
();
if
(
relements
>
1024
or
(
relements
%
4
!=
0
and
relements
>
256
))
return
;
auto
a
=
m
.
insert_instruction
(
m
.
replace_instruction
(
ins
,
add_layernorm
{
eps
},
add_ins
->
inputs
());
ins
,
make_op
(
"hip::allocate"
,
{{
"shape"
,
to_value
(
x_ins
->
get_shape
())}}));
m
.
replace_instruction
(
ins
,
make_op
(
"gpu::triadd_layernorm"
),
x_ins
,
y_ins
,
z_ins
,
a
);
}
}
};
};
}
// namespace
}
// namespace
void
prefuse_ops
::
apply
(
module
&
m
)
const
void
prefuse_ops
::
apply
(
module
&
m
)
const
{
{
match
::
find_matches
(
m
,
find_
tri
addlayernorm
{},
find_layernorm
{});
match
::
find_matches
(
m
,
find_add
_
layernorm
{},
find_layernorm
{});
}
}
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/quant_convolution.cpp
View file @
7f97b8ef
...
@@ -22,7 +22,6 @@
...
@@ -22,7 +22,6 @@
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/device/convert.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/generate.hpp>
...
...
src/targets/gpu/rocblas.cpp
View file @
7f97b8ef
...
@@ -21,7 +21,13 @@
...
@@ -21,7 +21,13 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <unordered_set>
#include <migraphx/ranges.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -41,6 +47,33 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
...
@@ -41,6 +47,33 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
return
rb
;
return
rb
;
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
bool
get_compute_fp32_flag
()
{
bool
compute_fp32
=
false
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
compute_fp32
=
true
;
#endif
return
compute_fp32
;
}
bool
get_int8_x4_format
(
context
&
ctx
)
{
bool
int8_x4_format
=
true
;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
#endif
return
int8_x4_format
;
}
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
src/targets/gpu/softmax.cpp
deleted
100644 → 0
View file @
2ba401f0
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/softmax.hpp>
#include <migraphx/gpu/device/softmax.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/tune_axis.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_softmax
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
standard
();
return
op
.
normalize_compute_shape
({
inputs
.
at
(
0
)});
}
argument
hip_softmax
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
n_dim
=
args
.
front
().
get_shape
().
lens
().
size
();
auto
tuned_axis
=
tune_axis
(
n_dim
,
op
.
axis
,
op
.
name
());
device
::
softmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
tuned_axis
);
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/target.cpp
View file @
7f97b8ef
...
@@ -42,6 +42,7 @@
...
@@ -42,6 +42,7 @@
#include <migraphx/register_target.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_gelu.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/rewrite_rnn.hpp>
...
@@ -116,6 +117,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -116,6 +117,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
inline_module
{},
inline_module
{},
rewrite_pooling
{},
rewrite_pooling
{},
dead_code_elimination
{},
dead_code_elimination
{},
rewrite_gelu
{},
dead_code_elimination
{},
eliminate_common_subexpression
{},
eliminate_common_subexpression
{},
dead_code_elimination
{},
dead_code_elimination
{},
simplify_algebra
{},
simplify_algebra
{},
...
@@ -134,8 +137,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -134,8 +137,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
lowering
{
&
ctx
,
options
.
offload_copy
},
lowering
{
&
ctx
,
options
.
offload_copy
},
eliminate_contiguous
{
"gpu::contiguous"
},
eliminate_contiguous
{
"gpu::contiguous"
},
dead_code_elimination
{},
dead_code_elimination
{},
replace_allocate
{
gpu_allocation_model
{},
options
.
offload_copy
},
dead_code_elimination
{},
eliminate_concat
{
concat_gpu_optimization
{}},
eliminate_concat
{
concat_gpu_optimization
{}},
dead_code_elimination
{},
dead_code_elimination
{},
pack_int8_args
{},
pack_int8_args
{},
...
@@ -144,6 +145,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -144,6 +145,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
fuse_ops
{
&
ctx
,
options
.
fast_math
},
fuse_ops
{
&
ctx
,
options
.
fast_math
},
dead_code_elimination
{},
dead_code_elimination
{},
replace_allocate
{
gpu_allocation_model
{},
options
.
offload_copy
},
dead_code_elimination
{},
compile_ops
{
&
ctx
},
compile_ops
{
&
ctx
},
dead_code_elimination
{},
dead_code_elimination
{},
write_literals
{
&
ctx
},
write_literals
{
&
ctx
},
...
...
src/targets/ref/lowering.cpp
View file @
7f97b8ef
...
@@ -51,6 +51,8 @@
...
@@ -51,6 +51,8 @@
#include <migraphx/register_op.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/tune_axis.hpp>
#include <migraphx/pad_calc.hpp>
#include <unordered_map>
#include <unordered_map>
#include <utility>
#include <utility>
#include <iostream>
#include <iostream>
...
@@ -231,8 +233,30 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
...
@@ -231,8 +233,30 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
{
{
return
op
.
normalize_compute_shape
(
inputs
);
return
op
.
normalize_compute_shape
(
inputs
);
}
}
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
{
std
::
vector
<
std
::
size_t
>
padding
;
if
(
op
.
use_dynamic_same_auto_pad
)
{
auto
input_lens
=
args
[
0
].
get_shape
().
lens
();
std
::
vector
<
std
::
size_t
>
img_lens
{
input_lens
.
begin
()
+
2
,
input_lens
.
end
()};
auto
weights_lens
=
args
[
1
].
get_shape
().
lens
();
std
::
vector
<
std
::
size_t
>
k_lens
{
weights_lens
.
begin
()
+
2
,
weights_lens
.
end
()};
padding
=
calc_dyn_auto_pad
(
img_lens
,
k_lens
,
op
.
stride
,
op
.
dilation
);
output_shape
=
compute_padded_shape
({
args
.
at
(
0
).
get_shape
(),
args
.
at
(
1
).
get_shape
()},
padding
);
}
else
{
padding
=
op
.
padding
;
if
(
output_shape
.
dynamic
())
{
output_shape
=
op
.
normalize_compute_shape
({
args
.
at
(
0
).
get_shape
(),
args
.
at
(
1
).
get_shape
()});
}
}
argument
result
{
output_shape
};
argument
result
{
output_shape
};
visit_quantize
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input
,
auto
weights
)
{
visit_quantize
(
result
,
args
[
0
],
args
[
1
])([
&
](
auto
output
,
auto
input
,
auto
weights
)
{
auto
in_lens
=
input
.
get_shape
().
lens
();
auto
in_lens
=
input
.
get_shape
().
lens
();
...
@@ -252,7 +276,7 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
...
@@ -252,7 +276,7 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
{
{
auto
d_2
=
dim
-
2
;
auto
d_2
=
dim
-
2
;
win_start
.
push_back
(
std
::
ptrdiff_t
(
idx_o
[
dim
]
*
op
.
stride
[
d_2
])
-
win_start
.
push_back
(
std
::
ptrdiff_t
(
idx_o
[
dim
]
*
op
.
stride
[
d_2
])
-
std
::
ptrdiff_t
(
op
.
padding
[
d_2
]));
std
::
ptrdiff_t
(
padding
[
d_2
]));
}
}
const
auto
group_id
=
w
/
(
wei_n
/
op
.
group
);
const
auto
group_id
=
w
/
(
wei_n
/
op
.
group
);
...
@@ -289,6 +313,34 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
...
@@ -289,6 +313,34 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
});
});
return
result
;
return
result
;
}
}
private:
/*!
* Used for dynamic auto padding since padding needs to be computed at evaulation time.
* \param inputs two fixed shape inputs [input_tensor, weights]
* \param padding from auto_pad calculation
*/
shape
compute_padded_shape
(
const
std
::
vector
<
shape
>&
inputs
,
const
std
::
vector
<
std
::
size_t
>&
padding
)
const
{
const
shape
&
input
=
inputs
.
at
(
0
);
const
shape
&
weights
=
inputs
.
at
(
1
);
const
size_t
num_spatial_dims
=
input
.
lens
().
size
()
-
2
;
std
::
vector
<
size_t
>
output_lens
{
input
.
lens
()[
0
],
weights
.
lens
()[
0
]};
// calculate the output shape of the convolution: ((W - K + 2P) / S) + 1
for
(
size_t
i
=
0
;
i
<
num_spatial_dims
;
i
++
)
{
auto
padding_factor
=
padding
[
i
]
+
padding
[
i
+
num_spatial_dims
];
output_lens
.
push_back
(
std
::
size_t
(
std
::
max
<
std
::
ptrdiff_t
>
(
1
,
(
input
.
lens
()[
i
+
2
]
-
(
1
+
op
.
dilation
[
i
]
*
(
weights
.
lens
()[
i
+
2
]
-
1
))
+
padding_factor
)
/
op
.
stride
[
i
]
+
1
)));
}
return
inputs
[
0
].
with_lens
(
output_lens
);
}
};
};
struct
ref_im2col
struct
ref_im2col
...
...
src/tf/parse_conv.cpp
View file @
7f97b8ef
...
@@ -100,7 +100,7 @@ struct parse_conv : op_parser<parse_conv>
...
@@ -100,7 +100,7 @@ struct parse_conv : op_parser<parse_conv>
{
{
MIGRAPHX_THROW
(
"padding should have 4 values"
);
MIGRAPHX_THROW
(
"padding should have 4 values"
);
}
}
if
(
padding
[
0
]
!=
padding
[
2
]
||
padding
[
1
]
!=
padding
[
3
])
if
(
padding
[
0
]
!=
padding
[
2
]
or
padding
[
1
]
!=
padding
[
3
])
{
{
MIGRAPHX_THROW
(
"migraphx does not support asymetric padding"
);
MIGRAPHX_THROW
(
"migraphx does not support asymetric padding"
);
}
}
...
...
src/tf/parse_depthwiseconv.cpp
View file @
7f97b8ef
...
@@ -90,7 +90,7 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
...
@@ -90,7 +90,7 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
calculate_padding
(
0
,
pads
,
input_dims
[
2
],
op
.
stride
[
0
],
op
.
dilation
[
0
],
weight_h
);
calculate_padding
(
0
,
pads
,
input_dims
[
2
],
op
.
stride
[
0
],
op
.
dilation
[
0
],
weight_h
);
calculate_padding
(
1
,
pads
,
input_dims
[
3
],
op
.
stride
[
1
],
op
.
dilation
[
1
],
weight_w
);
calculate_padding
(
1
,
pads
,
input_dims
[
3
],
op
.
stride
[
1
],
op
.
dilation
[
1
],
weight_w
);
if
(
pads
[
0
]
!=
pads
[
2
]
||
pads
[
1
]
!=
pads
[
3
])
if
(
pads
[
0
]
!=
pads
[
2
]
or
pads
[
1
]
!=
pads
[
3
])
{
{
std
::
vector
<
int64_t
>
padding
=
{
0
,
0
,
pads
[
0
],
pads
[
1
],
0
,
0
,
pads
[
2
],
pads
[
3
]};
std
::
vector
<
int64_t
>
padding
=
{
0
,
0
,
pads
[
0
],
pads
[
1
],
0
,
0
,
pads
[
2
],
pads
[
3
]};
l0
=
info
.
add_instruction
(
migraphx
::
make_op
(
"pad"
,
{{
"pads"
,
padding
}}),
l0
);
l0
=
info
.
add_instruction
(
migraphx
::
make_op
(
"pad"
,
{{
"pads"
,
padding
}}),
l0
);
...
...
src/tf/parse_pooling.cpp
View file @
7f97b8ef
...
@@ -42,7 +42,7 @@ struct parse_pooling : op_parser<parse_pooling>
...
@@ -42,7 +42,7 @@ struct parse_pooling : op_parser<parse_pooling>
tf_parser
::
node_info
info
,
tf_parser
::
node_info
info
,
std
::
vector
<
instruction_ref
>
args
)
const
std
::
vector
<
instruction_ref
>
args
)
const
{
{
if
(
!
starts_with
(
opd
.
tf_name
,
"Max"
)
&&
!
starts_with
(
opd
.
tf_name
,
"Av"
))
if
(
not
starts_with
(
opd
.
tf_name
,
"Max"
)
and
not
starts_with
(
opd
.
tf_name
,
"Av"
))
{
{
MIGRAPHX_THROW
(
"tf pooling mode must be Max or Average"
);
MIGRAPHX_THROW
(
"tf pooling mode must be Max or Average"
);
}
}
...
...
src/tf/parse_relu6.cpp
View file @
7f97b8ef
...
@@ -41,8 +41,9 @@ struct parse_relu6 : op_parser<parse_relu6>
...
@@ -41,8 +41,9 @@ struct parse_relu6 : op_parser<parse_relu6>
const
tf_parser
::
node_info
&
info
,
const
tf_parser
::
node_info
&
info
,
std
::
vector
<
instruction_ref
>
args
)
const
std
::
vector
<
instruction_ref
>
args
)
const
{
{
auto
min_val
=
info
.
add_literal
(
0.0
f
);
shape
::
type_t
output_type
=
args
[
0
]
->
get_shape
().
type
();
auto
max_val
=
info
.
add_literal
(
6.0
f
);
auto
min_val
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
output_type
},
{
0.0
f
}});
auto
max_val
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
output_type
},
{
6.0
f
}});
return
info
.
add_common_op
(
"clip"
,
args
[
0
],
min_val
,
max_val
);
return
info
.
add_common_op
(
"clip"
,
args
[
0
],
min_val
,
max_val
);
}
}
...
...
src/tf/tf_parser.cpp
View file @
7f97b8ef
...
@@ -347,7 +347,7 @@ void tf_parser::parse_node(const std::string& name)
...
@@ -347,7 +347,7 @@ void tf_parser::parse_node(const std::string& name)
// input was from a node with multiple outputs
// input was from a node with multiple outputs
if
(
contains
(
input_name
,
':'
))
if
(
contains
(
input_name
,
':'
))
{
{
input_name
=
input_name
.
substr
(
0
,
input
.
find
(
':'
));
input_name
.
resize
(
input
.
find
(
':'
));
}
}
else
else
{
{
...
@@ -371,7 +371,7 @@ void tf_parser::parse_node(const std::string& name)
...
@@ -371,7 +371,7 @@ void tf_parser::parse_node(const std::string& name)
{
{
result
=
ops
[
node
.
op
()](
*
this
,
{
get_attributes
(
node
),
node
.
op
(),
mm
},
args
);
result
=
ops
[
node
.
op
()](
*
this
,
{
get_attributes
(
node
),
node
.
op
(),
mm
},
args
);
}
}
assert
(
!
result
.
empty
());
assert
(
not
result
.
empty
());
// First output has no ":" delimiter
// First output has no ":" delimiter
instructions
[
name
]
=
result
.
front
();
instructions
[
name
]
=
result
.
front
();
for
(
size_t
i
=
1
;
i
<
result
.
size
();
i
++
)
for
(
size_t
i
=
1
;
i
<
result
.
size
();
i
++
)
...
@@ -458,7 +458,7 @@ literal tf_parser::parse_tensor(const tensorflow::TensorProto& t) const
...
@@ -458,7 +458,7 @@ literal tf_parser::parse_tensor(const tensorflow::TensorProto& t) const
{
{
std
::
vector
<
size_t
>
dims
=
parse_dims
(
t
.
tensor_shape
());
std
::
vector
<
size_t
>
dims
=
parse_dims
(
t
.
tensor_shape
());
size_t
shape_size
=
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
1
,
std
::
multiplies
<
size_t
>
());
size_t
shape_size
=
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
1
,
std
::
multiplies
<
size_t
>
());
if
(
!
t
.
tensor_content
().
empty
())
// has raw data
if
(
not
t
.
tensor_content
().
empty
())
// has raw data
{
{
const
std
::
string
&
s
=
t
.
tensor_content
();
const
std
::
string
&
s
=
t
.
tensor_content
();
switch
(
t
.
dtype
())
switch
(
t
.
dtype
())
...
...
src/tmp_dir.cpp
View file @
7f97b8ef
...
@@ -78,7 +78,7 @@ void tmp_dir::execute(const std::string& exe, const std::string& args) const
...
@@ -78,7 +78,7 @@ void tmp_dir::execute(const std::string& exe, const std::string& args) const
tmp_dir
::~
tmp_dir
()
tmp_dir
::~
tmp_dir
()
{
{
if
(
!
enabled
(
MIGRAPHX_DEBUG_SAVE_TEMP_DIR
{}))
if
(
not
enabled
(
MIGRAPHX_DEBUG_SAVE_TEMP_DIR
{}))
{
{
fs
::
remove_all
(
this
->
path
);
fs
::
remove_all
(
this
->
path
);
}
}
...
...
src/value.cpp
View file @
7f97b8ef
...
@@ -400,7 +400,7 @@ std::pair<value*, bool> value::insert(const value& v)
...
@@ -400,7 +400,7 @@ std::pair<value*, bool> value::insert(const value& v)
{
{
if
(
v
.
key
.
empty
())
if
(
v
.
key
.
empty
())
{
{
if
(
!
x
)
if
(
not
x
)
x
=
std
::
make_shared
<
array_value_holder
>
();
x
=
std
::
make_shared
<
array_value_holder
>
();
get_array_impl
(
x
).
push_back
(
v
);
get_array_impl
(
x
).
push_back
(
v
);
assert
(
this
->
if_array
());
assert
(
this
->
if_array
());
...
@@ -408,7 +408,7 @@ std::pair<value*, bool> value::insert(const value& v)
...
@@ -408,7 +408,7 @@ std::pair<value*, bool> value::insert(const value& v)
}
}
else
else
{
{
if
(
!
x
)
if
(
not
x
)
x
=
std
::
make_shared
<
object_value_holder
>
();
x
=
std
::
make_shared
<
object_value_holder
>
();
auto
p
=
x
->
if_object
()
->
emplace
(
v
.
key
,
get_array_impl
(
x
).
size
());
auto
p
=
x
->
if_object
()
->
emplace
(
v
.
key
,
get_array_impl
(
x
).
size
());
if
(
p
.
second
)
if
(
p
.
second
)
...
@@ -420,7 +420,7 @@ std::pair<value*, bool> value::insert(const value& v)
...
@@ -420,7 +420,7 @@ std::pair<value*, bool> value::insert(const value& v)
value
*
value
::
insert
(
const
value
*
pos
,
const
value
&
v
)
value
*
value
::
insert
(
const
value
*
pos
,
const
value
&
v
)
{
{
assert
(
v
.
key
.
empty
());
assert
(
v
.
key
.
empty
());
if
(
!
x
)
if
(
not
x
)
x
=
std
::
make_shared
<
array_value_holder
>
();
x
=
std
::
make_shared
<
array_value_holder
>
();
auto
&&
a
=
get_array_impl
(
x
);
auto
&&
a
=
get_array_impl
(
x
);
auto
it
=
a
.
insert
(
a
.
begin
()
+
(
pos
-
begin
()),
v
);
auto
it
=
a
.
insert
(
a
.
begin
()
+
(
pos
-
begin
()),
v
);
...
@@ -466,7 +466,7 @@ bool compare(const value& x, const value& y, F f)
...
@@ -466,7 +466,7 @@ bool compare(const value& x, const value& y, F f)
value
::
type_t
value
::
get_type
()
const
value
::
type_t
value
::
get_type
()
const
{
{
if
(
!
x
)
if
(
not
x
)
return
null_type
;
return
null_type
;
return
x
->
get_type
();
return
x
->
get_type
();
}
}
...
@@ -511,14 +511,7 @@ void print_value(std::ostream& os, const std::vector<value>& x)
...
@@ -511,14 +511,7 @@ void print_value(std::ostream& os, const std::vector<value>& x)
os
<<
"}"
;
os
<<
"}"
;
}
}
void
print_value
(
std
::
ostream
&
os
,
const
value
::
binary
&
x
)
void
print_value
(
std
::
ostream
&
os
,
const
value
::
binary
&
x
)
{
os
<<
x
;
}
{
// Convert binary to integers
std
::
vector
<
int
>
v
(
x
.
begin
(),
x
.
end
());
os
<<
"{"
;
os
<<
to_string_range
(
v
);
os
<<
"}"
;
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
value
&
d
)
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
value
&
d
)
{
{
...
...
test/api/test_custom_op.cpp
View file @
7f97b8ef
...
@@ -43,6 +43,8 @@ struct sigmoid_custom_op final : migraphx::experimental_custom_op_base
...
@@ -43,6 +43,8 @@ struct sigmoid_custom_op final : migraphx::experimental_custom_op_base
return
inputs
[
1
];
return
inputs
[
1
];
}
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
{
if
(
inputs
.
size
()
!=
2
)
if
(
inputs
.
size
()
!=
2
)
...
@@ -111,4 +113,45 @@ TEST_CASE(run_sigmoid_with_incorrect_shape)
...
@@ -111,4 +113,45 @@ TEST_CASE(run_sigmoid_with_incorrect_shape)
"Error in compute_shape of: sigmoid_custom_op: op must have two inputs"
));
"Error in compute_shape of: sigmoid_custom_op: op must have two inputs"
));
}
}
struct
identity_custom_op
final
:
migraphx
::
experimental_custom_op_base
{
virtual
std
::
string
name
()
const
override
{
return
"identity_custom_op"
;
}
virtual
migraphx
::
argument
compute
(
migraphx
::
context
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
{
return
inputs
[
0
];
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
if
(
inputs
.
size
()
!=
1
)
{
throw
std
::
runtime_error
(
"Identity op must have only one input"
);
}
return
inputs
.
back
();
}
virtual
std
::
vector
<
size_t
>
output_alias
(
migraphx
::
shapes
)
const
override
{
return
{
0
,
1
};
}
};
TEST_CASE
(
run_custom_op_with_invalid_output_alias
)
{
identity_custom_op
i_op
;
migraphx
::
register_experimental_custom_op
(
i_op
);
auto
op
=
migraphx
::
operation
(
"identity_custom_op"
);
EXPECT
(
op
.
name
()
==
"identity_custom_op"
);
migraphx
::
program
p
;
migraphx
::
shape
s
{
migraphx_shape_float_type
,
{
12
}};
migraphx
::
module
m
=
p
.
get_main_module
();
auto
x
=
m
.
add_parameter
(
"x"
,
s
);
auto
i_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"identity_custom_op"
),
{
x
});
migraphx_test_private_disable_exception_catch
(
true
);
EXPECT
(
test
::
throws
<
std
::
exception
>
(
[
&
]
{
p
.
compile
(
migraphx
::
target
(
"ref"
));
},
"Currently, CustomOps in MIGraphX only supports one output_alias"
));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/api/test_custom_op_gpu.cpp
View file @
7f97b8ef
...
@@ -24,40 +24,89 @@
...
@@ -24,40 +24,89 @@
#include <hip/hip_runtime_api.h>
#include <hip/hip_runtime_api.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.hpp>
#include <migraphx/migraphx.hpp>
#include <numeric>
#include <stdexcept>
#include <stdexcept>
#include "test.hpp"
#include "test.hpp"
#define MIGRAPHX_HIP_ASSERT(x) (EXPECT(x == hipSuccess))
#define MIGRAPHX_HIP_ASSERT(x) (EXPECT(x == hipSuccess))
struct
simple_custom_op
final
:
migraphx
::
experimental_custom_op_base
struct
half_copy_host
final
:
migraphx
::
experimental_custom_op_base
{
{
virtual
std
::
string
name
()
const
override
{
return
"simple_custom_op"
;
}
virtual
std
::
string
name
()
const
override
{
return
"half_copy_host"
;
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
false
;
}
virtual
migraphx
::
argument
virtual
migraphx
::
argument
compute
(
migraphx
::
context
ctx
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
compute
(
migraphx
::
context
ctx
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
{
{
// sets first half size_bytes of the input 0, and rest of the half bytes are copied.
// This custom op simply sets first half size_bytes of the input to 0, and rest of the half
int
*
h_output
=
nullptr
;
// bytes are copied. for this custom_op, it does its computation on the host. Therefore,
auto
*
d_output
=
reinterpret_cast
<
int
*>
(
inputs
[
0
].
data
());
// `runs_on_offload_target()` is set to false. MIGraphX would inject necessary buffer copies
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
// to and from GPU to Host based on `runs_on_offload_targe()` flag for input buffers as well
auto
*
output_ptr
=
inputs
[
1
].
data
();
// as the output buffers
auto
copy_bytes
=
input_bytes
/
2
;
auto
*
input_buffer_ptr
=
inputs
[
0
].
data
();
auto
*
output_buffer_ptr
=
inputs
[
1
].
data
();
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
copy_bytes
=
input_bytes
/
2
;
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipHostMalloc
(
&
h_output
,
input_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipMemcpyAsync
(
output_buffer_ptr
,
MIGRAPHX_HIP_ASSERT
(
hipMemcpyAsync
(
input_buffer_ptr
,
h_output
,
d_output
,
input_bytes
,
hipMemcpyDeviceToHost
,
ctx
.
get_queue
<
hipStream_t
>
()));
input_bytes
,
hipMemcpyHostToHost
,
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
h_
output
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
output
_buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipMemcpy
(
output_ptr
,
h_output
,
input_bytes
,
hipMemcpyHostToDevice
));
return
inputs
[
1
];
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
if
(
not
inputs
[
0
].
standard
()
or
not
inputs
[
1
].
standard
())
{
throw
std
::
runtime_error
(
"Input args must be standard shaped"
);
}
if
(
inputs
.
size
()
!=
2
)
{
throw
std
::
runtime_error
(
"number of inputs must be 2"
);
}
return
inputs
.
back
();
}
};
struct
half_copy_device
final
:
migraphx
::
experimental_custom_op_base
{
virtual
std
::
string
name
()
const
override
{
return
"half_copy_device"
;
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
migraphx
::
argument
compute
(
migraphx
::
context
ctx
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
{
// This custom op simply sets first half size_bytes of the input to 0, and rest of the half
// bytes are copied. for this custom_op, it does its computation on the "GPU". Therefore,
// `runs_on_offload_target()` is set to "true".
auto
*
input_buffer_ptr
=
inputs
[
0
].
data
();
auto
*
output_buffer_ptr
=
inputs
[
1
].
data
();
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
copy_bytes
=
input_bytes
/
2
;
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipMemcpyAsync
(
output_buffer_ptr
,
input_buffer_ptr
,
input_bytes
,
hipMemcpyDeviceToDevice
,
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
output_buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipHostFree
(
h_output
));
return
inputs
[
1
];
return
inputs
[
1
];
}
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
{
if
(
!
inputs
[
0
].
standard
())
if
(
not
inputs
[
0
].
standard
()
or
not
inputs
[
1
].
standard
()
)
{
{
throw
std
::
runtime_error
(
"
firs
t arg must be standard shaped"
);
throw
std
::
runtime_error
(
"
Inpu
t arg
s
must be standard shaped"
);
}
}
if
(
inputs
.
size
()
!=
2
)
if
(
inputs
.
size
()
!=
2
)
{
{
...
@@ -67,36 +116,208 @@ struct simple_custom_op final : migraphx::experimental_custom_op_base
...
@@ -67,36 +116,208 @@ struct simple_custom_op final : migraphx::experimental_custom_op_base
}
}
};
};
TEST_CASE
(
run_simple_custom_op
)
// overwrites input buffer
struct
half_copy_device_same_buffer
final
:
migraphx
::
experimental_custom_op_base
{
{
simple_custom_op
simple_op
;
virtual
std
::
string
name
()
const
override
{
return
"half_copy_device_same_buffer"
;
}
migraphx
::
register_experimental_custom_op
(
simple_op
);
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
migraphx
::
argument
compute
(
migraphx
::
context
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
{
// This custom op simply sets first half size_bytes of the input 0, and rest of the half
// bytes are copied. for this custom_op, it does its computation on the "device". Therefore,
// `runs_on_offload_target()` is set to "true"
auto
*
buffer_ptr
=
inputs
[
0
].
data
();
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
copy_bytes
=
input_bytes
/
2
;
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
return
inputs
[
0
];
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
if
(
not
inputs
[
0
].
standard
())
{
throw
std
::
runtime_error
(
"Input arg must be standard shaped"
);
}
return
inputs
.
front
();
}
};
TEST_CASE
(
register_half_copy_op
)
{
half_copy_host
hch
;
migraphx
::
register_experimental_custom_op
(
hch
);
auto
op
=
migraphx
::
operation
(
"half_copy_host"
);
EXPECT
(
op
.
name
()
==
"half_copy_host"
);
half_copy_device
hcd
;
migraphx
::
register_experimental_custom_op
(
hcd
);
op
=
migraphx
::
operation
(
"half_copy_device"
);
EXPECT
(
op
.
name
()
==
"half_copy_device"
);
half_copy_device_same_buffer
hcdsb
;
migraphx
::
register_experimental_custom_op
(
hcdsb
);
op
=
migraphx
::
operation
(
"half_copy_device_same_buffer"
);
EXPECT
(
op
.
name
()
==
"half_copy_device_same_buffer"
);
}
TEST_CASE
(
half_copy_custom_op_test
)
{
auto
run_test_prog
=
[](
const
std
::
string
&
op_name
,
bool
buffer_alloc
)
{
migraphx
::
program
p
;
migraphx
::
module
m
=
p
.
get_main_module
();
migraphx
::
shape
s
{
migraphx_shape_float_type
,
{
4
,
3
}};
auto
x
=
m
.
add_parameter
(
"x"
,
s
);
migraphx
::
instructions
inputs
=
{
x
};
if
(
buffer_alloc
)
{
auto
alloc
=
m
.
add_allocation
(
s
);
inputs
=
{
x
,
alloc
};
}
auto
half_copy_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
op_name
.
c_str
()),
inputs
);
m
.
add_return
({
half_copy_ins
});
migraphx
::
compile_options
options
;
options
.
set_offload_copy
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
migraphx
::
program_parameters
pp
;
std
::
vector
<
float
>
x_data
(
12
);
std
::
iota
(
x_data
.
begin
(),
x_data
.
end
(),
0
);
pp
.
add
(
"x"
,
migraphx
::
argument
(
s
,
x_data
.
data
()));
auto
results
=
p
.
eval
(
pp
);
auto
result
=
results
[
0
];
auto
result_vec
=
result
.
as_vector
<
float
>
();
std
::
vector
<
float
>
expected_result
(
12
,
0
);
std
::
iota
(
expected_result
.
begin
()
+
6
,
expected_result
.
end
(),
6
);
EXPECT
(
bool
{
result
==
migraphx
::
argument
(
s
,
expected_result
.
data
())});
};
// register all the ops
half_copy_host
hch
;
migraphx
::
register_experimental_custom_op
(
hch
);
half_copy_device
hcd
;
migraphx
::
register_experimental_custom_op
(
hcd
);
half_copy_device_same_buffer
hcdsb
;
migraphx
::
register_experimental_custom_op
(
hcdsb
);
std
::
vector
<
std
::
pair
<
std
::
string
,
bool
>>
tests_config
=
{
{
"half_copy_host"
,
true
},
{
"half_copy_device"
,
true
},
{
"half_copy_device_same_buffer"
,
false
}};
for
(
const
auto
&
i
:
tests_config
)
{
run_test_prog
(
i
.
first
,
i
.
second
);
}
}
struct
stride_two
final
:
migraphx
::
experimental_custom_op_base
{
virtual
std
::
string
name
()
const
override
{
return
"stride_two"
;
}
virtual
migraphx
::
argument
compute
(
migraphx
::
context
,
migraphx
::
shape
out_shape
,
migraphx
::
arguments
inputs
)
const
override
{
return
{
out_shape
,
inputs
[
0
].
data
()};
}
virtual
migraphx
::
shape
compute_shape
(
migraphx
::
shapes
inputs
)
const
override
{
if
(
inputs
.
size
()
!=
1
)
{
throw
std
::
runtime_error
(
"stride_two op must have only one input argument"
);
};
if
(
not
inputs
[
0
].
standard
())
{
throw
std
::
runtime_error
(
"stride_two op only works on the standard input shapes"
);
}
migraphx
::
shape
input_s
=
inputs
[
0
];
std
::
vector
<
size_t
>
dims
=
input_s
.
lengths
();
std
::
vector
<
size_t
>
new_dims
;
std
::
vector
<
size_t
>
strides
=
input_s
.
strides
();
std
::
vector
<
size_t
>
new_strides
;
std
::
for_each
(
dims
.
begin
(),
dims
.
end
(),
[
&
](
auto
i
)
{
new_dims
.
push_back
(
i
/
2
);
});
std
::
for_each
(
strides
.
begin
(),
strides
.
end
(),
[
&
](
auto
i
)
{
new_strides
.
push_back
(
i
*
2
);
});
migraphx
::
shape
output_shape
{
input_s
.
type
(),
new_dims
,
new_strides
};
return
output_shape
;
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
std
::
vector
<
size_t
>
output_alias
(
migraphx
::
shapes
)
const
override
{
return
{
0
};
};
};
TEST_CASE
(
stride_two_custom_op_test
)
{
stride_two
st
;
migraphx
::
register_experimental_custom_op
(
st
);
migraphx
::
program
p
;
migraphx
::
module
m
=
p
.
get_main_module
();
migraphx
::
shape
s
{
migraphx_shape_float_type
,
{
4
,
4
,
4
}};
auto
x
=
m
.
add_parameter
(
"x"
,
s
);
auto
stride_two_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"stride_two"
),
{
x
});
m
.
add_return
({
stride_two_ins
});
migraphx
::
compile_options
options
;
options
.
set_offload_copy
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
migraphx
::
program_parameters
pp
;
std
::
vector
<
float
>
x_data
(
64
);
std
::
iota
(
x_data
.
begin
(),
x_data
.
end
(),
0
);
pp
.
add
(
"x"
,
migraphx
::
argument
(
s
,
x_data
.
data
()));
auto
results
=
p
.
eval
(
pp
);
auto
result
=
results
[
0
];
auto
result_vec
=
result
.
as_vector
<
float
>
();
std
::
vector
<
float
>
expected_result
=
{
0
,
2
,
8
,
10
,
32
,
34
,
40
,
42
};
EXPECT
(
result_vec
==
expected_result
);
}
TEST_CASE
(
custom_op_with_pre_and_post_subgraph_test
)
{
half_copy_host
hco
;
migraphx
::
register_experimental_custom_op
(
hco
);
stride_two
st
;
migraphx
::
register_experimental_custom_op
(
st
);
migraphx
::
program
p
;
migraphx
::
program
p
;
migraphx
::
shape
s
{
migraphx_shape_int32_type
,
{
4
,
3
}};
migraphx
::
shape
s
{
migraphx_shape_float_type
,
{
4
,
6
}};
migraphx
::
shape
trans_shape
{
migraphx_shape_int32_type
,
{
3
,
4
}};
migraphx
::
module
m
=
p
.
get_main_module
();
migraphx
::
module
m
=
p
.
get_main_module
();
auto
x
=
m
.
add_parameter
(
"x"
,
s
);
auto
x
=
m
.
add_parameter
(
"x"
,
s
);
auto
neg
=
m
.
add_instruction
(
migraphx
::
operation
(
"neg"
),
x
);
// pre-subgraph
auto
alloc
=
m
.
add_allocation
(
trans_shape
);
auto
neg_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"neg"
),
x
);
auto
neg_trans
=
auto
trans_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"transpose"
,
"{permutation: [1, 0]}"
),
{
neg
});
m
.
add_instruction
(
migraphx
::
operation
(
"transpose"
,
"{permutation: [1, 0]}"
),
{
neg_ins
});
auto
neg_cont
=
m
.
add_instruction
(
migraphx
::
operation
(
"contiguous"
),
{
neg_trans
});
auto
cont_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"contiguous"
),
{
trans_ins
});
auto
custom_kernel
=
// custom_op
m
.
add_instruction
(
migraphx
::
operation
(
"simple_custom_op"
),
{
neg_cont
,
alloc
});
migraphx
::
shape
trans_shape
{
migraphx_shape_float_type
,
{
6
,
4
}};
auto
relu
=
m
.
add_instruction
(
migraphx
::
operation
(
"relu"
),
custom_kernel
);
auto
alloc
=
m
.
add_allocation
(
trans_shape
);
m
.
add_return
({
relu
});
auto
half_copy_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"half_copy_host"
),
{
cont_ins
,
alloc
});
// post-subgraph
auto
abs_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"abs"
),
{
half_copy_ins
});
// another custom_op
auto
stride_two_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"stride_two"
),
{
abs_ins
});
// post-subgraph
auto
relu_ins
=
m
.
add_instruction
(
migraphx
::
operation
(
"relu"
),
{
stride_two_ins
});
m
.
add_return
({
relu_ins
});
migraphx
::
compile_options
options
;
migraphx
::
compile_options
options
;
options
.
set_offload_copy
();
options
.
set_offload_copy
();
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
migraphx
::
program_parameters
pp
;
migraphx
::
program_parameters
pp
;
std
::
vector
<
int
>
x_data
(
12
,
-
3
);
std
::
vector
<
float
>
x_data
(
s
.
elements
());
std
::
iota
(
x_data
.
begin
(),
x_data
.
end
(),
0
);
pp
.
add
(
"x"
,
migraphx
::
argument
(
s
,
x_data
.
data
()));
pp
.
add
(
"x"
,
migraphx
::
argument
(
s
,
x_data
.
data
()));
auto
results
=
p
.
eval
(
pp
);
auto
results
=
p
.
eval
(
pp
);
auto
result
=
results
[
0
];
auto
result
=
results
[
0
];
auto
result_vec
=
result
.
as_vector
<
in
t
>
();
auto
result_vec
=
result
.
as_vector
<
floa
t
>
();
std
::
vector
<
in
t
>
expected_result
(
12
,
0
)
;
std
::
vector
<
floa
t
>
expected_result
=
{
0
,
0
,
0
,
0
,
4
,
16
}
;
std
::
fill
(
expected_result
.
begin
()
+
6
,
expected_result
.
end
(),
3
);
EXPECT
(
bool
{
result
==
migraphx
::
argument
(
migraphx
::
shape
{
migraphx_shape_float_type
,
{
3
,
2
}},
EXPECT
(
bool
{
result
==
migraphx
::
argument
(
trans_shape
,
expected_result
.
data
())});
expected_result
.
data
())});
}
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/api/test_gpu.cpp
View file @
7f97b8ef
...
@@ -25,6 +25,8 @@
...
@@ -25,6 +25,8 @@
#include <hip/hip_runtime_api.h>
#include <hip/hip_runtime_api.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.hpp>
#include <migraphx/migraphx.hpp>
#include <migraphx/manage_ptr.hpp>
#include "test.hpp"
#include "test.hpp"
TEST_CASE
(
load_and_run
)
TEST_CASE
(
load_and_run
)
...
@@ -44,11 +46,67 @@ TEST_CASE(load_and_run)
...
@@ -44,11 +46,67 @@ TEST_CASE(load_and_run)
{
{
pp
.
add
(
name
,
migraphx
::
argument
::
generate
(
param_shapes
[
name
]));
pp
.
add
(
name
,
migraphx
::
argument
::
generate
(
param_shapes
[
name
]));
}
}
auto
outputs
=
p
.
eval
(
pp
);
auto
outputs
=
p
.
eval
(
pp
);
CHECK
(
shapes_before
.
size
()
==
outputs
.
size
());
CHECK
(
shapes_before
.
size
()
==
outputs
.
size
());
CHECK
(
bool
{
shapes_before
.
front
()
==
outputs
.
front
().
get_shape
()});
CHECK
(
bool
{
shapes_before
.
front
()
==
outputs
.
front
().
get_shape
()});
}
}
using
hip_ptr
=
MIGRAPHX_MANAGE_PTR
(
void
,
hipFree
);
using
stream_ptr
=
MIGRAPHX_MANAGE_PTR
(
hipStream_t
,
hipStreamDestroy
);
stream_ptr
get_stream
()
{
hipStream_t
stream
;
auto
err
=
hipStreamCreateWithFlags
(
&
stream
,
0
);
EXPECT
(
err
==
hipSuccess
);
return
stream_ptr
{
stream
};
}
hip_ptr
get_hip_buffer
(
size_t
size
)
{
void
*
ptr
;
auto
err
=
hipMalloc
(
&
ptr
,
size
);
EXPECT
(
err
==
hipSuccess
);
return
hip_ptr
{
ptr
};
}
TEST_CASE
(
load_and_run_async
)
{
auto
p
=
migraphx
::
parse_onnx
(
"conv_relu_maxpool_test.onnx"
);
auto
shapes_before
=
p
.
get_output_shapes
();
migraphx
::
compile_options
options
;
options
.
set_offload_copy
(
false
);
p
.
compile
(
migraphx
::
target
(
"gpu"
),
options
);
auto
shapes_after
=
p
.
get_output_shapes
();
CHECK
(
shapes_before
.
size
()
==
1
);
CHECK
(
shapes_before
.
size
()
==
shapes_after
.
size
());
CHECK
(
bool
{
shapes_before
.
front
()
==
shapes_after
.
front
()});
migraphx
::
program_parameters
pp
;
auto
param_shapes
=
p
.
get_parameter_shapes
();
stream_ptr
stream
=
get_stream
();
std
::
vector
<
hip_ptr
>
buffs
;
std
::
vector
<
migraphx
::
argument
>
args
;
for
(
auto
&&
name
:
param_shapes
.
names
())
{
args
.
push_back
(
migraphx
::
argument
::
generate
(
param_shapes
[
name
]));
buffs
.
push_back
(
get_hip_buffer
(
args
.
rbegin
()
->
get_shape
().
bytes
()));
auto
err
=
hipMemcpy
(
buffs
.
rbegin
()
->
get
(),
args
.
rbegin
()
->
data
(),
args
.
rbegin
()
->
get_shape
().
bytes
(),
hipMemcpyHostToDevice
);
EXPECT
(
err
==
hipSuccess
);
pp
.
add
(
name
,
migraphx
::
argument
(
args
.
rbegin
()
->
get_shape
(),
buffs
.
rbegin
()
->
get
()));
}
auto
outputs
=
p
.
run_async
(
pp
,
stream
.
get
());
CHECK
(
shapes_before
.
size
()
==
outputs
.
size
());
CHECK
(
bool
{
shapes_before
.
front
()
==
outputs
.
front
().
get_shape
()});
}
TEST_CASE
(
load_and_run_ctx
)
TEST_CASE
(
load_and_run_ctx
)
{
{
auto
p
=
migraphx
::
parse_onnx
(
"conv_relu_maxpool_test.onnx"
);
auto
p
=
migraphx
::
parse_onnx
(
"conv_relu_maxpool_test.onnx"
);
...
@@ -82,10 +140,10 @@ TEST_CASE(if_pl_test)
...
@@ -82,10 +140,10 @@ TEST_CASE(if_pl_test)
migraphx
::
program_parameters
pp
;
migraphx
::
program_parameters
pp
;
auto
param_shapes
=
p
.
get_parameter_shapes
();
auto
param_shapes
=
p
.
get_parameter_shapes
();
auto
xs
=
param_shapes
[
"x"
];
auto
xs
=
param_shapes
[
"x"
];
std
::
vector
<
float
>
xd
(
xs
.
bytes
()
/
sizeof
(
float
),
1.0
);
std
::
vector
<
float
>
xd
(
xs
.
elements
(
),
1.0
);
pp
.
add
(
"x"
,
migraphx
::
argument
(
xs
,
xd
.
data
()));
pp
.
add
(
"x"
,
migraphx
::
argument
(
xs
,
xd
.
data
()));
auto
ys
=
param_shapes
[
"y"
];
auto
ys
=
param_shapes
[
"y"
];
std
::
vector
<
float
>
yd
(
ys
.
bytes
()
/
sizeof
(
float
),
2.0
);
std
::
vector
<
float
>
yd
(
ys
.
elements
(
),
2.0
);
pp
.
add
(
"y"
,
migraphx
::
argument
(
ys
,
yd
.
data
()));
pp
.
add
(
"y"
,
migraphx
::
argument
(
ys
,
yd
.
data
()));
char
ccond
=
cond
;
char
ccond
=
cond
;
pp
.
add
(
"cond"
,
migraphx
::
argument
(
param_shapes
[
"cond"
],
&
ccond
));
pp
.
add
(
"cond"
,
migraphx
::
argument
(
param_shapes
[
"cond"
],
&
ccond
));
...
...
Prev
1
…
14
15
16
17
18
19
20
21
22
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment