Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
dae94657
Unverified
Commit
dae94657
authored
Dec 14, 2022
by
Chris Austen
Committed by
GitHub
Dec 14, 2022
Browse files
Merge branch 'develop' into jit-reduce-reg
parents
b013d991
56c43445
Changes
201
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
450 additions
and
486 deletions
+450
-486
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+95
-69
src/targets/gpu/perfdb.cpp
src/targets/gpu/perfdb.cpp
+10
-5
src/targets/gpu/prefuse_ops.cpp
src/targets/gpu/prefuse_ops.cpp
+5
-2
src/targets/gpu/quant_convolution.cpp
src/targets/gpu/quant_convolution.cpp
+0
-194
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+10
-5
src/targets/ref/lowering.cpp
src/targets/ref/lowering.cpp
+14
-185
src/tf/parse_batchnorm.cpp
src/tf/parse_batchnorm.cpp
+27
-5
src/tf/parse_conv.cpp
src/tf/parse_conv.cpp
+0
-5
src/tf/parse_depthwiseconv.cpp
src/tf/parse_depthwiseconv.cpp
+0
-5
test/api/test_custom_op_gpu.cpp
test/api/test_custom_op_gpu.cpp
+7
-4
test/fuse_pointwise.cpp
test/fuse_pointwise.cpp
+29
-0
test/gpu/hip.cpp
test/gpu/hip.cpp
+50
-0
test/gpu/literal.cpp
test/gpu/literal.cpp
+1
-1
test/gpu/mlir.cpp
test/gpu/mlir.cpp
+5
-5
test/gpu/quantization.cpp
test/gpu/quantization.cpp
+0
-1
test/instruction.cpp
test/instruction.cpp
+51
-0
test/layout_nhwc.cpp
test/layout_nhwc.cpp
+127
-0
test/literal_test.cpp
test/literal_test.cpp
+19
-0
test/onnx/argmax_dyn_test.onnx
test/onnx/argmax_dyn_test.onnx
+0
-0
test/onnx/averagepool_dyn_asym_padding_error_test.onnx
test/onnx/averagepool_dyn_asym_padding_error_test.onnx
+0
-0
No files found.
src/targets/gpu/mlir.cpp
View file @
dae94657
...
@@ -21,6 +21,7 @@
...
@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include "migraphx/make_op.hpp"
#include <migraphx/gpu/mlir.hpp>
#include <migraphx/gpu/mlir.hpp>
#ifdef MIGRAPHX_MLIR
#ifdef MIGRAPHX_MLIR
...
@@ -31,7 +32,13 @@
...
@@ -31,7 +32,13 @@
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Registration.h>
#include <mutex>
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#warning "Incompatible version of rocMLIR library used, disabling"
#undef MIGRAPHX_MLIR
#else
#include <mlir-c/RegisterRocMLIR.h>
#endif
#endif
#endif
#include <migraphx/env.hpp>
#include <migraphx/env.hpp>
...
@@ -43,15 +50,12 @@
...
@@ -43,15 +50,12 @@
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/gpu/perfdb.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/permutation.hpp>
#include <deque>
#include <deque>
#include <variant>
#include <variant>
#if defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) && MLIR_MIGRAPHX_DIALECT_API_VERSION >= 2
#define MIGRAPHX_MLIR_BARE_POINTER
#endif
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
...
@@ -99,7 +103,10 @@ struct mlir_handle
...
@@ -99,7 +103,10 @@ struct mlir_handle
mlir_handle
(
T
p
)
:
handle
(
ptr
{
p
})
{}
mlir_handle
(
T
p
)
:
handle
(
ptr
{
p
})
{}
T
get
()
const
{
return
handle
.
get
().
get
();
}
T
get
()
const
{
return
handle
.
get
().
get
();
// NOLINT(readability-redundant-smartptr-get)
}
T
release
()
{
return
handle
.
release
().
get
();
}
T
release
()
{
return
handle
.
release
().
get
();
}
...
@@ -163,9 +170,11 @@ struct mlir_program
...
@@ -163,9 +170,11 @@ struct mlir_program
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
location
(
mlirLocationUnknownGet
(
ctx
.
get
())),
mmodule
(
mlirModuleCreateEmpty
(
location
))
mmodule
(
mlirModuleCreateEmpty
(
location
))
{
{
MlirDialectHandle
mixr_handle
=
mlirGetDialectHandle__migraphx__
();
MlirDialectRegistry
registry
=
mlirDialectRegistryCreate
();
mlirDialectHandleRegisterDialect
(
mixr_handle
,
ctx
.
get
());
mlirRegisterRocMLIRDialects
(
registry
);
mlirRegisterAllDialects
(
ctx
.
get
());
mlirContextAppendDialectRegistry
(
ctx
.
get
(),
registry
);
mlirContextLoadAllAvailableDialects
(
ctx
.
get
());
mlirDialectRegistryDestroy
(
registry
);
mlirContextSetAllowUnregisteredDialects
(
ctx
.
get
(),
true
/*allow*/
);
mlirContextSetAllowUnregisteredDialects
(
ctx
.
get
(),
true
/*allow*/
);
}
}
...
@@ -370,7 +379,11 @@ struct mlir_program
...
@@ -370,7 +379,11 @@ struct mlir_program
mlir_operation_state
&
add_results
(
const
std
::
vector
<
shape
>&
outputs
)
mlir_operation_state
&
add_results
(
const
std
::
vector
<
shape
>&
outputs
)
{
{
auto
x
=
prog
->
make_tensors
(
outputs
);
std
::
vector
<
shape
>
reshaped
(
outputs
.
size
());
std
::
transform
(
outputs
.
begin
(),
outputs
.
end
(),
reshaped
.
begin
(),
[](
const
shape
&
r
)
{
return
shape
{
r
.
type
(),
r
.
lens
()};
});
auto
x
=
prog
->
make_tensors
(
reshaped
);
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
mlirOperationStateAddResults
(
&
op_state
,
x
.
size
(),
x
.
data
());
return
*
this
;
return
*
this
;
}
}
...
@@ -443,7 +456,8 @@ struct mlir_program
...
@@ -443,7 +456,8 @@ struct mlir_program
auto
ops
=
create_operation_state
(
"func.func"
);
auto
ops
=
create_operation_state
(
"func.func"
);
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
{
"sym_name"
,
std
::
string
(
"main"
)},
{
"sym_name"
,
std
::
string
(
"main"
)},
{
"kernel"
,
std
::
string
(
"mixr"
)}});
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"arch"
,
target_arch
}});
ops
.
add_region
(
std
::
move
(
region
));
ops
.
add_region
(
std
::
move
(
region
));
insert
(
body
,
std
::
move
(
ops
));
insert
(
body
,
std
::
move
(
ops
));
...
@@ -502,11 +516,13 @@ struct mlir_program
...
@@ -502,11 +516,13 @@ struct mlir_program
{
{
pp
=
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
std
::
string
tuned
=
get_tune_params
();
// check if HW supports xdlops
auto
target_chip
=
trim
(
split_string
(
target_arch
,
':'
).
front
());
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_chip
);
std
::
string
tuned
=
get_tune_params
(
xdlops
);
if
(
not
tuned
.
empty
())
if
(
not
tuned
.
empty
())
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
// check if HW supports xdlops
if
(
xdlops
)
if
(
contains
(
get_xdlops_archs
(),
target_name
))
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
}
}
...
@@ -530,7 +546,7 @@ struct mlir_program
...
@@ -530,7 +546,7 @@ struct mlir_program
// 1st pipeline to call
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
// 2nd pipeline to call
// 2nd pipeline to call
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_
name
.
c_str
()
,
"amdgcn-amd-amdhsa"
,
""
);
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_
arch
.
c_str
());
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
code_object_op
op
{};
code_object_op
op
{};
...
@@ -540,16 +556,7 @@ struct mlir_program
...
@@ -540,16 +556,7 @@ struct mlir_program
return
op
;
return
op
;
}
}
void
find_target
()
void
find_target
()
{
target_arch
=
get_device_name
();
}
{
std
::
string
tname
=
get_device_name
();
// HACK: Since MLIR can't handle the full target name
target_name
=
trim
(
split_string
(
tname
,
':'
).
front
());
if
(
tname
.
size
()
!=
target_name
.
size
())
std
::
cout
<<
"*************** WARNING: MLIR may not compile the correct target features for: "
<<
tname
<<
std
::
endl
;
}
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
std
::
pair
<
std
::
size_t
,
std
::
size_t
>
get_launch_params
()
const
{
{
...
@@ -571,14 +578,14 @@ struct mlir_program
...
@@ -571,14 +578,14 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
}
std
::
string
get_tune_params
()
{
return
get_mlir_perf_for_conv
(
pp
);
}
std
::
string
get_tune_params
(
bool
xdlops
)
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
mlir_context
ctx
;
mlir_context
ctx
;
MlirLocation
location
;
MlirLocation
location
;
mlir_module
mmodule
;
mlir_module
mmodule
;
problem_params
pp
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_
name
;
std
::
string
target_
arch
;
};
};
std
::
string
dump_mlir
(
const
module
&
m
)
std
::
string
dump_mlir
(
const
module
&
m
)
...
@@ -589,11 +596,61 @@ std::string dump_mlir(const module& m)
...
@@ -589,11 +596,61 @@ std::string dump_mlir(const module& m)
return
mlir_print
(
&
mlirOperationPrint
,
mod_op
);
return
mlir_print
(
&
mlirOperationPrint
,
mod_op
);
}
}
code_object_op
compile_mlir
(
const
context
&
,
const
module
&
m
)
void
adjust_param_shapes
(
module
&
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
{
auto
names
=
m
.
get_parameter_names
();
std
::
sort
(
names
.
begin
(),
names
.
end
());
for
(
auto
i
:
range
(
names
.
size
()))
{
const
auto
&
name
=
names
[
i
];
const
auto
&
input
=
inputs
[
i
]
->
get_shape
();
auto
param
=
m
.
get_parameter
(
name
);
if
(
input
.
standard
())
continue
;
auto
lens
=
input
.
lens
();
auto
strides
=
input
.
strides
();
std
::
vector
<
operation
>
ops
;
if
(
input
.
transposed
())
{
auto
perm
=
find_permutation
(
input
);
auto
iperm
=
invert_permutation
(
perm
);
lens
=
reorder_dims
(
lens
,
iperm
);
strides
=
reorder_dims
(
strides
,
iperm
);
ops
.
push_back
(
make_op
(
"transpose"
,
{{
"permutation"
,
perm
}}));
}
if
(
input
.
broadcasted
())
{
std
::
transform
(
lens
.
begin
(),
lens
.
end
(),
strides
.
begin
(),
lens
.
begin
(),
[](
auto
len
,
auto
stride
)
->
std
::
size_t
{
if
(
stride
==
0
)
return
1
;
return
len
;
});
ops
.
push_back
(
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
input
.
lens
()}}));
}
auto
new_param
=
std
::
accumulate
(
ops
.
begin
(),
ops
.
end
(),
m
.
add_parameter
(
name
+
".0"
,
shape
{
input
.
type
(),
lens
}),
[
&
](
auto
x
,
auto
op
)
{
return
m
.
insert_instruction
(
param
,
op
,
x
);
});
m
.
replace_instruction
(
param
,
new_param
);
m
.
remove_instruction
(
param
);
}
}
code_object_op
compile_mlir
(
const
context
&
,
module
m
,
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
adjust_param_shapes
(
m
,
inputs
);
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
const
bool
trace
=
enabled
(
MIGRAPHX_TRACE_MLIR
{});
if
(
trace
)
if
(
trace
)
std
::
cout
<<
m
<<
std
::
endl
;
std
::
cout
<<
m
<<
std
::
endl
;
// set mutex while llvm thread support is disabled.
static
std
::
mutex
g_mlirc_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_mlirc_mutex
);
mlir_program
mp
;
mlir_program
mp
;
mp
.
find_target
();
mp
.
find_target
();
mp
.
parse
(
m
);
mp
.
parse
(
m
);
...
@@ -613,46 +670,9 @@ instruction_ref insert_mlir(module& m,
...
@@ -613,46 +670,9 @@ instruction_ref insert_mlir(module& m,
std
::
vector
<
instruction_ref
>
refs
;
std
::
vector
<
instruction_ref
>
refs
;
std
::
size_t
last
=
0
;
std
::
size_t
last
=
0
;
#ifdef MIGRAPHX_MLIR_BARE_POINTER
refs
.
reserve
(
inputs
.
size
());
refs
.
reserve
(
inputs
.
size
());
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
refs
));
std
::
copy
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
refs
));
last
=
refs
.
size
()
-
1
;
last
=
refs
.
size
()
-
1
;
#else
refs
.
reserve
(
inputs
.
size
()
*
15
);
std
::
unordered_map
<
uint64_t
,
instruction_ref
>
literal_map
{};
auto
get_literal
=
[
&
](
uint64_t
value
)
{
auto
fi
=
literal_map
.
find
(
value
);
if
(
fi
!=
literal_map
.
end
())
return
fi
->
second
;
auto
lit
=
m
.
add_literal
(
value
);
literal_map
.
emplace
(
value
,
lit
);
return
lit
;
};
for
(
auto
input
:
inputs
)
{
const
size_t
offset
=
0
;
auto
s
=
input
->
get_shape
();
last
=
refs
.
size
();
refs
.
push_back
(
input
);
refs
.
push_back
(
input
);
refs
.
push_back
(
get_literal
(
offset
));
// offset
// dim sizes
std
::
transform
(
s
.
lens
().
begin
(),
s
.
lens
().
end
(),
std
::
back_inserter
(
refs
),
[
&
](
const
auto
&
lval
)
{
return
get_literal
(
lval
);
});
// refs.push_back(get_literal(1)); // G
// dim strides
std
::
transform
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
std
::
back_inserter
(
refs
),
[
&
](
const
auto
&
lval
)
{
return
get_literal
(
lval
);
});
// refs.push_back(get_literal(1)); // G
}
#endif
co
.
expected_inputs
=
to_shapes
(
refs
);
co
.
expected_inputs
=
to_shapes
(
refs
);
co
.
output_arg
=
last
;
co
.
output_arg
=
last
;
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
return
m
.
insert_instruction
(
ins
,
co
,
refs
);
...
@@ -662,13 +682,19 @@ instruction_ref insert_mlir(module& m,
...
@@ -662,13 +682,19 @@ instruction_ref insert_mlir(module& m,
std
::
string
dump_mlir
(
const
module
&
)
{
return
{};
}
std
::
string
dump_mlir
(
const
module
&
)
{
return
{};
}
code_object_op
compile_mlir
(
const
context
&
,
const
module
&
)
{
return
{};
}
template
<
class
T
>
template
<
class
T
>
void
use
(
T
&
)
void
use
(
T
&
)
{
{
}
}
// Disabling clang-tidy warning on non-real useage.
// NOLINTBEGIN(performance-unnecessary-value-param)
code_object_op
compile_mlir
(
const
context
&
,
module
,
const
std
::
vector
<
instruction_ref
>&
)
{
return
{};
}
// NOLINTEND(performance-unnecessary-value-param)
instruction_ref
instruction_ref
// cppcheck-suppress funcArgNamesDifferent
// cppcheck-suppress funcArgNamesDifferent
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
insert_mlir
(
module
&
m
,
instruction_ref
,
code_object_op
co
,
const
std
::
vector
<
instruction_ref
>&
)
...
...
src/targets/gpu/perfdb.cpp
View file @
dae94657
...
@@ -27,6 +27,7 @@
...
@@ -27,6 +27,7 @@
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/permutation.hpp>
#include <fstream>
#include <fstream>
#include <mutex>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -88,6 +89,9 @@ std::string generate_miopen_config(const problem_params& pp)
...
@@ -88,6 +89,9 @@ std::string generate_miopen_config(const problem_params& pp)
auto
query_miopen_db
(
const
std
::
string
&
query
)
auto
query_miopen_db
(
const
std
::
string
&
query
)
{
{
static
std
::
mutex
g_db_mutex
;
// NOLINT
const
std
::
lock_guard
<
std
::
mutex
>
lock
(
g_db_mutex
);
// TODO: Store db as a static variable
// TODO: Store db as a static variable
const
auto
dbpath
=
fs
::
path
{
"/opt"
}
/
"rocm"
/
"share"
/
"miopen"
/
"db"
/
"miopen.db"
;
const
auto
dbpath
=
fs
::
path
{
"/opt"
}
/
"rocm"
/
"share"
/
"miopen"
/
"db"
/
"miopen.db"
;
// Check if db file exists.
// Check if db file exists.
...
@@ -108,16 +112,17 @@ auto query_miopen_db(const std::string& query)
...
@@ -108,16 +112,17 @@ auto query_miopen_db(const std::string& query)
}
// namespace
}
// namespace
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
)
std
::
string
get_mlir_perf_for_conv
(
const
problem_params
&
pp
,
bool
xdlops
)
{
{
std
::
string
query
=
"select P.* \
std
::
string
solver
=
xdlops
?
"ConvMlirIgemmFwdXdlops"
:
"ConvMlirIgemmFwd"
;
std
::
string
query
=
"select P.* \
from perf_db P, config C \
from perf_db P, config C \
where P.config = C.id AND \
where P.config = C.id AND \
P.solver = '
ConvMlirIgemmFwdXdlops
' AND \
P.solver = '
${solver}
' AND \
${config}"
;
${config}"
;
auto
results
=
auto
results
=
query_miopen_db
(
query_miopen_db
(
interpolate_string
(
query
,
{{
"config"
,
generate_miopen_config
(
pp
)}}));
interpolate_string
(
query
,
{{
"config"
,
generate_miopen_config
(
pp
)}
,
{
"solver"
,
solver
}
}));
if
(
results
.
empty
())
if
(
results
.
empty
())
return
""
;
return
""
;
return
results
.
front
().
at
(
"params"
);
return
results
.
front
().
at
(
"params"
);
...
...
src/targets/gpu/prefuse_ops.cpp
View file @
dae94657
...
@@ -51,17 +51,20 @@ struct layernorm_base
...
@@ -51,17 +51,20 @@ struct layernorm_base
}
}
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
nargs
+
N
);
check_shapes
{
inputs
,
static_cast
<
const
Derived
&>
(
*
this
)}.
has
(
nargs
+
N
);
auto
s
=
inputs
.
at
(
0
);
auto
s
=
inputs
.
at
(
0
);
auto
t
=
s
.
type
();
if
(
not
mods
.
empty
())
t
=
mods
.
front
()
->
get_output_shapes
().
front
().
type
();
if
(
s
.
scalar
())
if
(
s
.
scalar
())
{
{
return
s
;
return
s
;
}
}
else
if
(
s
.
broadcasted
())
else
if
(
s
.
broadcasted
())
{
{
return
{
s
.
type
()
,
s
.
lens
()};
return
{
t
,
s
.
lens
()};
}
}
else
else
{
{
return
s
.
with_lens
(
s
.
lens
());
return
s
.
with_lens
(
t
,
s
.
lens
());
}
}
}
}
};
};
...
...
src/targets/gpu/quant_convolution.cpp
deleted
100644 → 0
View file @
b013d991
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/quant_convolution.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/generate.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
miopen_quant_convolution
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
4
).
standard
();
return
op
.
normalize_compute_shape
({
inputs
.
at
(
0
),
inputs
.
at
(
1
)});
}
argument
miopen_quant_convolution
::
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
(),
int8_x4_format
);
auto
w_desc
=
make_tensor
(
args
[
1
].
get_shape
(),
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
float
alpha
=
1
;
float
beta
=
0
;
auto
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
w_desc
.
get
(),
args
[
1
].
implicit
(),
cd
.
get
(),
algo
,
&
beta
,
y_desc
.
get
(),
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
if
(
status
!=
miopenStatusSuccess
)
{
MIGRAPHX_THROW
(
"QUANT_CONVOLUTION: run convolution forward failed"
);
}
return
args
[
3
];
}
shape
miopen_quant_convolution
::
find
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
shape
workspace_shape
{};
auto
x_desc
=
make_tensor
(
inputs
[
0
],
int8_x4_format
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
std
::
size_t
workspace_size
=
0
;
miopenConvolutionForwardGetWorkSpaceSize
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
workspace_size
);
workspace_shape
=
shape
{
shape
::
int8_type
,
{
workspace_size
}};
auto
x_shape
=
inputs
[
0
];
auto
w_shape
=
inputs
[
1
];
if
(
int8_x4_format
)
{
x_shape
=
pack_int8_shape
(
x_shape
);
w_shape
=
pack_int8_shape
(
w_shape
);
}
auto
x
=
to_gpu
(
generate_argument
(
x_shape
));
auto
w
=
to_gpu
(
generate_argument
(
w_shape
));
auto
y
=
allocate_gpu
(
output_shape
);
auto
workspace
=
allocate_gpu
(
workspace_shape
);
int
algo_count
=
1
;
miopenConvAlgoPerf_t
perf
;
auto
status
=
miopenFindConvolutionForwardAlgorithm
(
ctx
.
get_stream
().
get_miopen
(),
x_desc
.
get
(),
x
.
implicit
(),
w_desc
.
get
(),
w
.
implicit
(),
cd
.
get
(),
y_desc
.
get
(),
y
.
implicit
(),
1
,
&
algo_count
,
&
perf
,
workspace
.
implicit
(),
workspace_size
,
false
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: find convolution failed"
);
algo
=
perf
.
fwd_algo
;
size_t
solution_count
;
status
=
miopenConvolutionForwardGetSolutionCount
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
&
solution_count
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: get solution count failed"
);
std
::
vector
<
miopenConvSolution_t
>
solutions
(
solution_count
);
status
=
miopenConvolutionForwardGetSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_count
,
&
solution_count
,
solutions
.
data
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: get solution failed"
);
solution_id
=
solutions
.
front
().
solution_id
;
return
shape
{
shape
::
int8_type
,
{
perf
.
memory
}};
}
void
miopen_quant_convolution
::
finalize
(
context
&
ctx
,
const
shape
&
output_shape
,
std
::
vector
<
shape
>
inputs
)
{
if
(
cd
==
nullptr
)
cd
=
make_conv
(
op
);
if
(
solution_id
==
0
)
{
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
find
(
ctx
,
output_shape
,
inputs
);
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: workspace has changed during finalization."
);
}
auto
x_desc
=
make_tensor
(
inputs
[
0
],
int8_x4_format
);
auto
w_desc
=
make_tensor
(
inputs
[
1
],
int8_x4_format
);
auto
y_desc
=
make_tensor
(
output_shape
);
auto
status
=
miopenConvolutionForwardCompileSolution
(
ctx
.
get_stream
().
get_miopen
(),
w_desc
.
get
(),
x_desc
.
get
(),
cd
.
get
(),
y_desc
.
get
(),
solution_id
);
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"MIOpen Quant Convolution: compile solution failed"
);
}
shape
miopen_quant_convolution
::
pack_int8_shape
(
const
shape
&
s
)
const
{
if
(
s
.
type
()
!=
shape
::
int8_type
)
{
MIGRAPHX_THROW
(
"PACK_INT8_SHAPE: only process int8_type"
);
}
auto
lens
=
s
.
lens
();
auto
strides
=
s
.
strides
();
lens
[
1
]
=
(
lens
[
1
]
+
3
)
/
4
*
4
;
strides
[
0
]
=
strides
[
1
]
*
lens
[
1
];
return
{
s
.
type
(),
lens
,
strides
};
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/target.cpp
View file @
dae94657
...
@@ -35,13 +35,13 @@
...
@@ -35,13 +35,13 @@
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/layout_nhwc.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/normalize_ops.hpp>
#include <migraphx/normalize_ops.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_gelu.hpp>
#include <migraphx/rewrite_gelu.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_quantization.hpp>
...
@@ -51,6 +51,7 @@
...
@@ -51,6 +51,7 @@
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/compile_miopen.hpp>
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/gpu/concat_gpu_opt.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/context.hpp>
...
@@ -71,6 +72,7 @@ namespace gpu {
...
@@ -71,6 +72,7 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
struct
id_pass
struct
id_pass
{
{
...
@@ -110,8 +112,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -110,8 +112,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
insert_pad
{},
insert_pad
{},
dead_code_elimination
{},
dead_code_elimination
{},
rewrite_batchnorm
{},
dead_code_elimination
{},
rewrite_rnn
{},
rewrite_rnn
{},
dead_code_elimination
{},
dead_code_elimination
{},
inline_module
{},
inline_module
{},
...
@@ -123,6 +123,9 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -123,6 +123,9 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
simplify_algebra
{},
simplify_algebra
{},
simplify_reshapes
{},
simplify_reshapes
{},
enable_pass
(
enabled
(
MIGRAPHX_ENABLE_NHWC
{}),
layout_nhwc
{}),
dead_code_elimination
{},
simplify_reshapes
{},
simplify_algebra
{},
simplify_algebra
{},
prefuse_ops
{},
prefuse_ops
{},
dead_code_elimination
{},
dead_code_elimination
{},
...
@@ -139,14 +142,16 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -139,14 +142,16 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
eliminate_concat
{
concat_gpu_optimization
{}},
eliminate_concat
{
concat_gpu_optimization
{}},
dead_code_elimination
{},
dead_code_elimination
{},
pack_int8_args
{
},
compile_miopen
{
&
gctx
},
dead_code_elimination
{},
dead_code_elimination
{},
adjust_allocation
{
gpu_allocation_model
{}
},
pack_int8_args
{
},
dead_code_elimination
{},
dead_code_elimination
{},
fuse_ops
{
&
ctx
,
options
.
fast_math
},
fuse_ops
{
&
ctx
,
options
.
fast_math
},
dead_code_elimination
{},
dead_code_elimination
{},
replace_allocate
{
gpu_allocation_model
{},
options
.
offload_copy
},
replace_allocate
{
gpu_allocation_model
{},
options
.
offload_copy
},
dead_code_elimination
{},
dead_code_elimination
{},
adjust_allocation
{
gpu_allocation_model
{}},
dead_code_elimination
{},
compile_ops
{
&
ctx
},
compile_ops
{
&
ctx
},
dead_code_elimination
{},
dead_code_elimination
{},
write_literals
{
&
ctx
},
write_literals
{
&
ctx
},
...
...
src/targets/ref/lowering.cpp
View file @
dae94657
...
@@ -26,15 +26,12 @@
...
@@ -26,15 +26,12 @@
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/batch_norm_inference.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/elu.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/im2col.hpp>
#include <migraphx/op/leaky_relu.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/logsoftmax.hpp>
#include <migraphx/op/loop.hpp>
#include <migraphx/op/loop.hpp>
#include <migraphx/op/lrn.hpp>
#include <migraphx/op/lrn.hpp>
...
@@ -75,84 +72,6 @@ typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::ena
...
@@ -75,84 +72,6 @@ typename std::conditional_t<std::is_integral<T>{}, std::make_signed<T>, std::ena
return
x
;
return
x
;
}
}
//
// ref implemenataion of batch norm for inference
//
// inputs are:
// args[0] -> input data buffer
// args[1] -> mini batch mean
// args[2] -> mini batch variance
// args[3] -> gamma
// args[4] -> bias
//
// The equation to compute batch norm for inference is:
//
// output[i] = bias + gamma * (input[i] + mean) / sqrt(variance + epsilon)
//
// the input data format should be nchw
//
struct
ref_batch_norm_inference
{
op
::
batch_norm_inference
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"ref::batch_norm_inference"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
output
{
output_shape
};
double
epsilon
=
op
.
epsilon
;
auto
input
=
args
[
0
];
auto
arg_gamma
=
args
[
1
];
auto
arg_bias
=
args
[
2
];
auto
mini_batch_mean
=
args
[
3
];
auto
mini_batch_variance
=
args
[
4
];
if
(
op
.
bn_mode
==
op
::
batch_norm_inference
::
spatial
)
{
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_variance
,
arg_gamma
,
arg_bias
)(
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
par_for
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
idx
=
output_shape
.
multi
(
i
);
auto
c
=
idx
[
1
];
assert
((
variance
[
c
]
+
epsilon
)
>
0
);
result
[
i
]
=
gamma
[
c
]
*
(
buffer
[
i
]
-
mean
[
c
])
/
std
::
sqrt
(
variance
[
c
]
+
epsilon
)
+
bias
[
c
];
});
});
}
if
(
op
.
bn_mode
==
op
::
batch_norm_inference
::
per_activation
)
{
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_variance
,
arg_gamma
,
arg_bias
)(
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
par_for
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
idx
=
output_shape
.
multi
(
i
);
idx
[
0
]
=
0
;
auto
index
=
output_shape
.
index
(
idx
);
assert
((
variance
[
index
]
+
epsilon
)
>
0
);
result
[
i
]
=
gamma
[
index
]
*
(
buffer
[
i
]
-
mean
[
index
])
/
std
::
sqrt
(
variance
[
index
]
+
epsilon
)
+
bias
[
index
];
});
});
}
return
output
;
}
};
MIGRAPHX_REGISTER_OP
(
ref_batch_norm_inference
)
struct
ref_lrn
struct
ref_lrn
{
{
op
::
lrn
op
;
op
::
lrn
op
;
...
@@ -237,15 +156,16 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
...
@@ -237,15 +156,16 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
shape
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
{
std
::
vector
<
std
::
size_t
>
padding
;
std
::
vector
<
std
::
size_t
>
padding
;
if
(
op
.
use_dynamic_same_auto_pad
)
if
(
op
.
padding_mode
!=
op
::
padding_mode_t
::
default_
)
{
{
auto
input_lens
=
args
[
0
].
get_shape
().
lens
();
auto
input_lens
=
args
[
0
].
get_shape
().
lens
();
std
::
vector
<
std
::
size_t
>
img_lens
{
input_lens
.
begin
()
+
2
,
input_lens
.
end
()};
auto
weights_lens
=
args
[
1
].
get_shape
().
lens
();
auto
weights_lens
=
args
[
1
].
get_shape
().
lens
();
std
::
vector
<
std
::
size_t
>
k_lens
{
weights_lens
.
begin
()
+
2
,
weights_lens
.
end
()};
padding
=
padding
=
calc_dyn_auto_pad
(
img_lens
,
k_lens
,
op
.
stride
,
op
.
dilation
);
op
.
padding_mode
==
op
::
same_upper
output_shape
=
?
calc_dyn_auto_pad
(
input_lens
,
weights_lens
,
op
.
stride
,
op
.
dilation
,
true
)
compute_padded_shape
({
args
.
at
(
0
).
get_shape
(),
args
.
at
(
1
).
get_shape
()},
padding
);
:
calc_dyn_auto_pad
(
input_lens
,
weights_lens
,
op
.
stride
,
op
.
dilation
,
false
);
output_shape
=
compute_padded_shape
(
args
[
0
].
get_shape
(),
args
[
1
].
get_shape
(),
padding
,
op
.
stride
,
op
.
dilation
);
}
}
else
else
{
{
...
@@ -313,34 +233,6 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
...
@@ -313,34 +233,6 @@ struct ref_convolution : auto_register_op<ref_convolution<Op>>
});
});
return
result
;
return
result
;
}
}
private:
/*!
* Used for dynamic auto padding since padding needs to be computed at evaulation time.
* \param inputs two fixed shape inputs [input_tensor, weights]
* \param padding from auto_pad calculation
*/
shape
compute_padded_shape
(
const
std
::
vector
<
shape
>&
inputs
,
const
std
::
vector
<
std
::
size_t
>&
padding
)
const
{
const
shape
&
input
=
inputs
.
at
(
0
);
const
shape
&
weights
=
inputs
.
at
(
1
);
const
size_t
num_spatial_dims
=
input
.
lens
().
size
()
-
2
;
std
::
vector
<
size_t
>
output_lens
{
input
.
lens
()[
0
],
weights
.
lens
()[
0
]};
// calculate the output shape of the convolution: ((W - K + 2P) / S) + 1
for
(
size_t
i
=
0
;
i
<
num_spatial_dims
;
i
++
)
{
auto
padding_factor
=
padding
[
i
]
+
padding
[
i
+
num_spatial_dims
];
output_lens
.
push_back
(
std
::
size_t
(
std
::
max
<
std
::
ptrdiff_t
>
(
1
,
(
input
.
lens
()[
i
+
2
]
-
(
1
+
op
.
dilation
[
i
]
*
(
weights
.
lens
()[
i
+
2
]
-
1
))
+
padding_factor
)
/
op
.
stride
[
i
]
+
1
)));
}
return
inputs
[
0
].
with_lens
(
output_lens
);
}
};
};
struct
ref_im2col
struct
ref_im2col
...
@@ -491,9 +383,9 @@ struct ref_gemm
...
@@ -491,9 +383,9 @@ struct ref_gemm
std
::
string
name
()
const
{
return
"ref::dot"
;
}
std
::
string
name
()
const
{
return
"ref::dot"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
{
argument
result
{
out
put_shape
};
argument
result
{
dyn_out
.
com
put
ed
_shape
};
migemm
(
result
,
args
[
0
],
args
[
1
],
1.0
f
,
0.0
f
);
migemm
(
result
,
args
[
0
],
args
[
1
],
1.0
f
,
0.0
f
);
return
result
;
return
result
;
...
@@ -537,65 +429,6 @@ struct ref_quant_gemm
...
@@ -537,65 +429,6 @@ struct ref_quant_gemm
};
};
MIGRAPHX_REGISTER_OP
(
ref_gemm
)
MIGRAPHX_REGISTER_OP
(
ref_gemm
)
struct
leaky_relu_op
{
op
::
leaky_relu
op
;
std
::
string
name
()
const
{
return
"ref::leaky_relu"
;
}
auto
fcn
()
const
{
auto
a
=
op
.
alpha
;
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
x
*
a
;
};
}
};
struct
elu_op
{
op
::
elu
op
;
std
::
string
name
()
const
{
return
"ref::elu"
;
}
auto
fcn
()
const
{
auto
a
=
op
.
alpha
;
return
[
a
](
auto
x
)
{
return
x
>
0
?
x
:
a
*
std
::
expm1
(
x
);
};
}
};
template
<
typename
Op
>
struct
ref_unary
:
auto_register_op
<
ref_unary
<
Op
>>
{
ref_unary
()
=
default
;
template
<
class
T
>
ref_unary
(
T
pop
)
:
op
(
Op
{
std
::
move
(
pop
)})
{
}
Op
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
.
op
,
f
);
}
std
::
string
name
()
const
{
return
op
.
name
();
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
1
);
const
auto
&
s
=
inputs
.
at
(
0
);
return
{
s
.
type
(),
s
.
lens
()};
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
assert
(
input
.
get_shape
().
standard
());
std
::
transform
(
input
.
begin
(),
input
.
end
(),
output
.
begin
(),
op
.
fcn
());
});
return
result
;
}
};
template
<
class
Op
>
template
<
class
Op
>
struct
ref_softmax
:
auto_register_op
<
ref_softmax
<
Op
>>
struct
ref_softmax
:
auto_register_op
<
ref_softmax
<
Op
>>
{
{
...
@@ -616,10 +449,10 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
...
@@ -616,10 +449,10 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
{
{
return
op
.
normalize_compute_shape
(
inputs
);
return
op
.
normalize_compute_shape
(
inputs
);
}
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
argument
compute
(
context
&
,
const
dyn_output
&
dyn_out
,
std
::
vector
<
argument
>
args
)
const
{
{
argument
result
{
out
put_shape
};
argument
result
{
dyn_out
.
com
put
ed
_shape
};
auto
batch_lens
=
out
put_shape
.
lens
();
auto
batch_lens
=
dyn_out
.
com
put
ed
_shape
.
lens
();
int64_t
tuned_axis
=
tune_axis
(
args
[
0
].
get_shape
().
lens
().
size
(),
op
.
axis
,
op
.
name
());
int64_t
tuned_axis
=
tune_axis
(
args
[
0
].
get_shape
().
lens
().
size
(),
op
.
axis
,
op
.
name
());
std
::
size_t
n_dims
=
batch_lens
[
tuned_axis
];
std
::
size_t
n_dims
=
batch_lens
[
tuned_axis
];
batch_lens
[
tuned_axis
]
=
1
;
batch_lens
[
tuned_axis
]
=
1
;
...
@@ -642,7 +475,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
...
@@ -642,7 +475,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
{
idx
[
tuned_axis
]
=
j
;
idx
[
tuned_axis
]
=
j
;
std
::
size_t
index
=
out
put_shape
.
index
(
idx
);
std
::
size_t
index
=
dyn_out
.
com
put
ed
_shape
.
index
(
idx
);
output
[
index
]
=
std
::
exp
(
input
[
index
]
-
batch_max
[
i
]);
output
[
index
]
=
std
::
exp
(
input
[
index
]
-
batch_max
[
i
]);
}
}
...
@@ -731,16 +564,12 @@ struct ref_apply
...
@@ -731,16 +564,12 @@ struct ref_apply
void
init
()
void
init
()
{
{
apply_map
[
"batch_norm_inference"
]
=
extend_op
<
ref_batch_norm_inference
,
op
::
batch_norm_inference
>
();
apply_map
[
"convolution"
]
=
extend_op
<
ref_convolution
<
op
::
convolution
>
,
op
::
convolution
>
();
apply_map
[
"convolution"
]
=
extend_op
<
ref_convolution
<
op
::
convolution
>
,
op
::
convolution
>
();
apply_map
[
"dot"
]
=
extend_op
<
ref_gemm
,
op
::
dot
>
();
apply_map
[
"dot"
]
=
extend_op
<
ref_gemm
,
op
::
dot
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
ref_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"quant_dot"
]
=
extend_op
<
ref_quant_gemm
,
op
::
quant_dot
>
();
apply_map
[
"quant_convolution"
]
=
apply_map
[
"quant_convolution"
]
=
extend_op
<
ref_convolution
<
op
::
quant_convolution
>
,
op
::
quant_convolution
>
();
extend_op
<
ref_convolution
<
op
::
quant_convolution
>
,
op
::
quant_convolution
>
();
apply_map
[
"elu"
]
=
extend_op
<
ref_unary
<
elu_op
>
,
op
::
elu
>
();
apply_map
[
"im2col"
]
=
extend_op
<
ref_im2col
,
op
::
im2col
>
();
apply_map
[
"im2col"
]
=
extend_op
<
ref_im2col
,
op
::
im2col
>
();
apply_map
[
"leaky_relu"
]
=
extend_op
<
ref_unary
<
leaky_relu_op
>
,
op
::
leaky_relu
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
ref_softmax
<
op
::
logsoftmax
>
,
op
::
logsoftmax
>
();
apply_map
[
"logsoftmax"
]
=
extend_op
<
ref_softmax
<
op
::
logsoftmax
>
,
op
::
logsoftmax
>
();
apply_map
[
"lrn"
]
=
extend_op
<
ref_lrn
,
op
::
lrn
>
();
apply_map
[
"lrn"
]
=
extend_op
<
ref_lrn
,
op
::
lrn
>
();
apply_map
[
"pad"
]
=
extend_op
<
ref_pad
,
op
::
pad
>
();
apply_map
[
"pad"
]
=
extend_op
<
ref_pad
,
op
::
pad
>
();
...
...
src/tf/parse_batchnorm.cpp
View file @
dae94657
...
@@ -23,6 +23,7 @@
...
@@ -23,6 +23,7 @@
*/
*/
#include <migraphx/tf/op_parser.hpp>
#include <migraphx/tf/op_parser.hpp>
#include <migraphx/tf/tf_parser.hpp>
#include <migraphx/tf/tf_parser.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
...
@@ -38,16 +39,37 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
...
@@ -38,16 +39,37 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
instruction_ref
parse
(
const
op_desc
&
/*opd*/
,
instruction_ref
parse
(
const
op_desc
&
/*opd*/
,
const
tf_parser
&
/*parser*/
,
const
tf_parser
&
/*parser*/
,
tf_parser
::
node_info
info
,
tf_parser
::
node_info
info
,
const
std
::
vector
<
instruction_ref
>
&
args
)
const
std
::
vector
<
instruction_ref
>
args
)
const
{
{
float
epsilon
=
1e-5
f
;
// different default epsilon than from ONNX
float
momentum
=
0.9
f
;
float
epsilon
=
1e-4
f
;
if
(
contains
(
info
.
attributes
,
"epsilon"
))
if
(
contains
(
info
.
attributes
,
"epsilon"
))
{
{
epsilon
=
info
.
attributes
.
at
(
"epsilon"
).
f
();
epsilon
=
info
.
attributes
.
at
(
"epsilon"
).
f
();
}
}
auto
op
=
make_op
(
"batch_norm_inference"
,
{{
"epsilon"
,
epsilon
},
{
"momentum"
,
momentum
}});
return
info
.
add_instruction
(
op
,
args
);
auto
x_lens
=
args
[
0
]
->
get_shape
().
lens
();
auto
x_type
=
args
[
0
]
->
get_shape
().
type
();
// unsqueeze tensors of shape (C) to broadcast correctly
auto
rt
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
0.5
}});
auto
eps
=
info
.
add_literal
(
migraphx
::
literal
{
migraphx
::
shape
{
x_type
},
{
epsilon
}});
auto
scale_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
1
]);
auto
bias_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
2
]);
auto
mean_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
3
]);
auto
var_unsqueeze
=
info
.
add_instruction
(
migraphx
::
make_op
(
"unsqueeze"
,
{{
"axes"
,
{
1
,
2
}}}),
args
[
4
]);
auto
numer
=
info
.
add_broadcastable_binary_op
(
"sub"
,
args
[
0
],
mean_unsqueeze
);
auto
var_eps
=
info
.
add_broadcastable_binary_op
(
"add"
,
var_unsqueeze
,
eps
);
auto
denom
=
info
.
add_broadcastable_binary_op
(
"pow"
,
var_eps
,
rt
);
auto
div0
=
info
.
add_broadcastable_binary_op
(
"div"
,
numer
,
denom
);
auto
r0
=
info
.
add_broadcastable_binary_op
(
"mul"
,
div0
,
scale_unsqueeze
);
return
info
.
add_broadcastable_binary_op
(
"add"
,
r0
,
bias_unsqueeze
);
}
}
};
};
...
...
src/tf/parse_conv.cpp
View file @
dae94657
...
@@ -75,7 +75,6 @@ struct parse_conv : op_parser<parse_conv>
...
@@ -75,7 +75,6 @@ struct parse_conv : op_parser<parse_conv>
const
std
::
string
&
pad_mode
=
info
.
attributes
.
at
(
"padding"
).
s
();
const
std
::
string
&
pad_mode
=
info
.
attributes
.
at
(
"padding"
).
s
();
if
(
pad_mode
.
find
(
"SAME"
)
!=
std
::
string
::
npos
)
if
(
pad_mode
.
find
(
"SAME"
)
!=
std
::
string
::
npos
)
{
{
op
.
padding_mode
=
op
::
padding_mode_t
::
same
;
std
::
vector
<
size_t
>
weight_dims
=
weights
->
get_shape
().
lens
();
std
::
vector
<
size_t
>
weight_dims
=
weights
->
get_shape
().
lens
();
size_t
weight_h
=
weight_dims
[
2
];
size_t
weight_h
=
weight_dims
[
2
];
size_t
weight_w
=
weight_dims
[
3
];
size_t
weight_w
=
weight_dims
[
3
];
...
@@ -87,10 +86,6 @@ struct parse_conv : op_parser<parse_conv>
...
@@ -87,10 +86,6 @@ struct parse_conv : op_parser<parse_conv>
op
.
padding
=
std
::
vector
<
size_t
>
(
pads
.
begin
(),
pads
.
end
());
op
.
padding
=
std
::
vector
<
size_t
>
(
pads
.
begin
(),
pads
.
end
());
}
}
else
if
(
pad_mode
.
find
(
"VALID"
)
!=
std
::
string
::
npos
)
{
op
.
padding_mode
=
op
::
padding_mode_t
::
valid
;
}
else
if
(
pad_mode
.
find
(
"EXPLICIT"
)
!=
std
::
string
::
npos
)
else
if
(
pad_mode
.
find
(
"EXPLICIT"
)
!=
std
::
string
::
npos
)
{
{
std
::
vector
<
size_t
>
padding
;
std
::
vector
<
size_t
>
padding
;
...
...
src/tf/parse_depthwiseconv.cpp
View file @
dae94657
...
@@ -80,7 +80,6 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
...
@@ -80,7 +80,6 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
if
(
pad_mode
.
find
(
"SAME"
)
!=
std
::
string
::
npos
)
if
(
pad_mode
.
find
(
"SAME"
)
!=
std
::
string
::
npos
)
{
{
op
.
padding_mode
=
op
::
padding_mode_t
::
same
;
std
::
vector
<
size_t
>
weight_dims
=
weights
->
get_shape
().
lens
();
std
::
vector
<
size_t
>
weight_dims
=
weights
->
get_shape
().
lens
();
size_t
weight_h
=
weight_dims
[
2
];
size_t
weight_h
=
weight_dims
[
2
];
size_t
weight_w
=
weight_dims
[
3
];
size_t
weight_w
=
weight_dims
[
3
];
...
@@ -101,10 +100,6 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
...
@@ -101,10 +100,6 @@ struct parse_depthwiseconv : op_parser<parse_depthwiseconv>
op
.
padding
[
1
]
=
pads
[
1
];
op
.
padding
[
1
]
=
pads
[
1
];
}
}
}
}
else
if
(
pad_mode
.
find
(
"VALID"
)
!=
std
::
string
::
npos
)
{
op
.
padding_mode
=
op
::
padding_mode_t
::
valid
;
}
}
}
std
::
vector
<
int64_t
>
new_weights_shape
;
std
::
vector
<
int64_t
>
new_weights_shape
;
...
...
test/api/test_custom_op_gpu.cpp
View file @
dae94657
...
@@ -55,7 +55,8 @@ struct half_copy_host final : migraphx::experimental_custom_op_base
...
@@ -55,7 +55,8 @@ struct half_copy_host final : migraphx::experimental_custom_op_base
hipMemcpyHostToHost
,
hipMemcpyHostToHost
,
ctx
.
get_queue
<
hipStream_t
>
()));
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
output_buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipMemsetAsync
(
output_buffer_ptr
,
0
,
copy_bytes
,
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
return
inputs
[
1
];
return
inputs
[
1
];
}
}
...
@@ -97,7 +98,8 @@ struct half_copy_device final : migraphx::experimental_custom_op_base
...
@@ -97,7 +98,8 @@ struct half_copy_device final : migraphx::experimental_custom_op_base
hipMemcpyDeviceToDevice
,
hipMemcpyDeviceToDevice
,
ctx
.
get_queue
<
hipStream_t
>
()));
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
output_buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipMemsetAsync
(
output_buffer_ptr
,
0
,
copy_bytes
,
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
return
inputs
[
1
];
return
inputs
[
1
];
}
}
...
@@ -124,7 +126,7 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas
...
@@ -124,7 +126,7 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
bool
runs_on_offload_target
()
const
override
{
return
true
;
}
virtual
migraphx
::
argument
virtual
migraphx
::
argument
compute
(
migraphx
::
context
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
compute
(
migraphx
::
context
ctx
,
migraphx
::
shape
,
migraphx
::
arguments
inputs
)
const
override
{
{
// This custom op simply sets first half size_bytes of the input 0, and rest of the half
// This custom op simply sets first half size_bytes of the input 0, and rest of the half
// bytes are copied. for this custom_op, it does its computation on the "device". Therefore,
// bytes are copied. for this custom_op, it does its computation on the "device". Therefore,
...
@@ -133,7 +135,8 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas
...
@@ -133,7 +135,8 @@ struct half_copy_device_same_buffer final : migraphx::experimental_custom_op_bas
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
input_bytes
=
inputs
[
0
].
get_shape
().
bytes
();
auto
copy_bytes
=
input_bytes
/
2
;
auto
copy_bytes
=
input_bytes
/
2
;
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipSetDevice
(
0
));
MIGRAPHX_HIP_ASSERT
(
hipMemset
(
buffer_ptr
,
0
,
copy_bytes
));
MIGRAPHX_HIP_ASSERT
(
hipMemsetAsync
(
buffer_ptr
,
0
,
copy_bytes
,
ctx
.
get_queue
<
hipStream_t
>
()));
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
MIGRAPHX_HIP_ASSERT
(
hipDeviceSynchronize
());
return
inputs
[
0
];
return
inputs
[
0
];
}
}
...
...
test/fuse_pointwise.cpp
View file @
dae94657
...
@@ -272,6 +272,35 @@ TEST_CASE(contiguous_input)
...
@@ -272,6 +272,35 @@ TEST_CASE(contiguous_input)
EXPECT
(
p1
==
p2
);
EXPECT
(
p1
==
p2
);
}
}
TEST_CASE
(
contiguous_boolean_input
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
bool_type
,
{
2
,
3
}};
migraphx
::
shape
s_lit
{
migraphx
::
shape
::
bool_type
,
{
1
},
{
0
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
one
=
mm
->
add_literal
(
migraphx
::
literal
(
s_lit
,
{
1.0
}));
auto
yb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
one
);
auto
y
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"contiguous"
),
yb
);
auto
xor1
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"logical_xor"
),
x
,
y
);
mm
->
add_return
({
xor1
});
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
xor1
=
add_pointwise
(
p2
,
"main:pointwise0"
,
{
x
},
[
=
](
auto
*
pm
,
const
auto
&
inputs
)
{
auto
y
=
pm
->
add_literal
(
migraphx
::
literal
(
s_lit
,
{
1
}));
return
pm
->
add_instruction
(
migraphx
::
make_op
(
"logical_xor"
),
inputs
[
0
],
y
);
});
mm
->
add_return
({
xor1
});
}
}
TEST_CASE
(
all_scalar_input
)
TEST_CASE
(
all_scalar_input
)
{
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
};
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
};
...
...
src/targets
/gpu/
elu
.cpp
→
test
/gpu/
hip
.cpp
View file @
dae94657
...
@@ -21,44 +21,30 @@
...
@@ -21,44 +21,30 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include <migraphx/gpu/elu.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
#include <test.hpp>
inline
namespace
MIGRAPHX_INLINE_NS
{
#include <migraphx/argument.hpp>
namespace
gpu
{
#include <migraphx/gpu/hip.hpp>
#include <migraphx/gpu/target.hpp>
shape
miopen_elu
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
TEST_CASE
(
tuple_to_from_gpu
)
{
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
not_broadcasted
();
migraphx
::
shape
s1
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
return
inputs
.
at
(
1
);
migraphx
::
shape
s2
{
migraphx
::
shape
::
int32_type
,
{
2
,
4
}};
std
::
vector
<
float
>
p1_data
=
{
1.1
,
2.2
,
3.3
,
4.4
,
5.5
,
6.6
};
std
::
vector
<
int
>
p2_data
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
};
auto
p1
=
migraphx
::
argument
{
s1
,
p1_data
.
data
()};
auto
p2
=
migraphx
::
argument
{
s2
,
p2_data
.
data
()};
auto
p1_gpu
=
migraphx
::
gpu
::
to_gpu
(
p1
);
auto
p2_gpu
=
migraphx
::
gpu
::
to_gpu
(
p2
);
auto
p_tuple
=
migraphx
::
gpu
::
from_gpu
(
migraphx
::
argument
({
p1_gpu
,
p2_gpu
}));
std
::
vector
<
migraphx
::
argument
>
results
=
p_tuple
.
get_sub_objects
();
std
::
vector
<
float
>
result1
;
results
[
0
].
visit
([
&
](
auto
output
)
{
result1
.
assign
(
output
.
begin
(),
output
.
end
());
});
std
::
vector
<
int
>
result2
;
results
[
1
].
visit
([
&
](
auto
output
)
{
result2
.
assign
(
output
.
begin
(),
output
.
end
());
});
EXPECT
(
result1
==
p1_data
);
EXPECT
(
result2
==
p2_data
);
}
}
argument
miopen_elu
::
compute
(
context
&
ctx
,
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
{
float
alpha
=
1
;
float
beta
=
0
;
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
miopenActivationForward
(
ctx
.
get_stream
().
get_miopen
(),
ad
.
get
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
&
beta
,
y_desc
.
get
(),
args
[
1
].
implicit
());
return
args
[
1
];
}
void
miopen_elu
::
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
ad
=
make_elu
(
op
.
alpha
);
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
test/gpu/literal.cpp
View file @
dae94657
...
@@ -48,4 +48,4 @@ void gpu_literal_test()
...
@@ -48,4 +48,4 @@ void gpu_literal_test()
}
}
}
}
int
main
()
{
gpu_literal_test
();
}
int
main
()
{
gpu_literal_test
();
}
// NOLINT (bugprone-exception-escape)
test/gpu/mlir.cpp
View file @
dae94657
...
@@ -84,7 +84,7 @@ migraphx::program create_program_from_mlir(const migraphx::module& mmlir)
...
@@ -84,7 +84,7 @@ migraphx::program create_program_from_mlir(const migraphx::module& mmlir)
inputs
.
push_back
(
mm
->
add_parameter
(
"output"
,
mmlir
.
get_output_shapes
().
front
()));
inputs
.
push_back
(
mm
->
add_parameter
(
"output"
,
mmlir
.
get_output_shapes
().
front
()));
migraphx
::
gpu
::
context
ctx
;
migraphx
::
gpu
::
context
ctx
;
migraphx
::
gpu
::
insert_mlir
(
*
mm
,
mm
->
end
(),
compile_mlir
(
ctx
,
mmlir
),
inputs
);
migraphx
::
gpu
::
insert_mlir
(
*
mm
,
mm
->
end
(),
compile_mlir
(
ctx
,
mmlir
,
inputs
),
inputs
);
return
p
;
return
p
;
}
}
...
@@ -140,8 +140,8 @@ TEST_CASE(conv)
...
@@ -140,8 +140,8 @@ TEST_CASE(conv)
{
{
const
std
::
string
mlir_output
=
R"__migraphx__(
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
module {
func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {
arch = "",
kernel = "mixr"} {
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]
, use_dynamic_same_auto_pad = 0 : i64
} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
}
}
}
}
...
@@ -163,8 +163,8 @@ TEST_CASE(conv_add_relu)
...
@@ -163,8 +163,8 @@ TEST_CASE(conv_add_relu)
{
{
const
std
::
string
mlir_output
=
R"__migraphx__(
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
module {
func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {
arch = "",
kernel = "mixr"} {
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]
, use_dynamic_same_auto_pad = 0 : i64
} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
return %2 : tensor<1x2x2x2xf32>
return %2 : tensor<1x2x2x2xf32>
...
...
test/gpu/quantization.cpp
View file @
dae94657
...
@@ -30,7 +30,6 @@
...
@@ -30,7 +30,6 @@
#include <migraphx/ref/target.hpp>
#include <migraphx/ref/target.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/verify.hpp>
#include <migraphx/verify.hpp>
#include <migraphx/quantization.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/pass_manager.hpp>
...
...
test/instruction.cpp
0 → 100644
View file @
dae94657
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <migraphx/make_op.hpp>
#include "test.hpp"
TEST_CASE
(
check_undefined
)
{
migraphx
::
module
m
;
auto
und
=
m
.
add_instruction
(
migraphx
::
make_op
(
"undefined"
));
auto
cov
=
m
.
add_instruction
(
migraphx
::
make_op
(
"convert"
,
{{
"target_type"
,
migraphx
::
shape
::
half_type
}}),
und
);
auto
abs
=
m
.
add_instruction
(
migraphx
::
make_op
(
"abs"
),
cov
);
migraphx
::
shape
xs
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
std
::
vector
<
float
>
datax
=
{
1
,
2
,
3
,
4
,
5
,
6
};
auto
lit
=
m
.
add_literal
(
migraphx
::
literal
(
xs
,
datax
));
auto
mul
=
m
.
add_instruction
(
migraphx
::
make_op
(
"mul"
),
lit
,
lit
);
EXPECT
(
und
->
is_undefined
());
EXPECT
(
cov
->
is_undefined
());
EXPECT
(
abs
->
is_undefined
());
EXPECT
(
not
lit
->
is_undefined
());
EXPECT
(
not
mul
->
is_undefined
());
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/layout_nhwc.cpp
0 → 100644
View file @
dae94657
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/layout_nhwc.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <basic_ops.hpp>
#include <migraphx/make_op.hpp>
#include <test.hpp>
void
run_pass
(
migraphx
::
module
&
m
)
{
migraphx
::
run_passes
(
m
,
{
migraphx
::
layout_nhwc
{},
migraphx
::
dead_code_elimination
{}});
}
migraphx
::
operation
layout
(
std
::
vector
<
int64_t
>
permutation
=
{
0
,
1
,
2
,
3
})
{
return
migraphx
::
make_op
(
"layout"
,
{{
"permutation"
,
permutation
}});
}
migraphx
::
instruction_ref
add_layout_nhwc
(
migraphx
::
module
&
m
,
migraphx
::
instruction_ref
ins
)
{
return
m
.
add_instruction
(
layout
({
0
,
2
,
3
,
1
}),
ins
);
}
TEST_CASE
(
conv_relu
)
{
migraphx
::
module
m1
;
{
auto
x
=
m1
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
8
,
16
,
16
}});
auto
w
=
m1
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
float_type
,
{
16
,
8
,
3
,
3
}}));
auto
conv
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"convolution"
,
{{
"padding"
,
{
1
,
1
}},
{
"stride"
,
{
2
,
2
}},
{
"dilation"
,
{
1
,
1
}}}),
x
,
w
);
m1
.
add_instruction
(
migraphx
::
make_op
(
"relu"
),
conv
);
}
run_pass
(
m1
);
migraphx
::
module
m2
;
{
auto
x
=
add_layout_nhwc
(
m2
,
m2
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
8
,
16
,
16
}}));
auto
w
=
add_layout_nhwc
(
m2
,
m2
.
add_literal
(
migraphx
::
generate_literal
(
{
migraphx
::
shape
::
float_type
,
{
16
,
8
,
3
,
3
}})));
auto
conv
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"convolution"
,
{{
"padding"
,
{
1
,
1
}},
{
"stride"
,
{
2
,
2
}},
{
"dilation"
,
{
1
,
1
}}}),
x
,
w
);
auto
conv_layout
=
m2
.
add_instruction
(
layout
(),
conv
);
m2
.
add_instruction
(
migraphx
::
make_op
(
"relu"
),
conv_layout
);
}
EXPECT
(
m1
.
sort
()
==
m2
.
sort
());
}
TEST_CASE
(
conv_add
)
{
migraphx
::
module
m1
;
{
auto
x
=
m1
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
8
,
16
,
16
}});
auto
w
=
m1
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
float_type
,
{
16
,
8
,
3
,
3
}}));
auto
y
=
m1
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
float_type
,
{
16
}}));
auto
conv
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"convolution"
,
{{
"padding"
,
{
1
,
1
}},
{
"stride"
,
{
2
,
2
}},
{
"dilation"
,
{
1
,
1
}}}),
x
,
w
);
auto
b
=
m1
.
add_instruction
(
migraphx
::
make_op
(
"broadcast"
,
{{
"axis"
,
1
},
{
"out_lens"
,
conv
->
get_shape
().
lens
()}}),
y
);
m1
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
conv
,
b
);
}
run_pass
(
m1
);
migraphx
::
module
m2
;
{
auto
x
=
add_layout_nhwc
(
m2
,
m2
.
add_parameter
(
"x"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
8
,
16
,
16
}}));
auto
w
=
add_layout_nhwc
(
m2
,
m2
.
add_literal
(
migraphx
::
generate_literal
(
{
migraphx
::
shape
::
float_type
,
{
16
,
8
,
3
,
3
}})));
auto
y
=
m2
.
add_literal
(
migraphx
::
generate_literal
({
migraphx
::
shape
::
float_type
,
{
16
}}));
auto
conv
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"convolution"
,
{{
"padding"
,
{
1
,
1
}},
{
"stride"
,
{
2
,
2
}},
{
"dilation"
,
{
1
,
1
}}}),
x
,
w
);
auto
conv_layout
=
m2
.
add_instruction
(
layout
(),
conv
);
auto
b
=
m2
.
add_instruction
(
migraphx
::
make_op
(
"broadcast"
,
{{
"axis"
,
1
},
{
"out_lens"
,
conv
->
get_shape
().
lens
()}}),
y
);
m2
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
conv_layout
,
b
);
}
EXPECT
(
m1
.
sort
()
==
m2
.
sort
());
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/literal_test.cpp
View file @
dae94657
...
@@ -49,6 +49,25 @@ TEST_CASE(literal_test)
...
@@ -49,6 +49,25 @@ TEST_CASE(literal_test)
EXPECT
(
l4
.
empty
());
EXPECT
(
l4
.
empty
());
}
}
TEST_CASE
(
literal_nstd_shape_vector
)
{
migraphx
::
shape
nstd_shape
{
migraphx
::
shape
::
float_type
,
{
1
,
3
,
2
,
2
},
{
12
,
1
,
6
,
3
}};
std
::
vector
<
float
>
data
(
12
);
std
::
iota
(
data
.
begin
(),
data
.
end
(),
0
);
auto
l0
=
migraphx
::
literal
{
nstd_shape
,
data
};
// check data buffer is read in correctly
std
::
vector
<
float
>
expected_buffer
=
{
0
,
4
,
8
,
1
,
5
,
9
,
2
,
6
,
10
,
3
,
7
,
11
};
const
auto
*
start
=
reinterpret_cast
<
const
float
*>
(
l0
.
data
());
std
::
vector
<
float
>
l0_data
{
start
,
start
+
12
};
EXPECT
(
l0_data
==
expected_buffer
);
// check that using visit() (that uses a tensor view) gives data in correct order
std
::
vector
<
float
>
results_vector
(
12
);
l0
.
visit
([
&
](
auto
output
)
{
results_vector
.
assign
(
output
.
begin
(),
output
.
end
());
});
EXPECT
(
results_vector
==
data
);
}
TEST_CASE
(
literal_os1
)
TEST_CASE
(
literal_os1
)
{
{
migraphx
::
literal
l
{
1
};
migraphx
::
literal
l
{
1
};
...
...
test/onnx/argmax_dyn_test.onnx
0 → 100644
View file @
dae94657
File added
test/onnx/averagepool_dyn_asym_padding_error_test.onnx
0 → 100644
View file @
dae94657
File added
Prev
1
…
3
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment