Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
fe493c28
Commit
fe493c28
authored
Apr 10, 2023
by
Alan Turner
Browse files
Merge remote-tracking branch 'origin/develop' into ck-gsg
parents
ba0b3794
cce35871
Changes
121
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1198 additions
and
577 deletions
+1198
-577
src/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp
src/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp
+0
-1
src/targets/gpu/include/migraphx/gpu/reduce_op.hpp
src/targets/gpu/include/migraphx/gpu/reduce_op.hpp
+0
-1
src/targets/gpu/jit/concat.cpp
src/targets/gpu/jit/concat.cpp
+3
-1
src/targets/gpu/jit/mlir.cpp
src/targets/gpu/jit/mlir.cpp
+1
-1
src/targets/gpu/jit/reduce.cpp
src/targets/gpu/jit/reduce.cpp
+134
-44
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
...rgets/gpu/kernels/include/migraphx/kernels/functional.hpp
+8
-0
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+6
-0
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
+45
-40
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+103
-13
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+10
-3
src/targets/ref/CMakeLists.txt
src/targets/ref/CMakeLists.txt
+1
-2
test/CMakeLists.txt
test/CMakeLists.txt
+9
-2
test/api/CMakeLists.txt
test/api/CMakeLists.txt
+3
-3
test/check_shapes_test.cpp
test/check_shapes_test.cpp
+1
-1
test/fuse_reduce.cpp
test/fuse_reduce.cpp
+330
-0
test/gpu/hip.cpp
test/gpu/hip.cpp
+20
-1
test/gpu/mlir.cpp
test/gpu/mlir.cpp
+28
-2
test/include/pointwise.hpp
test/include/pointwise.hpp
+10
-1
test/onnx/onnx_test.cpp
test/onnx/onnx_test.cpp
+143
-160
test/op_shape_test.cpp
test/op_shape_test.cpp
+343
-301
No files found.
src/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp
View file @
fe493c28
...
@@ -33,7 +33,6 @@
...
@@ -33,7 +33,6 @@
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <migraphx/type_name.hpp>
#include <utility>
#include <utility>
#include <iostream>
#include <iostream>
...
...
src/targets/gpu/include/migraphx/gpu/reduce_op.hpp
View file @
fe493c28
...
@@ -31,7 +31,6 @@
...
@@ -31,7 +31,6 @@
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <migraphx/type_name.hpp>
#include <utility>
#include <utility>
#include <iostream>
#include <iostream>
...
...
src/targets/gpu/jit/concat.cpp
View file @
fe493c28
...
@@ -78,7 +78,9 @@ struct concat_compiler : compiler<concat_compiler>
...
@@ -78,7 +78,9 @@ struct concat_compiler : compiler<concat_compiler>
options
.
params
=
"-Wno-float-equal"
;
options
.
params
=
"-Wno-float-equal"
;
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"concat_kernel"
);
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
axis
=
find_fast_axis
(
options
.
inputs
);
auto
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
inputs
);
vectorize
vec
{};
if
(
axis
!=
v
.
at
(
"axis"
).
to
<
std
::
size_t
>
())
vec
=
vectorize
::
elements
(
ctx
,
axis
,
options
.
inputs
);
options
.
set_launch_params
(
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
v
,
compute_global_for
(
ctx
,
get_concat_elements
(
options
.
inputs
)
/
vec
.
size
,
256
));
auto
src
=
interpolate_string
(
auto
src
=
interpolate_string
(
...
...
src/targets/gpu/jit/mlir.cpp
View file @
fe493c28
...
@@ -32,7 +32,7 @@ namespace gpu {
...
@@ -32,7 +32,7 @@ namespace gpu {
struct
mlir_compiler
:
compiler
<
mlir_compiler
>
struct
mlir_compiler
:
compiler
<
mlir_compiler
>
{
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"gpu::mlir_
conv
"
};
}
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"gpu::mlir_
op
"
};
}
operation
compile_op
(
context
&
,
const
std
::
vector
<
shape
>&
,
const
value
&
)
const
{
return
{};
}
operation
compile_op
(
context
&
,
const
std
::
vector
<
shape
>&
,
const
value
&
)
const
{
return
{};
}
...
...
src/targets/gpu/jit/reduce.cpp
View file @
fe493c28
...
@@ -60,15 +60,6 @@ __global__ void reduce_kernel(void* input_p, void* output_p)
...
@@ -60,15 +60,6 @@ __global__ void reduce_kernel(void* input_p, void* output_p)
)__migraphx__"
;
)__migraphx__"
;
static
std
::
size_t
get_reduce_elements
(
const
std
::
vector
<
shape
>&
inputs
)
{
return
inputs
.
front
().
elements
()
/
inputs
.
back
().
elements
();
}
static
std
::
size_t
get_reduce_elements
(
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
return
get_reduce_elements
(
to_shapes
(
inputs
));
}
static
std
::
vector
<
std
::
size_t
>
get_reduce_lens
(
const
std
::
vector
<
std
::
size_t
>&
input_lens
,
static
std
::
vector
<
std
::
size_t
>
get_reduce_lens
(
const
std
::
vector
<
std
::
size_t
>&
input_lens
,
const
std
::
vector
<
std
::
size_t
>&
output_lens
)
const
std
::
vector
<
std
::
size_t
>&
output_lens
)
{
{
...
@@ -86,9 +77,28 @@ static std::vector<std::size_t> get_reduce_lens(const std::vector<std::size_t>&
...
@@ -86,9 +77,28 @@ static std::vector<std::size_t> get_reduce_lens(const std::vector<std::size_t>&
return
reduce_lens
;
return
reduce_lens
;
}
}
static
std
::
string
get_reduce_algo
(
const
std
::
vector
<
shape
>&
inputs
)
template
<
class
T
>
static
shape
get_reduced_shape
(
const
shape
&
s
,
const
std
::
vector
<
T
>&
axes
)
{
auto
lens
=
s
.
lens
();
std
::
fill
(
lens
.
begin
(),
lens
.
end
(),
1
);
for
(
const
auto
&
axis
:
axes
)
lens
[
axis
]
=
s
.
lens
()[
axis
];
return
shape
{
s
.
type
(),
lens
};
}
template
<
class
T
>
static
shape
get_output_shape
(
const
shape
&
s
,
const
std
::
vector
<
T
>&
axes
)
{
auto
lens
=
s
.
lens
();
for
(
const
auto
&
axis
:
axes
)
lens
[
axis
]
=
1
;
return
shape
{
s
.
type
(),
lens
};
}
template
<
class
ReduceLens
>
static
std
::
string
get_reduce_algo
(
const
std
::
vector
<
shape
>&
inputs
,
ReduceLens
rlens
)
{
{
auto
rlens
=
get_reduce_lens
(
inputs
.
front
().
lens
(),
inputs
.
back
().
lens
());
const
auto
init
=
std
::
numeric_limits
<
std
::
size_t
>::
max
();
const
auto
init
=
std
::
numeric_limits
<
std
::
size_t
>::
max
();
// The minimum stride
// The minimum stride
auto
min_stride
=
std
::
inner_product
(
auto
min_stride
=
std
::
inner_product
(
...
@@ -103,11 +113,27 @@ static std::string get_reduce_algo(const std::vector<shape>& inputs)
...
@@ -103,11 +113,27 @@ static std::string get_reduce_algo(const std::vector<shape>& inputs)
return
"block"
;
return
"block"
;
}
}
struct
reduce_compiler
:
compiler
<
reduce_compiler
>
static
std
::
string
get_reduce_algo
(
const
std
::
vector
<
shape
>&
inputs
)
{
auto
rlens
=
get_reduce_lens
(
inputs
.
front
().
lens
(),
inputs
.
back
().
lens
());
return
get_reduce_algo
(
inputs
,
rlens
);
}
struct
simple_reduce_compiler
:
compiler
<
simple_reduce_compiler
>
{
{
std
::
vector
<
std
::
string
>
names
()
const
std
::
vector
<
std
::
string
>
names
()
const
{
{
return
{
"reduce"
,
"reduce_sum"
,
"reduce_mean"
,
"reduce_max"
,
"reduce_min"
,
"reduce_prod"
};
return
{
"simple_reduce"
,
"reduce_sum"
,
"reduce_mean"
,
"reduce_max"
,
"reduce_min"
,
"reduce_prod"
};
}
static
std
::
size_t
get_reduce_elements
(
const
std
::
vector
<
shape
>&
inputs
)
{
return
inputs
.
front
().
elements
()
/
inputs
.
back
().
elements
();
}
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
...
@@ -157,44 +183,108 @@ struct reduce_compiler : compiler<reduce_compiler>
...
@@ -157,44 +183,108 @@ struct reduce_compiler : compiler<reduce_compiler>
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
{
value
v
=
value
::
object
{};
value
v
=
value
::
object
{};
if
(
op
.
name
()
==
"reduce_sum"
)
reduce_op
r
{};
{
r
.
set
(
ins
,
op
);
v
[
"reduction"
]
=
"op::sum{}"
;
v
[
"reduction"
]
=
r
.
reduction
;
}
v
[
"read"
]
=
r
.
read
;
else
if
(
op
.
name
()
==
"reduce_mean"
)
v
[
"write"
]
=
r
.
write
;
{
v
[
"init"
]
=
r
.
init
;
auto
reduce_elements
=
get_reduce_elements
(
ins
->
inputs
());
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
auto
reduce_type
=
ins
->
inputs
().
front
()
->
get_shape
().
type
();
}
v
[
"reduction"
]
=
"op::sum{}"
;
};
std
::
string
mean
=
"op::mean<"
+
std
::
to_string
(
reduce_elements
)
+
">{}"
;
// Use float accumulator when reduction size is too large for half
static
const
char
*
const
fused_reduce_kernel
=
R"__migraphx__(
if
(
reduce_type
==
shape
::
half_type
and
reduce_elements
>
16384
)
#include <migraphx/kernels/index.hpp>
v
[
"read"
]
=
"compose("
+
mean
+
", op::convert_to<float>{})"
;
#include <migraphx/kernels/reduce.hpp>
else
if
(
contains
({
shape
::
float_type
,
shape
::
half_type
,
shape
::
double_type
},
#include <migraphx/kernels/pointwise.hpp>
reduce_type
))
#include <migraphx/kernels/vectorize.hpp>
v
[
"read"
]
=
mean
;
#include <args.hpp>
else
v
[
"write"
]
=
mean
;
namespace migraphx {
}
else
if
(
op
.
name
()
==
"reduce_max"
)
${preamble}
{
v
[
"reduction"
]
=
"op::max{}"
;
extern "C" {
v
[
"init"
]
=
"lowest{}"
;
MIGRAPHX_GLOBAL void ${kernel}(${params})
}
{
else
if
(
op
.
name
()
==
"reduce_min"
)
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, auto... xs) {
fused_reduce<reduce::${algo}, ${reduced}>(y, partial(${lambda})(xs...));
});
}
}
} // namespace migraphx
)__migraphx__"
;
struct
fused_reduce_compiler
:
compiler
<
fused_reduce_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"fused_reduce"
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
auto
axes
=
v
.
at
(
"axes"
).
to_vector
<
std
::
size_t
>
();
auto
virtual_inputs
=
inputs
;
virtual_inputs
.
push_back
(
get_reduced_shape
(
inputs
.
front
(),
axes
));
virtual_inputs
.
push_back
(
get_output_shape
(
inputs
.
front
(),
axes
));
virtual_inputs
=
reduce_dims
(
virtual_inputs
);
auto
reduce_output_shape
=
virtual_inputs
.
back
();
virtual_inputs
.
pop_back
();
auto
reduction_shape
=
virtual_inputs
.
back
();
virtual_inputs
.
pop_back
();
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
virtual_inputs
;
auto
faxis
=
find_fast_axis
({
options
.
virtual_inputs
.
front
()});
vectorize
vec
{};
auto
nelements
=
reduce_output_shape
.
elements
();
auto
algo
=
v
.
get
(
"algo"
,
get_reduce_algo
(
options
.
virtual_inputs
,
reduction_shape
.
lens
()));
if
(
algo
==
"block"
)
{
{
v
[
"reduction"
]
=
"op::min{}"
;
// Vectorize if the axis is a reduction axis
v
[
"init"
]
=
"highest{}"
;
if
(
reduce_output_shape
.
lens
()[
faxis
]
==
1
)
vec
=
vectorize
::
elements
(
ctx
,
faxis
,
options
.
virtual_inputs
);
auto
relements
=
reduction_shape
.
elements
()
/
vec
.
size
;
auto
block_size
=
compute_block_size
(
relements
,
256
);
if
(
relements
>=
block_size
*
256
)
algo
=
"block_large"
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
nelements
*
block_size
,
256
),
block_size
);
}
}
else
if
(
op
.
name
()
==
"reduce_prod
"
)
else
if
(
algo
==
"lane
"
)
{
{
v
[
"reduction"
]
=
"op::product{}"
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
nelements
,
256
));
v
[
"init"
]
=
"1"
;
}
}
else
else
{
{
MIGRAPHX_THROW
(
"Un
supported reduce"
);
MIGRAPHX_THROW
(
"Un
known reduce algo: "
+
algo
);
}
}
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"reduce_kernel"
);
auto
src
=
interpolate_string
(
fused_reduce_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"algo"
,
algo
},
{
"reduced"
,
"decltype("
+
generate_make_shape
(
reduce_output_shape
)
+
")"
},
{
"lambda"
,
v
.
at
(
"lambda"
).
to
<
std
::
string
>
()},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})}});
options
.
params
+=
"-Wno-float-equal"
;
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
assert
(
not
ins
->
module_inputs
().
empty
());
auto
v
=
op
.
to_value
();
auto
*
rm
=
ins
->
module_inputs
().
front
();
v
[
"preamble"
]
=
generate_reduce
(
*
rm
,
"fused_reduce_op"
);
v
[
"lambda"
]
=
"MIGRAPHX_LIFT(fused_reduce_op)"
;
v
[
"kernel"
]
=
generate_name_from_ops
(
*
rm
)
+
"_kernel"
;
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
}
}
};
};
...
...
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
View file @
fe493c28
...
@@ -204,6 +204,14 @@ constexpr auto compose(Fs... fs)
...
@@ -204,6 +204,14 @@ constexpr auto compose(Fs... fs)
})(
fs
...);
})(
fs
...);
}
}
template
<
class
F
>
constexpr
auto
partial
(
F
f
)
{
return
[
=
](
auto
...
xs
)
{
return
[
=
](
auto
&&
...
ys
)
{
return
f
(
xs
...,
static_cast
<
decltype
(
ys
)
>
(
ys
)...);
};
};
}
template
<
class
...
Ts
>
template
<
class
...
Ts
>
constexpr
auto
pack
(
Ts
...
xs
)
constexpr
auto
pack
(
Ts
...
xs
)
{
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
View file @
fe493c28
...
@@ -241,6 +241,12 @@ struct index
...
@@ -241,6 +241,12 @@ struct index
}
}
};
};
#ifdef MIGRAPHX_NLOCAL
#define MIGRAPHX_GLOBAL \
__global__ __attribute__((amdgpu_flat_work_group_size(MIGRAPHX_NLOCAL, MIGRAPHX_NLOCAL)))
#else
#define MIGRAPHX_GLOBAL __global__
#endif
inline
__device__
__attribute__
((
const
))
index
make_index
()
inline
__device__
__attribute__
((
const
))
index
make_index
()
{
{
return
index
{
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
return
index
{
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
...
...
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
View file @
fe493c28
...
@@ -174,6 +174,25 @@ struct inner_storage_tag
...
@@ -174,6 +174,25 @@ struct inner_storage_tag
template
<
class
T
>
template
<
class
T
>
using
is_inner_storage
=
is_base_of
<
inner_storage_tag
,
remove_cv_t
<
remove_reference_t
<
T
>>>
;
using
is_inner_storage
=
is_base_of
<
inner_storage_tag
,
remove_cv_t
<
remove_reference_t
<
T
>>>
;
template
<
class
Size
,
class
F
>
struct
lazy_inner_storage
:
inner_storage_tag
{
using
type
=
remove_reference_t
<
decltype
(
declval
<
F
>
()(
0
,
_c
<
0
>
))
>
;
F
f
;
constexpr
Size
rsize
()
const
{
return
{};
}
template
<
class
U
,
class
V
>
constexpr
auto
operator
()(
U
j
,
V
d
)
const
{
return
f
(
j
,
d
);
}
};
template
<
class
Size
,
class
F
>
constexpr
lazy_inner_storage
<
Size
,
F
>
make_lazy_inner_storage
(
Size
,
F
f
)
{
return
{{},
f
};
}
template
<
class
R
,
class
F
>
template
<
class
R
,
class
F
>
struct
storage_access
:
F
struct
storage_access
:
F
{
{
...
@@ -278,6 +297,14 @@ struct reducer_base
...
@@ -278,6 +297,14 @@ struct reducer_base
});
});
}
}
template
<
class
F
>
__device__
auto
lazy_inner
(
F
f
)
const
{
return
this
->
inner_sliced
([
=
](
auto
n
,
auto
&&
...
xs
)
{
return
make_lazy_inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
});
}
template
<
class
Op
,
class
T
,
class
Read
>
template
<
class
Op
,
class
T
,
class
Read
>
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
{
{
...
@@ -396,25 +423,6 @@ struct block_large
...
@@ -396,25 +423,6 @@ struct block_large
index
idx
;
index
idx
;
Slicer
slice
;
Slicer
slice
;
template
<
class
Size
,
class
F
>
struct
inner_storage
:
inner_storage_tag
{
using
type
=
remove_reference_t
<
decltype
(
declval
<
F
>
()(
0
,
_c
<
0
>
))
>
;
F
f
;
constexpr
Size
rsize
()
const
{
return
{};
}
template
<
class
U
,
class
V
>
constexpr
auto
operator
()(
U
j
,
V
d
)
const
{
return
f
(
j
,
d
);
}
};
template
<
class
Size
,
class
F
>
static
constexpr
inner_storage
<
Size
,
F
>
make_inner_storage
(
Size
,
F
f
)
{
return
{{},
{
f
}};
}
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
...
Ts
>
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
...
Ts
>
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
Ts
&&
...
xs
)
const
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
Ts
&&
...
xs
)
const
{
{
...
@@ -439,7 +447,7 @@ struct block_large
...
@@ -439,7 +447,7 @@ struct block_large
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
{
{
return
make_inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
return
make_
lazy_
inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
}
}
};
};
...
@@ -469,25 +477,6 @@ struct lane
...
@@ -469,25 +477,6 @@ struct lane
index
idx
;
index
idx
;
Slicer
slice
;
Slicer
slice
;
template
<
class
Size
,
class
F
>
struct
inner_storage
:
inner_storage_tag
{
using
type
=
remove_reference_t
<
decltype
(
declval
<
F
>
()(
0
,
_c
<
0
>
))
>
;
F
f
;
constexpr
Size
rsize
()
const
{
return
{};
}
template
<
class
U
,
class
V
>
constexpr
auto
operator
()(
U
j
,
V
d
)
const
{
return
f
(
j
,
d
);
}
};
template
<
class
Size
,
class
F
>
static
constexpr
inner_storage
<
Size
,
F
>
make_inner_storage
(
Size
,
F
f
)
{
return
{{},
{
f
}};
}
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
U
,
class
...
Us
>
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
U
,
class
...
Us
>
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
U
&&
x
,
Us
&&
...
xs
)
const
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
U
&&
x
,
Us
&&
...
xs
)
const
{
{
...
@@ -518,7 +507,7 @@ struct lane
...
@@ -518,7 +507,7 @@ struct lane
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
{
{
return
make_inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
return
make_
lazy_
inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
}
}
};
};
template
<
class
Slicer
>
template
<
class
Slicer
>
...
@@ -577,5 +566,21 @@ simple_reduce(Op op, T init, Input input, Output output, ReadInput read, WriteOu
...
@@ -577,5 +566,21 @@ simple_reduce(Op op, T init, Input input, Output output, ReadInput read, WriteOu
});
});
}
}
template
<
class
Algo
,
class
Reduced
,
class
Output
,
class
F
>
__device__
void
fused_reduce
(
Output
output
,
F
f
)
{
Algo
::
template
run
<
Reduced
>([
&
](
auto
out_idx
,
auto
r
)
{
auto
result
=
f
(
r
);
if
constexpr
(
reduce
::
is_inner_storage
<
decltype
(
result
)
>
{})
{
r
.
inner
([
&
](
auto
&
y
,
auto
x
)
{
y
=
x
;
})(
output
,
result
);
}
else
{
r
.
outer
([
&
]
{
output
[
out_idx
]
=
implicit_conversion
(
result
);
});
}
});
}
}
// namespace migraphx
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_REDUCE_HPP
#endif // MIGRAPHX_GUARD_KERNELS_REDUCE_HPP
src/targets/gpu/mlir.cpp
View file @
fe493c28
...
@@ -30,6 +30,7 @@
...
@@ -30,6 +30,7 @@
#include <mlir-c/BuiltinTypes.h>
#include <mlir-c/BuiltinTypes.h>
#include <mlir-c/Diagnostics.h>
#include <mlir-c/Diagnostics.h>
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/Dialect/Rock.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Pass.h>
#include <mutex>
#include <mutex>
...
@@ -55,12 +56,16 @@
...
@@ -55,12 +56,16 @@
#include <migraphx/permutation.hpp>
#include <migraphx/permutation.hpp>
#include <deque>
#include <deque>
#include <variant>
#include <variant>
#include <fstream>
#include <sstream>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_MLIR
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_MLIR
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_MLIR_TUNING_DB
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_MLIR_TUNING_CFG
);
#ifdef MIGRAPHX_MLIR
#ifdef MIGRAPHX_MLIR
template
<
class
T
,
class
F
,
F
f
>
// NOLINT
template
<
class
T
,
class
F
,
F
f
>
// NOLINT
...
@@ -124,6 +129,8 @@ using mlir_op_printing_flags = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirOpPrintingFlags,
...
@@ -124,6 +129,8 @@ using mlir_op_printing_flags = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirOpPrintingFlags,
using
mlir_region
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRegion
,
mlirRegionDestroy
);
using
mlir_region
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRegion
,
mlirRegionDestroy
);
using
mlir_block
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirBlock
,
mlirBlockDestroy
);
using
mlir_block
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirBlock
,
mlirBlockDestroy
);
using
mlir_pass_manager
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirPassManager
,
mlirPassManagerDestroy
);
using
mlir_pass_manager
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirPassManager
,
mlirPassManagerDestroy
);
using
mlir_tuning_table
=
MIGRAPHX_MANAGE_MLIR_HANDLE
(
MlirRockTuningTable
,
mlirRockTuningTableDestroy
);
std
::
string_view
to_string_view
(
MlirStringRef
s
)
{
return
{
s
.
data
,
s
.
length
};
}
std
::
string_view
to_string_view
(
MlirStringRef
s
)
{
return
{
s
.
data
,
s
.
length
};
}
...
@@ -455,7 +462,7 @@ struct mlir_program
...
@@ -455,7 +462,7 @@ struct mlir_program
auto
ops
=
create_operation_state
(
"func.func"
);
auto
ops
=
create_operation_state
(
"func.func"
);
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
ops
.
add_attributes
({{
"function_type"
,
make_function_type
(
inputs
,
outputs
)},
{
"sym_name"
,
s
td
::
string
(
"main"
)
},
{
"sym_name"
,
s
ym_name
},
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"kernel"
,
std
::
string
(
"mixr"
)},
{
"arch"
,
target_arch
}});
{
"arch"
,
target_arch
}});
ops
.
add_region
(
std
::
move
(
region
));
ops
.
add_region
(
std
::
move
(
region
));
...
@@ -498,11 +505,25 @@ struct mlir_program
...
@@ -498,11 +505,25 @@ struct mlir_program
return
ins
->
get_shape
();
return
ins
->
get_shape
();
}
}
static
std
::
string
get_symbol_name
(
const
module
&
m
)
{
for
(
auto
ins
:
iterator_for
(
m
))
{
if
(
ins
->
name
()
==
"convolution"
or
ins
->
name
()
==
"dot"
)
{
return
"mlir_"
+
ins
->
name
();
}
}
return
"main"
;
}
void
parse
(
const
module
&
m
)
void
parse
(
const
module
&
m
)
{
{
sym_name
=
get_symbol_name
(
m
);
auto
mbody
=
mlirModuleGetBody
(
mmodule
.
get
());
auto
mbody
=
mlirModuleGetBody
(
mmodule
.
get
());
std
::
unordered_map
<
instruction_ref
,
MlirValue
>
ins_map
;
std
::
unordered_map
<
instruction_ref
,
MlirValue
>
ins_map
;
auto
fbody
=
insert
(
mbody
,
m
,
ins_map
);
auto
fbody
=
insert
(
mbody
,
m
,
ins_map
);
for
(
auto
ins
:
iterator_for
(
m
))
for
(
auto
ins
:
iterator_for
(
m
))
{
{
if
(
ins
->
name
()
==
"@param"
)
if
(
ins
->
name
()
==
"@param"
)
...
@@ -512,16 +533,13 @@ struct mlir_program
...
@@ -512,16 +533,13 @@ struct mlir_program
ops
.
add_attribute_value
(
get_operator_value
(
ins
->
get_operator
()));
ops
.
add_attribute_value
(
get_operator_value
(
ins
->
get_operator
()));
if
(
ins
->
name
()
!=
"@return"
)
if
(
ins
->
name
()
!=
"@return"
)
ops
.
add_results
({
get_shape
(
ins
)});
ops
.
add_results
({
get_shape
(
ins
)});
if
(
ins
->
name
()
==
"convolution"
)
if
(
ins
->
name
()
==
"convolution"
or
ins
->
name
()
==
"dot"
)
{
{
pp
=
pp
=
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
problem_params
{
ins
->
get_operator
(),
to_shapes
(
ins
->
inputs
()),
ins
->
get_shape
()};
// check if HW supports xdlops
// check if HW supports xdlops
auto
target_chip
=
trim
(
split_string
(
target_arch
,
':'
).
front
());
auto
target_chip
=
trim
(
split_string
(
target_arch
,
':'
).
front
());
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_chip
);
bool
xdlops
=
contains
(
get_xdlops_archs
(),
target_chip
);
std
::
string
tuned
=
get_tune_params
(
xdlops
);
if
(
not
tuned
.
empty
())
ops
.
add_attributes
({{
"perf_config"
,
tuned
}});
if
(
xdlops
)
if
(
xdlops
)
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
ops
.
add_attributes
({{
"xdlopsV2"
,
true
}});
}
}
...
@@ -542,15 +560,19 @@ struct mlir_program
...
@@ -542,15 +560,19 @@ struct mlir_program
code_object_op
compile
()
MIGRAPHX_TIDY_CONST
code_object_op
compile
()
MIGRAPHX_TIDY_CONST
{
{
mlir_pass_manager
pm
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlir_pass_manager
pm_front
{
mlirPassManagerCreate
(
ctx
.
get
())};
mlir_pass_manager
pm_back
{
mlirPassManagerCreate
(
ctx
.
get
())};
// 1st pipeline to call
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline
(
pm
.
get
());
mlirMIGraphXAddHighLevelPipeline
(
pm_front
.
get
());
mlirPassManagerRun
(
pm_front
.
get
(),
mmodule
.
get
());
// 2nd pipeline to call
// 2nd pipeline to call
mlirMIGraphXAddBackendPipeline
(
pm
.
get
(),
target_arch
.
c_str
());
get_module_tuned
();
mlirPassManagerRun
(
pm
.
get
(),
mmodule
.
get
());
mlirMIGraphXAddBackendPipeline
(
pm_back
.
get
(),
target_arch
.
c_str
());
mlirPassManagerRun
(
pm_back
.
get
(),
mmodule
.
get
());
code_object_op
op
{};
code_object_op
op
{};
op
.
symbol_name
=
"main"
;
op
.
symbol_name
=
sym_name
;
op
.
code_object
=
get_binary
();
op
.
code_object
=
get_binary
();
std
::
tie
(
op
.
global
,
op
.
local
)
=
get_launch_params
();
std
::
tie
(
op
.
global
,
op
.
local
)
=
get_launch_params
();
return
op
;
return
op
;
...
@@ -578,7 +600,74 @@ struct mlir_program
...
@@ -578,7 +600,74 @@ struct mlir_program
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
MIGRAPHX_THROW
(
"Failed to compile mlir program"
);
}
}
std
::
string
get_tune_params
(
bool
xdlops
)
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
std
::
string
get_tune_params
(
bool
xdlops
)
const
{
return
get_mlir_perf_for_conv
(
pp
,
xdlops
);
}
// This function appends to tuning cfg file that could be
// used with rocMLIR tuning scripts.
void
dump_tuning_cfg
(
const
char
*
prob_config
)
const
{
std
::
string
tuning_cfg_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_CFG
{});
if
(
!
tuning_cfg_path
.
empty
())
{
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
prob_config
,
'\t'
);
std
::
string
prob
=
tokens
[
1
];
if
(
starts_with
(
prob
,
"conv"
))
{
tuning_cfg_path
+=
".conv"
;
}
else
{
tuning_cfg_path
+=
".gemm"
;
}
std
::
ofstream
tuning_cfg
(
tuning_cfg_path
,
std
::
ios
::
app
);
tuning_cfg
<<
prob
<<
std
::
endl
;
}
}
static
mlir_tuning_table
create_tuning_table
()
{
mlir_tuning_table
tuning_table
{
mlirRockTuningTableCreate
()};
std
::
string
tuning_db_path
=
string_value_of
(
MIGRAPHX_MLIR_TUNING_DB
{});
if
(
!
tuning_db_path
.
empty
())
{
std
::
ifstream
tuning_db_tsv
(
tuning_db_path
);
if
(
tuning_db_tsv
)
{
std
::
string
line
;
while
(
std
::
getline
(
tuning_db_tsv
,
line
))
{
std
::
vector
<
std
::
string
>
tokens
=
split_string
(
line
,
'\t'
);
std
::
string
arch
=
tokens
[
0
];
std
::
string
prob
=
tokens
[
1
];
std
::
string
perf
=
tokens
[
2
];
std
::
string
key
=
arch
.
append
(
"
\t
"
).
append
(
prob
);
mlirRockTuningUpdateTable
(
tuning_table
.
get
(),
key
.
c_str
(),
perf
.
c_str
(),
1.0
);
}
}
}
else
{
std
::
cerr
<<
"WARNING: MLIR tuning db not found. Please set MIGRAPHX_MLIR_TUNING_DB for "
"optimal performance."
<<
std
::
endl
;
}
return
tuning_table
;
}
bool
get_module_tuned
()
const
{
static
mlir_tuning_table
tuning_table
=
create_tuning_table
();
if
(
!
mlirRockTuningSetFromTable
(
tuning_table
.
get
(),
mmodule
.
get
()))
{
const
char
*
prob_config
=
mlirRockTuningGetKey
(
tuning_table
.
get
(),
mmodule
.
get
());
std
::
stringstream
key
(
prob_config
);
std
::
cerr
<<
"fails to set param on"
<<
prob_config
<<
std
::
endl
;
dump_tuning_cfg
(
prob_config
);
return
false
;
}
return
true
;
}
mlir_context
ctx
;
mlir_context
ctx
;
MlirLocation
location
;
MlirLocation
location
;
...
@@ -586,6 +675,7 @@ struct mlir_program
...
@@ -586,6 +675,7 @@ struct mlir_program
problem_params
pp
;
problem_params
pp
;
std
::
deque
<
std
::
string
>
strings
{};
std
::
deque
<
std
::
string
>
strings
{};
std
::
string
target_arch
;
std
::
string
target_arch
;
std
::
string
sym_name
;
};
};
std
::
string
dump_mlir
(
const
module
&
m
)
std
::
string
dump_mlir
(
const
module
&
m
)
...
...
src/targets/gpu/target.cpp
View file @
fe493c28
...
@@ -26,13 +26,13 @@
...
@@ -26,13 +26,13 @@
#include <migraphx/check_context.hpp>
#include <migraphx/check_context.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_allocation.hpp>
#include <migraphx/eliminate_allocation.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/eliminate_concat.hpp>
#include <migraphx/eliminate_concat.hpp>
#include <migraphx/eliminate_contiguous.hpp>
#include <migraphx/eliminate_contiguous.hpp>
#include <migraphx/eliminate_data_type.hpp>
#include <migraphx/eliminate_data_type.hpp>
#include <migraphx/eliminate_identity.hpp>
#include <migraphx/eliminate_identity.hpp>
#include <migraphx/eliminate_pad.hpp>
#include <migraphx/eliminate_pad.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_reduce.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/layout_nhwc.hpp>
#include <migraphx/layout_nhwc.hpp>
...
@@ -40,7 +40,7 @@
...
@@ -40,7 +40,7 @@
#include <migraphx/normalize_ops.hpp>
#include <migraphx/normalize_ops.hpp>
#include <migraphx/optimize_module.hpp>
#include <migraphx/optimize_module.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/pro
pagate_constant
.hpp>
#include <migraphx/pro
mote_literals
.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_gelu.hpp>
#include <migraphx/rewrite_gelu.hpp>
...
@@ -48,9 +48,9 @@
...
@@ -48,9 +48,9 @@
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/schedule.hpp>
#include <migraphx/schedule.hpp>
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/split_single_dyn_dim.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/compile_miopen.hpp>
#include <migraphx/gpu/compile_miopen.hpp>
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/compile_ops.hpp>
...
@@ -75,6 +75,7 @@ namespace gpu {
...
@@ -75,6 +75,7 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_REDUCE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
struct
id_pass
struct
id_pass
{
{
...
@@ -103,6 +104,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -103,6 +104,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
// clang-format off
// clang-format off
return
return
{
{
enable_pass
(
options
.
split_single_dyn_dim
,
split_single_dyn_dim
{}),
enable_pass
(
options
.
split_single_dyn_dim
,
dead_code_elimination
{}),
normalize_ops
{},
normalize_ops
{},
dead_code_elimination
{},
dead_code_elimination
{},
simplify_qdq
{},
simplify_qdq
{},
...
@@ -132,6 +135,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -132,6 +135,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
optimize_module
{},
optimize_module
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}),
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}),
dead_code_elimination
{},
dead_code_elimination
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_REDUCE_FUSION
{}),
fuse_reduce
{}),
dead_code_elimination
{},
fuse_mlir
{
&
ctx
},
fuse_mlir
{
&
ctx
},
dead_code_elimination
{},
dead_code_elimination
{},
fuse_ck
{
&
ctx
},
fuse_ck
{
&
ctx
},
...
@@ -153,6 +158,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -153,6 +158,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
compile_ops
{
&
ctx
},
compile_ops
{
&
ctx
},
dead_code_elimination
{},
dead_code_elimination
{},
promote_literals
{},
dead_code_elimination
{},
write_literals
{
&
ctx
},
write_literals
{
&
ctx
},
schedule
{
gpu
::
schedule_model
{
ctx
.
get_current_device
().
nstreams
()},
not
enabled
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
{})},
schedule
{
gpu
::
schedule_model
{
ctx
.
get_current_device
().
nstreams
()},
not
enabled
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
{})},
memory_coloring
{
"hip::allocate"
},
memory_coloring
{
"hip::allocate"
},
...
...
src/targets/ref/CMakeLists.txt
View file @
fe493c28
...
@@ -31,10 +31,9 @@ set_target_properties(migraphx_ref PROPERTIES EXPORT_NAME ref)
...
@@ -31,10 +31,9 @@ set_target_properties(migraphx_ref PROPERTIES EXPORT_NAME ref)
rocm_set_soversion
(
migraphx_ref
${
MIGRAPHX_SO_VERSION
}
)
rocm_set_soversion
(
migraphx_ref
${
MIGRAPHX_SO_VERSION
}
)
find_path
(
BLAZE_INCLUDE blaze/Blaze.h
)
find_path
(
BLAZE_INCLUDE blaze/Blaze.h
)
find_package
(
Threads
)
rocm_clang_tidy_check
(
migraphx_ref
)
rocm_clang_tidy_check
(
migraphx_ref
)
target_link_libraries
(
migraphx_ref migraphx
Threads::Threads
)
target_link_libraries
(
migraphx_ref
PUBLIC
migraphx
)
target_include_directories
(
migraphx_ref PRIVATE
${
BLAZE_INCLUDE
}
)
target_include_directories
(
migraphx_ref PRIVATE
${
BLAZE_INCLUDE
}
)
target_compile_definitions
(
migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS
)
target_compile_definitions
(
migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS
)
...
...
test/CMakeLists.txt
View file @
fe493c28
...
@@ -110,7 +110,7 @@ function(add_test_executable TEST_NAME)
...
@@ -110,7 +110,7 @@ function(add_test_executable TEST_NAME)
add_test_command
(
${
TEST_NAME
}
${
TEST_COMMAND
}
)
add_test_command
(
${
TEST_NAME
}
${
TEST_COMMAND
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
migraphx migraphx_onnx
)
target_link_libraries
(
${
TEST_NAME
}
migraphx migraphx_onnx
migraphx_ref
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
endfunction
(
add_test_executable
)
endfunction
(
add_test_executable
)
...
@@ -163,7 +163,7 @@ foreach(ONNX_TEST ${ONNX_TESTS})
...
@@ -163,7 +163,7 @@ foreach(ONNX_TEST ${ONNX_TESTS})
set
(
TEST_NAME test_
${
BASE_NAME
}
)
set
(
TEST_NAME test_
${
BASE_NAME
}
)
add_executable
(
${
TEST_NAME
}
${
ONNX_TEST
}
)
add_executable
(
${
TEST_NAME
}
${
ONNX_TEST
}
)
rocm_clang_tidy_check
(
${
TEST_NAME
}
)
rocm_clang_tidy_check
(
${
TEST_NAME
}
)
target_link_libraries
(
${
TEST_NAME
}
migraphx_onnx
)
target_link_libraries
(
${
TEST_NAME
}
migraphx_onnx
migraphx_ref
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
target_include_directories
(
${
TEST_NAME
}
PUBLIC include
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
> WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
> WORKING_DIRECTORY
${
TEST_ONNX_DIR
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
tests
${
TEST_NAME
}
)
...
@@ -218,3 +218,10 @@ test_headers(migraphx/ref ${CMAKE_SOURCE_DIR}/src/targets/ref/include/migraphx/r
...
@@ -218,3 +218,10 @@ test_headers(migraphx/ref ${CMAKE_SOURCE_DIR}/src/targets/ref/include/migraphx/r
if
(
MIGRAPHX_ENABLE_GPU
)
if
(
MIGRAPHX_ENABLE_GPU
)
test_headers
(
migraphx/gpu
${
CMAKE_SOURCE_DIR
}
/src/targets/gpu/include/migraphx/gpu/*.hpp
)
test_headers
(
migraphx/gpu
${
CMAKE_SOURCE_DIR
}
/src/targets/gpu/include/migraphx/gpu/*.hpp
)
endif
()
endif
()
if
(
MIGRAPHX_ENABLE_CPU
)
test_headers
(
migraphx/cpu
${
CMAKE_SOURCE_DIR
}
/src/targets/cpu/include/migraphx/cpu/*.hpp
)
endif
()
if
(
MIGRAPHX_ENABLE_FPGA
)
test_headers
(
migraphx/fpga
${
CMAKE_SOURCE_DIR
}
/src/targets/fpga/include/migraphx/fpga/*.hpp
)
endif
()
test/api/CMakeLists.txt
View file @
fe493c28
...
@@ -25,7 +25,7 @@ function(add_api_test TEST_NAME TEST_SRC TEST_DIR)
...
@@ -25,7 +25,7 @@ function(add_api_test TEST_NAME TEST_SRC TEST_DIR)
set
(
NAME test_api_
${
TEST_NAME
}
)
set
(
NAME test_api_
${
TEST_NAME
}
)
add_executable
(
${
NAME
}
EXCLUDE_FROM_ALL
${
TEST_SRC
}
)
add_executable
(
${
NAME
}
EXCLUDE_FROM_ALL
${
TEST_SRC
}
)
rocm_clang_tidy_check
(
${
NAME
}
)
rocm_clang_tidy_check
(
${
NAME
}
)
target_link_libraries
(
${
NAME
}
migraphx_c migraphx
)
target_link_libraries
(
${
NAME
}
migraphx_c migraphx
migraphx_all_targets
)
target_include_directories
(
${
NAME
}
PUBLIC ../include
)
target_include_directories
(
${
NAME
}
PUBLIC ../include
)
add_test
(
NAME
${
NAME
}
COMMAND $<TARGET_FILE:
${
NAME
}
> WORKING_DIRECTORY
${
TEST_DIR
}
)
add_test
(
NAME
${
NAME
}
COMMAND $<TARGET_FILE:
${
NAME
}
> WORKING_DIRECTORY
${
TEST_DIR
}
)
add_dependencies
(
tests
${
NAME
}
)
add_dependencies
(
tests
${
NAME
}
)
...
@@ -59,7 +59,7 @@ if(MIGRAPHX_ENABLE_GPU)
...
@@ -59,7 +59,7 @@ if(MIGRAPHX_ENABLE_GPU)
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm
)
list
(
APPEND CMAKE_PREFIX_PATH /opt/rocm
)
find_package
(
hip
)
find_package
(
hip
)
add_api_test
(
gpu test_gpu.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
gpu test_gpu.cpp
${
TEST_ONNX_DIR
}
)
target_link_libraries
(
test_api_gpu
hip::host
)
target_link_libraries
(
test_api_gpu
)
add_api_test
(
custom_op_gpu test_custom_op_gpu.cpp
${
TEST_ONNX_DIR
}
)
add_api_test
(
custom_op_gpu test_custom_op_gpu.cpp
${
TEST_ONNX_DIR
}
)
target_link_libraries
(
test_api_custom_op_gpu
hip::host
)
target_link_libraries
(
test_api_custom_op_gpu
)
endif
()
endif
()
test/check_shapes_test.cpp
View file @
fe493c28
...
@@ -36,7 +36,7 @@ bool create_shapes(bool dynamic_allowed)
...
@@ -36,7 +36,7 @@ bool create_shapes(bool dynamic_allowed)
try
try
{
{
shape
a
{
shape
::
int64_type
,
{
3
}};
shape
a
{
shape
::
int64_type
,
{
3
}};
shape
b
{
shape
::
float_type
,
{{
3
,
6
,
0
},
{
4
,
4
,
0
}}};
shape
b
{
shape
::
float_type
,
{{
3
,
6
},
{
4
,
4
}}};
auto
op
=
migraphx
::
make_op
(
"add"
);
auto
op
=
migraphx
::
make_op
(
"add"
);
migraphx
::
check_shapes
{{
a
,
b
},
op
,
dynamic_allowed
}.
has
(
2
);
migraphx
::
check_shapes
{{
a
,
b
},
op
,
dynamic_allowed
}.
has
(
2
);
return
true
;
return
true
;
...
...
test/fuse_reduce.cpp
0 → 100644
View file @
fe493c28
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/fuse_reduce.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/program.hpp>
#include <basic_ops.hpp>
#include <migraphx/make_op.hpp>
#include <test.hpp>
#include <pointwise.hpp>
void
run_pass
(
migraphx
::
program
&
p
)
{
migraphx
::
run_passes
(
p
,
{
migraphx
::
fuse_reduce
{},
migraphx
::
dead_code_elimination
{}});
}
bool
all_instructions_are_local
(
const
migraphx
::
module
&
m
)
{
return
std
::
all_of
(
m
.
begin
(),
m
.
end
(),
[
&
](
const
auto
&
ins
)
{
return
std
::
all_of
(
ins
.
inputs
().
begin
(),
ins
.
inputs
().
end
(),
[
&
](
auto
input
)
{
return
m
.
has_instruction
(
input
);
});
});
}
template
<
class
F
>
migraphx
::
instruction_ref
add_reduce
(
migraphx
::
program
&
p
,
const
std
::
string
&
name
,
std
::
vector
<
migraphx
::
instruction_ref
>
inputs
,
const
std
::
vector
<
int64_t
>&
axes
,
F
f
)
{
auto
*
rm
=
p
.
create_module
(
name
);
auto
*
mm
=
p
.
get_main_module
();
rm
->
set_bypass
();
std
::
vector
<
migraphx
::
instruction_ref
>
params
;
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
params
),
[
&
](
auto
input
)
{
return
rm
->
add_parameter
(
"x"
+
std
::
to_string
(
params
.
size
()),
migraphx
::
shape
{
input
->
get_shape
().
type
(),
input
->
get_shape
().
lens
()});
});
auto
r
=
f
(
rm
,
params
,
axes
);
rm
->
add_return
({
r
});
EXPECT
(
all_instructions_are_local
(
*
rm
));
return
mm
->
add_instruction
(
migraphx
::
make_op
(
"fused_reduce"
,
{{
"axes"
,
axes
}}),
inputs
,
{
rm
});
}
inline
auto
single_reduce
(
const
std
::
string
&
name
)
{
return
[
=
](
auto
*
rm
,
const
auto
&
inputs
,
const
auto
&
axes
)
{
return
rm
->
add_instruction
(
migraphx
::
make_op
(
name
,
{{
"axes"
,
axes
}}),
inputs
);
};
}
TEST_CASE
(
single
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
rsum1
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
{
1
}}}),
x
);
auto
rsum2
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
{
1
}}}),
y
);
mm
->
add_return
({
rsum1
,
rsum2
});
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
rsum1
=
add_reduce
(
p2
,
"main:reduce_sum0"
,
{
x
},
{
1
},
single_reduce
(
"reduce_sum"
));
auto
rsum2
=
add_reduce
(
p2
,
"main:reduce_sum1"
,
{
y
},
{
1
},
single_reduce
(
"reduce_sum"
));
mm
->
add_return
({
rsum1
,
rsum2
});
}
EXPECT
(
p1
==
p2
);
}
TEST_CASE
(
pointwise_reduce
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
add
=
add_pointwise
(
p1
,
"main:pointwise0"
,
{
x
,
y
},
single_pointwise
(
"add"
));
auto
rsum
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
{
1
}}}),
add
);
mm
->
add_return
({
rsum
});
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
rsum
=
add_reduce
(
p2
,
"main:pointwise0:main:reduce_sum0"
,
{
x
,
y
},
{
1
},
[
&
](
auto
*
rm
,
const
auto
&
inputs
,
const
auto
&
axes
)
{
auto
add
=
add_pointwise
(
p2
,
rm
,
"main:pointwise0"
,
inputs
,
single_pointwise
(
"add"
));
return
rm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
axes
}}),
add
);
});
mm
->
add_return
({
rsum
});
}
EXPECT
(
p1
==
p2
);
}
TEST_CASE
(
reduce_pointwise
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
rsum
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
{
1
}}}),
x
);
auto
rsumb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
rsum
);
auto
add
=
add_pointwise
(
p1
,
"main:pointwise0"
,
{
rsumb
,
y
},
single_pointwise
(
"add"
));
mm
->
add_return
({
add
});
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
y
=
mm
->
add_parameter
(
"y"
,
s
);
auto
add
=
add_reduce
(
p2
,
"main:reduce_sum0:main:pointwise0"
,
{
x
,
y
},
{
1
},
[
&
](
auto
*
rm
,
const
auto
&
inputs
,
const
auto
&
axes
)
{
auto
rsum
=
rm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
axes
}}),
inputs
[
0
]);
auto
rsumb
=
rm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
rsum
);
return
add_pointwise
(
p2
,
rm
,
"main:pointwise0"
,
{
rsumb
,
inputs
[
1
]},
single_pointwise
(
"add"
));
});
mm
->
add_return
({
add
});
}
EXPECT
(
p1
==
p2
);
}
TEST_CASE
(
reduce_reduce
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
rsum
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
{
1
}}}),
x
);
auto
rsumb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
rsum
);
auto
rsumdiff
=
add_pointwise
(
p1
,
"main:pointwise0"
,
{
rsumb
,
x
},
single_pointwise
(
"sub"
));
auto
rsum2
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
{
1
}}}),
rsumdiff
);
auto
sqrt
=
add_pointwise
(
p1
,
"main:pointwise1"
,
{
rsum2
},
single_pointwise
(
"sqrt"
));
mm
->
add_return
({
sqrt
});
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
sqrt
=
add_reduce
(
p2
,
"main:reduce_sum1:main:reduce_sum0:main:pointwise0:main:pointwise1"
,
{
x
},
{
1
},
[
&
](
auto
*
rm
,
const
auto
&
inputs
,
const
auto
&
axes
)
{
auto
rsum
=
rm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
axes
}}),
inputs
[
0
]);
auto
rsumb
=
rm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
rsum
);
auto
rsumdiff
=
add_pointwise
(
p2
,
rm
,
"main:pointwise0"
,
{
rsumb
,
inputs
[
0
]},
single_pointwise
(
"sub"
));
auto
rsum2
=
rm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
axes
}}),
rsumdiff
);
return
add_pointwise
(
p2
,
rm
,
"main:pointwise1"
,
{
rsum2
},
single_pointwise
(
"sqrt"
));
});
mm
->
add_return
({
sqrt
});
}
EXPECT
(
p1
==
p2
);
}
TEST_CASE
(
reduce_reduce_mismatch_axis
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
4
,
2
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
rsum1
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
{
1
}}}),
x
);
auto
rsum2
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
{
2
}}}),
rsum1
);
mm
->
add_return
({
rsum2
});
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
rsum1
=
add_reduce
(
p2
,
"main:reduce_sum0"
,
{
x
},
{
1
},
single_reduce
(
"reduce_sum"
));
auto
rsum2
=
add_reduce
(
p2
,
"main:reduce_sum1"
,
{
rsum1
},
{
2
},
single_reduce
(
"reduce_sum"
));
mm
->
add_return
({
rsum2
});
}
EXPECT
(
p1
==
p2
);
}
TEST_CASE
(
pointwise_reduce_broadcast
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
rsum1
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
{
1
}}}),
x
);
auto
sqrt
=
add_pointwise
(
p1
,
"main:pointwise0"
,
{
rsum1
},
single_pointwise
(
"sqrt"
));
auto
sqrtb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
sqrt
);
auto
add1
=
add_pointwise
(
p1
,
"main:pointwise1"
,
{
sqrtb
,
x
},
single_pointwise
(
"add"
));
auto
rsum2
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
{
1
}}}),
add1
);
auto
add2
=
add_pointwise
(
p1
,
"main:pointwise2"
,
{
rsum2
,
rsum1
},
single_pointwise
(
"add"
));
mm
->
add_return
({
add2
});
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
add2
=
add_reduce
(
p2
,
"main:pointwise0:main:pointwise1:main:reduce_sum1:main:pointwise2:main:reduce_sum0"
,
{
x
},
{
1
},
[
&
](
auto
*
rm
,
const
auto
&
inputs
,
const
auto
&
axes
)
{
auto
rsum1
=
rm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
axes
}}),
inputs
[
0
]);
auto
sqrt
=
add_pointwise
(
p2
,
rm
,
"main:pointwise0"
,
{
rsum1
},
single_pointwise
(
"sqrt"
));
auto
sqrtb
=
rm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
sqrt
);
auto
add1
=
add_pointwise
(
p2
,
rm
,
"main:pointwise1"
,
{
sqrtb
,
inputs
[
0
]},
single_pointwise
(
"add"
));
auto
rsum2
=
rm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
axes
}}),
add1
);
return
add_pointwise
(
p2
,
rm
,
"main:pointwise2"
,
{
rsum2
,
rsum1
},
single_pointwise
(
"add"
));
});
mm
->
add_return
({
add2
});
}
EXPECT
(
p1
==
p2
);
}
TEST_CASE
(
reduce_reduce_broadcast
)
{
migraphx
::
shape
s
{
migraphx
::
shape
::
float_type
,
{
4
,
2
,
3
}};
migraphx
::
program
p1
;
{
auto
*
mm
=
p1
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
rsum1
=
add_reduce
(
p1
,
"test:reduce_sum0"
,
{
x
},
{
1
},
single_reduce
(
"reduce_sum"
));
auto
rsumb
=
mm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
rsum1
);
auto
add
=
add_reduce
(
p1
,
"test:reduce_sum1"
,
{
rsumb
,
x
},
{
1
},
[
&
](
auto
*
rm
,
const
auto
&
inputs
,
const
auto
&
axes
)
{
auto
add2
=
add_pointwise
(
p1
,
rm
,
"test:pointwise0"
,
inputs
,
single_pointwise
(
"add"
));
return
rm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
axes
}}),
add2
);
});
mm
->
add_return
({
add
});
}
run_pass
(
p1
);
migraphx
::
program
p2
;
{
auto
*
mm
=
p2
.
get_main_module
();
auto
x
=
mm
->
add_parameter
(
"x"
,
s
);
auto
rsum
=
add_reduce
(
p2
,
"test:reduce_sum1:test:reduce_sum0"
,
{
x
},
{
1
},
[
&
](
auto
*
rm
,
const
auto
&
inputs
,
const
auto
&
axes
)
{
auto
rsum1
=
rm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
axes
}}),
inputs
[
0
]);
auto
rsumb
=
rm
->
add_instruction
(
migraphx
::
make_op
(
"multibroadcast"
,
{{
"out_lens"
,
s
.
lens
()}}),
rsum1
);
auto
add
=
add_pointwise
(
p2
,
rm
,
"test:pointwise0"
,
{
rsumb
,
inputs
[
0
]},
single_pointwise
(
"add"
));
return
rm
->
add_instruction
(
migraphx
::
make_op
(
"reduce_sum"
,
{{
"axes"
,
axes
}}),
add
);
});
mm
->
add_return
({
rsum
});
}
EXPECT
(
p1
==
p2
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/gpu/hip.cpp
View file @
fe493c28
...
@@ -27,7 +27,7 @@
...
@@ -27,7 +27,7 @@
#include <migraphx/gpu/hip.hpp>
#include <migraphx/gpu/hip.hpp>
#include <migraphx/gpu/target.hpp>
#include <migraphx/gpu/target.hpp>
TEST_CASE
(
tuple_
to_
from_gpu
)
TEST_CASE
(
tuple_from_gpu
)
{
{
migraphx
::
shape
s1
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
shape
s1
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
shape
s2
{
migraphx
::
shape
::
int32_type
,
{
2
,
4
}};
migraphx
::
shape
s2
{
migraphx
::
shape
::
int32_type
,
{
2
,
4
}};
...
@@ -47,4 +47,23 @@ TEST_CASE(tuple_to_from_gpu)
...
@@ -47,4 +47,23 @@ TEST_CASE(tuple_to_from_gpu)
EXPECT
(
result2
==
p2_data
);
EXPECT
(
result2
==
p2_data
);
}
}
TEST_CASE
(
tuple_to_gpu
)
{
migraphx
::
shape
s1
{
migraphx
::
shape
::
float_type
,
{
2
,
3
}};
migraphx
::
shape
s2
{
migraphx
::
shape
::
int32_type
,
{
2
,
4
}};
std
::
vector
<
float
>
p1_data
=
{
1.1
,
2.2
,
3.3
,
4.4
,
5.5
,
6.6
};
std
::
vector
<
int
>
p2_data
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
};
auto
p1
=
migraphx
::
argument
{
s1
,
p1_data
.
data
()};
auto
p2
=
migraphx
::
argument
{
s2
,
p2_data
.
data
()};
auto
p_gpu
=
migraphx
::
gpu
::
to_gpu
(
migraphx
::
argument
({
p1
,
p2
}));
auto
p_host
=
migraphx
::
gpu
::
from_gpu
(
p_gpu
);
std
::
vector
<
migraphx
::
argument
>
results
=
p_host
.
get_sub_objects
();
std
::
vector
<
float
>
result1
;
results
[
0
].
visit
([
&
](
auto
output
)
{
result1
.
assign
(
output
.
begin
(),
output
.
end
());
});
std
::
vector
<
int
>
result2
;
results
[
1
].
visit
([
&
](
auto
output
)
{
result2
.
assign
(
output
.
begin
(),
output
.
end
());
});
EXPECT
(
result1
==
p1_data
);
EXPECT
(
result2
==
p2_data
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/gpu/mlir.cpp
View file @
fe493c28
...
@@ -140,7 +140,7 @@ TEST_CASE(conv)
...
@@ -140,7 +140,7 @@ TEST_CASE(conv)
{
{
const
std
::
string
mlir_output
=
R"__migraphx__(
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
module {
func.func @m
ai
n(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
func.func @m
lir_convolutio
n(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
return %0 : tensor<1x2x2x2xf32>
}
}
...
@@ -163,7 +163,7 @@ TEST_CASE(conv_add_relu)
...
@@ -163,7 +163,7 @@ TEST_CASE(conv_add_relu)
{
{
const
std
::
string
mlir_output
=
R"__migraphx__(
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
module {
func.func @m
ai
n(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
func.func @m
lir_convolutio
n(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
%2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
...
@@ -187,4 +187,30 @@ module {
...
@@ -187,4 +187,30 @@ module {
EXPECT
(
verify_mlir
(
m
));
EXPECT
(
verify_mlir
(
m
));
}
}
TEST_CASE
(
dot_add
)
{
const
std
::
string
mlir_output
=
R"__migraphx__(
module {
func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr"} {
%0 = migraphx.dot(%arg0, %arg1) : tensor<1x5x4xf32>, tensor<1x4x3xf32> -> tensor<1x5x3xf32>
%1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
return %1 : tensor<1x5x3xf32>
}
}
)__migraphx__"
;
migraphx
::
module
m
;
auto
arg0
=
m
.
add_parameter
(
"arg0"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
5
,
4
}});
auto
arg1
=
m
.
add_parameter
(
"arg1"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
4
,
3
}});
auto
arg2
=
m
.
add_parameter
(
"arg2"
,
{
migraphx
::
shape
::
float_type
,
{
1
,
5
,
3
}});
auto
conv
=
m
.
add_instruction
(
migraphx
::
make_op
(
"dot"
),
arg0
,
arg1
);
auto
add
=
m
.
add_instruction
(
migraphx
::
make_op
(
"add"
),
conv
,
arg2
);
m
.
add_return
({
add
});
auto
s
=
migraphx
::
gpu
::
dump_mlir
(
m
);
// Skip test if MLIR is not enabled
if
(
s
.
empty
())
return
;
CHECK
(
encode
(
s
)
==
encode
(
mlir_output
));
EXPECT
(
verify_mlir
(
m
));
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
int
main
(
int
argc
,
const
char
*
argv
[])
{
test
::
run
(
argc
,
argv
);
}
test/include/pointwise.hpp
View file @
fe493c28
...
@@ -30,12 +30,12 @@
...
@@ -30,12 +30,12 @@
template
<
class
F
>
template
<
class
F
>
migraphx
::
instruction_ref
add_pointwise
(
migraphx
::
program
&
p
,
migraphx
::
instruction_ref
add_pointwise
(
migraphx
::
program
&
p
,
migraphx
::
module_ref
mm
,
const
std
::
string
&
name
,
const
std
::
string
&
name
,
std
::
vector
<
migraphx
::
instruction_ref
>
inputs
,
std
::
vector
<
migraphx
::
instruction_ref
>
inputs
,
F
f
)
F
f
)
{
{
auto
*
pm
=
p
.
create_module
(
name
);
auto
*
pm
=
p
.
create_module
(
name
);
auto
*
mm
=
p
.
get_main_module
();
pm
->
set_bypass
();
pm
->
set_bypass
();
std
::
vector
<
migraphx
::
instruction_ref
>
params
;
std
::
vector
<
migraphx
::
instruction_ref
>
params
;
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
params
),
[
&
](
auto
input
)
{
std
::
transform
(
inputs
.
begin
(),
inputs
.
end
(),
std
::
back_inserter
(
params
),
[
&
](
auto
input
)
{
...
@@ -47,6 +47,15 @@ migraphx::instruction_ref add_pointwise(migraphx::program& p,
...
@@ -47,6 +47,15 @@ migraphx::instruction_ref add_pointwise(migraphx::program& p,
return
mm
->
add_instruction
(
migraphx
::
make_op
(
"pointwise"
),
inputs
,
{
pm
});
return
mm
->
add_instruction
(
migraphx
::
make_op
(
"pointwise"
),
inputs
,
{
pm
});
}
}
template
<
class
F
>
migraphx
::
instruction_ref
add_pointwise
(
migraphx
::
program
&
p
,
const
std
::
string
&
name
,
std
::
vector
<
migraphx
::
instruction_ref
>
inputs
,
F
f
)
{
return
add_pointwise
(
p
,
p
.
get_main_module
(),
name
,
inputs
,
f
);
}
inline
auto
single_pointwise
(
const
std
::
string
&
name
)
inline
auto
single_pointwise
(
const
std
::
string
&
name
)
{
{
return
[
=
](
auto
*
pm
,
const
auto
&
inputs
)
{
return
[
=
](
auto
*
pm
,
const
auto
&
inputs
)
{
...
...
test/onnx/onnx_test.cpp
View file @
fe493c28
This diff is collapsed.
Click to expand it.
test/op_shape_test.cpp
View file @
fe493c28
This diff is collapsed.
Click to expand it.
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment