Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
18cf0435
"src/vscode:/vscode.git/clone" did not exist on "6bb299ae25434b9b3aea642dd77d75778dc70ce5"
Unverified
Commit
18cf0435
authored
Apr 18, 2023
by
Umang Yadav
Committed by
GitHub
Apr 18, 2023
Browse files
Merge branch 'develop' into blas_tuning
parents
12258d8f
3e8d7196
Changes
81
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
509 additions
and
104 deletions
+509
-104
src/propagate_constant.cpp
src/propagate_constant.cpp
+16
-0
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+6
-1
src/shape.cpp
src/shape.cpp
+33
-0
src/simplify_algebra.cpp
src/simplify_algebra.cpp
+36
-3
src/split_single_dyn_dim.cpp
src/split_single_dyn_dim.cpp
+33
-0
src/targets/cpu/target.cpp
src/targets/cpu/target.cpp
+0
-1
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+5
-1
src/targets/gpu/compile_gen.cpp
src/targets/gpu/compile_gen.cpp
+137
-4
src/targets/gpu/driver/CMakeLists.txt
src/targets/gpu/driver/CMakeLists.txt
+1
-0
src/targets/gpu/driver/include/migraphx/gpu/driver/action.hpp
...targets/gpu/driver/include/migraphx/gpu/driver/action.hpp
+1
-1
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+14
-2
src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
+17
-0
src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
...gets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
+2
-0
src/targets/gpu/include/migraphx/gpu/convolution.hpp
src/targets/gpu/include/migraphx/gpu/convolution.hpp
+2
-2
src/targets/gpu/jit/reduce.cpp
src/targets/gpu/jit/reduce.cpp
+134
-44
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
...rgets/gpu/kernels/include/migraphx/kernels/functional.hpp
+8
-0
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+6
-0
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
+45
-40
src/targets/gpu/mlir.cpp
src/targets/gpu/mlir.cpp
+9
-5
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+4
-0
No files found.
src/propagate_constant.cpp
View file @
18cf0435
...
@@ -27,11 +27,14 @@
...
@@ -27,11 +27,14 @@
#include <migraphx/literal.hpp>
#include <migraphx/literal.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/env.hpp>
#include <unordered_set>
#include <unordered_set>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_PROPAGATE_CONSTANT
)
bool
skip_propogate
(
instruction_ref
ins
)
bool
skip_propogate
(
instruction_ref
ins
)
{
{
if
(
ins
->
name
()
==
"contiguous"
)
if
(
ins
->
name
()
==
"contiguous"
)
...
@@ -85,6 +88,19 @@ void propagate_constant::apply(module& m) const
...
@@ -85,6 +88,19 @@ void propagate_constant::apply(module& m) const
{
{
if
(
not
literals
[
i
].
empty
())
if
(
not
literals
[
i
].
empty
())
{
{
if
(
enabled
(
MIGRAPHX_TRACE_PROPAGATE_CONSTANT
{}))
{
std
::
cout
<<
"Constant replace: "
<<
std
::
endl
;
std
::
vector
<
instruction_ref
>
inss
;
fix
([
&
](
auto
self
,
auto
ins
)
{
if
(
contains
(
inss
,
ins
))
return
;
for
(
auto
input
:
ins
->
inputs
())
self
(
input
);
inss
.
push_back
(
ins
);
})(
const_instrs_vec
[
i
]);
m
.
debug_print
(
inss
);
}
assert
(
literals
[
i
].
get_shape
()
==
const_instrs_vec
[
i
]
->
get_shape
());
assert
(
literals
[
i
].
get_shape
()
==
const_instrs_vec
[
i
]
->
get_shape
());
auto
l
=
m
.
add_literal
(
literals
[
i
].
get_shape
(),
literals
[
i
].
data
());
auto
l
=
m
.
add_literal
(
literals
[
i
].
get_shape
(),
literals
[
i
].
data
());
m
.
replace_instruction
(
const_instrs_vec
[
i
],
l
);
m
.
replace_instruction
(
const_instrs_vec
[
i
],
l
);
...
...
src/py/migraphx_py.cpp
View file @
18cf0435
...
@@ -62,6 +62,7 @@ namespace py = pybind11;
...
@@ -62,6 +62,7 @@ namespace py = pybind11;
PYBIND11_MODULE(__VA_ARGS__) \
PYBIND11_MODULE(__VA_ARGS__) \
MIGRAPHX_POP_WARNING
MIGRAPHX_POP_WARNING
#define MIGRAPHX_PYTHON_GENERATE_SHAPE_ENUM(x, t) .value(#x, migraphx::shape::type_t::x)
namespace
migraphx
{
namespace
migraphx
{
migraphx
::
value
to_value
(
py
::
kwargs
kwargs
);
migraphx
::
value
to_value
(
py
::
kwargs
kwargs
);
...
@@ -235,7 +236,8 @@ migraphx::shape to_shape(const py::buffer_info& info)
...
@@ -235,7 +236,8 @@ migraphx::shape to_shape(const py::buffer_info& info)
MIGRAPHX_PYBIND11_MODULE
(
migraphx
,
m
)
MIGRAPHX_PYBIND11_MODULE
(
migraphx
,
m
)
{
{
py
::
class_
<
migraphx
::
shape
>
(
m
,
"shape"
)
py
::
class_
<
migraphx
::
shape
>
shape_cls
(
m
,
"shape"
);
shape_cls
.
def
(
py
::
init
([](
py
::
kwargs
kwargs
)
{
.
def
(
py
::
init
([](
py
::
kwargs
kwargs
)
{
auto
v
=
migraphx
::
to_value
(
kwargs
);
auto
v
=
migraphx
::
to_value
(
kwargs
);
auto
t
=
migraphx
::
shape
::
parse_type
(
v
.
get
(
"type"
,
"float"
));
auto
t
=
migraphx
::
shape
::
parse_type
(
v
.
get
(
"type"
,
"float"
));
...
@@ -261,6 +263,9 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
...
@@ -261,6 +263,9 @@ MIGRAPHX_PYBIND11_MODULE(migraphx, m)
.
def
(
"__ne__"
,
std
::
not_equal_to
<
migraphx
::
shape
>
{})
.
def
(
"__ne__"
,
std
::
not_equal_to
<
migraphx
::
shape
>
{})
.
def
(
"__repr__"
,
[](
const
migraphx
::
shape
&
s
)
{
return
migraphx
::
to_string
(
s
);
});
.
def
(
"__repr__"
,
[](
const
migraphx
::
shape
&
s
)
{
return
migraphx
::
to_string
(
s
);
});
py
::
enum_
<
migraphx
::
shape
::
type_t
>
(
shape_cls
,
"type_t"
)
MIGRAPHX_SHAPE_VISIT_TYPES
(
MIGRAPHX_PYTHON_GENERATE_SHAPE_ENUM
);
py
::
class_
<
migraphx
::
argument
>
(
m
,
"argument"
,
py
::
buffer_protocol
())
py
::
class_
<
migraphx
::
argument
>
(
m
,
"argument"
,
py
::
buffer_protocol
())
.
def_buffer
([](
migraphx
::
argument
&
x
)
->
py
::
buffer_info
{
return
to_buffer_info
(
x
);
})
.
def_buffer
([](
migraphx
::
argument
&
x
)
->
py
::
buffer_info
{
return
to_buffer_info
(
x
);
})
.
def
(
py
::
init
([](
py
::
buffer
b
)
{
.
def
(
py
::
init
([](
py
::
buffer
b
)
{
...
...
src/shape.cpp
View file @
18cf0435
...
@@ -481,6 +481,15 @@ shape shape::with_type(type_t t) const
...
@@ -481,6 +481,15 @@ shape shape::with_type(type_t t) const
shape
shape
::
to_dynamic
()
const
shape
shape
::
to_dynamic
()
const
{
{
if
(
not
sub_shapes
().
empty
())
{
std
::
vector
<
shape
>
subs
;
std
::
transform
(
sub_shapes
().
cbegin
(),
sub_shapes
().
cend
(),
std
::
back_inserter
(
subs
),
[](
auto
s
)
{
return
s
.
to_dynamic
();
});
return
{
subs
};
}
if
(
this
->
dynamic
())
if
(
this
->
dynamic
())
{
{
return
*
this
;
return
*
this
;
...
@@ -488,6 +497,30 @@ shape shape::to_dynamic() const
...
@@ -488,6 +497,30 @@ shape shape::to_dynamic() const
return
{
type
(),
lens
(),
lens
(),
{}};
return
{
type
(),
lens
(),
lens
(),
{}};
}
}
shape
shape
::
to_static
(
std
::
size_t
x
)
const
{
if
(
not
sub_shapes
().
empty
())
{
std
::
vector
<
shape
>
subs
;
std
::
transform
(
sub_shapes
().
cbegin
(),
sub_shapes
().
cend
(),
std
::
back_inserter
(
subs
),
[
&
](
auto
s
)
{
return
s
.
to_static
(
x
);
});
return
{
subs
};
}
if
(
not
this
->
dynamic
())
{
return
*
this
;
}
auto
static_lens
=
this
->
max_lens
();
std
::
transform
(
static_lens
.
begin
(),
static_lens
.
end
(),
this
->
dyn_dims
().
cbegin
(),
static_lens
.
begin
(),
[
&
](
auto
sl
,
auto
dd
)
{
return
dd
.
is_fixed
()
?
sl
:
x
;
});
return
{
type
(),
static_lens
};
}
std
::
size_t
shape
::
element_space
()
const
{
return
impl
->
element_space
();
}
std
::
size_t
shape
::
element_space
()
const
{
return
impl
->
element_space
();
}
std
::
string
shape
::
type_string
()
const
{
return
name
(
this
->
type
());
}
std
::
string
shape
::
type_string
()
const
{
return
name
(
this
->
type
());
}
...
...
src/simplify_algebra.cpp
View file @
18cf0435
...
@@ -52,8 +52,9 @@ auto op_lit_broadcast(std::string op, std::string x, std::string y)
...
@@ -52,8 +52,9 @@ auto op_lit_broadcast(std::string op, std::string x, std::string y)
auto
conv_const_weights
()
auto
conv_const_weights
()
{
{
return
match
::
name
(
"convolution"
)(
match
::
used_once
(),
return
match
::
name
(
"convolution"
)(
match
::
args
(
match
::
any
(),
match
::
is_constant
().
bind
(
"w"
)));
match
::
used_once
(),
match
::
args
(
match
::
none_of
(
match
::
is_constant
()),
match
::
is_constant
().
bind
(
"w"
)));
}
}
auto
reduction
()
{
return
match
::
name_contains
(
"reduce"
);
}
auto
reduction
()
{
return
match
::
name_contains
(
"reduce"
);
}
...
@@ -203,7 +204,12 @@ struct find_mul_slice_conv
...
@@ -203,7 +204,12 @@ struct find_mul_slice_conv
}
}
};
};
// a * (x + b) => a * x + a * b
// ******************************
// a * (x + b) => a * x + a * b
// ******************************
// When a * (x + b) is followed by another add of constant, then the
// additional add can be const folded. Also, better fusions can be applied
// when the add comes after.
struct
find_mul_add
struct
find_mul_add
{
{
auto
matcher
()
const
auto
matcher
()
const
...
@@ -268,6 +274,32 @@ struct find_dot_add
...
@@ -268,6 +274,32 @@ struct find_dot_add
}
}
};
};
struct
find_conv_add
{
auto
matcher
()
const
{
auto
add
=
match
::
name
(
"add"
)(
match
::
either_arg
(
0
,
1
)(
match
::
any
().
bind
(
"x"
),
match
::
any_of
(
match
::
is_constant
()).
bind
(
"a"
)),
match
::
used_once
());
return
match
::
name
(
"convolution"
)(
match
::
used_once
(),
match
::
args
(
add
,
match
::
is_constant
().
bind
(
"w"
)));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
r
)
const
{
auto
ins
=
r
.
result
;
auto
a_ins
=
r
.
instructions
[
"a"
];
auto
x_ins
=
r
.
instructions
[
"x"
];
auto
w_ins
=
r
.
instructions
[
"w"
];
auto
conv1
=
m
.
insert_instruction
(
ins
,
ins
->
get_operator
(),
a_ins
,
w_ins
);
auto
conv2
=
m
.
insert_instruction
(
ins
,
ins
->
get_operator
(),
x_ins
,
w_ins
);
m
.
replace_instruction
(
ins
,
make_op
(
"add"
),
conv1
,
conv2
);
}
};
struct
find_add_lit_broadcast
struct
find_add_lit_broadcast
{
{
auto
matcher
()
const
auto
matcher
()
const
...
@@ -1239,6 +1271,7 @@ void simplify_algebra::apply(module& m) const
...
@@ -1239,6 +1271,7 @@ void simplify_algebra::apply(module& m) const
find_neg_unit_ops
{},
find_neg_unit_ops
{},
find_zero_ops
{},
find_zero_ops
{},
find_dot_add
{},
find_dot_add
{},
find_conv_add
{},
find_div_const
{},
find_div_const
{},
find_sub_const
{},
find_sub_const
{},
find_rsqrt
{},
find_rsqrt
{},
...
...
src/split_single_dyn_dim.cpp
View file @
18cf0435
...
@@ -28,6 +28,7 @@
...
@@ -28,6 +28,7 @@
#include <migraphx/functional.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/matcher.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -67,6 +68,37 @@ has_one_dyn_dim(const std::unordered_map<std::string, shape>& param_shapes)
...
@@ -67,6 +68,37 @@ has_one_dyn_dim(const std::unordered_map<std::string, shape>& param_shapes)
dds_it
->
max
};
dds_it
->
max
};
}
}
namespace
{
struct
find_static_2in_broadcasts
{
// Convert 2 input static shape broadcast/multibroadcast into 1 input version.
// Some compiler passes (ex. simplify_algebra) only support the 1 input versions
// of the broadcasting operators.
auto
matcher
()
const
{
return
match
::
broadcast
(
match
::
nargs
(
2
),
match
::
arg
(
0
)(
match
::
static_shape
()),
match
::
arg
(
1
)(
match
::
static_shape
()));
}
void
apply
(
module
&
m
,
const
match
::
matcher_result
&
mr
)
const
{
auto
ins
=
mr
.
result
;
auto
out_lens
=
ins
->
get_shape
().
lens
();
auto
broadcast_op
=
ins
->
get_operator
();
if
(
broadcast_op
.
name
()
==
"broadcast"
)
{
broadcast_op
.
from_value
({{
"out_lens"
,
out_lens
}});
}
else
{
broadcast_op
.
from_value
({{
"out_lens"
,
out_lens
},
{
"out_dyn_dims"
,
{}}});
}
m
.
replace_instruction
(
ins
,
broadcast_op
,
ins
->
inputs
().
at
(
0
));
}
};
}
// namespace
/**
/**
* Makes all the shapes in the dynamic_dimension range.
* Makes all the shapes in the dynamic_dimension range.
* Probably won't work for `if` and `loop` instructions, depending on how the submodules for those
* Probably won't work for `if` and `loop` instructions, depending on how the submodules for those
...
@@ -97,6 +129,7 @@ void split_single_dyn_dim::apply(module_pass_manager& mpm) const
...
@@ -97,6 +129,7 @@ void split_single_dyn_dim::apply(module_pass_manager& mpm) const
dd_check
->
dyn_param_str
,
migraphx
::
shape
{
dyn_param_shape
.
type
(),
static_lens
});
dd_check
->
dyn_param_str
,
migraphx
::
shape
{
dyn_param_shape
.
type
(),
static_lens
});
auto
outputs
=
submod
->
add_instructions
(
mm
,
map_ins
);
auto
outputs
=
submod
->
add_instructions
(
mm
,
map_ins
);
submod
->
add_return
({
outputs
});
submod
->
add_return
({
outputs
});
match
::
find_matches
(
*
submod
,
find_static_2in_broadcasts
{});
submodules
.
push_back
(
submod
);
submodules
.
push_back
(
submod
);
}
}
// redirect to select_module operator and return
// redirect to select_module operator and return
...
...
src/targets/cpu/target.cpp
View file @
18cf0435
...
@@ -82,7 +82,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -82,7 +82,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination
{},
dead_code_elimination
{},
simplify_algebra
{},
simplify_algebra
{},
simplify_reshapes
{},
simplify_reshapes
{},
layout_nhwc
{},
dead_code_elimination
{},
dead_code_elimination
{},
simplify_reshapes
{},
simplify_reshapes
{},
simplify_algebra
{},
simplify_algebra
{},
...
...
src/targets/gpu/CMakeLists.txt
View file @
18cf0435
...
@@ -33,7 +33,11 @@ if(NOT TARGET MIOpen)
...
@@ -33,7 +33,11 @@ if(NOT TARGET MIOpen)
message
(
SEND_ERROR
"Cant find miopen"
)
message
(
SEND_ERROR
"Cant find miopen"
)
endif
()
endif
()
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
"Use hipRTC APIs"
)
if
(
BUILD_DEV
)
set
(
MIGRAPHX_USE_HIPRTC OFF CACHE BOOL
"Use hipRTC APIs"
)
else
()
set
(
MIGRAPHX_USE_HIPRTC ON CACHE BOOL
"Use hipRTC APIs"
)
endif
()
include
(
Embed
)
include
(
Embed
)
file
(
GLOB KERNEL_FILES
${
CONFIGURE_DEPENDS
}
file
(
GLOB KERNEL_FILES
${
CONFIGURE_DEPENDS
}
...
...
src/targets/gpu/compile_gen.cpp
View file @
18cf0435
...
@@ -168,7 +168,7 @@ std::string make_transformer_args(std::vector<std::string> transformers)
...
@@ -168,7 +168,7 @@ std::string make_transformer_args(std::vector<std::string> transformers)
return
join_strings
(
std
::
move
(
transformers
),
", "
);
return
join_strings
(
std
::
move
(
transformers
),
", "
);
}
}
std
::
string
generate_pointwise
(
const
module
&
pm
,
const
std
::
string
&
name
)
void
generate_pointwise
(
cpp_generator
&
gg
,
const
module
&
pm
,
const
std
::
string
&
name
)
{
{
module
m
=
pm
;
module
m
=
pm
;
run_passes
(
m
,
{
eliminate_common_subexpression
{},
dead_code_elimination
{}});
run_passes
(
m
,
{
eliminate_common_subexpression
{},
dead_code_elimination
{}});
...
@@ -184,8 +184,131 @@ std::string generate_pointwise(const module& pm, const std::string& name)
...
@@ -184,8 +184,131 @@ std::string generate_pointwise(const module& pm, const std::string& name)
// Add explict conversions
// Add explict conversions
g
.
fresult
(
g
.
fresult
(
[](
const
shape
&
s
)
{
return
"migraphx::convert<"
+
shape
::
cpp_type
(
s
.
type
())
+
">"
;
});
[](
const
shape
&
s
)
{
return
"migraphx::convert<"
+
shape
::
cpp_type
(
s
.
type
())
+
">"
;
});
g
.
create_function
(
gg
.
create_function
(
g
.
generate_module
(
m
)
g
.
generate_module
(
m
).
set_attributes
({
"__device__"
}).
set_generic_types
(
m
).
set_name
(
name
));
.
set_attributes
({
"__device__"
,
"__attribute__((const))"
})
.
set_generic_types
(
m
)
.
set_name
(
name
));
}
std
::
string
generate_pointwise
(
const
module
&
pm
,
const
std
::
string
&
name
)
{
cpp_generator
g
;
generate_pointwise
(
g
,
pm
,
name
);
return
g
.
str
();
}
std
::
string
reduce_op
::
str
()
const
{
return
write
+
"(r.reduce("
+
reduction
+
", "
+
init
+
", "
+
read
+
")("
+
input
+
"))"
;
}
void
reduce_op
::
set
(
instruction_ref
ins
,
const
operation
&
op
)
{
if
(
op
.
name
()
==
"reduce_sum"
)
{
reduction
=
"op::sum{}"
;
}
else
if
(
op
.
name
()
==
"reduce_mean"
)
{
auto
s
=
ins
->
inputs
().
front
()
->
get_shape
();
auto
reduce_elements
=
s
.
elements
()
/
ins
->
get_shape
().
elements
();
auto
reduce_type
=
s
.
type
();
reduction
=
"op::sum{}"
;
std
::
string
mean
=
"op::mean<"
+
std
::
to_string
(
reduce_elements
)
+
">{}"
;
// Use float accumulator when reduction size is too large for half
if
(
reduce_type
==
shape
::
half_type
and
reduce_elements
>
16384
)
read
=
"compose("
+
mean
+
", op::convert_to<float>{})"
;
else
if
(
contains
({
shape
::
float_type
,
shape
::
half_type
,
shape
::
double_type
},
reduce_type
))
read
=
mean
;
else
write
=
mean
;
}
else
if
(
op
.
name
()
==
"reduce_max"
)
{
reduction
=
"op::max{}"
;
init
=
"lowest{}"
;
}
else
if
(
op
.
name
()
==
"reduce_min"
)
{
reduction
=
"op::min{}"
;
init
=
"highest{}"
;
}
else
if
(
op
.
name
()
==
"reduce_prod"
)
{
reduction
=
"op::product{}"
;
init
=
"1"
;
}
else
{
MIGRAPHX_THROW
(
"Unsupported reduce"
);
}
}
std
::
string
reduce_op
::
generate
(
instruction_ref
ins
,
const
std
::
string
&
x
)
{
reduce_op
r
{
x
};
r
.
set
(
ins
,
ins
->
get_operator
());
return
r
.
str
();
}
static
bool
use_lazy_inner
(
instruction_ref
ins
)
{
if
(
ins
->
outputs
().
size
()
!=
1
)
return
false
;
auto
output
=
ins
->
outputs
().
front
();
return
contains
(
output
->
name
(),
"reduce"
)
or
output
->
name
()
==
"@return"
;
}
std
::
string
generate_reduce
(
const
module
&
m
,
const
std
::
string
&
name
)
{
cpp_generator
g
;
auto
ilens
=
m
.
get_parameter_shapes
().
begin
()
->
second
.
lens
();
std
::
size_t
i
=
0
;
auto
f
=
g
.
generate_module
(
m
,
[
&
](
instruction_ref
ins
,
const
auto
&
names
)
{
if
(
contains
(
ins
->
name
(),
"reduce"
))
{
return
reduce_op
::
generate
(
ins
,
names
.
at
(
ins
->
inputs
().
front
()));
}
else
if
(
ins
->
name
()
==
"pointwise"
)
{
auto
pointwise_name
=
"pointwise"
+
std
::
to_string
(
i
);
i
++
;
generate_pointwise
(
g
,
*
ins
->
module_inputs
().
front
(),
pointwise_name
);
std
::
vector
<
instruction_ref
>
tensors
;
std
::
copy_if
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
std
::
back_inserter
(
tensors
),
[
&
](
auto
input
)
{
return
input
->
get_shape
().
lens
()
==
ilens
and
not
input
->
get_shape
().
broadcasted
();
});
auto
inner_names
=
names
;
for
(
auto
input
:
tensors
)
inner_names
[
input
]
+=
"_lambda_param"
;
auto
call_function
=
pointwise_name
+
"("
+
join_strings
(
cpp_generator
::
to_args
(
ins
->
inputs
(),
inner_names
),
", "
)
+
")"
;
if
(
tensors
.
empty
())
return
call_function
;
const
std
::
string
inner_template
=
"r.${inner}([=](${params}) { return ${call}; })(${args})"
;
std
::
string
inner_name
=
use_lazy_inner
(
ins
)
?
"lazy_inner"
:
"inner"
;
auto
args
=
cpp_generator
::
to_args
(
tensors
,
names
);
auto
params
=
cpp_generator
::
to_args
(
tensors
,
inner_names
);
std
::
transform
(
params
.
begin
(),
params
.
end
(),
params
.
begin
(),
[](
auto
s
)
{
return
"auto "
+
s
;
});
return
interpolate_string
(
inner_template
,
{{
"inner"
,
inner_name
},
{
"params"
,
join_strings
(
params
,
", "
)},
{
"args"
,
join_strings
(
args
,
", "
)},
{
"call"
,
call_function
}});
}
else
if
(
ins
->
name
()
==
"multibroadcast"
)
{
return
names
.
at
(
ins
->
inputs
().
front
());
}
MIGRAPHX_THROW
(
"Unknown operator: "
+
ins
->
name
());
});
f
.
set_attributes
({
"__device__"
,
"__attribute__((const))"
}).
set_generic_types
(
m
).
set_name
(
name
);
f
.
add_generic_param
(
"r"
);
g
.
create_function
(
f
);
return
g
.
str
();
return
g
.
str
();
}
}
...
@@ -196,7 +319,17 @@ static std::vector<std::string> get_op_names(const module& m)
...
@@ -196,7 +319,17 @@ static std::vector<std::string> get_op_names(const module& m)
{
{
if
(
starts_with
(
ins
.
name
(),
"@"
))
if
(
starts_with
(
ins
.
name
(),
"@"
))
continue
;
continue
;
result
.
push_back
(
ins
.
name
());
if
(
ins
.
name
()
==
"multibroadcast"
)
continue
;
if
(
ins
.
name
()
==
"pointwise"
)
{
auto
names
=
get_op_names
(
*
ins
.
module_inputs
().
front
());
result
.
insert
(
result
.
end
(),
names
.
begin
(),
names
.
end
());
}
else
{
result
.
push_back
(
ins
.
name
());
}
}
}
return
result
;
return
result
;
}
}
...
...
src/targets/gpu/driver/CMakeLists.txt
View file @
18cf0435
...
@@ -26,5 +26,6 @@ file(GLOB GPU_DRIVER_SRCS ${CONFIGURE_DEPENDS} ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
...
@@ -26,5 +26,6 @@ file(GLOB GPU_DRIVER_SRCS ${CONFIGURE_DEPENDS} ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp
add_executable
(
gpu-driver
add_executable
(
gpu-driver
${
GPU_DRIVER_SRCS
}
${
GPU_DRIVER_SRCS
}
)
)
rocm_clang_tidy_check
(
gpu-driver
)
target_include_directories
(
gpu-driver PRIVATE include
)
target_include_directories
(
gpu-driver PRIVATE include
)
target_link_libraries
(
gpu-driver PRIVATE migraphx_gpu
)
target_link_libraries
(
gpu-driver PRIVATE migraphx_gpu
)
src/targets/gpu/driver/include/migraphx/gpu/driver/action.hpp
View file @
18cf0435
...
@@ -44,7 +44,7 @@ struct auto_register_action
...
@@ -44,7 +44,7 @@ struct auto_register_action
template
<
class
T
>
template
<
class
T
>
static
void
apply
()
static
void
apply
()
{
{
auto
name
=
get_type_name
<
T
>
();
const
auto
&
name
=
get_type_name
<
T
>
();
register_action
(
name
.
substr
(
name
.
rfind
(
"::"
)
+
2
),
register_action
(
name
.
substr
(
name
.
rfind
(
"::"
)
+
2
),
[](
auto
&&
...
xs
)
{
T
::
apply
(
std
::
forward
<
decltype
(
xs
)
>
(
xs
)...);
});
[](
auto
&&
...
xs
)
{
T
::
apply
(
std
::
forward
<
decltype
(
xs
)
>
(
xs
)...);
});
}
}
...
...
src/targets/gpu/hip.cpp
View file @
18cf0435
...
@@ -189,8 +189,20 @@ argument register_on_gpu(const argument& arg)
...
@@ -189,8 +189,20 @@ argument register_on_gpu(const argument& arg)
argument
to_gpu
(
const
argument
&
arg
,
bool
host
)
argument
to_gpu
(
const
argument
&
arg
,
bool
host
)
{
{
auto
p
=
write_to_gpu
(
arg
.
data
(),
arg
.
get_shape
().
bytes
(),
host
);
argument
result
;
return
{
arg
.
get_shape
(),
p
};
arg
.
visit
(
[
&
](
auto
x
)
{
auto
p
=
write_to_gpu
(
arg
.
data
(),
arg
.
get_shape
().
bytes
(),
host
);
result
=
{
x
.
get_shape
(),
p
};
},
[
&
](
const
auto
&
xs
)
{
std
::
vector
<
argument
>
args
;
std
::
transform
(
xs
.
begin
(),
xs
.
end
(),
std
::
back_inserter
(
args
),
[
&
](
auto
x
)
{
return
to_gpu
(
x
,
host
);
});
result
=
argument
{
args
};
});
return
result
;
}
}
argument
from_gpu
(
const
argument
&
arg
)
argument
from_gpu
(
const
argument
&
arg
)
...
...
src/targets/gpu/include/migraphx/gpu/compile_gen.hpp
View file @
18cf0435
...
@@ -26,6 +26,7 @@
...
@@ -26,6 +26,7 @@
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <migraphx/module_ref.hpp>
#include <migraphx/module_ref.hpp>
#include <migraphx/instruction_ref.hpp>
#include <string>
#include <string>
#include <unordered_map>
#include <unordered_map>
#include <vector>
#include <vector>
...
@@ -34,6 +35,7 @@ namespace migraphx {
...
@@ -34,6 +35,7 @@ namespace migraphx {
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
shape
;
struct
shape
;
struct
operation
;
namespace
gpu
{
namespace
gpu
{
...
@@ -72,8 +74,23 @@ std::string make_transformer_args(Ts... xs)
...
@@ -72,8 +74,23 @@ std::string make_transformer_args(Ts... xs)
std
::
string
generate_pointwise
(
const
module
&
pm
,
const
std
::
string
&
name
);
std
::
string
generate_pointwise
(
const
module
&
pm
,
const
std
::
string
&
name
);
std
::
string
generate_reduce
(
const
module
&
m
,
const
std
::
string
&
name
);
std
::
string
generate_name_from_ops
(
const
module
&
m
);
std
::
string
generate_name_from_ops
(
const
module
&
m
);
struct
reduce_op
{
std
::
string
input
=
""
;
std
::
string
reduction
=
""
;
std
::
string
init
=
"0"
;
std
::
string
read
=
"op::id{}"
;
std
::
string
write
=
"op::id{}"
;
void
set
(
instruction_ref
ins
,
const
operation
&
op
);
std
::
string
str
()
const
;
static
std
::
string
generate
(
instruction_ref
ins
,
const
std
::
string
&
x
);
};
}
// namespace gen
}
// namespace gen
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/compile_hip_code_object.hpp
View file @
18cf0435
...
@@ -71,6 +71,8 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
...
@@ -71,6 +71,8 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
std
::
size_t
compute_block_size
(
std
::
size_t
n
,
std
::
size_t
max_block_size
=
1024
);
std
::
size_t
compute_block_size
(
std
::
size_t
n
,
std
::
size_t
max_block_size
=
1024
);
std
::
string
generate_make_shape
(
const
shape
&
s
);
}
// namespace gpu
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/include/migraphx/gpu/convolution.hpp
View file @
18cf0435
...
@@ -21,8 +21,8 @@
...
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_
GPU_
CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_CONVOLUTION_HPP
#define MIGRAPHX_GUARD_RTGLIB_
GPU_
CONVOLUTION_HPP
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/generate.hpp>
#include <migraphx/generate.hpp>
...
...
src/targets/gpu/jit/reduce.cpp
View file @
18cf0435
...
@@ -60,15 +60,6 @@ __global__ void reduce_kernel(void* input_p, void* output_p)
...
@@ -60,15 +60,6 @@ __global__ void reduce_kernel(void* input_p, void* output_p)
)__migraphx__"
;
)__migraphx__"
;
static
std
::
size_t
get_reduce_elements
(
const
std
::
vector
<
shape
>&
inputs
)
{
return
inputs
.
front
().
elements
()
/
inputs
.
back
().
elements
();
}
static
std
::
size_t
get_reduce_elements
(
const
std
::
vector
<
instruction_ref
>&
inputs
)
{
return
get_reduce_elements
(
to_shapes
(
inputs
));
}
static
std
::
vector
<
std
::
size_t
>
get_reduce_lens
(
const
std
::
vector
<
std
::
size_t
>&
input_lens
,
static
std
::
vector
<
std
::
size_t
>
get_reduce_lens
(
const
std
::
vector
<
std
::
size_t
>&
input_lens
,
const
std
::
vector
<
std
::
size_t
>&
output_lens
)
const
std
::
vector
<
std
::
size_t
>&
output_lens
)
{
{
...
@@ -86,9 +77,28 @@ static std::vector<std::size_t> get_reduce_lens(const std::vector<std::size_t>&
...
@@ -86,9 +77,28 @@ static std::vector<std::size_t> get_reduce_lens(const std::vector<std::size_t>&
return
reduce_lens
;
return
reduce_lens
;
}
}
static
std
::
string
get_reduce_algo
(
const
std
::
vector
<
shape
>&
inputs
)
template
<
class
T
>
static
shape
get_reduced_shape
(
const
shape
&
s
,
const
std
::
vector
<
T
>&
axes
)
{
auto
lens
=
s
.
lens
();
std
::
fill
(
lens
.
begin
(),
lens
.
end
(),
1
);
for
(
const
auto
&
axis
:
axes
)
lens
[
axis
]
=
s
.
lens
()[
axis
];
return
shape
{
s
.
type
(),
lens
};
}
template
<
class
T
>
static
shape
get_output_shape
(
const
shape
&
s
,
const
std
::
vector
<
T
>&
axes
)
{
auto
lens
=
s
.
lens
();
for
(
const
auto
&
axis
:
axes
)
lens
[
axis
]
=
1
;
return
shape
{
s
.
type
(),
lens
};
}
template
<
class
ReduceLens
>
static
std
::
string
get_reduce_algo
(
const
std
::
vector
<
shape
>&
inputs
,
ReduceLens
rlens
)
{
{
auto
rlens
=
get_reduce_lens
(
inputs
.
front
().
lens
(),
inputs
.
back
().
lens
());
const
auto
init
=
std
::
numeric_limits
<
std
::
size_t
>::
max
();
const
auto
init
=
std
::
numeric_limits
<
std
::
size_t
>::
max
();
// The minimum stride
// The minimum stride
auto
min_stride
=
std
::
inner_product
(
auto
min_stride
=
std
::
inner_product
(
...
@@ -103,11 +113,27 @@ static std::string get_reduce_algo(const std::vector<shape>& inputs)
...
@@ -103,11 +113,27 @@ static std::string get_reduce_algo(const std::vector<shape>& inputs)
return
"block"
;
return
"block"
;
}
}
struct
reduce_compiler
:
compiler
<
reduce_compiler
>
static
std
::
string
get_reduce_algo
(
const
std
::
vector
<
shape
>&
inputs
)
{
auto
rlens
=
get_reduce_lens
(
inputs
.
front
().
lens
(),
inputs
.
back
().
lens
());
return
get_reduce_algo
(
inputs
,
rlens
);
}
struct
simple_reduce_compiler
:
compiler
<
simple_reduce_compiler
>
{
{
std
::
vector
<
std
::
string
>
names
()
const
std
::
vector
<
std
::
string
>
names
()
const
{
{
return
{
"reduce"
,
"reduce_sum"
,
"reduce_mean"
,
"reduce_max"
,
"reduce_min"
,
"reduce_prod"
};
return
{
"simple_reduce"
,
"reduce_sum"
,
"reduce_mean"
,
"reduce_max"
,
"reduce_min"
,
"reduce_prod"
};
}
static
std
::
size_t
get_reduce_elements
(
const
std
::
vector
<
shape
>&
inputs
)
{
return
inputs
.
front
().
elements
()
/
inputs
.
back
().
elements
();
}
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
...
@@ -157,44 +183,108 @@ struct reduce_compiler : compiler<reduce_compiler>
...
@@ -157,44 +183,108 @@ struct reduce_compiler : compiler<reduce_compiler>
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
{
value
v
=
value
::
object
{};
value
v
=
value
::
object
{};
if
(
op
.
name
()
==
"reduce_sum"
)
reduce_op
r
{};
{
r
.
set
(
ins
,
op
);
v
[
"reduction"
]
=
"op::sum{}"
;
v
[
"reduction"
]
=
r
.
reduction
;
}
v
[
"read"
]
=
r
.
read
;
else
if
(
op
.
name
()
==
"reduce_mean"
)
v
[
"write"
]
=
r
.
write
;
{
v
[
"init"
]
=
r
.
init
;
auto
reduce_elements
=
get_reduce_elements
(
ins
->
inputs
());
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
auto
reduce_type
=
ins
->
inputs
().
front
()
->
get_shape
().
type
();
}
v
[
"reduction"
]
=
"op::sum{}"
;
};
std
::
string
mean
=
"op::mean<"
+
std
::
to_string
(
reduce_elements
)
+
">{}"
;
// Use float accumulator when reduction size is too large for half
static
const
char
*
const
fused_reduce_kernel
=
R"__migraphx__(
if
(
reduce_type
==
shape
::
half_type
and
reduce_elements
>
16384
)
#include <migraphx/kernels/index.hpp>
v
[
"read"
]
=
"compose("
+
mean
+
", op::convert_to<float>{})"
;
#include <migraphx/kernels/reduce.hpp>
else
if
(
contains
({
shape
::
float_type
,
shape
::
half_type
,
shape
::
double_type
},
#include <migraphx/kernels/pointwise.hpp>
reduce_type
))
#include <migraphx/kernels/vectorize.hpp>
v
[
"read"
]
=
mean
;
#include <args.hpp>
else
v
[
"write"
]
=
mean
;
namespace migraphx {
}
else
if
(
op
.
name
()
==
"reduce_max"
)
${preamble}
{
v
[
"reduction"
]
=
"op::max{}"
;
extern "C" {
v
[
"init"
]
=
"lowest{}"
;
MIGRAPHX_GLOBAL void ${kernel}(${params})
}
{
else
if
(
op
.
name
()
==
"reduce_min"
)
transform_args(make_tensors(), rotate_last(), ${transformers})(${args})([](auto y, auto... xs) {
fused_reduce<reduce::${algo}, ${reduced}>(y, partial(${lambda})(xs...));
});
}
}
} // namespace migraphx
)__migraphx__"
;
struct
fused_reduce_compiler
:
compiler
<
fused_reduce_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"fused_reduce"
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
auto
axes
=
v
.
at
(
"axes"
).
to_vector
<
std
::
size_t
>
();
auto
virtual_inputs
=
inputs
;
virtual_inputs
.
push_back
(
get_reduced_shape
(
inputs
.
front
(),
axes
));
virtual_inputs
.
push_back
(
get_output_shape
(
inputs
.
front
(),
axes
));
virtual_inputs
=
reduce_dims
(
virtual_inputs
);
auto
reduce_output_shape
=
virtual_inputs
.
back
();
virtual_inputs
.
pop_back
();
auto
reduction_shape
=
virtual_inputs
.
back
();
virtual_inputs
.
pop_back
();
hip_compile_options
options
;
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
virtual_inputs
;
auto
faxis
=
find_fast_axis
({
options
.
virtual_inputs
.
front
()});
vectorize
vec
{};
auto
nelements
=
reduce_output_shape
.
elements
();
auto
algo
=
v
.
get
(
"algo"
,
get_reduce_algo
(
options
.
virtual_inputs
,
reduction_shape
.
lens
()));
if
(
algo
==
"block"
)
{
{
v
[
"reduction"
]
=
"op::min{}"
;
// Vectorize if the axis is a reduction axis
v
[
"init"
]
=
"highest{}"
;
if
(
reduce_output_shape
.
lens
()[
faxis
]
==
1
)
vec
=
vectorize
::
elements
(
ctx
,
faxis
,
options
.
virtual_inputs
);
auto
relements
=
reduction_shape
.
elements
()
/
vec
.
size
;
auto
block_size
=
compute_block_size
(
relements
,
256
);
if
(
relements
>=
block_size
*
256
)
algo
=
"block_large"
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
nelements
*
block_size
,
256
),
block_size
);
}
}
else
if
(
op
.
name
()
==
"reduce_prod
"
)
else
if
(
algo
==
"lane
"
)
{
{
v
[
"reduction"
]
=
"op::product{}"
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
nelements
,
256
));
v
[
"init"
]
=
"1"
;
}
}
else
else
{
{
MIGRAPHX_THROW
(
"Un
supported reduce"
);
MIGRAPHX_THROW
(
"Un
known reduce algo: "
+
algo
);
}
}
options
.
kernel_name
=
v
.
get
(
"kernel"
,
"reduce_kernel"
);
auto
src
=
interpolate_string
(
fused_reduce_kernel
,
{{
"kernel"
,
options
.
kernel_name
},
{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"algo"
,
algo
},
{
"reduced"
,
"decltype("
+
generate_make_shape
(
reduce_output_shape
)
+
")"
},
{
"lambda"
,
v
.
at
(
"lambda"
).
to
<
std
::
string
>
()},
{
"transformers"
,
make_transformer_args
(
vec
)},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})}});
options
.
params
+=
"-Wno-float-equal"
;
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
assert
(
not
ins
->
module_inputs
().
empty
());
auto
v
=
op
.
to_value
();
auto
*
rm
=
ins
->
module_inputs
().
front
();
v
[
"preamble"
]
=
generate_reduce
(
*
rm
,
"fused_reduce_op"
);
v
[
"lambda"
]
=
"MIGRAPHX_LIFT(fused_reduce_op)"
;
v
[
"kernel"
]
=
generate_name_from_ops
(
*
rm
)
+
"_kernel"
;
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
v
));
}
}
};
};
...
...
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
View file @
18cf0435
...
@@ -195,6 +195,14 @@ constexpr auto compose(Fs... fs)
...
@@ -195,6 +195,14 @@ constexpr auto compose(Fs... fs)
})(
fs
...);
})(
fs
...);
}
}
template
<
class
F
>
constexpr
auto
partial
(
F
f
)
{
return
[
=
](
auto
...
xs
)
{
return
[
=
](
auto
&&
...
ys
)
{
return
f
(
xs
...,
static_cast
<
decltype
(
ys
)
>
(
ys
)...);
};
};
}
template
<
class
...
Ts
>
template
<
class
...
Ts
>
constexpr
auto
pack
(
Ts
...
xs
)
constexpr
auto
pack
(
Ts
...
xs
)
{
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
View file @
18cf0435
...
@@ -233,6 +233,12 @@ struct index
...
@@ -233,6 +233,12 @@ struct index
}
}
};
};
#ifdef MIGRAPHX_NLOCAL
#define MIGRAPHX_GLOBAL \
__global__ __attribute__((amdgpu_flat_work_group_size(MIGRAPHX_NLOCAL, MIGRAPHX_NLOCAL)))
#else
#define MIGRAPHX_GLOBAL __global__
#endif
inline
__device__
__attribute__
((
const
))
index
make_index
()
inline
__device__
__attribute__
((
const
))
index
make_index
()
{
{
return
index
{
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
return
index
{
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
threadIdx
.
x
,
blockIdx
.
x
};
// NOLINT
...
...
src/targets/gpu/kernels/include/migraphx/kernels/reduce.hpp
View file @
18cf0435
...
@@ -174,6 +174,25 @@ struct inner_storage_tag
...
@@ -174,6 +174,25 @@ struct inner_storage_tag
template
<
class
T
>
template
<
class
T
>
using
is_inner_storage
=
is_base_of
<
inner_storage_tag
,
remove_cv_t
<
remove_reference_t
<
T
>>>
;
using
is_inner_storage
=
is_base_of
<
inner_storage_tag
,
remove_cv_t
<
remove_reference_t
<
T
>>>
;
template
<
class
Size
,
class
F
>
struct
lazy_inner_storage
:
inner_storage_tag
{
using
type
=
remove_reference_t
<
decltype
(
declval
<
F
>
()(
0
,
_c
<
0
>
))
>
;
F
f
;
constexpr
Size
rsize
()
const
{
return
{};
}
template
<
class
U
,
class
V
>
constexpr
auto
operator
()(
U
j
,
V
d
)
const
{
return
f
(
j
,
d
);
}
};
template
<
class
Size
,
class
F
>
constexpr
lazy_inner_storage
<
Size
,
F
>
make_lazy_inner_storage
(
Size
,
F
f
)
{
return
{{},
f
};
}
template
<
class
R
,
class
F
>
template
<
class
R
,
class
F
>
struct
storage_access
:
F
struct
storage_access
:
F
{
{
...
@@ -278,6 +297,14 @@ struct reducer_base
...
@@ -278,6 +297,14 @@ struct reducer_base
});
});
}
}
template
<
class
F
>
__device__
auto
lazy_inner
(
F
f
)
const
{
return
this
->
inner_sliced
([
=
](
auto
n
,
auto
&&
...
xs
)
{
return
make_lazy_inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
});
}
template
<
class
Op
,
class
T
,
class
Read
>
template
<
class
Op
,
class
T
,
class
Read
>
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
__device__
auto
reduce
(
Op
op
,
T
init
,
Read
read
)
const
{
{
...
@@ -396,25 +423,6 @@ struct block_large
...
@@ -396,25 +423,6 @@ struct block_large
index
idx
;
index
idx
;
Slicer
slice
;
Slicer
slice
;
template
<
class
Size
,
class
F
>
struct
inner_storage
:
inner_storage_tag
{
using
type
=
remove_reference_t
<
decltype
(
declval
<
F
>
()(
0
,
_c
<
0
>
))
>
;
F
f
;
constexpr
Size
rsize
()
const
{
return
{};
}
template
<
class
U
,
class
V
>
constexpr
auto
operator
()(
U
j
,
V
d
)
const
{
return
f
(
j
,
d
);
}
};
template
<
class
Size
,
class
F
>
static
constexpr
inner_storage
<
Size
,
F
>
make_inner_storage
(
Size
,
F
f
)
{
return
{{},
{
f
}};
}
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
...
Ts
>
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
...
Ts
>
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
Ts
&&
...
xs
)
const
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
Ts
&&
...
xs
)
const
{
{
...
@@ -439,7 +447,7 @@ struct block_large
...
@@ -439,7 +447,7 @@ struct block_large
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
{
{
return
make_inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
return
make_
lazy_
inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
}
}
};
};
...
@@ -469,25 +477,6 @@ struct lane
...
@@ -469,25 +477,6 @@ struct lane
index
idx
;
index
idx
;
Slicer
slice
;
Slicer
slice
;
template
<
class
Size
,
class
F
>
struct
inner_storage
:
inner_storage_tag
{
using
type
=
remove_reference_t
<
decltype
(
declval
<
F
>
()(
0
,
_c
<
0
>
))
>
;
F
f
;
constexpr
Size
rsize
()
const
{
return
{};
}
template
<
class
U
,
class
V
>
constexpr
auto
operator
()(
U
j
,
V
d
)
const
{
return
f
(
j
,
d
);
}
};
template
<
class
Size
,
class
F
>
static
constexpr
inner_storage
<
Size
,
F
>
make_inner_storage
(
Size
,
F
f
)
{
return
{{},
{
f
}};
}
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
U
,
class
...
Us
>
template
<
class
Op
,
class
T
,
class
Read
,
class
N
,
class
U
,
class
...
Us
>
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
U
&&
x
,
Us
&&
...
xs
)
const
__device__
auto
reduce_impl
(
Op
op
,
T
init
,
Read
read
,
N
n
,
U
&&
x
,
Us
&&
...
xs
)
const
{
{
...
@@ -518,7 +507,7 @@ struct lane
...
@@ -518,7 +507,7 @@ struct lane
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
template
<
class
R
,
class
F
,
class
N
,
class
...
Ts
>
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
__device__
auto
inner_impl
(
F
f
,
N
n
,
Ts
&&
...
xs
)
const
{
{
return
make_inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
return
make_
lazy_
inner_storage
(
n
,
[
=
](
auto
j
,
auto
d
)
{
return
f
(
xs
(
j
,
d
)...);
});
}
}
};
};
template
<
class
Slicer
>
template
<
class
Slicer
>
...
@@ -577,5 +566,21 @@ simple_reduce(Op op, T init, Input input, Output output, ReadInput read, WriteOu
...
@@ -577,5 +566,21 @@ simple_reduce(Op op, T init, Input input, Output output, ReadInput read, WriteOu
});
});
}
}
template
<
class
Algo
,
class
Reduced
,
class
Output
,
class
F
>
__device__
void
fused_reduce
(
Output
output
,
F
f
)
{
Algo
::
template
run
<
Reduced
>([
&
](
auto
out_idx
,
auto
r
)
{
auto
result
=
f
(
r
);
if
constexpr
(
reduce
::
is_inner_storage
<
decltype
(
result
)
>
{})
{
r
.
inner
([
&
](
auto
&
y
,
auto
x
)
{
y
=
x
;
})(
output
,
result
);
}
else
{
r
.
outer
([
&
]
{
output
[
out_idx
]
=
implicit_conversion
(
result
);
});
}
});
}
}
// namespace migraphx
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_REDUCE_HPP
#endif // MIGRAPHX_GUARD_KERNELS_REDUCE_HPP
src/targets/gpu/mlir.cpp
View file @
18cf0435
...
@@ -197,10 +197,14 @@ struct mlir_program
...
@@ -197,10 +197,14 @@ struct mlir_program
result
=
mlirF64TypeGet
(
ctx
.
get
());
result
=
mlirF64TypeGet
(
ctx
.
get
());
else
if
(
as
.
is_integral
())
else
if
(
as
.
is_integral
())
{
{
if
(
as
.
is_signed
())
// Note: rocMLIR use signless integer type for tensors types. This
result
=
mlirIntegerTypeSignedGet
(
ctx
.
get
(),
as
.
size
()
*
8
);
// will translate to signed implementation for current supported
else
// operations.
result
=
mlirIntegerTypeGet
(
ctx
.
get
(),
as
.
size
()
*
8
);
if
(
as
.
is_unsigned
())
{
MIGRAPHX_THROW
(
"Unsupported type: "
+
std
::
to_string
(
as
.
type_enum
()));
}
result
=
mlirIntegerTypeGet
(
ctx
.
get
(),
as
.
size
()
*
8
);
}
}
else
else
MIGRAPHX_THROW
(
"Unsupported type: "
+
std
::
to_string
(
as
.
type_enum
()));
MIGRAPHX_THROW
(
"Unsupported type: "
+
std
::
to_string
(
as
.
type_enum
()));
...
@@ -483,7 +487,7 @@ struct mlir_program
...
@@ -483,7 +487,7 @@ struct mlir_program
static
value
get_operator_value
(
const
operation
&
op
)
static
value
get_operator_value
(
const
operation
&
op
)
{
{
auto
v
=
op
.
to_value
();
auto
v
=
op
.
to_value
();
if
(
op
.
name
()
==
"convolution"
)
if
(
op
.
name
()
==
"convolution"
or
op
.
name
()
==
"quant_convolution"
)
{
{
// Adjust symetrical padding
// Adjust symetrical padding
if
(
v
.
at
(
"padding"
).
size
()
==
v
.
at
(
"stride"
).
size
())
if
(
v
.
at
(
"padding"
).
size
()
==
v
.
at
(
"stride"
).
size
())
...
...
src/targets/gpu/target.cpp
View file @
18cf0435
...
@@ -32,6 +32,7 @@
...
@@ -32,6 +32,7 @@
#include <migraphx/eliminate_identity.hpp>
#include <migraphx/eliminate_identity.hpp>
#include <migraphx/eliminate_pad.hpp>
#include <migraphx/eliminate_pad.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_reduce.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/layout_nhwc.hpp>
#include <migraphx/layout_nhwc.hpp>
...
@@ -72,6 +73,7 @@ namespace gpu {
...
@@ -72,6 +73,7 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_REDUCE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NHWC
)
struct
id_pass
struct
id_pass
{
{
...
@@ -129,6 +131,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
...
@@ -129,6 +131,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
optimize_module
{},
optimize_module
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}),
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}),
dead_code_elimination
{},
dead_code_elimination
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_REDUCE_FUSION
{}),
fuse_reduce
{}),
dead_code_elimination
{},
fuse_mlir
{
&
ctx
},
fuse_mlir
{
&
ctx
},
dead_code_elimination
{},
dead_code_elimination
{},
lowering
{
&
ctx
,
options
.
offload_copy
},
lowering
{
&
ctx
,
options
.
offload_copy
},
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment