Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
e2eb6036
Commit
e2eb6036
authored
Apr 13, 2022
by
Paul
Browse files
Merge
parents
298c93d5
1e0bbd78
Changes
267
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
715 additions
and
133 deletions
+715
-133
src/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp
src/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp
+2
-3
src/targets/gpu/jit/pointwise.cpp
src/targets/gpu/jit/pointwise.cpp
+109
-0
src/targets/gpu/jit/roialign.cpp
src/targets/gpu/jit/roialign.cpp
+88
-0
src/targets/gpu/jit/scatternd.cpp
src/targets/gpu/jit/scatternd.cpp
+88
-0
src/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp
...argets/gpu/kernels/include/migraphx/kernels/algorithm.hpp
+10
-0
src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp
...pu/kernels/include/migraphx/kernels/integral_constant.hpp
+7
-1
src/targets/gpu/kernels/include/migraphx/kernels/iota_iterator.hpp
...ts/gpu/kernels/include/migraphx/kernels/iota_iterator.hpp
+145
-0
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
+3
-0
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
...argets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
+2
-4
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
+27
-10
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
...targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
+40
-42
src/targets/gpu/kernels/include/migraphx/kernels/scatternd.hpp
...argets/gpu/kernels/include/migraphx/kernels/scatternd.hpp
+64
-0
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
+27
-24
src/targets/gpu/kernels/include/migraphx/kernels/tensor_view.hpp
...gets/gpu/kernels/include/migraphx/kernels/tensor_view.hpp
+18
-4
src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
...gets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
+43
-0
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
+1
-0
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
+6
-3
src/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
...argets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
+2
-2
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+31
-38
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+2
-2
No files found.
src/targets/gpu/include/migraphx/gpu/prefix_scan_sum.hpp
View file @
e2eb6036
...
...
@@ -40,9 +40,8 @@ struct hip_prefix_scan_sum : oper<hip_prefix_scan_sum>
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
if
(
op
.
exclusive
or
op
.
reverse
)
MIGRAPHX_THROW
(
"Exclusive and reverse scan not supported"
);
device
::
prefix_scan_sum
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
],
op
.
axis
);
device
::
prefix_scan_sum
(
ctx
.
get_stream
().
get
(),
args
[
1
],
args
[
0
],
op
.
axis
,
op
.
exclusive
,
op
.
reverse
);
return
args
[
1
];
}
...
...
src/targets/gpu/jit/pointwise.cpp
0 → 100644
View file @
e2eb6036
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
static
const
char
*
const
pointwise_kernel
=
R"__migraphx__(
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/pointwise.hpp>
#include <args.hpp>
namespace migraphx {
${preamble}
extern "C" {
__global__ void kernel(${params})
{
pointwise(${lambda}, ${args});
}
}
} // namespace migraphx
)__migraphx__"
;
struct
pointwise_compiler
:
compiler
<
pointwise_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"pointwise"
};
}
static
std
::
size_t
oversubscribe
(
const
std
::
vector
<
shape
>&
inputs
)
{
if
(
std
::
any_of
(
inputs
.
begin
(),
inputs
.
end
(),
[](
const
auto
&
s
)
{
return
s
.
broadcasted
();
}))
return
1
;
else
return
4
;
}
static
std
::
size_t
vectorize_elements
(
const
std
::
vector
<
shape
>&
inputs
)
{
std
::
size_t
n
=
inputs
.
front
().
elements
();
if
(
std
::
all_of
(
inputs
.
begin
(),
inputs
.
end
(),
[](
const
auto
&
s
)
{
return
s
.
packed
()
or
s
.
broadcasted
();
}))
{
if
((
n
%
4
)
==
0
)
return
n
/
4
;
else
if
((
n
%
2
)
==
0
)
return
n
/
2
;
}
return
n
;
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
hip_compile_options
options
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
vectorize_elements
(
inputs
),
oversubscribe
(
inputs
)));
options
.
inputs
=
inputs
;
options
.
output
=
inputs
.
back
();
options
.
virtual_inputs
=
reduce_dims
(
inputs
);
options
.
params
=
"-Wno-float-equal"
;
auto
src
=
interpolate_string
(
pointwise_kernel
,
{{
"params"
,
enum_params
(
inputs
.
size
(),
"void * private_p"
)},
{
"args"
,
enum_params
(
inputs
.
size
(),
"private_p"
)},
{
"lambda"
,
v
.
at
(
"lambda"
).
to
<
std
::
string
>
()},
{
"preamble"
,
v
.
get
(
"preamble"
,
std
::
string
{})}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
)
const
{
assert
(
not
ins
->
module_inputs
().
empty
());
auto
*
pm
=
ins
->
module_inputs
().
front
();
run_passes
(
*
pm
,
{
eliminate_common_subexpression
{},
dead_code_elimination
{}});
cpp_generator
g
;
g
.
fmap
([](
const
std
::
string
&
fname
)
{
return
"migraphx::"
+
fname
;
});
g
.
add_point_op
(
"where"
,
"${function:where}(${0}, ${1}, ${2})"
);
g
.
add_point_op
(
"prelu"
,
"${function:where}(${0} < 0, ${0} * ${1}, ${0})"
);
g
.
add_point_op
(
"sign"
,
"${function:where}(${0} > 0, 1, ${function:where}(${0} < 0, -1, 0))"
);
g
.
add_point_op
(
"equal"
,
"migraphx::abs(${0} == ${1})"
);
g
.
add_point_op
(
"less"
,
"migraphx::abs(${0} < ${1})"
);
g
.
add_point_op
(
"greater"
,
"migraphx::abs(${0} > ${1})"
);
g
.
add_point_op
(
"not"
,
"migraphx::abs(not ${0})"
);
// Add explict conversions
g
.
fresult
(
[](
const
shape
&
s
)
{
return
"migraphx::convert<"
+
shape
::
cpp_type
(
s
.
type
())
+
">"
;
});
auto
name
=
g
.
create_function
(
g
.
generate_module
(
*
pm
).
set_attributes
({
"__device__"
}).
set_generic_types
(
*
pm
));
std
::
string
lambda
=
"MIGRAPHX_LIFT("
+
name
+
")"
;
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
{{
"lambda"
,
lambda
},
{
"preamble"
,
g
.
str
()}}));
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/
compile_
roialign.cpp
→
src/targets/gpu/
jit/
roialign.cpp
View file @
e2eb6036
#include <migraphx/gpu/compile
_roialign
.hpp>
#include <migraphx/gpu/compile
r
.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/cpp_generator.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -37,46 +43,46 @@ __global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y
} // namespace migraphx
int main() {}
)__migraphx__"
;
operation
compile_
roialign
(
co
ntext
&
,
const
std
::
vector
<
shape
>&
io_shapes
,
const
value
&
val
)
struct
roialign
_
co
mpiler
:
compiler
<
roialign_compiler
>
{
hip_compile_options
options
;
auto
out_s
=
io_shapes
.
back
();
options
.
local
=
128
;
options
.
global
=
compute_global
(
out_s
.
elements
(),
options
.
local
);
options
.
inputs
=
io_shapes
;
options
.
output
=
out_s
;
options
.
kernel_name
=
"roialign_kernel"
;
options
.
virtual_inputs
=
io_shapes
;
// sampling_ratio
assert
(
val
.
contains
(
"sampling_ratio"
));
auto
sampling_ratio
=
val
.
at
(
"sampling_ratio"
).
to
<
int64_t
>
();
options
.
params
+=
" -DSAMPLING_RATIO="
+
std
::
to_string
(
sampling_ratio
);
// pooling_mode
assert
(
val
.
contains
(
"mode"
));
auto
mode
=
val
.
at
(
"mode"
).
to
<
std
::
string
>
();
bool
is_avg_pooling
=
(
mode
==
"avg"
);
options
.
params
+=
" -DIS_AVG_POOLING="
+
std
::
to_string
(
static_cast
<
int
>
(
is_avg_pooling
));
// coord_trans_mode
assert
(
val
.
contains
(
"coordinate_transformation_mode"
));
auto
ctm
=
val
.
at
(
"coordinate_transformation_mode"
).
to
<
std
::
string
>
();
float
rois_offset
=
(
ctm
==
"output_half_pixel"
)
?
-
0.5
f
:
0.0
f
;
options
.
params
+=
" -DROIS_OFFSET="
+
std
::
to_string
(
rois_offset
);
// spatial_scale
assert
(
val
.
contains
(
"spatial_scale"
));
float
spatial_scale
=
val
.
at
(
"spatial_scale"
).
to
<
float
>
();
options
.
params
+=
" -DSPATIAL_SCALE="
+
std
::
to_string
(
spatial_scale
);
return
compile_hip_code_object
(
roialign_kernel
,
options
);
}
}
// namespace gpu
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"roialign"
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
hip_compile_options
options
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
back
().
elements
()),
128
);
options
.
output
=
inputs
.
back
();
options
.
inputs
=
inputs
;
options
.
kernel_name
=
"roialign_kernel"
;
// sampling_ratio
options
.
params
+=
" -DSAMPLING_RATIO="
+
v
.
at
(
"sampling_ratio"
).
to
<
std
::
string
>
();
// pooling_mode
auto
mode
=
v
.
at
(
"mode"
).
to
<
migraphx
::
op
::
pooling_mode
>
();
std
::
string
is_avg_pooling
=
(
mode
==
migraphx
::
op
::
pooling_mode
::
average
)
?
"true"
:
"false"
;
options
.
params
+=
" -DIS_AVG_POOLING="
+
is_avg_pooling
;
// coord_trans_mode
auto
ctm
=
v
.
at
(
"coordinate_transformation_mode"
).
to
<
std
::
string
>
();
float
rois_offset
=
(
ctm
==
"output_half_pixel"
)
?
-
0.5
f
:
0.0
f
;
options
.
params
+=
" -DROIS_OFFSET="
+
std
::
to_string
(
rois_offset
);
// spatial_scale
options
.
params
+=
" -DSPATIAL_SCALE="
+
v
.
at
(
"spatial_scale"
).
to
<
std
::
string
>
();
return
compile_hip_code_object
(
roialign_kernel
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
return
replace
(
compile_op
(
ctx
,
to_shapes
(
ins
->
inputs
()),
op
.
to_value
()));
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/jit/scatternd.cpp
0 → 100644
View file @
e2eb6036
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/reduce_dims.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/module.hpp>
#include <migraphx/pass_manager.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
// NOLINTNEXTLINE
static
const
char
*
const
scatternd_kernel
=
R"__migraphx__(
#include <migraphx/kernels/scatternd.hpp>
#include <migraphx/kernels/basic_ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp>
namespace migraphx {
extern "C" {
__global__ void scatternd_kernel(void* in_indices, void* in_updates, void* output)
{
make_tensors()(in_indices, in_updates, output)([](auto&&... xs) {
scatternd(xs..., ${reduction}{});
});
}
}
} // namespace migraphx
)__migraphx__"
;
struct
scatternd_compiler
:
compiler
<
scatternd_compiler
>
{
std
::
vector
<
std
::
string
>
names
()
const
{
return
{
"scatternd_none"
,
"scatternd_add"
,
"scatternd_mul"
};
}
operation
compile_op
(
context
&
ctx
,
const
std
::
vector
<
shape
>&
inputs
,
const
value
&
v
)
const
{
hip_compile_options
options
;
options
.
set_launch_params
(
v
,
compute_global_for
(
ctx
,
inputs
.
at
(
1
).
elements
()));
auto
out_s
=
inputs
.
back
();
options
.
inputs
=
inputs
;
options
.
output
=
out_s
;
options
.
kernel_name
=
"scatternd_kernel"
;
options
.
virtual_inputs
=
inputs
;
auto
reduction
=
"assign_"
+
v
.
get
(
"reduction"
,
std
::
string
{
"none"
});
auto
src
=
interpolate_string
(
scatternd_kernel
,
{{
"reduction"
,
reduction
}});
return
compile_hip_code_object
(
src
,
options
);
}
compiler_replace
compile
(
context
&
ctx
,
instruction_ref
ins
,
const
operation
&
op
)
const
{
assert
(
starts_with
(
op
.
name
(),
"scatternd_"
));
auto
reduction
=
op
.
name
().
substr
(
10
);
return
insert
(
compile_op
(
ctx
,
to_shapes
({
ins
->
inputs
().
begin
()
+
1
,
ins
->
inputs
().
end
()}),
{{
"reduction"
,
reduction
}}));
}
compiler_replace
insert
(
const
operation
&
op
)
const
{
return
[
=
](
module
&
m
,
instruction_ref
ins
)
{
auto
args
=
ins
->
inputs
();
args
.
back
()
=
m
.
insert_instruction
(
ins
,
make_op
(
"hip::copy"
),
args
.
front
(),
args
.
back
());
args
.
erase
(
args
.
begin
());
return
m
.
replace_instruction
(
ins
,
op
,
args
);
};
}
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/kernels/include/migraphx/kernels/algorithm.hpp
View file @
e2eb6036
...
...
@@ -21,6 +21,16 @@ struct greater
}
};
template
<
class
InputIt
,
class
OutputIt
>
constexpr
OutputIt
copy
(
InputIt
first
,
InputIt
last
,
OutputIt
d_first
)
{
while
(
first
!=
last
)
{
*
d_first
++
=
*
first
++
;
}
return
d_first
;
}
template
<
class
Iterator
,
class
Compare
>
constexpr
Iterator
is_sorted_until
(
Iterator
first
,
Iterator
last
,
Compare
comp
)
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/integral_constant.hpp
View file @
e2eb6036
...
...
@@ -48,7 +48,7 @@ MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(>=)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
==
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
!=
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
&
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
^
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
^
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
|
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
&&
)
MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP
(
||
)
...
...
@@ -70,5 +70,11 @@ using index_constant = integral_constant<index_int, N>;
template
<
auto
V
>
static
constexpr
auto
_c
=
integral_constant
<
decltype
(
V
),
V
>
{};
// NOLINT
template
<
class
F
>
constexpr
auto
return_c
(
F
f
)
{
return
_c
<
f
()
>
;
}
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP
src/targets/gpu/kernels/include/migraphx/kernels/iota_iterator.hpp
0 → 100644
View file @
e2eb6036
#ifndef MIGRAPHX_GUARD_KERNELS_IOTA_ITERATOR_HPP
#define MIGRAPHX_GUARD_KERNELS_IOTA_ITERATOR_HPP
#include <migraphx/kernels/types.hpp>
#include <migraphx/kernels/type_traits.hpp>
namespace
migraphx
{
template
<
class
F
,
class
Iterator
=
diff_int
>
struct
basic_iota_iterator
{
Iterator
index
;
F
f
;
using
difference_type
=
diff_int
;
using
reference
=
decltype
(
f
(
std
::
declval
<
Iterator
>
()));
using
value_type
=
remove_reference_t
<
reference
>
;
using
pointer
=
add_pointer_t
<
value_type
>
;
constexpr
basic_iota_iterator
&
operator
+=
(
diff_int
n
)
{
index
+=
n
;
return
*
this
;
}
constexpr
basic_iota_iterator
&
operator
-=
(
diff_int
n
)
{
index
-=
n
;
return
*
this
;
}
constexpr
basic_iota_iterator
&
operator
++
()
{
index
++
;
return
*
this
;
}
constexpr
basic_iota_iterator
&
operator
--
()
{
index
--
;
return
*
this
;
}
constexpr
basic_iota_iterator
operator
++
(
int
)
// NOLINT
{
basic_iota_iterator
it
=
*
this
;
index
++
;
return
it
;
}
constexpr
basic_iota_iterator
operator
--
(
int
)
// NOLINT
{
basic_iota_iterator
it
=
*
this
;
index
--
;
return
it
;
}
// TODO: operator->
constexpr
reference
operator
*
()
const
{
return
f
(
index
);
}
template
<
class
T
>
constexpr
reference
operator
[](
T
x
)
const
{
return
f
(
index
+
x
);
}
};
template
<
class
T
,
class
F
>
constexpr
basic_iota_iterator
<
F
,
T
>
make_basic_iota_iterator
(
T
x
,
F
f
)
{
return
basic_iota_iterator
<
F
,
T
>
{
x
,
f
};
}
template
<
class
F
,
class
Iterator
>
constexpr
basic_iota_iterator
<
F
,
Iterator
>
operator
+
(
basic_iota_iterator
<
F
,
Iterator
>
x
,
diff_int
y
)
{
return
x
+=
y
;
}
template
<
class
F
,
class
Iterator
>
constexpr
basic_iota_iterator
<
F
,
Iterator
>
operator
+
(
diff_int
x
,
basic_iota_iterator
<
F
,
Iterator
>
y
)
{
return
y
+
x
;
}
template
<
class
F
,
class
Iterator
>
constexpr
diff_int
operator
-
(
basic_iota_iterator
<
F
,
Iterator
>
x
,
basic_iota_iterator
<
F
,
Iterator
>
y
)
{
return
x
.
index
-
y
.
index
;
}
template
<
class
F
,
class
Iterator
>
constexpr
basic_iota_iterator
<
F
,
Iterator
>
operator
-
(
basic_iota_iterator
<
F
,
Iterator
>
x
,
diff_int
y
)
{
return
x
-=
y
;
}
template
<
class
F
,
class
Iterator
>
constexpr
bool
operator
==
(
basic_iota_iterator
<
F
,
Iterator
>
x
,
basic_iota_iterator
<
F
,
Iterator
>
y
)
{
return
x
.
index
==
y
.
index
;
}
template
<
class
F
,
class
Iterator
>
constexpr
bool
operator
!=
(
basic_iota_iterator
<
F
,
Iterator
>
x
,
basic_iota_iterator
<
F
,
Iterator
>
y
)
{
return
x
.
index
!=
y
.
index
;
}
template
<
class
F
,
class
Iterator
>
constexpr
bool
operator
<
(
basic_iota_iterator
<
F
,
Iterator
>
x
,
basic_iota_iterator
<
F
,
Iterator
>
y
)
{
return
x
.
index
<
y
.
index
;
}
template
<
class
F
,
class
Iterator
>
constexpr
bool
operator
>
(
basic_iota_iterator
<
F
,
Iterator
>
x
,
basic_iota_iterator
<
F
,
Iterator
>
y
)
{
return
x
.
index
>
y
.
index
;
}
template
<
class
F
,
class
Iterator
>
constexpr
bool
operator
>=
(
basic_iota_iterator
<
F
,
Iterator
>
x
,
basic_iota_iterator
<
F
,
Iterator
>
y
)
{
return
x
.
index
>=
y
.
index
;
}
template
<
class
F
,
class
Iterator
>
constexpr
bool
operator
<=
(
basic_iota_iterator
<
F
,
Iterator
>
x
,
basic_iota_iterator
<
F
,
Iterator
>
y
)
{
return
x
.
index
<=
y
.
index
;
}
struct
defaul_iota_iterator
{
template
<
class
T
>
constexpr
auto
operator
()(
T
x
)
const
{
return
x
;
}
};
using
iota_iterator
=
basic_iota_iterator
<
defaul_iota_iterator
>
;
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_IOTA_ITERATOR_HPP
src/targets/gpu/kernels/include/migraphx/kernels/math.hpp
View file @
e2eb6036
...
...
@@ -59,6 +59,7 @@ MIGRAPHX_DEVICE_MATH(cosh, ::cosh)
MIGRAPHX_DEVICE_MATH
(
erf
,
::
erf
)
MIGRAPHX_DEVICE_MATH
(
exp
,
::
exp
)
MIGRAPHX_DEVICE_MATH
(
floor
,
::
floor
)
MIGRAPHX_DEVICE_MATH
(
isnan
,
::
isnan
)
MIGRAPHX_DEVICE_MATH
(
log
,
::
log
)
MIGRAPHX_DEVICE_MATH
(
pow
,
::
pow
)
MIGRAPHX_DEVICE_MATH
(
round
,
::
round
)
...
...
@@ -103,6 +104,7 @@ MIGRAPHX_DEVICE_MATH_HALF(cos, ::cos)
MIGRAPHX_DEVICE_MATH_HALF
(
cosh
,
::
cosh
)
MIGRAPHX_DEVICE_MATH_HALF
(
erf
,
::
erf
)
MIGRAPHX_DEVICE_MATH_HALF
(
floor
,
::
floor
)
MIGRAPHX_DEVICE_MATH_HALF
(
isnan
,
::
isnan
)
MIGRAPHX_DEVICE_MATH_HALF
(
pow
,
::
pow
)
MIGRAPHX_DEVICE_MATH_HALF
(
round
,
::
round
)
MIGRAPHX_DEVICE_MATH_HALF
(
sin
,
::
sin
)
...
...
@@ -129,6 +131,7 @@ MIGRAPHX_DEVICE_MATH_VEC(cosh)
MIGRAPHX_DEVICE_MATH_VEC
(
erf
)
MIGRAPHX_DEVICE_MATH_VEC
(
exp
)
MIGRAPHX_DEVICE_MATH_VEC
(
floor
)
MIGRAPHX_DEVICE_MATH_VEC
(
isnan
)
MIGRAPHX_DEVICE_MATH_VEC
(
log
)
MIGRAPHX_DEVICE_MATH_VEC
(
pow
)
MIGRAPHX_DEVICE_MATH_VEC
(
round
)
...
...
src/targets/gpu/kernels/include/migraphx/kernels/pointwise.hpp
View file @
e2eb6036
...
...
@@ -39,10 +39,8 @@ template <class F, class T, class... Ts>
__device__
void
pointwise_tensor
(
index
idx
,
F
f
,
T
out
,
Ts
...
xs
)
{
preload
<
typename
T
::
type
>
(
idx
,
xs
...)([
&
](
auto
...
ps
)
{
idx
.
global_stride
(
out
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
auto
multi_idx
=
out
.
get_shape
().
multi
(
i
);
out
[
multi_idx
]
=
implicit_conversion
(
f
(
ps
[
multi_idx
]...));
});
idx
.
global_stride
(
out
.
get_shape
().
elements
(),
[
&
](
auto
i
)
{
out
[
i
]
=
implicit_conversion
(
f
(
ps
[
i
]...));
});
});
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
View file @
e2eb6036
...
...
@@ -6,15 +6,32 @@
namespace
migraphx
{
template
<
class
T
>
struct
remove_vec_impl
{
using
type
=
T
;
};
template
<
class
T
,
index_int
N
>
struct
remove_vec_impl
<
vec
<
T
,
N
>>
{
using
type
=
T
;
};
template
<
class
T
>
using
remove_vec
=
typename
remove_vec_impl
<
T
>::
type
;
template
<
class
T
,
class
...
Shapes
>
constexpr
auto
traverse_preload
(
Shapes
...
ss
)
{
return
[
=
](
auto
f
,
auto
...
g
)
{
index_int
offset
=
0
;
auto
each
=
[
&
](
auto
x
)
{
using
type
=
remove_vec
<
typename
decltype
(
x
)
::
type
>
;
constexpr
auto
s
=
decltype
(
x
.
get_shape
()){};
constexpr
auto
size
=
_c
<
s
.
element_space
()
>
;
if
constexpr
(
not
s
.
broadcasted
()
or
(
s
.
elements
()
-
size
)
<
64
)
constexpr
auto
size
=
s
.
element_space
();
if
constexpr
(
not
s
.
broadcasted
()
or
(
s
.
elements
()
-
size
)
<
64
or
not
is_same
<
T
,
type
>
{})
return
f
(
x
,
offset
,
false_type
{});
else
{
...
...
@@ -78,23 +95,23 @@ __device__ auto preload_copy(index idx, F f, __shared__ T* buffer, Ts... xs)
invoke
);
}
template
<
class
T
>
struct
remove_vec
template
<
class
T
,
class
Shape
>
struct
shape_type
:
Shape
{
using
type
=
T
;
};
template
<
class
T
,
index_int
N
>
struct
remove_vec
<
vec
<
T
,
N
>>
template
<
class
T
>
constexpr
auto
make_shape_type
(
T
)
{
using
type
=
T
;
}
;
return
shape_type
<
typename
T
::
type
,
typename
T
::
shape_type
>
{}
;
}
template
<
class
T
,
class
...
Ts
>
__device__
auto
preload
(
index
idx
,
Ts
...
xs
)
{
using
type
=
typename
remove_vec
<
T
>
::
type
;
constexpr
auto
size
=
decltype
(
compute_preload_size
<
type
>
(
xs
.
get_shape
(
)...)){};
using
type
=
remove_vec
<
T
>
;
constexpr
auto
size
=
decltype
(
compute_preload_size
<
type
>
(
make_shape_type
(
xs
)...)){};
const
index_int
max_size
=
512
*
sizeof
(
type
);
return
[
=
](
auto
f
)
{
if
constexpr
(
size
>
0
and
size
<
max_size
)
...
...
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
View file @
e2eb6036
...
...
@@ -19,7 +19,7 @@ struct max_pool
}
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
T
final
(
T
x
,
std
::
size_
t
)
MIGRAPHX_DEVICE_CONSTEXPR
T
final
(
T
x
,
index_in
t
)
{
return
(
x
);
}
...
...
@@ -36,21 +36,19 @@ struct avg_pool
}
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
T
final
(
T
x
,
std
::
size_
t
y
)
MIGRAPHX_DEVICE_CONSTEXPR
T
final
(
T
x
,
index_in
t
y
)
{
return
(
y
==
0
)
?
0.0
:
(
x
/
y
);
}
};
template
<
class
T
,
class
Op
>
MIGRAPHX_DEVICE_CONSTEXPR
T
bilinear_interpolate
(
const
T
*
data
,
const
array
<
std
::
size_t
,
2
>&
dims
,
array
<
float
,
2
>
xy
,
Op
pooling
)
template
<
class
Iterator
,
class
Op
>
MIGRAPHX_DEVICE_CONSTEXPR
typename
Iterator
::
value_type
bilinear_interpolate
(
const
Iterator
data
,
const
array
<
index_int
,
2
>&
dims
,
array
<
float
,
2
>
xy
,
Op
pooling
)
{
array
<
int
,
2
>
low
{};
array
<
int
,
2
>
high
{};
for
(
std
::
size_
t
ii
=
0
;
ii
<
xy
.
size
();
++
ii
)
for
(
index_in
t
ii
=
0
;
ii
<
xy
.
size
();
++
ii
)
{
if
(
xy
[
ii
]
<
-
1.0
f
or
xy
[
ii
]
>
dims
[
ii
])
{
...
...
@@ -65,36 +63,36 @@ MIGRAPHX_DEVICE_CONSTEXPR T bilinear_interpolate(const T* data,
xy
[
ii
]
=
high
[
ii
]
=
low
[
ii
]
=
dims
[
ii
]
-
1
;
}
}
array
<
std
::
size_
t
,
4
>
locs
=
{
low
[
0
]
*
dims
[
1
]
+
low
[
1
],
low
[
0
]
*
dims
[
1
]
+
high
[
1
],
high
[
0
]
*
dims
[
1
]
+
low
[
1
],
high
[
0
]
*
dims
[
1
]
+
high
[
1
]};
array
<
index_in
t
,
4
>
locs
=
{
low
[
0
]
*
dims
[
1
]
+
low
[
1
],
low
[
0
]
*
dims
[
1
]
+
high
[
1
],
high
[
0
]
*
dims
[
1
]
+
low
[
1
],
high
[
0
]
*
dims
[
1
]
+
high
[
1
]};
float
ly
=
xy
[
0
]
-
low
[
0
];
float
lx
=
xy
[
1
]
-
low
[
1
];
float
hy
=
1.0
f
-
ly
;
float
hx
=
1.0
f
-
lx
;
array
<
T
,
4
>
ws
=
{
hy
*
hx
,
hy
*
lx
,
ly
*
hx
,
ly
*
lx
};
float
ly
=
xy
[
0
]
-
low
[
0
];
float
lx
=
xy
[
1
]
-
low
[
1
];
float
hy
=
1.0
f
-
ly
;
float
hx
=
1.0
f
-
lx
;
array
<
typename
Iterator
::
value_type
,
4
>
ws
=
{
hy
*
hx
,
hy
*
lx
,
ly
*
hx
,
ly
*
lx
};
auto
v01
=
pooling
(
data
[
locs
[
0
]]
*
ws
[
0
],
data
[
locs
[
1
]]
*
ws
[
1
]);
auto
v23
=
pooling
(
data
[
locs
[
2
]]
*
ws
[
2
],
data
[
locs
[
3
]]
*
ws
[
3
]);
return
pooling
(
v01
,
v23
);
}
template
<
class
T
,
class
Op
>
MIGRAPHX_DEVICE_CONSTEXPR
T
calc_pooling
(
const
T
*
&
data
,
const
array
<
float
,
2
>&
roi_starts
,
const
array
<
float
,
2
>&
bin_size
,
const
array
<
int
,
2
>&
idx
,
const
array
<
std
::
size_
t
,
2
>&
bin_grid_size
,
const
array
<
std
::
size_
t
,
2
>&
dims
,
float
roi_offset
,
Op
op
)
template
<
class
Iterator
,
class
Op
>
MIGRAPHX_DEVICE_CONSTEXPR
auto
calc_pooling
(
const
Iterator
&
data
,
const
array
<
float
,
2
>&
roi_starts
,
const
array
<
float
,
2
>&
bin_size
,
const
array
<
int
,
2
>&
idx
,
const
array
<
index_in
t
,
2
>&
bin_grid_size
,
const
array
<
index_in
t
,
2
>&
dims
,
float
roi_offset
,
Op
op
)
{
T
output_val
=
op
.
init
();
const
int64_t
count
=
bin_grid_size
[
0
]
*
bin_grid_size
[
1
];
typename
Iterator
::
value_type
output_val
=
op
.
init
();
const
int64_t
count
=
bin_grid_size
[
0
]
*
bin_grid_size
[
1
];
dfor
(
bin_grid_size
[
0
],
bin_grid_size
[
1
])([
&
](
auto
iy
,
auto
ix
)
{
array
<
std
::
size_
t
,
2
>
id
=
{
iy
,
ix
};
array
<
index_in
t
,
2
>
id
=
{
iy
,
ix
};
array
<
float
,
2
>
locs
=
roi_starts
+
idx
*
bin_size
+
bin_size
*
(
id
+
0.5
f
)
/
bin_grid_size
+
roi_offset
;
...
...
@@ -122,19 +120,19 @@ constexpr roalign_settings<Ts...> make_roalign_settings(Ts... xs)
template
<
class
T
,
class
U
,
class
V
,
class
W
,
class
Settings
>
__device__
void
roialign
(
const
T
&
x_t
,
const
U
&
rois_t
,
const
V
&
ind_t
,
const
W
&
y_t
,
Settings
s
)
{
auto
index
=
make_index
();
const
auto
*
x
=
x_t
.
data
();
const
auto
*
rois
=
rois_t
.
data
();
const
auto
*
ind
=
ind_t
.
data
();
auto
index
=
make_index
();
const
auto
x
=
x_t
.
begin
();
const
auto
rois
=
rois_t
.
begin
();
const
auto
ind
=
ind_t
.
begin
();
auto
*
out_ptr
=
y_t
.
data
();
auto
out_ptr
=
y_t
.
begin
();
// input shape
auto
x_lens
=
x_t
.
get_shape
().
lens
;
auto
channel_num
=
x_lens
[
1
];
// input dims of height and width, in all 2-dim arrays, the first dim
// is for height and second dim is for width
array
<
std
::
size_
t
,
2
>
in_dims
=
{
x_lens
[
2
],
x_lens
[
3
]};
array
<
index_in
t
,
2
>
in_dims
=
{
x_lens
[
2
],
x_lens
[
3
]};
const
auto
stride
=
index
.
nglobal
();
auto
out_s
=
y_t
.
get_shape
();
...
...
@@ -142,8 +140,8 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
// output dims of height and width, in all 2-dim arrays, the first dim
// is for height and second dim is for width
const
auto
&
out_lens
=
out_s
.
lens
;
array
<
std
::
size_
t
,
2
>
out_dims
=
{
out_lens
[
2
],
out_lens
[
3
]};
const
auto
&
out_lens
=
out_s
.
lens
;
array
<
index_in
t
,
2
>
out_dims
=
{
out_lens
[
2
],
out_lens
[
3
]};
for
(
index_int
i
=
index
.
global
;
i
<
out_s
.
elements
();
i
+=
stride
)
{
...
...
@@ -153,8 +151,8 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
int
ph
=
idx
[
2
];
int
pw
=
idx
[
3
];
const
auto
*
offset_rois
=
rois
+
(
n
*
roi_column_num
);
const
int
batch_ind
=
ind
[
n
];
const
auto
offset_rois
=
rois
+
(
n
*
roi_column_num
);
const
int
batch_ind
=
ind
[
n
];
array
<
float
,
2
>
roi_starts
=
{
offset_rois
[
1
]
*
s
.
spatial_scale
,
offset_rois
[
0
]
*
s
.
spatial_scale
};
...
...
@@ -163,9 +161,9 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
array
<
float
,
2
>
roi_size
{};
array
<
float
,
2
>
bin_size
{};
array
<
std
::
size_
t
,
2
>
bin_grid_size
{};
array
<
index_in
t
,
2
>
bin_grid_size
{};
for
(
std
::
size_
t
ii
=
0
;
ii
<
roi_size
.
size
();
++
ii
)
for
(
index_in
t
ii
=
0
;
ii
<
roi_size
.
size
();
++
ii
)
{
roi_size
[
ii
]
=
roi_ends
[
ii
]
-
roi_starts
[
ii
];
roi_size
[
ii
]
=
max
(
roi_size
[
ii
],
1.0
f
);
...
...
@@ -175,7 +173,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
(
s
.
sampling_ratio
>
0
)
?
s
.
sampling_ratio
:
std
::
ceil
(
roi_size
[
ii
]
/
out_dims
[
ii
]);
}
const
auto
*
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
const
auto
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
if
constexpr
(
s
.
is_avg_pooling
)
{
out_ptr
[
i
]
=
calc_pooling
(
offset_x
,
...
...
src/targets/gpu/kernels/include/migraphx/kernels/scatternd.hpp
0 → 100644
View file @
e2eb6036
#ifndef MIGRAPHX_GUARD_KERNELS_SCATTERND_HPP
#define MIGRAPHX_GUARD_KERNELS_SCATTERND_HPP
#include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/algorithm.hpp>
namespace
migraphx
{
struct
assign_none
{
template
<
class
T
,
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
void
operator
()(
T
&
x
,
U
y
)
const
{
x
=
y
;
}
};
struct
assign_add
{
template
<
class
T
,
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
void
operator
()(
T
&
x
,
U
y
)
const
{
x
+=
y
;
}
};
struct
assign_mul
{
template
<
class
T
,
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
void
operator
()(
T
&
x
,
U
y
)
const
{
x
*=
y
;
}
};
template
<
class
T
,
class
U
,
class
V
,
class
F
>
__device__
void
scatternd
(
const
T
&
indices_t
,
const
U
&
updates_t
,
const
V
&
output_t
,
F
f
)
{
auto
index
=
make_index
();
auto
updates_shape
=
updates_t
.
get_shape
();
index
.
global_stride
(
updates_shape
.
elements
(),
[
&
](
auto
i
)
{
auto
output_shape
=
output_t
.
get_shape
();
auto
indices_shape
=
indices_t
.
get_shape
();
auto
k
=
indices_shape
.
lens
.
back
();
auto
q
=
indices_shape
.
lens
.
size
();
auto
updates_idx
=
updates_shape
.
multi
(
i
);
auto
indices_idx
=
indices_shape
.
multi
(
0
);
copy
(
updates_idx
.
begin
(),
updates_idx
.
begin
()
+
q
-
1
,
indices_idx
.
begin
());
auto
index_start
=
indices_t
.
begin
()
+
indices_shape
.
index
(
indices_idx
);
auto
index_end
=
index_start
+
k
;
auto
out_idx
=
output_shape
.
multi
(
0
);
copy
(
index_start
,
index_end
,
out_idx
.
begin
());
copy
(
updates_idx
.
begin
()
+
q
-
1
,
updates_idx
.
end
(),
out_idx
.
begin
()
+
k
);
f
(
output_t
[
out_idx
],
updates_t
[
i
]);
});
}
}
// namespace migraphx
#endif
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
View file @
e2eb6036
...
...
@@ -17,35 +17,38 @@ struct shape
constexpr
shape
(
Lens
l
,
Strides
s
)
:
lens
(
l
),
strides
(
s
)
{}
constexpr
index_int
elements
()
const
{
return
l
ens
.
product
();
}
constexpr
auto
elements
()
const
{
return
_c
<
L
ens
{}
.
product
()
>
;
}
constexpr
index_int
element_space
()
const
{
return
s
trides
.
dot
(
l
ens
-
1
)
+
1
;
}
constexpr
auto
element_space
()
const
{
return
_c
<
S
trides
{}
.
dot
(
L
ens
{}
-
1
)
+
1
>
;
}
constexpr
bool
packed
()
const
{
return
elements
()
==
element_space
();
}
constexpr
bool
broadcasted
()
const
{
return
s
trides
.
product
()
==
0
;
}
constexpr
bool
transposed
()
const
constexpr
auto
packed
()
const
{
return
elements
()
==
element_space
();
}
constexpr
auto
broadcasted
()
const
{
return
_c
<
S
trides
{}
.
product
()
==
0
>
;
}
constexpr
auto
transposed
()
const
{
if
(
broadcasted
())
{
index_array
s
;
index_int
j
=
0
;
for
(
index_int
i
=
0
;
i
<
s
.
size
();
i
++
)
return
return_c
([]
{
auto
lstrides
=
Strides
{};
if
(
shape
{}.
broadcasted
())
{
if
(
strides
[
i
]
!=
0
)
index_array
s
{};
index_int
j
=
0
;
for
(
index_int
i
=
0
;
i
<
s
.
size
();
i
++
)
{
s
[
j
]
=
strides
[
i
];
j
++
;
if
(
lstrides
[
i
]
!=
0
)
{
s
[
j
]
=
lstrides
[
i
];
j
++
;
}
}
return
not
is_sorted
(
s
.
begin
(),
s
.
begin
()
+
j
,
greater
{});
}
return
not
is_sorted
(
s
.
begin
(),
s
.
begin
()
+
j
,
greater
{});
}
else
{
return
not
is_sorted
(
strides
.
begin
(),
strides
.
end
(),
greater
{});
}
else
{
return
not
is_sorted
(
lstrides
.
begin
(),
lstrides
.
end
(),
greater
{});
}
});
}
constexpr
bool
standard
()
const
{
return
packed
()
and
not
transposed
();
}
constexpr
auto
standard
()
const
{
return
packed
()
and
not
transposed
();
}
constexpr
index_int
index
(
index_array
x
)
const
{
return
x
.
dot
(
strides
);
}
...
...
@@ -63,10 +66,10 @@ struct shape
return
i
;
else
{
const
index_int
rank
=
this
->
lens
.
size
();
index_int
s
=
1
;
index_int
result
=
0
;
for
(
index_int
j
=
0
;
j
<
this
->
lens
.
size
()
;
j
++
)
const
auto
rank
=
this
->
lens
.
size
();
index_int
s
=
1
;
index_int
result
=
0
;
for
(
index_int
j
=
0
;
j
<
rank
;
j
++
)
{
const
index_int
k
=
rank
-
j
-
1
;
const
index_int
stride
=
this
->
strides
[
k
];
...
...
src/targets/gpu/kernels/include/migraphx/kernels/tensor_view.hpp
100755 → 100644
View file @
e2eb6036
...
...
@@ -3,16 +3,30 @@
#include <migraphx/kernels/shape.hpp>
#include <migraphx/kernels/debug.hpp>
#include <migraphx/kernels/iota_iterator.hpp>
namespace
migraphx
{
template
<
class
T
>
struct
tensor_view_iterator_read
{
T
*
view
;
constexpr
auto
&
operator
()(
std
::
size_t
n
)
const
{
MIGRAPHX_ASSERT
(
view
!=
nullptr
);
return
(
*
view
)[
n
];
}
};
template
<
class
T
,
class
Shape
>
struct
tensor_view
{
using
type
=
T
;
using
type
=
T
;
using
shape_type
=
Shape
;
using
iterator
=
basic_iota_iterator
<
tensor_view_iterator_read
<
const
tensor_view
>
,
index_int
>
;
constexpr
Shape
get_shape
()
const
{
return
Shape
{};
}
constexpr
index_int
size
()
const
{
return
get_shape
().
elements
();
}
constexpr
auto
size
()
const
{
return
get_shape
().
elements
();
}
template
<
class
U
>
constexpr
T
&
operator
[](
U
i
)
const
...
...
@@ -23,8 +37,8 @@ struct tensor_view
constexpr
T
*
data
()
const
{
return
x
;
}
constexpr
T
*
begin
()
const
{
return
data
()
;
}
constexpr
T
*
end
()
const
{
return
data
()
+
size
()
;
}
constexpr
auto
begin
()
const
{
return
iterator
{
0
,
{
this
}}
;
}
constexpr
auto
end
()
const
{
return
iterator
{
this
->
size
(),
{
this
}}
;
}
template
<
class
U
>
constexpr
tensor_view
<
U
,
Shape
>
with
(
U
*
y
)
const
...
...
src/targets/gpu/kernels/include/migraphx/kernels/type_traits.hpp
View file @
e2eb6036
...
...
@@ -6,6 +6,12 @@
namespace
migraphx
{
template
<
class
T
>
struct
type_identity
{
using
type
=
T
;
};
template
<
bool
B
,
class
T
=
void
>
struct
enable_if
{
...
...
@@ -25,6 +31,43 @@ struct is_convertible : bool_constant<__is_convertible(From, To)>
{
};
template
<
class
T
,
class
U
>
struct
is_same
:
false_type
{
};
template
<
class
T
>
struct
is_same
<
T
,
T
>
:
true_type
{
};
template
<
class
T
>
struct
remove_reference
{
using
type
=
T
;
};
template
<
class
T
>
struct
remove_reference
<
T
&>
{
using
type
=
T
;
};
template
<
class
T
>
struct
remove_reference
<
T
&&>
{
using
type
=
T
;
};
template
<
class
T
>
using
remove_reference_t
=
typename
remove_reference
<
T
>::
type
;
template
<
class
T
>
struct
add_pointer
:
type_identity
<
typename
remove_reference
<
T
>::
type
*>
{
};
template
<
class
T
>
using
add_pointer_t
=
typename
add_pointer
<
T
>::
type
;
#define MIGRAPHX_REQUIRES(...) class = enable_if_t<__VA_ARGS__>
}
// namespace migraphx
...
...
src/targets/gpu/kernels/include/migraphx/kernels/types.hpp
100755 → 100644
View file @
e2eb6036
...
...
@@ -6,6 +6,7 @@
namespace
migraphx
{
using
index_int
=
std
::
uint32_t
;
using
diff_int
=
std
::
int32_t
;
#define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vec.hpp
View file @
e2eb6036
...
...
@@ -66,15 +66,18 @@ __device__ __host__ auto as_vec(T* x)
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
}
template
<
class
T
,
index_int
N
>
using
safe_vec
=
vec
<
std
::
conditional_t
<
std
::
is_same
<
T
,
bool
>
{},
uint8_t
,
T
>
,
N
>
;
template
<
class
...
Ts
>
constexpr
auto
vec_transform
(
Ts
...
xs
)
{
return
[
=
](
auto
f
)
{
if
constexpr
(
is_any_vec
<
Ts
...
>
())
{
using
type
=
decltype
(
f
(
vec_at
(
xs
,
0
)...));
constexpr
auto
size
=
common_vec_size
<
Ts
...
>
();
vec
<
type
,
size
>
result
=
{
0
};
using
type
=
decltype
(
f
(
vec_at
(
xs
,
0
)...));
constexpr
auto
size
=
common_vec_size
<
Ts
...
>
();
safe_
vec
<
type
,
size
>
result
=
{
0
};
for
(
int
i
=
0
;
i
<
size
;
i
++
)
result
[
i
]
=
f
(
vec_at
(
xs
,
i
)...);
return
result
;
...
...
src/targets/gpu/kernels/include/migraphx/kernels/vectorize.hpp
View file @
e2eb6036
...
...
@@ -50,14 +50,14 @@ constexpr auto shape_step(Shape s, Axis)
});
}
// Bools can not be used as a vector type so convert it to int8
// Bools can not be used as a vector type so convert it to
u
int8
template
<
class
T
>
__device__
__host__
T
*
remove_bool
(
T
*
x
)
{
return
x
;
}
inline
__device__
__host__
int8_t
*
remove_bool
(
bool
*
x
)
{
return
reinterpret_cast
<
int8_t
*>
(
x
);
}
inline
__device__
__host__
u
int8_t
*
remove_bool
(
bool
*
x
)
{
return
reinterpret_cast
<
u
int8_t
*>
(
x
);
}
template
<
index_int
N
,
class
T
,
class
Axis
>
__device__
__host__
auto
as_vec
(
T
x
,
Axis
axis
)
...
...
src/targets/gpu/lowering.cpp
View file @
e2eb6036
...
...
@@ -20,10 +20,10 @@
#include <migraphx/gpu/abs.hpp>
#include <migraphx/gpu/batch_norm_inference.hpp>
#include <migraphx/gpu/compile_roialign.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/convolution.hpp>
#include <migraphx/gpu/deconvolution.hpp>
#include <migraphx/gpu/device_name.hpp>
#include <migraphx/gpu/elu.hpp>
#include <migraphx/gpu/equal.hpp>
#include <migraphx/gpu/gemm.hpp>
...
...
@@ -40,6 +40,7 @@
#include <migraphx/gpu/rocblas.hpp>
#include <migraphx/gpu/unary_not.hpp>
#include <migraphx/gpu/where.hpp>
#include <migraphx/gpu/compiler.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/program.hpp>
#include <utility>
...
...
@@ -60,6 +61,7 @@ struct miopen_apply
std
::
unordered_map
<
instruction_ref
,
std
::
string
>
prog_output_names
{};
bool
offload_copy
=
false
;
bool
int8_x4_format
=
true
;
bool
compute_fp32
=
false
;
context
&
get_context
()
const
{
...
...
@@ -96,13 +98,22 @@ struct miopen_apply
}
}
const
std
::
unordered_set
<
std
::
string
>&
get_rocblas_fp32_archs
()
{
static
std
::
unordered_set
<
std
::
string
>
supported_archs
{
"gfx908"
,
"gfx90a"
};
return
supported_archs
;
}
void
init
()
{
assert
(
mod
!=
nullptr
);
assert
(
pass
!=
nullptr
);
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
auto
&
ctx
=
get_context
();
auto
&
ctx
=
get_context
();
const
auto
device_name
=
trim
(
split_string
(
get_device_name
(),
':'
).
front
());
if
(
contains
(
get_rocblas_fp32_archs
(),
device_name
))
compute_fp32
=
true
;
rocblas_gemm_flags
flag
;
rocblas_query_int8_layout_flag
(
ctx
.
get_stream
().
get_rocblas
(),
&
flag
);
int8_x4_format
=
(
flag
==
rocblas_gemm_flags_pack_int8x4
);
...
...
@@ -183,8 +194,6 @@ struct miopen_apply
add_extend_op
(
"softmax"
);
add_extend_op
(
"topk"
);
add_precompile_op
(
"pointwise"
);
add_batch_norm_inference_op
();
add_convolution_op
();
add_deconvolution_op
();
...
...
@@ -195,7 +204,6 @@ struct miopen_apply
add_neg_op
();
add_nms_op
();
add_quant_convolution_op
();
add_roialign
();
}
void
copy_params
()
...
...
@@ -249,11 +257,28 @@ struct miopen_apply
{
check_shape
(
s
,
apply_map
.
at
(
it
->
name
())(
it
));
}
else
if
(
has_compiler_for
(
it
->
name
()))
{
check_shape
(
s
,
insert_precompile_op
(
it
));
}
}
copy_params
();
}
instruction_ref
insert_precompile_op
(
instruction_ref
ins
)
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
std
::
vector
<
instruction_ref
>
refs
=
ins
->
inputs
();
refs
.
push_back
(
output
);
return
mod
->
replace_instruction
(
ins
,
make_op
(
"gpu::precompile_op"
,
{{
"op"
,
to_value
(
ins
->
get_operator
())}}),
refs
,
ins
->
module_inputs
());
}
instruction_ref
insert_allocation
(
instruction_ref
ins
,
const
shape
&
s
,
std
::
string
tag
=
""
)
{
// Instruction's output is an input of the ret instruction
...
...
@@ -337,7 +362,7 @@ struct miopen_apply
}
}
return
mod
->
replace_instruction
(
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
int8_x4_format
},
refs
);
ins
,
rocblas_gemm
<
Op
>
{
Op
{},
1
,
0
,
int8_x4_format
,
compute_fp32
},
refs
);
});
}
...
...
@@ -383,21 +408,6 @@ struct miopen_apply
});
}
void
add_precompile_op
(
const
std
::
string
&
name
)
{
apply_map
.
emplace
(
name
,
[
=
](
instruction_ref
ins
)
{
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
std
::
vector
<
instruction_ref
>
refs
=
ins
->
inputs
();
refs
.
push_back
(
output
);
return
mod
->
replace_instruction
(
ins
,
make_op
(
"gpu::precompile_op"
,
{{
"op"
,
to_value
(
ins
->
get_operator
())}}),
refs
,
ins
->
module_inputs
());
});
}
void
add_batch_norm_inference_op
()
{
apply_map
.
emplace
(
"batch_norm_inference"
,
[
=
](
instruction_ref
ins
)
{
...
...
@@ -432,7 +442,6 @@ struct miopen_apply
reshapes
[
2
],
reshapes
[
3
],
output
);
});
}
...
...
@@ -489,22 +498,6 @@ struct miopen_apply
});
}
void
add_roialign
()
{
apply_map
.
emplace
(
"roialign"
,
[
=
](
instruction_ref
ins
)
{
auto
s
=
ins
->
get_shape
();
auto
op_val
=
ins
->
get_operator
().
to_value
();
auto
output
=
insert_allocation
(
ins
,
s
);
auto
args
=
ins
->
inputs
();
args
.
push_back
(
output
);
auto
io_shapes
=
to_shapes
(
args
);
auto
co
=
compile_roialign
(
get_context
(),
io_shapes
,
op_val
);
return
mod
->
replace_instruction
(
ins
,
co
,
args
);
});
}
// replace the loop operator with gpu_loop operator
void
add_loop_op
()
{
...
...
src/targets/gpu/target.cpp
View file @
e2eb6036
...
...
@@ -45,7 +45,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_DISABLE_SCHEDULE_PASS
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_
EN
ABLE_POINTWISE_FUSION
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_
DIS
ABLE_POINTWISE_FUSION
)
struct
id_pass
{
...
...
@@ -101,7 +101,7 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
simplify_reshapes
{},
propagate_constant
{},
dead_code_elimination
{},
fuse_pointwise
{},
enable_pass
(
not
enabled
(
MIGRAPHX_DISABLE_POINTWISE_FUSION
{}),
fuse_pointwise
{}
)
,
dead_code_elimination
{},
fuse_mlir
{
&
ctx
},
dead_code_elimination
{},
...
...
Prev
1
…
4
5
6
7
8
9
10
11
12
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment