Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
511c8d8f
Commit
511c8d8f
authored
Jun 25, 2019
by
Paul
Browse files
Merge from develop
parents
9b7c44ab
2a2c146c
Changes
62
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
612 additions
and
626 deletions
+612
-626
src/include/migraphx/ranges.hpp
src/include/migraphx/ranges.hpp
+22
-0
src/include/migraphx/raw_data.hpp
src/include/migraphx/raw_data.hpp
+19
-0
src/include/migraphx/requires.hpp
src/include/migraphx/requires.hpp
+7
-24
src/include/migraphx/streamutils.hpp
src/include/migraphx/streamutils.hpp
+2
-0
src/include/migraphx/stringutils.hpp
src/include/migraphx/stringutils.hpp
+2
-0
src/onnx/CMakeLists.txt
src/onnx/CMakeLists.txt
+1
-1
src/onnx/onnx.cpp
src/onnx/onnx.cpp
+12
-4
src/py/CMakeLists.txt
src/py/CMakeLists.txt
+1
-6
src/py/migraphx_py.cpp
src/py/migraphx_py.cpp
+7
-7
src/rewrite_rnn.cpp
src/rewrite_rnn.cpp
+94
-173
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+54
-48
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-0
src/targets/gpu/device/concat.cpp
src/targets/gpu/device/concat.cpp
+10
-12
src/targets/gpu/device/gather.cpp
src/targets/gpu/device/gather.cpp
+18
-23
src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
+60
-0
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+95
-271
src/targets/gpu/device/include/migraphx/gpu/device/shape.hpp
src/targets/gpu/device/include/migraphx/gpu/device/shape.hpp
+89
-0
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
...targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
+13
-54
src/targets/gpu/device/include/migraphx/gpu/device/tensor_view.hpp
...ts/gpu/device/include/migraphx/gpu/device/tensor_view.hpp
+59
-0
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
+46
-3
No files found.
src/include/migraphx/ranges.hpp
View file @
511c8d8f
...
...
@@ -33,6 +33,10 @@ auto generic_find_impl(rank<0>, C&& c, const T& x)
return
std
::
find
(
c
.
begin
(),
c
.
end
(),
x
);
}
struct
empty
{
};
}
// namespace detail
template
<
class
C
,
class
T
>
...
...
@@ -71,6 +75,12 @@ bool all_of(const std::initializer_list<T>& c, const Predicate& p)
return
std
::
all_of
(
c
.
begin
(),
c
.
end
(),
p
);
}
template
<
class
Predicate
>
bool
all_of
(
detail
::
empty
,
const
Predicate
&
)
{
return
true
;
}
template
<
class
C
,
class
Predicate
>
bool
any_of
(
const
C
&
c
,
const
Predicate
&
p
)
{
...
...
@@ -83,6 +93,12 @@ bool any_of(const std::initializer_list<T>& c, const Predicate& p)
return
std
::
any_of
(
c
.
begin
(),
c
.
end
(),
p
);
}
template
<
class
Predicate
>
bool
any_of
(
detail
::
empty
,
const
Predicate
&
)
{
return
false
;
}
template
<
class
C
,
class
Predicate
>
bool
none_of
(
const
C
&
c
,
const
Predicate
&
p
)
{
...
...
@@ -95,6 +111,12 @@ bool none_of(const std::initializer_list<T>& c, const Predicate& p)
return
std
::
none_of
(
c
.
begin
(),
c
.
end
(),
p
);
}
template
<
class
Predicate
>
bool
none_of
(
detail
::
empty
,
const
Predicate
&
)
{
return
true
;
}
template
<
class
Range
,
class
Iterator
>
void
copy
(
Range
&&
r
,
Iterator
it
)
{
...
...
src/include/migraphx/raw_data.hpp
View file @
511c8d8f
...
...
@@ -212,6 +212,25 @@ auto visit_all(T&& x, Ts&&... xs)
};
}
template
<
class
T
>
auto
visit_all
(
const
std
::
vector
<
T
>&
x
)
{
auto
&&
s
=
x
.
front
().
get_shape
();
if
(
!
std
::
all_of
(
x
.
begin
(),
x
.
end
(),
[
&
](
const
T
&
y
)
{
return
y
.
get_shape
().
type
()
==
s
.
type
();
}))
MIGRAPHX_THROW
(
"Types must be the same"
);
return
[
&
](
auto
v
)
{
s
.
visit_type
([
&
](
auto
as
)
{
using
type
=
typename
decltype
(
as
)
::
type
;
std
::
vector
<
tensor_view
<
type
>>
result
;
std
::
transform
(
x
.
begin
(),
x
.
end
(),
std
::
back_inserter
(
result
),
[
&
](
const
auto
&
y
)
{
return
make_view
(
y
.
get_shape
(),
as
.
from
(
y
.
data
()));
});
v
(
result
);
});
};
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/include/migraphx/requires.hpp
View file @
511c8d8f
...
...
@@ -15,35 +15,18 @@ struct and_ : std::is_same<and_<Bs...>, and_<(Bs || true)...>> // NOLINT
template
<
bool
B
>
using
bool_c
=
std
::
integral_constant
<
bool
,
B
>
;
template
<
int
N
>
struct
requires_enum
{
enum
e
{
a
=
0
};
};
#define MIGRAPHX_REQUIRES_PRIMITIVE_CAT(x, y) x##y
#define MIGRAPHX_REQUIRES_CAT(x, y) MIGRAPHX_REQUIRES_PRIMITIVE_CAT(x, y)
#define MIGRAPHX_REQUIRES_
CAT(x, y) x##y
#define MIGRAPHX_REQUIRES_
VAR() MIGRAPHX_REQUIRES_CAT(PrivateRequires, __LINE__)
#ifdef CPPCHECK
#define MIGRAPHX_REQUIRES(...) class = void
#else
#if 0
// TODO: This currently crashed on clang
#define MIGRAPHX_REQUIRES(...) \
typename migraphx::requires_enum<__LINE__>::e MIGRAPHX_REQUIRES_CAT( \
PrivateRequires, \
__LINE__) = migraphx::requires_enum<__LINE__>::a, \
class = typename std::enable_if<and_<__VA_ARGS__, \
MIGRAPHX_REQUIRES_CAT(PrivateRequires, __LINE__) == \
migraphx::requires_enum<__LINE__>::a>{}>::type
#else
#define MIGRAPHX_REQUIRES(...) \
typename migraphx::requires_enum<__LINE__>::e MIGRAPHX_REQUIRES_CAT( \
PrivateRequires, __LINE__) = migraphx::requires_enum<__LINE__>::a, \
class = typename std::enable_if<and_<__VA_ARGS__>{}>::type
#endif
#define MIGRAPHX_REQUIRES(...) \
bool MIGRAPHX_REQUIRES_VAR() = true, \
typename std::enable_if<(MIGRAPHX_REQUIRES_VAR() && (migraphx::and_<__VA_ARGS__>{})), \
int>::type = 0
#endif
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/include/migraphx/streamutils.hpp
View file @
511c8d8f
...
...
@@ -42,7 +42,9 @@ template <class Range>
auto
stream_write_value_impl
(
rank
<
1
>
,
std
::
ostream
&
os
,
const
Range
&
r
)
->
decltype
(
r
.
begin
(),
r
.
end
(),
void
())
{
os
<<
"{"
;
os
<<
stream_range
(
r
);
os
<<
"}"
;
}
template
<
class
T
>
...
...
src/include/migraphx/stringutils.hpp
View file @
511c8d8f
...
...
@@ -52,6 +52,8 @@ inline std::string transform_string(std::string s, F f)
inline
std
::
string
to_upper
(
std
::
string
s
)
{
return
transform_string
(
std
::
move
(
s
),
::
toupper
);
}
inline
std
::
string
to_lower
(
std
::
string
s
)
{
return
transform_string
(
std
::
move
(
s
),
::
tolower
);
}
inline
bool
starts_with
(
const
std
::
string
&
value
,
const
std
::
string
&
prefix
)
{
if
(
prefix
.
size
()
>
value
.
size
())
...
...
src/onnx/CMakeLists.txt
View file @
511c8d8f
...
...
@@ -19,7 +19,7 @@ rocm_install_targets(
add_executable
(
read_onnx read_onnx.cpp
)
rocm_clang_tidy_check
(
read_onnx
)
target_link_libraries
(
read_onnx migraphx_onnx
)
target_link_libraries
(
read_onnx
migraphx_cpu
migraphx_onnx
)
if
(
MIGRAPHX_ENABLE_GPU
)
...
...
src/onnx/onnx.cpp
View file @
511c8d8f
...
...
@@ -100,6 +100,7 @@ struct onnx_parser
void
init_actv_func
()
{
// Support name format of all lower case or the first letter capital
map_actv_funcs
.
insert
(
std
::
make_pair
(
"tanh"
,
op
::
tanh
{}));
map_actv_funcs
.
insert
(
std
::
make_pair
(
"relu"
,
op
::
relu
{}));
map_actv_funcs
.
insert
(
std
::
make_pair
(
"sigmoid"
,
op
::
sigmoid
{}));
...
...
@@ -352,7 +353,8 @@ struct onnx_parser
{
// insert zeros for pad op (args[0] has 4 dims)
padding
=
{
0
,
0
,
padding
[
0
],
padding
[
1
],
0
,
0
,
padding
[
2
],
padding
[
3
]};
l0
=
prog
.
add_instruction
(
op
::
pad
{
padding
},
l0
);
l0
=
prog
.
add_instruction
(
op
::
pad
{
padding
,
std
::
numeric_limits
<
float
>::
lowest
()},
l0
);
}
else
{
...
...
@@ -870,7 +872,9 @@ struct onnx_parser
auto
names
=
attributes
.
at
(
"activations"
).
strings
();
vec_names
.
clear
();
vec_names
.
resize
(
names
.
size
());
std
::
copy
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
(),
[](
auto
name
)
{
return
to_lower
(
name
);
});
}
auto
name_it
=
std
::
find_if
(
vec_names
.
begin
(),
vec_names
.
end
(),
[
&
](
auto
&
name
)
{
...
...
@@ -961,7 +965,9 @@ struct onnx_parser
auto
names
=
attributes
.
at
(
"activations"
).
strings
();
vec_names
.
clear
();
vec_names
.
resize
(
names
.
size
());
std
::
copy
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
(),
[](
auto
name
)
{
return
to_lower
(
name
);
});
}
// need 4 activation functions
...
...
@@ -1088,7 +1094,9 @@ struct onnx_parser
auto
names
=
attributes
.
at
(
"activations"
).
strings
();
vec_names
.
clear
();
vec_names
.
resize
(
names
.
size
());
std
::
copy
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
vec_names
.
begin
(),
[](
auto
name
)
{
return
to_lower
(
name
);
});
}
// need 6 activation functions for bidirectional directions
...
...
src/py/CMakeLists.txt
View file @
511c8d8f
...
...
@@ -12,12 +12,7 @@ if(MIGRAPHX_ENABLE_PYTHON)
C_VISIBILITY_PRESET hidden
CXX_VISIBILITY_PRESET hidden
)
if
(
MIGRAPHX_ENABLE_TF
)
target_link_libraries
(
migraphx_py PRIVATE migraphx migraphx_tf migraphx_cpu
)
target_compile_definitions
(
migraphx_py PRIVATE -DENABLE_TF
)
else
()
target_link_libraries
(
migraphx_py PRIVATE migraphx migraphx_onnx migraphx_cpu
)
endif
()
target_link_libraries
(
migraphx_py PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_cpu
)
if
(
MIGRAPHX_ENABLE_GPU
)
target_link_libraries
(
migraphx_py PRIVATE migraphx_gpu
)
target_compile_definitions
(
migraphx_py PRIVATE -DHAVE_GPU
)
...
...
src/py/migraphx_py.cpp
View file @
511c8d8f
...
...
@@ -6,11 +6,9 @@
#include <migraphx/generate.hpp>
#include <migraphx/cpu/target.hpp>
#include <migraphx/stringutils.hpp>
#ifdef ENABLE_TF
#include <migraphx/tf.hpp>
#else
#include <migraphx/onnx.hpp>
#
endif
#
include <migraphx/type_name.hpp>
#ifdef HAVE_GPU
#include <migraphx/gpu/target.hpp>
...
...
@@ -104,8 +102,13 @@ migraphx::shape to_shape(const py::buffer_info& info)
t
=
as
.
type_enum
();
n
=
sizeof
(
as
());
}
});
if
(
n
==
0
)
{
MIGRAPHX_THROW
(
"MIGRAPHX PYTHON: Unsupported data type"
+
info
.
format
);
}
auto
strides
=
info
.
strides
;
std
::
transform
(
strides
.
begin
(),
strides
.
end
(),
strides
.
begin
(),
[
&
](
auto
i
)
->
std
::
size_t
{
return
n
>
0
?
i
/
n
:
0
;
...
...
@@ -161,16 +164,13 @@ PYBIND11_MODULE(migraphx, m)
.
def
(
"__ne__"
,
std
::
not_equal_to
<
migraphx
::
program
>
{})
.
def
(
"__repr__"
,
[](
const
migraphx
::
program
&
p
)
{
return
migraphx
::
to_string
(
p
);
});
#ifdef ENABLE_TF
m
.
def
(
"parse_tf"
,
&
migraphx
::
parse_tf
,
"Parse tf protobuf (default format is nhwc)"
,
py
::
arg
(
"filename"
),
py
::
arg
(
"is_nhwc"
)
=
true
);
#else
m
.
def
(
"parse_onnx"
,
&
migraphx
::
parse_onnx
);
#endif
m
.
def
(
"get_target"
,
[](
const
std
::
string
&
name
)
->
migraphx
::
target
{
if
(
name
==
"cpu"
)
return
migraphx
::
cpu
::
target
{};
...
...
src/rewrite_rnn.cpp
View file @
511c8d8f
This diff is collapsed.
Click to expand it.
src/targets/cpu/lowering.cpp
View file @
511c8d8f
...
...
@@ -517,40 +517,60 @@ struct cpu_unary
}
};
struct
softmax
2d
struct
cpu_
softmax
{
std
::
string
name
()
const
{
return
"cpu::softmax2d"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
inputs
.
front
();
}
op
::
softmax
op
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
migraphx
::
reflect
(
self
.
op
,
f
);
}
std
::
string
name
()
const
{
return
"cpu::softmax"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
template
<
typename
T
>
std
::
size_t
compute_batch_index
(
T
idx
,
shape
&
batch_shape
,
int
axis
)
const
{
idx
[
axis
]
=
0
;
return
batch_shape
.
index
(
idx
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
auto
batch_lens
=
output_shape
.
lens
();
batch_lens
[
op
.
axis
]
=
1
;
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
using
value_type
=
typename
decltype
(
input
)
::
value_type
;
auto
nb
=
input
.
get_shape
().
lens
()[
0
];
auto
nc
=
input
.
get_shape
().
lens
()[
1
];
auto
nh
=
input
.
get_shape
().
lens
()[
2
];
auto
nw
=
input
.
get_shape
().
lens
()[
3
];
dfor
(
nb
,
nh
,
nw
)([
&
](
std
::
size_t
b
,
std
::
size_t
i
,
std
::
size_t
j
)
{
value_type
cmax
=
std
::
numeric_limits
<
value_type
>::
lowest
();
for
(
std
::
size_t
c
=
0
;
c
<
nc
;
c
++
)
{
cmax
=
std
::
max
(
cmax
,
input
(
b
,
c
,
i
,
j
));
}
for
(
std
::
size_t
c
=
0
;
c
<
nc
;
c
++
)
{
output
(
b
,
c
,
i
,
j
)
=
std
::
exp
(
input
(
b
,
c
,
i
,
j
)
-
cmax
);
}
value_type
sum
=
value_type
(
0
);
for
(
std
::
size_t
c
=
0
;
c
<
nc
;
c
++
)
{
sum
+=
output
(
b
,
c
,
i
,
j
);
}
for
(
std
::
size_t
c
=
0
;
c
<
nc
;
c
++
)
{
output
(
b
,
c
,
i
,
j
)
=
output
(
b
,
c
,
i
,
j
)
/
sum
;
}
std
::
vector
<
value_type
>
batch_max
(
batch_shape
.
elements
(),
std
::
numeric_limits
<
value_type
>::
lowest
());
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
batch_max
[
index
]
=
std
::
max
(
batch_max
[
index
],
input
(
idx
.
begin
(),
idx
.
end
()));
});
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
output
(
idx
.
begin
(),
idx
.
end
())
=
std
::
exp
(
input
(
idx
.
begin
(),
idx
.
end
())
-
batch_max
[
index
]);
});
std
::
vector
<
value_type
>
batch_sum
(
batch_shape
.
elements
(),
value_type
(
0
));
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
batch_sum
[
index
]
+=
output
(
idx
.
begin
(),
idx
.
end
());
});
shape_for_each
(
output_shape
,
[
&
](
auto
idx
)
{
auto
index
=
this
->
compute_batch_index
(
idx
,
batch_shape
,
op
.
axis
);
output
(
idx
.
begin
(),
idx
.
end
())
/=
batch_sum
[
index
];
});
});
return
result
;
}
};
...
...
@@ -569,33 +589,19 @@ struct cpu_logsoftmax
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
return
op
.
compute_shape
(
inputs
);
}
template
<
typename
T
>
std
::
size_t
compute_batch_index
(
const
T
&
idx
,
shape
&
batch_shape
,
int
axis
)
const
std
::
size_t
compute_batch_index
(
T
idx
,
const
shape
&
batch_shape
,
int
axis
)
const
{
if
(
axis
==
0
)
{
return
0
;
}
else
{
std
::
vector
<
std
::
size_t
>
batch_idx
(
idx
.
begin
(),
idx
.
begin
()
+
axis
);
return
batch_shape
.
index
(
batch_idx
.
begin
(),
batch_idx
.
end
());
}
idx
[
axis
]
=
0
;
return
batch_shape
.
index
(
idx
);
}
argument
compute
(
context
&
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
)
const
{
argument
result
{
output_shape
};
auto
lens
=
output_shape
.
lens
();
std
::
vector
<
std
::
size_t
>
batch_lens
{};
if
(
op
.
axis
==
0
)
{
batch_lens
.
push_back
(
1
);
}
else
{
batch_lens
.
insert
(
batch_lens
.
begin
(),
lens
.
begin
(),
lens
.
begin
()
+
op
.
axis
);
}
shape
batch_shape
{
migraphx
::
shape
::
uint32_type
,
batch_lens
};
auto
batch_lens
=
output_shape
.
lens
();
batch_lens
[
op
.
axis
]
=
1
;
shape
batch_shape
{
shape
::
int32_type
,
batch_lens
};
visit_all
(
result
,
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
using
value_type
=
typename
decltype
(
input
)
::
value_type
;
std
::
vector
<
value_type
>
batch_max
(
batch_shape
.
elements
(),
...
...
@@ -660,7 +666,7 @@ struct cpu_apply
apply_map
[
"logsoftmax"
]
=
extend_op
<
cpu_logsoftmax
,
op
::
logsoftmax
>
();
apply_map
[
"lrn"
]
=
extend_op
<
cpu_lrn
,
op
::
lrn
>
();
apply_map
[
"pad"
]
=
extend_op
<
cpu_pad
,
op
::
pad
>
();
apply_map
[
"softmax"
]
=
simple_op
<
softmax2d
>
();
apply_map
[
"softmax"
]
=
extend_op
<
cpu_softmax
,
op
::
softmax
>
();
}
void
apply
()
...
...
src/targets/gpu/CMakeLists.txt
View file @
511c8d8f
...
...
@@ -27,6 +27,7 @@ add_library(migraphx_device
device/add_relu.cpp
device/contiguous.cpp
device/logsoftmax.cpp
device/softmax.cpp
device/convert.cpp
device/mul.cpp
device/concat.cpp
...
...
src/targets/gpu/device/concat.cpp
View file @
511c8d8f
...
...
@@ -10,22 +10,20 @@ namespace gpu {
namespace
device
{
argument
concat
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
,
std
::
vector
<
migraphx
::
argument
>
args
,
std
::
vector
<
std
::
size_t
>
offsets
)
{
for
(
std
::
size_t
l
=
0
;
l
<
args
.
size
()
-
1
;
l
++
)
auto
ninputs
=
args
.
size
()
-
1
;
for
(
std
::
size_t
j
=
0
;
j
<
ninputs
;
j
++
)
{
auto
argl
=
args
[
l
];
std
::
size_t
nelements
=
argl
.
get_shape
().
elements
();
visit_all
(
args
.
back
(),
argl
)([
&
](
auto
output
,
auto
input
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
auto
*
outptr
=
output
.
data
()
+
offsets
[
l
];
const
auto
*
inptr
=
input
.
data
();
hip_tensor_descriptor
<
ndim
>
desc_input
(
input
.
get_shape
());
hip_tensor_descriptor
<
ndim
>
desc_output
(
output
.
get_shape
());
gs_launch
(
stream
,
nelements
)(
[
=
](
auto
i
)
{
outptr
[
desc_output
.
linear
(
desc_input
.
multi
(
i
))]
=
inptr
[
i
];
});
auto
&&
arg
=
args
[
j
];
std
::
size_t
nelements
=
arg
.
get_shape
().
elements
();
auto
offset
=
offsets
[
j
];
hip_visit_all
(
args
.
back
(),
arg
)([
&
](
auto
output
,
auto
input
)
{
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
auto
idx
=
output
.
get_shape
().
index
(
input
.
get_shape
().
multi
(
i
));
output
.
data
()[
idx
+
offset
]
=
input
.
data
()[
i
];
});
});
}
...
...
src/targets/gpu/device/gather.cpp
View file @
511c8d8f
...
...
@@ -11,35 +11,30 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
device
{
argument
gather
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
argument
gather
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
argument
arg2
,
int
axis
)
{
auto
axis_index
=
(
axis
<
0
)
?
(
axis
+
args
[
0
].
get_shape
().
lens
().
size
())
:
axis
;
visit_all
(
args
.
back
(),
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
output_shape
.
elements
();
args
[
1
].
visit
([
&
](
auto
indices
)
{
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
auto
*
out_ptr
=
device_cast
(
output
.
data
());
const
auto
*
in_ptr
=
device_cast
(
input
.
data
());
auto
&
input_shape
=
args
[
0
].
get_shape
();
auto
lens
=
input_shape
.
lens
();
lens
[
axis_index
]
=
args
[
1
].
get_shape
().
elements
();
migraphx
::
shape
out_comp_shape
{
output_shape
.
type
(),
lens
};
visit_tensor_size
(
out_comp_shape
.
lens
().
size
(),
[
&
](
auto
n_out_dim
)
{
hip_tensor_descriptor
<
n_out_dim
>
desc_input
(
input_shape
);
hip_tensor_descriptor
<
n_out_dim
>
desc_output
(
out_comp_shape
);
gs_launch
(
stream
,
nelements
)([
=
](
auto
ii
)
{
auto
in_idx
=
desc_output
.
multi
(
ii
);
in_idx
[
axis_index
]
=
indices_ptr
[
in_idx
[
axis_index
]];
out_ptr
[
ii
]
=
in_ptr
[
desc_input
.
linear
(
in_idx
)];
auto
axis_index
=
(
axis
<
0
)
?
(
axis
+
arg1
.
get_shape
().
lens
().
size
())
:
axis
;
auto
&
input_shape
=
arg1
.
get_shape
();
auto
lens
=
input_shape
.
lens
();
lens
[
axis_index
]
=
arg2
.
get_shape
().
elements
();
shape
out_comp_shape
{
result
.
get_shape
().
type
(),
lens
};
std
::
size_t
nelements
=
result
.
get_shape
().
elements
();
visit_all
(
result
,
arg1
)([
&
](
auto
output
,
auto
input_v
)
{
hip_visit_views
(
input_v
,
out_comp_shape
)([
&
](
auto
input
,
auto
out_comp
)
{
arg2
.
visit
([
&
](
auto
indices
)
{
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
auto
*
output_ptr
=
device_cast
(
output
.
data
());
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
auto
idx
=
out_comp
.
multi
(
i
);
idx
[
axis_index
]
=
indices_ptr
[
idx
[
axis_index
]];
output_ptr
[
i
]
=
input
[
idx
];
});
});
});
});
return
args
.
back
()
;
return
result
;
}
}
// namespace device
...
...
src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
0 → 100644
View file @
511c8d8f
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ARRAY_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARRAY_HPP
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
T
,
std
::
size_t
N
>
struct
hip_array
{
T
d
[
N
];
MIGRAPHX_DEVICE_CONSTEXPR
T
&
operator
[](
std
::
size_t
i
)
{
return
d
[
i
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
operator
[](
std
::
size_t
i
)
const
{
return
d
[
i
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
data
()
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
data
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
integral_constant
<
std
::
size_t
,
N
>
size
()
const
{
return
{};
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
begin
()
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
begin
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
end
()
{
return
d
+
size
();
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
end
()
const
{
return
d
+
size
();
}
MIGRAPHX_DEVICE_CONSTEXPR
T
dot
(
const
hip_array
&
x
)
const
{
T
result
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
N
;
i
++
)
result
+=
x
[
i
]
*
d
[
i
];
return
result
;
}
MIGRAPHX_DEVICE_CONSTEXPR
T
product
()
const
{
T
result
=
1
;
for
(
std
::
size_t
i
=
0
;
i
<
N
;
i
++
)
result
*=
d
[
i
];
return
result
;
}
friend
MIGRAPHX_DEVICE_CONSTEXPR
hip_array
operator
*
(
const
hip_array
&
x
,
const
hip_array
&
y
)
{
hip_array
result
;
for
(
std
::
size_t
i
=
0
;
i
<
N
;
i
++
)
result
[
i
]
=
x
[
i
]
*
y
[
i
];
return
result
;
}
};
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
View file @
511c8d8f
This diff is collapsed.
Click to expand it.
src/targets/gpu/device/include/migraphx/gpu/device/shape.hpp
0 → 100644
View file @
511c8d8f
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_SHAPE_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SHAPE_HPP
#include <migraphx/gpu/device/array.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
std
::
size_t
N
>
struct
hip_shape
{
using
hip_index
=
hip_array
<
std
::
size_t
,
N
>
;
hip_array
<
std
::
size_t
,
N
>
lens
=
{};
hip_array
<
std
::
size_t
,
N
>
strides
=
{};
bool
standard
=
false
;
__device__
__host__
hip_shape
()
=
default
;
hip_shape
(
const
shape
&
s
)
:
standard
(
s
.
standard
())
{
assert
(
s
.
lens
().
size
()
==
N
);
assert
(
s
.
strides
().
size
()
==
N
);
std
::
copy
(
s
.
lens
().
begin
(),
s
.
lens
().
end
(),
lens
.
begin
());
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
.
begin
());
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
elements
()
const
{
return
lens
.
product
();
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
index
(
hip_index
x
)
const
{
return
x
.
dot
(
strides
);
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
index
(
std
::
initializer_list
<
std
::
size_t
>
x
)
const
{
std
::
size_t
idx
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
x
.
size
();
i
++
)
idx
+=
*
(
x
.
begin
()
+
i
)
*
strides
[
i
];
return
idx
;
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
index
(
std
::
size_t
i
)
const
{
if
(
this
->
standard
)
return
i
;
else
{
const
std
::
size_t
rank
=
this
->
lens
.
size
();
std
::
size_t
s
=
1
;
std
::
size_t
result
=
0
;
for
(
std
::
size_t
j
=
0
;
j
<
this
->
lens
.
size
();
j
++
)
{
const
std
::
size_t
k
=
rank
-
j
-
1
;
const
std
::
size_t
stride
=
this
->
strides
[
k
];
const
std
::
size_t
len
=
this
->
lens
[
k
];
const
std
::
size_t
slen
=
s
*
len
;
const
std
::
size_t
idx
=
(
i
%
slen
)
/
s
;
result
+=
stride
*
idx
;
s
=
slen
;
}
return
result
;
}
}
MIGRAPHX_DEVICE_CONSTEXPR
hip_index
multi
(
std
::
size_t
idx
)
const
{
hip_index
result
;
std
::
size_t
tidx
=
idx
;
for
(
std
::
size_t
is
=
0
;
is
<
result
.
size
();
is
++
)
{
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
return
result
;
}
};
template
<
std
::
size_t
N
>
hip_shape
<
N
>
make_hip_shape
(
const
shape
&
x
)
{
return
x
;
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
View file @
511c8d8f
#ifndef MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#include <hip/hip_runtime.h>
#include <migraphx/functional.hpp>
#include <migraphx/config.hpp>
#include <migraphx/gpu/device/visit.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
F
>
void
visit_tensor_size
(
std
::
size_t
n
,
F
f
)
{
switch
(
n
)
{
case
1
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
1
>
{});
break
;
}
case
2
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
2
>
{});
break
;
}
case
3
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
3
>
{});
break
;
}
case
4
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
4
>
{});
break
;
}
case
5
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
5
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Unknown tensor size"
);
}
}
template
<
size_t
NDim
>
struct
hip_index
{
size_t
d
[
NDim
];
__device__
__host__
size_t
&
operator
[](
size_t
i
)
{
return
d
[
i
];
}
__device__
__host__
size_t
operator
[](
size_t
i
)
const
{
return
d
[
i
];
}
};
template
<
std
::
size_t
NDim
>
using
hip_tensor_index
=
hip_array
<
std
::
size_t
,
NDim
>
;
template
<
size_t
NDim
>
template
<
std
::
size_t
NDim
>
struct
hip_tensor_descriptor
{
__device__
__host__
hip_tensor_descriptor
()
=
default
;
...
...
@@ -63,26 +22,26 @@ struct hip_tensor_descriptor
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
);
}
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
__device__
__host__
hip_
tensor_
index
<
NDim
>
multi
(
std
::
size_t
idx
)
const
{
hip_index
<
NDim
>
result
{};
size_t
tidx
=
idx
;
for
(
size_t
is
=
0
;
is
<
NDim
;
is
++
)
hip_
tensor_
index
<
NDim
>
result
{};
std
::
size_t
tidx
=
idx
;
for
(
std
::
size_t
is
=
0
;
is
<
NDim
;
is
++
)
{
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
return
result
;
}
__device__
__host__
size_t
linear
(
hip_index
<
NDim
>
s
)
const
__device__
__host__
std
::
size_t
linear
(
hip_
tensor_
index
<
NDim
>
s
)
const
{
size_t
idx
=
0
;
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
std
::
size_t
idx
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
NDim
;
i
++
)
idx
+=
s
[
i
]
*
strides
[
i
];
return
idx
;
}
size_t
lens
[
NDim
]
=
{};
size_t
strides
[
NDim
]
=
{};
std
::
size_t
lens
[
NDim
]
=
{};
std
::
size_t
strides
[
NDim
]
=
{};
};
}
// namespace device
...
...
src/targets/gpu/device/include/migraphx/gpu/device/tensor_view.hpp
0 → 100644
View file @
511c8d8f
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TENSOR_VIEW_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TENSOR_VIEW_HPP
#include <migraphx/gpu/device/shape.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
T
,
std
::
size_t
N
>
struct
hip_tensor_view
{
using
value_type
=
T
;
using
hip_index
=
typename
hip_shape
<
N
>::
hip_index
;
__device__
__host__
hip_tensor_view
()
=
default
;
__host__
hip_tensor_view
(
tensor_view
<
T
>
x
)
:
d
(
x
.
data
()),
s
(
x
.
get_shape
())
{}
__host__
hip_tensor_view
(
T
*
x
,
const
shape
&
ss
)
:
d
(
x
),
s
(
ss
)
{}
MIGRAPHX_DEVICE_CONSTEXPR
const
hip_shape
<
N
>&
get_shape
()
const
{
return
s
;
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
size
()
const
{
return
s
.
elements
();
}
MIGRAPHX_DEVICE_CONSTEXPR
value_type
*
data
()
const
{
return
d
;
}
template
<
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
value_type
&
operator
[](
U
i
)
const
{
return
d
[
s
.
index
(
i
)];
}
MIGRAPHX_DEVICE_CONSTEXPR
value_type
*
begin
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
value_type
*
end
()
const
{
return
d
+
size
();
}
private:
value_type
*
d
=
nullptr
;
hip_shape
<
N
>
s
{};
};
template
<
std
::
size_t
N
,
class
T
>
hip_tensor_view
<
T
,
N
>
make_hip_view
(
const
shape
&
s
,
T
*
x
)
{
return
{
x
,
s
};
}
template
<
std
::
size_t
N
,
class
T
>
hip_tensor_view
<
T
,
N
>
make_hip_view
(
tensor_view
<
T
>
x
)
{
return
{
x
};
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
View file @
511c8d8f
...
...
@@ -8,14 +8,45 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#include <hip/hip_runtime.h>
#include <migraphx/half.hpp>
#include <migraphx/config.hpp>
#include <migraphx/tensor_view.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
#define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT
template
<
class
T
,
std
::
size_t
N
>
using
vec
=
T
__attribute__
((
ext_vector_type
(
N
)));
template
<
std
::
size_t
N
,
class
T
>
__device__
__host__
T
*
as_pointer
(
vec
<
T
,
N
>*
x
)
{
return
reinterpret_cast
<
T
*>
(
x
);
}
template
<
std
::
size_t
N
,
class
T
>
__device__
__host__
vec
<
T
,
N
>*
as_vec
(
T
*
x
)
{
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
}
template
<
std
::
size_t
N
,
class
T
>
tensor_view
<
vec
<
T
,
N
>>
as_vec
(
tensor_view
<
T
>
x
)
{
return
{
x
.
get_shape
(),
as_vec
<
N
>
(
x
.
data
())};
}
template
<
std
::
size_t
N
,
class
...
Ts
>
auto
pack_vec
(
Ts
...
xs
)
{
return
[
=
](
auto
f
,
std
::
size_t
n
)
{
return
f
(
as_vec
<
N
>
(
xs
)[
n
]...);
};
}
using
gpu_half
=
__fp16
;
namespace
detail
{
...
...
@@ -25,6 +56,12 @@ struct device_type
using
type
=
T
;
};
template
<
class
T
,
std
::
size_t
N
>
struct
device_type
<
vec
<
T
,
N
>>
{
using
type
=
vec
<
typename
device_type
<
T
>::
type
,
N
>
;
};
template
<
>
struct
device_type
<
half
>
{
...
...
@@ -38,7 +75,7 @@ struct host_type
};
template
<
>
struct
device
_type
<
gpu_half
>
struct
host
_type
<
gpu_half
>
{
using
type
=
half
;
};
...
...
@@ -64,9 +101,9 @@ host_type<T>* host_cast(T* x)
}
template
<
class
T
>
device_type
<
T
>
device_cast
(
T
x
)
device_type
<
T
>
device_cast
(
const
T
&
x
)
{
return
reinterpret_cast
<
device_type
<
T
>>
(
x
);
return
reinterpret_cast
<
const
device_type
<
T
>
&
>
(
x
);
}
template
<
class
T
>
...
...
@@ -75,6 +112,12 @@ device_type<T>* device_cast(T* x)
return
reinterpret_cast
<
device_type
<
T
>*>
(
x
);
}
template
<
class
T
>
tensor_view
<
device_type
<
T
>>
device_cast
(
tensor_view
<
T
>
x
)
{
return
{
x
.
get_shape
(),
reinterpret_cast
<
device_type
<
T
>*>
(
x
.
data
())};
}
template
<
class
T
>
T
to_hip_type
(
T
x
)
{
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment