Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
57444235
Commit
57444235
authored
Oct 29, 2018
by
Khalique
Browse files
fix merge conflict
parents
a0ea12f6
d8bf45cf
Changes
49
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
320 additions
and
78 deletions
+320
-78
src/targets/gpu/device/add_relu.cpp
src/targets/gpu/device/add_relu.cpp
+9
-4
src/targets/gpu/device/concat.cpp
src/targets/gpu/device/concat.cpp
+36
-0
src/targets/gpu/device/contiguous.cpp
src/targets/gpu/device/contiguous.cpp
+2
-2
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
+4
-4
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
+48
-37
src/targets/gpu/fuse_ops.cpp
src/targets/gpu/fuse_ops.cpp
+10
-9
src/targets/gpu/gemm.cpp
src/targets/gpu/gemm.cpp
+1
-1
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+15
-8
src/targets/gpu/include/migraph/gpu/concat.hpp
src/targets/gpu/include/migraph/gpu/concat.hpp
+37
-0
src/targets/gpu/include/migraph/gpu/context.hpp
src/targets/gpu/include/migraph/gpu/context.hpp
+100
-3
src/targets/gpu/include/migraph/gpu/device/add.hpp
src/targets/gpu/include/migraph/gpu/device/add.hpp
+7
-2
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
+7
-2
src/targets/gpu/include/migraph/gpu/device/concat.hpp
src/targets/gpu/include/migraph/gpu/device/concat.hpp
+20
-0
src/targets/gpu/include/migraph/gpu/device/contiguous.hpp
src/targets/gpu/include/migraph/gpu/device/contiguous.hpp
+2
-1
src/targets/gpu/include/migraph/gpu/gemm.hpp
src/targets/gpu/include/migraph/gpu/gemm.hpp
+1
-1
src/targets/gpu/include/migraph/gpu/hip.hpp
src/targets/gpu/include/migraph/gpu/hip.hpp
+2
-0
src/targets/gpu/include/migraph/gpu/rocblas.hpp
src/targets/gpu/include/migraph/gpu/rocblas.hpp
+1
-0
src/targets/gpu/leaky_relu.cpp
src/targets/gpu/leaky_relu.cpp
+1
-1
src/targets/gpu/lowering.cpp
src/targets/gpu/lowering.cpp
+16
-2
src/targets/gpu/pooling.cpp
src/targets/gpu/pooling.cpp
+1
-1
No files found.
src/targets/gpu/device/add_relu.cpp
View file @
57444235
...
@@ -5,17 +5,22 @@ namespace migraph {
...
@@ -5,17 +5,22 @@ namespace migraph {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
add_relu
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
void
add_relu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
nary
(
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
{
return
std
::
max
<
decltype
(
x
+
y
)
>
(
0
,
x
+
y
);
});
nary
(
stream
,
result
,
arg1
,
arg2
)(
[](
auto
x
,
auto
y
)
{
return
std
::
max
<
decltype
(
x
+
y
)
>
(
0
,
x
+
y
);
});
}
}
void
add_relu
(
const
argument
&
result
,
void
add_relu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg2
,
const
argument
&
arg3
)
const
argument
&
arg3
)
{
{
nary
(
result
,
arg1
,
arg2
,
arg3
)(
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)(
[](
auto
x
,
auto
y
,
auto
z
)
{
return
std
::
max
<
decltype
(
x
+
y
+
z
)
>
(
0
,
x
+
y
+
z
);
});
[](
auto
x
,
auto
y
,
auto
z
)
{
return
std
::
max
<
decltype
(
x
+
y
+
z
)
>
(
0
,
x
+
y
+
z
);
});
}
}
...
...
src/targets/gpu/device/concat.cpp
0 → 100644
View file @
57444235
#include <migraph/shape.hpp>
#include <migraph/argument.hpp>
#include <migraph/gpu/device/concat.hpp>
#include <migraph/gpu/device/tensor.hpp>
#include <migraph/gpu/device/launch.hpp>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
argument
concat
(
hipStream_t
stream
,
const
migraph
::
shape
&
output_shape
,
std
::
vector
<
migraph
::
argument
>
args
,
std
::
vector
<
std
::
size_t
>
offsets
)
{
for
(
std
::
size_t
l
=
0
;
l
<
args
.
size
()
-
1
;
l
++
)
{
auto
argl
=
args
[
l
];
std
::
size_t
nelements
=
argl
.
get_shape
().
elements
();
visit_all
(
args
.
back
(),
argl
)([
&
](
auto
output
,
auto
input
)
{
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
auto
*
outptr
=
output
.
data
()
+
offsets
[
l
];
const
auto
*
inptr
=
input
.
data
();
hip_tensor_descriptor
<
ndim
>
desc_input
(
input
.
get_shape
());
hip_tensor_descriptor
<
ndim
>
desc_output
(
output
.
get_shape
());
gs_launch
(
stream
,
nelements
)(
[
=
](
auto
i
)
{
outptr
[
desc_output
.
linear
(
desc_input
.
multi
(
i
))]
=
inptr
[
i
];
});
});
});
}
return
args
.
back
();
}
}
// namespace device
}
// namespace gpu
}
// namespace migraph
src/targets/gpu/device/contiguous.cpp
View file @
57444235
...
@@ -6,9 +6,9 @@ namespace migraph {
...
@@ -6,9 +6,9 @@ namespace migraph {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
contiguous
(
argument
result
,
argument
arg
)
void
contiguous
(
hipStream_t
stream
,
argument
result
,
argument
arg
)
{
{
nary_nonstandard
(
std
::
move
(
result
),
std
::
move
(
arg
))([](
auto
x
)
{
return
x
;
});
nary_nonstandard
(
stream
,
std
::
move
(
result
),
std
::
move
(
arg
))([](
auto
x
)
{
return
x
;
});
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/include/migraph/gpu/device/launch.hpp
View file @
57444235
...
@@ -21,7 +21,7 @@ __global__ void launcher(F f)
...
@@ -21,7 +21,7 @@ __global__ void launcher(F f)
f
(
idx
);
f
(
idx
);
}
}
inline
auto
launch
(
std
::
size_t
global
,
std
::
size_t
local
)
inline
auto
launch
(
hipStream_t
stream
,
std
::
size_t
global
,
std
::
size_t
local
)
{
{
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
assert
(
local
>
0
);
assert
(
local
>
0
);
...
@@ -29,17 +29,17 @@ inline auto launch(std::size_t global, std::size_t local)
...
@@ -29,17 +29,17 @@ inline auto launch(std::size_t global, std::size_t local)
using
f_type
=
decltype
(
f
);
using
f_type
=
decltype
(
f
);
dim3
nblocks
(
global
/
local
);
dim3
nblocks
(
global
/
local
);
dim3
nthreads
(
local
);
dim3
nthreads
(
local
);
hipLaunchKernelGGL
((
launcher
<
f_type
>
),
nblocks
,
nthreads
,
0
,
nullptr
,
f
);
hipLaunchKernelGGL
((
launcher
<
f_type
>
),
nblocks
,
nthreads
,
0
,
stream
,
f
);
};
};
}
}
inline
auto
gs_launch
(
std
::
size_t
n
,
std
::
size_t
local
=
1024
)
inline
auto
gs_launch
(
hipStream_t
stream
,
std
::
size_t
n
,
std
::
size_t
local
=
1024
)
{
{
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
256
,
groups
)
*
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
256
,
groups
)
*
local
;
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
launch
(
nglobal
,
local
)([
=
](
auto
idx
)
{
launch
(
stream
,
nglobal
,
local
)([
=
](
auto
idx
)
{
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
{
{
f
(
i
);
f
(
i
);
...
...
src/targets/gpu/device/include/migraph/gpu/device/nary.hpp
View file @
57444235
...
@@ -32,7 +32,7 @@ auto pack_vec4(Ts... xs)
...
@@ -32,7 +32,7 @@ auto pack_vec4(Ts... xs)
}
}
template
<
class
F
,
class
...
Arguments
>
template
<
class
F
,
class
...
Arguments
>
auto
nary_nonstandard_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
auto
nary_nonstandard_impl
(
hipStream_t
stream
,
F
f
,
argument
result
,
Arguments
...
args
)
{
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
...
@@ -41,7 +41,7 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
...
@@ -41,7 +41,7 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
std
::
make_pair
(
hip_tensor_descriptor
<
ndim
>
{
inputs
.
get_shape
()},
inputs
.
data
())...);
std
::
make_pair
(
hip_tensor_descriptor
<
ndim
>
{
inputs
.
get_shape
()},
inputs
.
data
())...);
hip_tensor_descriptor
<
ndim
>
out_desc
(
output_shape
);
hip_tensor_descriptor
<
ndim
>
out_desc
(
output_shape
);
auto
*
outp
=
output
.
data
();
auto
*
outp
=
output
.
data
();
gs_launch
(
output_shape
.
elements
())([
=
](
auto
i
)
{
gs_launch
(
stream
,
output_shape
.
elements
())([
=
](
auto
i
)
{
data
([
&
](
auto
&&
...
ps
)
{
data
([
&
](
auto
&&
...
ps
)
{
auto
outidx
=
out_desc
.
multi
(
i
);
auto
outidx
=
out_desc
.
multi
(
i
);
outp
[
i
]
=
f
(
ps
.
second
[
ps
.
first
.
linear
(
outidx
)]...);
outp
[
i
]
=
f
(
ps
.
second
[
ps
.
first
.
linear
(
outidx
)]...);
...
@@ -52,8 +52,12 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
...
@@ -52,8 +52,12 @@ auto nary_nonstandard_impl(F f, argument result, Arguments... args)
}
}
template
<
class
F
>
template
<
class
F
>
void
trinary_broadcast_vec_impl
(
void
trinary_broadcast_vec_impl
(
hipStream_t
stream
,
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
)
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
)
{
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
arg3
.
get_shape
();
const
auto
&
b_shape
=
arg3
.
get_shape
();
...
@@ -79,7 +83,7 @@ void trinary_broadcast_vec_impl(
...
@@ -79,7 +83,7 @@ void trinary_broadcast_vec_impl(
const
std
::
size_t
n
=
output
.
size
()
/
vec_size
;
const
std
::
size_t
n
=
output
.
size
()
/
vec_size
;
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
MIGRAPH_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
// Load bias into LDS
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
...
@@ -107,8 +111,12 @@ void trinary_broadcast_vec_impl(
...
@@ -107,8 +111,12 @@ void trinary_broadcast_vec_impl(
}
}
template
<
class
F
>
template
<
class
F
>
void
trinary_broadcast_impl
(
void
trinary_broadcast_impl
(
hipStream_t
stream
,
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
)
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
)
{
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
arg3
.
get_shape
();
const
auto
&
b_shape
=
arg3
.
get_shape
();
...
@@ -132,7 +140,7 @@ void trinary_broadcast_impl(
...
@@ -132,7 +140,7 @@ void trinary_broadcast_impl(
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
();
const
std
::
size_t
n
=
output
.
size
();
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
type
buffer
[
2048
];
MIGRAPH_DEVICE_SHARED
type
buffer
[
2048
];
// Load bias into LDS
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
...
@@ -154,10 +162,8 @@ void trinary_broadcast_impl(
...
@@ -154,10 +162,8 @@ void trinary_broadcast_impl(
}
}
template
<
class
F
>
template
<
class
F
>
void
binary_broadcast_vec_impl
(
F
f
,
void
binary_broadcast_vec_impl
(
const
argument
&
result
,
hipStream_t
stream
,
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
arg2
.
get_shape
();
const
auto
&
b_shape
=
arg2
.
get_shape
();
...
@@ -182,7 +188,7 @@ void binary_broadcast_vec_impl(F f,
...
@@ -182,7 +188,7 @@ void binary_broadcast_vec_impl(F f,
const
std
::
size_t
n
=
output
.
size
()
/
vec_size
;
const
std
::
size_t
n
=
output
.
size
()
/
vec_size
;
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
MIGRAPH_DEVICE_SHARED
vec4
<
type
>
buffer
[
2048
/
vec_size
];
// Load bias into LDS
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
...
@@ -209,7 +215,8 @@ void binary_broadcast_vec_impl(F f,
...
@@ -209,7 +215,8 @@ void binary_broadcast_vec_impl(F f,
}
}
template
<
class
F
>
template
<
class
F
>
void
binary_broadcast_impl
(
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
void
binary_broadcast_impl
(
hipStream_t
stream
,
F
f
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
arg2
.
get_shape
();
const
auto
&
b_shape
=
arg2
.
get_shape
();
...
@@ -232,7 +239,7 @@ void binary_broadcast_impl(F f, const argument& result, const argument& arg1, co
...
@@ -232,7 +239,7 @@ void binary_broadcast_impl(F f, const argument& result, const argument& arg1, co
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
n
=
output
.
size
();
const
std
::
size_t
n
=
output
.
size
();
launch
(
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPH_DEVICE_SHARED
type
buffer
[
2048
];
MIGRAPH_DEVICE_SHARED
type
buffer
[
2048
];
// Load bias into LDS
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_len
;
i
+=
nlocal
)
...
@@ -253,7 +260,7 @@ void binary_broadcast_impl(F f, const argument& result, const argument& arg1, co
...
@@ -253,7 +260,7 @@ void binary_broadcast_impl(F f, const argument& result, const argument& arg1, co
}
}
template
<
class
F
,
class
...
Arguments
>
template
<
class
F
,
class
...
Arguments
>
void
nary_standard_vec_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
void
nary_standard_vec_impl
(
hipStream_t
stream
,
F
f
,
argument
result
,
Arguments
...
args
)
{
{
// assert(x.get_shape().elements() == y.get_shape().elements());
// assert(x.get_shape().elements() == y.get_shape().elements());
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
output_shape
=
result
.
get_shape
();
...
@@ -262,7 +269,7 @@ void nary_standard_vec_impl(F f, argument result, Arguments... args)
...
@@ -262,7 +269,7 @@ void nary_standard_vec_impl(F f, argument result, Arguments... args)
const
std
::
size_t
vec_size
=
4
;
const
std
::
size_t
vec_size
=
4
;
auto
data
=
pack_vec4
(
inputs
.
data
()...);
auto
data
=
pack_vec4
(
inputs
.
data
()...);
auto
*
outp
=
as_vec4
(
output
.
data
());
auto
*
outp
=
as_vec4
(
output
.
data
());
gs_launch
(
output_shape
.
elements
()
/
vec_size
)([
=
](
auto
i
)
{
gs_launch
(
stream
,
output_shape
.
elements
()
/
vec_size
)([
=
](
auto
i
)
{
vec4
<
type
>
out
=
outp
[
i
];
vec4
<
type
>
out
=
outp
[
i
];
data
(
data
(
[
&
](
auto
...
xs
)
{
[
&
](
auto
...
xs
)
{
...
@@ -278,50 +285,51 @@ void nary_standard_vec_impl(F f, argument result, Arguments... args)
...
@@ -278,50 +285,51 @@ void nary_standard_vec_impl(F f, argument result, Arguments... args)
}
}
template
<
class
F
,
class
...
Arguments
>
template
<
class
F
,
class
...
Arguments
>
void
nary_standard_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
void
nary_standard_impl
(
hipStream_t
stream
,
F
f
,
argument
result
,
Arguments
...
args
)
{
{
// assert(x.get_shape().elements() == y.get_shape().elements());
// assert(x.get_shape().elements() == y.get_shape().elements());
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
output_shape
=
result
.
get_shape
();
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
visit_all
(
result
,
args
...)([
&
](
auto
output
,
auto
...
inputs
)
{
auto
data
=
pack
(
inputs
.
data
()...);
auto
data
=
pack
(
inputs
.
data
()...);
auto
*
outp
=
output
.
data
();
auto
*
outp
=
output
.
data
();
gs_launch
(
output_shape
.
elements
())(
gs_launch
(
stream
,
output_shape
.
elements
())(
[
=
](
auto
i
)
{
data
([
&
](
auto
...
xps
)
{
outp
[
i
]
=
f
(
xps
[
i
]...);
});
});
[
=
](
auto
i
)
{
data
([
&
](
auto
...
xps
)
{
outp
[
i
]
=
f
(
xps
[
i
]...);
});
});
});
});
}
}
template
<
class
F
,
class
...
Arguments
>
template
<
class
F
,
class
...
Arguments
>
void
nary_impl
(
F
f
,
argument
result
,
Arguments
...
args
)
void
nary_impl
(
hipStream_t
stream
,
F
f
,
argument
result
,
Arguments
...
args
)
{
{
bool
standard
=
all_of
({
args
.
get_shape
()...},
[](
const
shape
&
s
)
{
return
s
.
standard
();
});
bool
standard
=
all_of
({
args
.
get_shape
()...},
[](
const
shape
&
s
)
{
return
s
.
standard
();
});
bool
packed
=
all_of
({
args
.
get_shape
()...},
[](
const
shape
&
s
)
{
return
s
.
packed
();
});
bool
packed
=
all_of
({
args
.
get_shape
()...},
[](
const
shape
&
s
)
{
return
s
.
packed
();
});
bool
same_shapes
=
bool
same_shapes
=
all_of
({
args
.
get_shape
()...},
[
&
](
const
shape
&
s
)
{
return
s
==
result
.
get_shape
();
});
all_of
({
args
.
get_shape
()...},
[
&
](
const
shape
&
s
)
{
return
s
==
result
.
get_shape
();
});
if
(
standard
or
(
packed
and
same_shapes
))
if
(
standard
or
(
packed
and
same_shapes
))
nary_standard_impl
(
f
,
result
,
args
...);
nary_standard_impl
(
stream
,
f
,
result
,
args
...);
else
else
nary_nonstandard_impl
(
f
,
result
,
args
...);
nary_nonstandard_impl
(
stream
,
f
,
result
,
args
...);
}
}
template
<
class
...
Arguments
>
template
<
class
...
Arguments
>
auto
nary_nonstandard
(
argument
result
,
Arguments
...
args
)
auto
nary_nonstandard
(
hipStream_t
stream
,
argument
result
,
Arguments
...
args
)
{
{
return
[
=
](
auto
f
)
{
nary_nonstandard_impl
(
f
,
result
,
args
...);
};
return
[
=
](
auto
f
)
{
nary_nonstandard_impl
(
stream
,
f
,
result
,
args
...);
};
}
}
template
<
class
...
Arguments
>
template
<
class
...
Arguments
>
auto
nary_standard
(
argument
result
,
Arguments
...
args
)
auto
nary_standard
(
hipStream_t
stream
,
argument
result
,
Arguments
...
args
)
{
{
return
[
=
](
auto
f
)
{
nary_standard_impl
(
f
,
result
,
args
...);
};
return
[
=
](
auto
f
)
{
nary_standard_impl
(
stream
,
f
,
result
,
args
...);
};
}
}
template
<
class
...
Arguments
>
template
<
class
...
Arguments
>
auto
nary
(
argument
result
,
Arguments
...
args
)
auto
nary
(
hipStream_t
stream
,
argument
result
,
Arguments
...
args
)
{
{
return
[
=
](
auto
f
)
{
nary_impl
(
f
,
result
,
args
...);
};
return
[
=
](
auto
f
)
{
nary_impl
(
stream
,
f
,
result
,
args
...);
};
}
}
inline
auto
nary
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
inline
auto
nary
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
{
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
// TODO: Check result and arg1 shape is the same
// TODO: Check result and arg1 shape is the same
...
@@ -339,18 +347,21 @@ inline auto nary(const argument& result, const argument& arg1, const argument& a
...
@@ -339,18 +347,21 @@ inline auto nary(const argument& result, const argument& arg1, const argument& a
const
bool
divisible_by_4
=
(
b_len
%
4
==
0
)
and
(
b_stride
%
4
==
0
)
and
const
bool
divisible_by_4
=
(
b_len
%
4
==
0
)
and
(
b_stride
%
4
==
0
)
and
(
arg1
.
get_shape
().
elements
()
%
4
==
0
);
(
arg1
.
get_shape
().
elements
()
%
4
==
0
);
if
(
divisible_by_4
)
if
(
divisible_by_4
)
binary_broadcast_vec_impl
(
f
,
result
,
arg1
,
arg2
);
binary_broadcast_vec_impl
(
stream
,
f
,
result
,
arg1
,
arg2
);
else
else
binary_broadcast_impl
(
f
,
result
,
arg1
,
arg2
);
binary_broadcast_impl
(
stream
,
f
,
result
,
arg1
,
arg2
);
return
;
return
;
}
}
}
}
nary_impl
(
f
,
result
,
arg1
,
arg2
);
nary_impl
(
stream
,
f
,
result
,
arg1
,
arg2
);
};
};
}
}
inline
auto
inline
auto
nary
(
hipStream_t
stream
,
nary
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
)
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
)
{
{
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
// TODO: Check result and arg1 shape is the same
// TODO: Check result and arg1 shape is the same
...
@@ -369,13 +380,13 @@ nary(const argument& result, const argument& arg1, const argument& arg2, const a
...
@@ -369,13 +380,13 @@ nary(const argument& result, const argument& arg1, const argument& arg2, const a
const
bool
divisible_by_4
=
(
b_len
%
4
==
0
)
and
(
b_stride
%
4
==
0
)
and
const
bool
divisible_by_4
=
(
b_len
%
4
==
0
)
and
(
b_stride
%
4
==
0
)
and
(
arg1
.
get_shape
().
elements
()
%
4
==
0
);
(
arg1
.
get_shape
().
elements
()
%
4
==
0
);
if
(
divisible_by_4
)
if
(
divisible_by_4
)
trinary_broadcast_vec_impl
(
f
,
result
,
arg1
,
arg2
,
arg3
);
trinary_broadcast_vec_impl
(
stream
,
f
,
result
,
arg1
,
arg2
,
arg3
);
else
else
trinary_broadcast_impl
(
f
,
result
,
arg1
,
arg2
,
arg3
);
trinary_broadcast_impl
(
stream
,
f
,
result
,
arg1
,
arg2
,
arg3
);
return
;
return
;
}
}
}
}
nary_impl
(
f
,
result
,
arg1
,
arg2
,
arg3
);
nary_impl
(
stream
,
f
,
result
,
arg1
,
arg2
,
arg3
);
};
};
}
}
...
...
src/targets/gpu/fuse_ops.cpp
View file @
57444235
...
@@ -82,13 +82,14 @@ struct fusion
...
@@ -82,13 +82,14 @@ struct fusion
// int algo_count = 1;
// int algo_count = 1;
// miopenConvFwdAlgorithm_t algo;
// miopenConvFwdAlgorithm_t algo;
// miopenFusionPlanConvolutionGetAlgo(fp.get(), 1, &algo_count, &algo);
// miopenFusionPlanConvolutionGetAlgo(fp.get(), 1, &algo_count, &algo);
// miopenFusionPlanGetWorkSpaceSize(ctx.handle.get(), fp.get(), &ws_size, algo);
// miopenFusionPlanGetWorkSpaceSize(ctx.get_stream().get_miopen(), fp.get(), &ws_size,
// algo);
return
shape
{
shape
::
int8_type
,
{
ws_size
}};
return
shape
{
shape
::
int8_type
,
{
ws_size
}};
}
}
void
compile
(
context
&
ctx
)
void
compile
(
context
&
ctx
)
{
{
auto
status
=
miopenCompileFusionPlan
(
ctx
.
handle
.
get
(),
fp
.
get
());
auto
status
=
miopenCompileFusionPlan
(
ctx
.
get_stream
().
get_miopen
(),
fp
.
get
());
if
(
status
!=
miopenStatusSuccess
)
if
(
status
!=
miopenStatusSuccess
)
MIGRAPH_THROW
(
"Compiling fusion plan failed"
);
MIGRAPH_THROW
(
"Compiling fusion plan failed"
);
}
}
...
@@ -100,7 +101,7 @@ struct fusion
...
@@ -100,7 +101,7 @@ struct fusion
{
{
auto
x_td
=
make_tensor
(
x
.
get_shape
());
auto
x_td
=
make_tensor
(
x
.
get_shape
());
auto
y_td
=
make_tensor
(
y
.
get_shape
());
auto
y_td
=
make_tensor
(
y
.
get_shape
());
auto
status
=
miopenExecuteFusionPlan
(
ctx
.
handle
.
get
(),
auto
status
=
miopenExecuteFusionPlan
(
ctx
.
get_stream
().
get_miopen
(),
fp
.
get
(),
fp
.
get
(),
x_td
.
get
(),
x_td
.
get
(),
x
.
implicit
(),
x
.
implicit
(),
...
@@ -152,9 +153,9 @@ struct hip_triadd
...
@@ -152,9 +153,9 @@ struct hip_triadd
check_shapes
{
inputs
,
*
this
}.
has
(
4
);
check_shapes
{
inputs
,
*
this
}.
has
(
4
);
return
inputs
.
front
();
return
inputs
.
front
();
}
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
device
::
add
(
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
device
::
add
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
at
(
3
);
return
args
.
at
(
3
);
}
}
};
};
...
@@ -167,9 +168,9 @@ struct hip_triadd_relu
...
@@ -167,9 +168,9 @@ struct hip_triadd_relu
check_shapes
{
inputs
,
*
this
}.
has
(
4
);
check_shapes
{
inputs
,
*
this
}.
has
(
4
);
return
inputs
.
front
();
return
inputs
.
front
();
}
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
device
::
add_relu
(
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
3
),
args
.
at
(
0
),
args
.
at
(
1
),
args
.
at
(
2
));
return
args
.
at
(
3
);
return
args
.
at
(
3
);
}
}
};
};
...
@@ -182,9 +183,9 @@ struct hip_add_relu
...
@@ -182,9 +183,9 @@ struct hip_add_relu
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
check_shapes
{
inputs
,
*
this
}.
has
(
3
);
return
inputs
.
front
();
return
inputs
.
front
();
}
}
argument
compute
(
context
&
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
{
device
::
add_relu
(
args
.
at
(
2
),
args
.
at
(
0
),
args
.
at
(
1
));
device
::
add_relu
(
ctx
.
get_stream
().
get
(),
args
.
at
(
2
),
args
.
at
(
0
),
args
.
at
(
1
));
return
args
.
at
(
2
);
return
args
.
at
(
2
);
}
}
};
};
...
...
src/targets/gpu/gemm.cpp
View file @
57444235
...
@@ -26,7 +26,7 @@ argument miopen_gemm::compute(context& ctx,
...
@@ -26,7 +26,7 @@ argument miopen_gemm::compute(context& ctx,
rocblas_int
m
=
output_shape
.
lens
()[
0
];
rocblas_int
m
=
output_shape
.
lens
()[
0
];
rocblas_int
n
=
output_shape
.
lens
()[
1
];
rocblas_int
n
=
output_shape
.
lens
()[
1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
1
];
rocblas_int
k
=
args
[
0
].
get_shape
().
lens
()[
1
];
rocblas_sgemm
(
ctx
.
rbhandle
.
get
(),
rocblas_sgemm
(
ctx
.
get_stream
().
get_rocblas
(),
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transb
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
transa
?
rocblas_operation_transpose
:
rocblas_operation_none
,
n
,
n
,
...
...
src/targets/gpu/hip.cpp
View file @
57444235
...
@@ -38,14 +38,6 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
...
@@ -38,14 +38,6 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
return
hip_ptr
{
result
};
return
hip_ptr
{
result
};
}
}
template
<
class
T
>
hip_ptr
write_to_gpu
(
const
T
&
x
)
{
using
type
=
typename
T
::
value_type
;
auto
size
=
x
.
size
()
*
sizeof
(
type
);
return
write_to_gpu
(
x
.
data
(),
size
);
}
template
<
class
T
>
template
<
class
T
>
std
::
vector
<
T
>
read_from_gpu
(
const
void
*
x
,
std
::
size_t
sz
)
std
::
vector
<
T
>
read_from_gpu
(
const
void
*
x
,
std
::
size_t
sz
)
{
{
...
@@ -65,6 +57,14 @@ hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
...
@@ -65,6 +57,14 @@ hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
return
result
;
return
result
;
}
}
template
<
class
T
>
hip_ptr
write_to_gpu
(
const
T
&
x
)
{
using
type
=
typename
T
::
value_type
;
auto
size
=
x
.
size
()
*
sizeof
(
type
);
return
write_to_gpu
(
x
.
data
(),
size
);
}
argument
allocate_gpu
(
const
shape
&
s
,
bool
host
)
argument
allocate_gpu
(
const
shape
&
s
,
bool
host
)
{
{
auto
p
=
share
(
allocate_gpu
(
s
.
bytes
()
+
1
,
host
));
auto
p
=
share
(
allocate_gpu
(
s
.
bytes
()
+
1
,
host
));
...
@@ -88,6 +88,13 @@ argument from_gpu(argument arg)
...
@@ -88,6 +88,13 @@ argument from_gpu(argument arg)
return
result
;
return
result
;
}
}
void
set_device
(
std
::
size_t
id
)
{
auto
status
=
hipSetDevice
(
id
);
if
(
status
!=
hipSuccess
)
MIGRAPH_THROW
(
"Error setting device"
);
}
void
gpu_sync
()
{
hipDeviceSynchronize
();
}
void
gpu_sync
()
{
hipDeviceSynchronize
();
}
void
copy_to_gpu
(
argument
src
,
argument
dst
)
void
copy_to_gpu
(
argument
src
,
argument
dst
)
...
...
src/targets/gpu/include/migraph/gpu/concat.hpp
0 → 100644
View file @
57444235
#ifndef MIGRAPH_GUARD_RTGLIB_CONCAT_HPP
#define MIGRAPH_GUARD_RTGLIB_CONCAT_HPP
#include <migraph/gpu/lowering.hpp>
#include <migraph/manage_ptr.hpp>
#include <migraph/instruction.hpp>
#include <migraph/operators.hpp>
#include <migraph/generate.hpp>
#include <migraph/shape_for_each.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/dfor.hpp>
#include <migraph/gpu/device/concat.hpp>
#include <migraph/gpu/device/add.hpp>
#include <migraph/iterator_for.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/context.hpp>
#include <utility>
namespace
migraph
{
namespace
gpu
{
struct
hip_concat
{
op
::
concat
op
;
std
::
string
name
()
const
{
return
"gpu::concat"
;
}
shape
compute_shape
(
std
::
vector
<
shape
>
inputs
)
const
;
argument
compute
(
context
&
ctx
,
const
shape
&
output_shape
,
const
std
::
vector
<
argument
>&
args
)
const
;
};
}
// namespace gpu
}
// namespace migraph
#endif
src/targets/gpu/include/migraph/gpu/context.hpp
View file @
57444235
...
@@ -4,17 +4,114 @@
...
@@ -4,17 +4,114 @@
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/miopen.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/rocblas.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/gpu/hip.hpp>
#include <migraph/env.hpp>
namespace
migraph
{
namespace
migraph
{
namespace
gpu
{
namespace
gpu
{
MIGRAPH_DECLARE_ENV_VAR
(
MIGRAPH_DISABLE_NULL_STREAM
)
struct
hip_device
{
hip_device
()
{
add_stream
();
}
hip_device
(
std
::
size_t
id
)
:
device_id
(
id
)
{
add_stream
();
}
struct
stream
{
using
hip_stream_ptr
=
MIGRAPH_MANAGE_PTR
(
hipStream_t
,
hipStreamDestroy
);
stream
()
{}
stream
(
std
::
size_t
device_number
)
:
id
(
device_number
)
{}
void
setup
()
{
set_device
(
id
);
}
static
hip_stream_ptr
create_stream
()
{
hipStream_t
result
=
nullptr
;
auto
status
=
hipStreamCreate
(
&
result
);
if
(
status
!=
hipSuccess
)
MIGRAPH_THROW
(
"Failed to allocate stream"
);
return
hip_stream_ptr
{
result
};
}
hipStream_t
get
()
{
if
(
enabled
(
MIGRAPH_DISABLE_NULL_STREAM
{}))
{
setup
();
if
(
s
==
nullptr
)
s
=
create_stream
();
assert
(
s
.
get
()
!=
nullptr
);
return
s
.
get
();
}
return
nullptr
;
}
auto
create_miopen_handle
()
{
if
(
enabled
(
MIGRAPH_DISABLE_NULL_STREAM
{}))
return
make_obj
<
miopen_handle
>
(
&
miopenCreateWithStream
,
get
());
else
return
make_obj
<
miopen_handle
>
(
&
miopenCreate
);
}
auto
get_miopen
()
{
setup
();
if
(
mihandle
==
nullptr
)
mihandle
=
create_miopen_handle
();
assert
(
mihandle
.
get
()
!=
nullptr
);
return
mihandle
.
get
();
}
auto
get_rocblas
()
{
setup
();
if
(
rbhandle
==
nullptr
)
rbhandle
=
create_rocblas_handle_ptr
(
get
());
assert
(
rbhandle
.
get
()
!=
nullptr
);
return
rbhandle
.
get
();
}
private:
std
::
size_t
id
=
0
;
shared
<
hip_stream_ptr
>
s
=
nullptr
;
shared
<
miopen_handle
>
mihandle
=
nullptr
;
shared
<
rocblas_handle_ptr
>
rbhandle
=
nullptr
;
};
void
add_stream
()
{
streams
.
emplace_back
(
device_id
);
}
stream
&
get_stream
()
{
return
streams
.
at
(
current_stream
);
}
void
set_stream
(
std
::
size_t
n
)
{
current_stream
=
n
;
}
private:
std
::
size_t
device_id
=
0
;
std
::
size_t
current_stream
=
0
;
std
::
vector
<
stream
>
streams
;
};
struct
context
struct
context
{
{
shared
<
miopen_handle
>
handle
;
context
(
std
::
size_t
n
=
0
)
:
current_device
(
std
::
make_shared
<
hip_device
>
(
n
))
{}
shared
<
rocblas_handle_ptr
>
rbhandle
;
argument
scratch
;
hip_device
&
get_current_device
()
{
assert
(
current_device
!=
nullptr
);
return
*
current_device
;
}
hip_device
::
stream
&
get_stream
()
{
return
get_current_device
().
get_stream
();
}
std
::
vector
<
argument
>
literals
{};
std
::
vector
<
argument
>
literals
{};
void
finish
()
const
{
gpu_sync
();
}
void
finish
()
const
{
gpu_sync
();
}
private:
// TODO: Make this a vector to support multiple devices
std
::
shared_ptr
<
hip_device
>
current_device
;
};
};
}
// namespace gpu
}
// namespace gpu
}
// namespace migraph
}
// namespace migraph
...
...
src/targets/gpu/include/migraph/gpu/device/add.hpp
View file @
57444235
...
@@ -3,14 +3,19 @@
...
@@ -3,14 +3,19 @@
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_HPP
#include <migraph/argument.hpp>
#include <migraph/argument.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraph
{
namespace
migraph
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
add
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
void
add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
void
add
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
);
void
add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
);
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraph/gpu/device/add_relu.hpp
View file @
57444235
...
@@ -3,14 +3,19 @@
...
@@ -3,14 +3,19 @@
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_ADD_RELU_HPP
#include <migraph/argument.hpp>
#include <migraph/argument.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraph
{
namespace
migraph
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
add_relu
(
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
void
add_relu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
);
void
add_relu
(
const
argument
&
result
,
void
add_relu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg2
,
const
argument
&
arg3
);
const
argument
&
arg3
);
...
...
src/targets/gpu/include/migraph/gpu/device/concat.hpp
0 → 100644
View file @
57444235
#ifndef MIGRAPH_GUARD_RTGLIB_DEVICE_CONCAT_HPP
#define MIGRAPH_GUARD_RTGLIB_DEVICE_CONCAT_HPP
#include <migraph/argument.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraph
{
namespace
gpu
{
namespace
device
{
argument
concat
(
hipStream_t
stream
,
const
shape
&
output_shape
,
std
::
vector
<
argument
>
args
,
std
::
vector
<
std
::
size_t
>
offsets
);
}
// namespace device
}
// namespace gpu
}
// namespace migraph
#endif
src/targets/gpu/include/migraph/gpu/device/contiguous.hpp
View file @
57444235
...
@@ -2,12 +2,13 @@
...
@@ -2,12 +2,13 @@
#define MIGRAPH_GUARD_MIGRAPHLIB_KERNELS_HPP
#define MIGRAPH_GUARD_MIGRAPHLIB_KERNELS_HPP
#include <migraph/argument.hpp>
#include <migraph/argument.hpp>
#include <hip/hip_runtime_api.h>
namespace
migraph
{
namespace
migraph
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
void
contiguous
(
argument
result
,
argument
arg
);
void
contiguous
(
hipStream_t
stream
,
argument
result
,
argument
arg
);
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/include/migraph/gpu/gemm.hpp
View file @
57444235
...
@@ -22,7 +22,7 @@ namespace gpu {
...
@@ -22,7 +22,7 @@ namespace gpu {
struct
miopen_gemm
struct
miopen_gemm
{
{
op
::
gemm
op
;
op
::
dot
op
;
std
::
string
name
()
const
{
return
"gpu::gemm"
;
}
std
::
string
name
()
const
{
return
"gpu::gemm"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
shape
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
;
argument
argument
...
...
src/targets/gpu/include/migraph/gpu/hip.hpp
View file @
57444235
...
@@ -13,6 +13,8 @@ migraph::argument to_gpu(migraph::argument arg, bool host = false);
...
@@ -13,6 +13,8 @@ migraph::argument to_gpu(migraph::argument arg, bool host = false);
migraph
::
argument
from_gpu
(
migraph
::
argument
arg
);
migraph
::
argument
from_gpu
(
migraph
::
argument
arg
);
void
set_device
(
std
::
size_t
id
);
void
gpu_sync
();
void
gpu_sync
();
void
copy_to_gpu
(
argument
src
,
argument
dst
);
void
copy_to_gpu
(
argument
src
,
argument
dst
);
...
...
src/targets/gpu/include/migraph/gpu/rocblas.hpp
View file @
57444235
...
@@ -11,6 +11,7 @@ namespace gpu {
...
@@ -11,6 +11,7 @@ namespace gpu {
using
rocblas_handle_ptr
=
MIGRAPH_MANAGE_PTR
(
rocblas_handle
,
rocblas_destroy_handle
);
using
rocblas_handle_ptr
=
MIGRAPH_MANAGE_PTR
(
rocblas_handle
,
rocblas_destroy_handle
);
rocblas_handle_ptr
create_rocblas_handle_ptr
();
rocblas_handle_ptr
create_rocblas_handle_ptr
();
rocblas_handle_ptr
create_rocblas_handle_ptr
(
hipStream_t
s
);
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/leaky_relu.cpp
View file @
57444235
...
@@ -20,7 +20,7 @@ argument miopen_leaky_relu::compute(context& ctx,
...
@@ -20,7 +20,7 @@ argument miopen_leaky_relu::compute(context& ctx,
float
alpha
=
1
,
beta
=
0
;
float
alpha
=
1
,
beta
=
0
;
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
x_desc
=
make_tensor
(
args
[
0
].
get_shape
());
auto
y_desc
=
make_tensor
(
output_shape
);
auto
y_desc
=
make_tensor
(
output_shape
);
miopenActivationForward
(
ctx
.
handle
.
get
(),
miopenActivationForward
(
ctx
.
get_stream
().
get_miopen
(),
ad
.
get
(),
ad
.
get
(),
&
alpha
,
&
alpha
,
x_desc
.
get
(),
x_desc
.
get
(),
...
...
src/targets/gpu/lowering.cpp
View file @
57444235
...
@@ -23,6 +23,7 @@
...
@@ -23,6 +23,7 @@
#include <migraph/gpu/batchnorm.hpp>
#include <migraph/gpu/batchnorm.hpp>
#include <migraph/gpu/pooling.hpp>
#include <migraph/gpu/pooling.hpp>
#include <migraph/gpu/gemm.hpp>
#include <migraph/gpu/gemm.hpp>
#include <migraph/gpu/concat.hpp>
#include <utility>
#include <utility>
namespace
migraph
{
namespace
migraph
{
...
@@ -69,7 +70,7 @@ struct miopen_apply
...
@@ -69,7 +70,7 @@ struct miopen_apply
{
{
check_shape
(
s
,
apply_mul
(
it
));
check_shape
(
s
,
apply_mul
(
it
));
}
}
else
if
(
it
->
name
()
==
"
gemm
"
)
else
if
(
it
->
name
()
==
"
dot
"
)
{
{
check_shape
(
s
,
apply_gemm
(
it
));
check_shape
(
s
,
apply_gemm
(
it
));
}
}
...
@@ -77,6 +78,10 @@ struct miopen_apply
...
@@ -77,6 +78,10 @@ struct miopen_apply
{
{
check_shape
(
s
,
apply_contiguous
(
it
));
check_shape
(
s
,
apply_contiguous
(
it
));
}
}
else
if
(
it
->
name
()
==
"concat"
)
{
check_shape
(
s
,
apply_concat
(
it
));
}
else
if
(
it
->
name
()
==
"batch_norm_inference"
)
else
if
(
it
->
name
()
==
"batch_norm_inference"
)
{
{
check_shape
(
s
,
apply_batch_norm_inference
(
it
));
check_shape
(
s
,
apply_batch_norm_inference
(
it
));
...
@@ -172,7 +177,7 @@ struct miopen_apply
...
@@ -172,7 +177,7 @@ struct miopen_apply
instruction_ref
apply_gemm
(
instruction_ref
ins
)
instruction_ref
apply_gemm
(
instruction_ref
ins
)
{
{
auto
&&
op
=
any_cast
<
op
::
gemm
>
(
ins
->
get_operator
());
auto
&&
op
=
any_cast
<
op
::
dot
>
(
ins
->
get_operator
());
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
return
prog
->
replace_instruction
(
return
prog
->
replace_instruction
(
ins
,
miopen_gemm
{
op
},
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
output
);
ins
,
miopen_gemm
{
op
},
ins
->
inputs
().
at
(
0
),
ins
->
inputs
().
at
(
1
),
output
);
...
@@ -185,6 +190,15 @@ struct miopen_apply
...
@@ -185,6 +190,15 @@ struct miopen_apply
return
prog
->
replace_instruction
(
ins
,
miopen_contiguous
{
op
},
ins
->
inputs
().
at
(
0
),
output
);
return
prog
->
replace_instruction
(
ins
,
miopen_contiguous
{
op
},
ins
->
inputs
().
at
(
0
),
output
);
}
}
instruction_ref
apply_concat
(
instruction_ref
ins
)
{
auto
&&
op
=
any_cast
<
op
::
concat
>
(
ins
->
get_operator
());
auto
output
=
insert_allocation
(
ins
,
ins
->
get_shape
());
std
::
vector
<
instruction_ref
>
refs
=
ins
->
inputs
();
refs
.
push_back
(
output
);
return
prog
->
replace_instruction
(
ins
,
hip_concat
{
op
},
refs
);
}
instruction_ref
apply_batch_norm_inference
(
instruction_ref
ins
)
instruction_ref
apply_batch_norm_inference
(
instruction_ref
ins
)
{
{
auto
&&
op
=
any_cast
<
op
::
batch_norm_inference
>
(
ins
->
get_operator
());
auto
&&
op
=
any_cast
<
op
::
batch_norm_inference
>
(
ins
->
get_operator
());
...
...
src/targets/gpu/pooling.cpp
View file @
57444235
...
@@ -21,7 +21,7 @@ argument miopen_pooling::compute(context& ctx,
...
@@ -21,7 +21,7 @@ argument miopen_pooling::compute(context& ctx,
float
alpha
=
1
,
beta
=
0
;
float
alpha
=
1
,
beta
=
0
;
miopenPoolingForward
(
ctx
.
handle
.
get
(),
miopenPoolingForward
(
ctx
.
get_stream
().
get_miopen
(),
pd
.
get
(),
pd
.
get
(),
&
alpha
,
&
alpha
,
x_desc
.
get
(),
x_desc
.
get
(),
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment