Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
55422f0e
Commit
55422f0e
authored
Jun 18, 2019
by
Paul
Browse files
Add vectorized nary broadcast
parent
8ec57ece
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
106 additions
and
32 deletions
+106
-32
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+60
-30
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
...targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
+17
-1
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
+29
-1
No files found.
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
View file @
55422f0e
...
@@ -14,28 +14,10 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -14,28 +14,10 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
// template <class T>
template
<
class
...
Ts
>
// using vec4 = T __attribute__((ext_vector_type(4)));
auto
pack
(
Ts
...
xs
)
__device__
template
<
class
T
,
std
::
size_t
N
>
using
vec
=
T
__attribute__
((
ext_vector_type
(
N
)));
template
<
std
::
size_t
N
,
class
T
>
__device__
__host__
vec
<
T
,
N
>*
as_vec
(
T
*
x
)
{
{
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
return
[
=
](
auto
f
)
{
return
f
(
xs
...);
};
}
template
<
std
::
size_t
N
,
class
T
>
__device__
__host__
T
*
as_pointer
(
vec
<
T
,
N
>*
x
)
{
return
reinterpret_cast
<
T
*>
(
x
);
}
template
<
std
::
size_t
N
,
class
...
Ts
>
auto
pack_vec
(
Ts
...
xs
)
{
return
[
=
](
auto
f
,
std
::
size_t
n
)
{
return
f
(
as_vec
<
4
>
(
xs
)[
n
]...);
};
}
}
template
<
class
F
,
class
...
Arguments
>
template
<
class
F
,
class
...
Arguments
>
...
@@ -258,6 +240,55 @@ void binary_broadcast_impl(
...
@@ -258,6 +240,55 @@ void binary_broadcast_impl(
});
});
}
}
template
<
class
F
,
class
...
Arguments
>
void
nary_broadcast_vec_impl
(
hipStream_t
stream
,
F
f
,
argument
result
,
argument
barg
,
Arguments
...
args
)
{
const
auto
&
output_shape
=
result
.
get_shape
();
const
auto
&
b_shape
=
barg
.
get_shape
();
auto
bdim
=
std
::
distance
(
b_shape
.
strides
().
begin
(),
std
::
find_if
(
b_shape
.
strides
().
begin
(),
b_shape
.
strides
().
end
(),
[](
auto
x
)
{
return
x
!=
0
;
}));
auto
bdim_len
=
output_shape
.
lens
()[
bdim
];
auto
bdim_stride
=
output_shape
.
strides
()[
bdim
];
auto
bdim_next_stride
=
bdim_stride
*
bdim_len
;
const
std
::
size_t
vec_size
=
4
;
const
std
::
size_t
nlocal
=
1024
;
const
std
::
size_t
nglobal
=
256
*
nlocal
;
const
std
::
size_t
bdim_vec_len
=
bdim_len
/
vec_size
;
hip_vec_visit_all
<
vec_size
>
(
result
,
barg
,
args
...)([
&
](
auto
output
,
auto
binput
,
auto
...
inputs
)
{
using
type
=
typename
decltype
(
output
)
::
value_type
;
const
std
::
size_t
nelements
=
output
.
size
()
/
vec_size
;
launch
(
stream
,
nglobal
,
nlocal
)([
=
](
auto
idx
)
__device__
{
MIGRAPHX_DEVICE_SHARED
type
buffer
[
2048
/
vec_size
];
// Load bias into LDS
for
(
size_t
i
=
idx
.
local
;
i
<
bdim_vec_len
;
i
+=
nlocal
)
{
buffer
[
i
]
=
binput
.
data
()[
i
];
}
__syncthreads
();
auto
*
bp
=
as_pointer
(
buffer
);
// Process the data
for
(
size_t
i
=
idx
.
global
;
i
<
nelements
;
i
+=
nglobal
)
{
auto
bidx
=
((
i
*
vec_size
)
%
bdim_next_stride
)
/
bdim_stride
;
auto
b
=
bp
[
bidx
];
auto
out
=
output
.
data
()[
i
];
pack
(
inputs
.
data
()[
i
]...)([
&
](
auto
...
xs
)
__device__
{
for
(
std
::
size_t
j
=
0
;
j
<
vec_size
;
j
++
)
{
output
.
data
()[
i
][
j
]
=
f
(
xs
[
j
]...,
b
);
}
});
output
.
data
()[
i
]
=
out
;
}
});
});
}
template
<
class
F
,
class
...
Arguments
>
template
<
class
F
,
class
...
Arguments
>
void
nary_broadcast_impl
(
hipStream_t
stream
,
F
f
,
argument
result
,
argument
barg
,
Arguments
...
args
)
void
nary_broadcast_impl
(
hipStream_t
stream
,
F
f
,
argument
result
,
argument
barg
,
Arguments
...
args
)
{
{
...
@@ -385,15 +416,14 @@ auto nary(hipStream_t stream, argument result, Arguments... args)
...
@@ -385,15 +416,14 @@ auto nary(hipStream_t stream, argument result, Arguments... args)
assert
(
bshape
.
lens
()[
b_idx
]
==
b_len
);
assert
(
bshape
.
lens
()[
b_idx
]
==
b_len
);
if
(
b_len
<=
2048
and
std
::
none_of
(
std
::
next
(
b_it
),
strides
.
end
(),
not_zero
))
if
(
b_len
<=
2048
and
std
::
none_of
(
std
::
next
(
b_it
),
strides
.
end
(),
not_zero
))
{
{
nary_broadcast_impl
(
stream
,
f
,
result
,
barg
,
args2
...);
//
const bool divisible_by_4 = (b_len % 4 == 0) and (b_stride % 4 == 0) and
const
bool
divisible_by_4
=
(
b_len
%
4
==
0
)
and
(
b_stride
%
4
==
0
)
and
//
(
arg1
.get_shape().elements() % 4 == 0);
(
front_args
(
args
...)
.
get_shape
().
elements
()
%
4
==
0
);
//
if(divisible_by_4)
if
(
divisible_by_4
)
//
bi
nary_broadcast_vec_impl(stream, f, result, arg
1
, arg);
nary_broadcast_vec_impl
(
stream
,
f
,
result
,
b
arg
,
arg
s2
...
);
//
else
else
//
bi
nary_broadcast_impl(stream, f, result, arg
1
, arg);
nary_broadcast_impl
(
stream
,
f
,
result
,
b
arg
,
arg
s2
...
);
//
return;
return
;
}
}
}
}
});
});
...
...
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
View file @
55422f0e
...
@@ -214,13 +214,13 @@ struct hip_shape
...
@@ -214,13 +214,13 @@ struct hip_shape
return
result
;
return
result
;
}
}
};
};
template
<
class
T
,
std
::
size_t
N
>
template
<
class
T
,
std
::
size_t
N
>
struct
hip_tensor_view
struct
hip_tensor_view
{
{
using
value_type
=
device_type
<
T
>
;
using
value_type
=
device_type
<
T
>
;
__device__
__host__
hip_tensor_view
()
=
default
;
__device__
__host__
hip_tensor_view
()
=
default
;
__host__
hip_tensor_view
(
tensor_view
<
T
>
x
)
:
d
(
device_cast
(
x
.
data
())),
s
(
x
.
get_shape
())
{}
__host__
hip_tensor_view
(
tensor_view
<
T
>
x
)
:
d
(
device_cast
(
x
.
data
())),
s
(
x
.
get_shape
())
{}
__host__
hip_tensor_view
(
T
*
x
,
const
shape
&
ss
)
:
d
(
x
),
s
(
ss
)
{}
MIGRAPHX_DEVICE_CONSTEXPR
const
hip_shape
<
N
>&
get_shape
()
const
{
return
s
;
}
MIGRAPHX_DEVICE_CONSTEXPR
const
hip_shape
<
N
>&
get_shape
()
const
{
return
s
;
}
...
@@ -249,6 +249,12 @@ hip_tensor_view<T, N> make_hip_tensor_view(tensor_view<T> x)
...
@@ -249,6 +249,12 @@ hip_tensor_view<T, N> make_hip_tensor_view(tensor_view<T> x)
return
x
;
return
x
;
}
}
template
<
std
::
size_t
N
,
std
::
size_t
M
,
class
T
>
hip_tensor_view
<
vec
<
device_type
<
T
>
,
M
>
,
N
>
make_hip_vec_tensor_view
(
tensor_view
<
T
>
x
)
{
return
{
as_vec
<
M
>
(
device_cast
(
x
.
data
())),
x
.
get_shape
()};
}
template
<
std
::
size_t
N
,
std
::
size_t
M
,
class
T
>
template
<
std
::
size_t
N
,
std
::
size_t
M
,
class
T
>
hip_vector
<
hip_tensor_view
<
T
,
N
>
,
M
>
make_hip_tensor_views
(
const
std
::
vector
<
tensor_view
<
T
>>&
x
)
hip_vector
<
hip_tensor_view
<
T
,
N
>
,
M
>
make_hip_tensor_views
(
const
std
::
vector
<
tensor_view
<
T
>>&
x
)
{
{
...
@@ -268,6 +274,16 @@ auto hip_visit_all(T&& x, Ts&&... xs)
...
@@ -268,6 +274,16 @@ auto hip_visit_all(T&& x, Ts&&... xs)
};
};
}
}
template
<
std
::
size_t
N
,
class
T
,
class
...
Ts
>
auto
hip_vec_visit_all
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
visit_tensor_size
(
x
.
get_shape
().
lens
().
size
(),
[
&
](
auto
dim
)
{
visit_all
(
x
,
xs
...)([
&
](
auto
...
vs
)
{
f
(
make_hip_vec_tensor_view
<
dim
,
N
>
(
vs
)...);
});
});
};
}
template
<
std
::
size_t
N
,
class
T
>
template
<
std
::
size_t
N
,
class
T
>
auto
hip_visit_all
(
const
std
::
vector
<
T
>&
x
)
auto
hip_visit_all
(
const
std
::
vector
<
T
>&
x
)
{
{
...
...
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
View file @
55422f0e
...
@@ -16,6 +16,27 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -16,6 +16,27 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
template
<
class
T
,
std
::
size_t
N
>
using
vec
=
T
__attribute__
((
ext_vector_type
(
N
)));
template
<
std
::
size_t
N
,
class
T
>
__device__
__host__
vec
<
T
,
N
>*
as_vec
(
T
*
x
)
{
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
}
template
<
std
::
size_t
N
,
class
T
>
__device__
__host__
T
*
as_pointer
(
vec
<
T
,
N
>*
x
)
{
return
reinterpret_cast
<
T
*>
(
x
);
}
template
<
std
::
size_t
N
,
class
...
Ts
>
auto
pack_vec
(
Ts
...
xs
)
{
return
[
=
](
auto
f
,
std
::
size_t
n
)
{
return
f
(
as_vec
<
N
>
(
xs
)[
n
]...);
};
}
using
gpu_half
=
__fp16
;
using
gpu_half
=
__fp16
;
namespace
detail
{
namespace
detail
{
...
@@ -25,12 +46,19 @@ struct device_type
...
@@ -25,12 +46,19 @@ struct device_type
using
type
=
T
;
using
type
=
T
;
};
};
template
<
class
T
,
std
::
size_t
N
>
struct
device_type
<
T
__attribute__
((
ext_vector_type
(
N
)))
>
{
using
type
=
typename
device_type
<
T
>::
type
__attribute__
((
ext_vector_type
(
N
)));
};
template
<
>
template
<
>
struct
device_type
<
half
>
struct
device_type
<
half
>
{
{
using
type
=
gpu_half
;
using
type
=
gpu_half
;
};
};
template
<
class
T
>
template
<
class
T
>
struct
host_type
struct
host_type
{
{
...
@@ -38,7 +66,7 @@ struct host_type
...
@@ -38,7 +66,7 @@ struct host_type
};
};
template
<
>
template
<
>
struct
device
_type
<
gpu_half
>
struct
host
_type
<
gpu_half
>
{
{
using
type
=
half
;
using
type
=
half
;
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment