Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
246c4236
Commit
246c4236
authored
Jan 18, 2019
by
Paul
Browse files
Run some cpu ops in parallel
parent
426e28ac
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
138 additions
and
4 deletions
+138
-4
src/include/migraphx/functional.hpp
src/include/migraphx/functional.hpp
+8
-0
src/include/migraphx/par_dfor.hpp
src/include/migraphx/par_dfor.hpp
+45
-0
src/include/migraphx/par_for.hpp
src/include/migraphx/par_for.hpp
+80
-0
src/targets/cpu/lowering.cpp
src/targets/cpu/lowering.cpp
+5
-4
No files found.
src/include/migraphx/functional.hpp
View file @
246c4236
...
@@ -94,6 +94,14 @@ constexpr void each_args(F)
...
@@ -94,6 +94,14 @@ constexpr void each_args(F)
{
{
}
}
template
<
class
F
,
class
T
>
auto
unpack
(
F
f
,
T
&
x
)
{
return
sequence_c
<
std
::
tuple_size
<
T
>
{}
>
([
&
](
auto
...
is
)
{
f
(
std
::
get
<
is
>
(
x
)...);
});
}
/// Implements a fix-point combinator
/// Implements a fix-point combinator
template
<
class
R
,
class
F
>
template
<
class
R
,
class
F
>
detail
::
fix_f
<
R
,
F
>
fix
(
F
f
)
detail
::
fix_f
<
R
,
F
>
fix
(
F
f
)
...
...
src/include/migraphx/par_dfor.hpp
0 → 100644
View file @
246c4236
#ifndef GUARD_RTGLIB_PAR_DFOR_HPP
#define GUARD_RTGLIB_PAR_DFOR_HPP
#include <migraphx/par_for.hpp>
#include <migraphx/functional.hpp>
#include <array>
#include <numeric>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
template
<
class
...
Ts
>
auto
par_dfor
(
Ts
...
xs
)
{
return
[
=
](
auto
f
)
{
using
array_type
=
std
::
array
<
std
::
size_t
,
sizeof
...(
Ts
)
>
;
array_type
lens
=
{{
static_cast
<
std
::
size_t
>
(
xs
)...}};
auto
n
=
std
::
accumulate
(
lens
.
begin
(),
lens
.
end
(),
1
,
std
::
multiplies
<
std
::
size_t
>
{});
const
std
::
size_t
min_grain
=
8
;
if
(
n
>
2
*
min_grain
)
{
array_type
strides
;
strides
.
fill
(
1
);
std
::
partial_sum
(
lens
.
rbegin
(),
lens
.
rend
()
-
1
,
strides
.
rbegin
()
+
1
,
std
::
multiplies
<
std
::
size_t
>
());
auto
size
=
std
::
accumulate
(
lens
.
begin
(),
lens
.
end
(),
1
,
std
::
multiplies
<
std
::
size_t
>
());
par_for
(
size
,
min_grain
,
[
&
](
std
::
size_t
i
)
{
array_type
indices
;
std
::
transform
(
strides
.
begin
(),
strides
.
end
(),
lens
.
begin
(),
indices
.
begin
(),
[
&
](
size_t
stride
,
size_t
len
)
{
return
(
i
/
stride
)
%
len
;
});
migraphx
::
unpack
(
f
,
indices
);
});
}
else
{
dfor
(
xs
...)(
f
);
}
};
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/include/migraphx/par_for.hpp
0 → 100644
View file @
246c4236
#ifndef MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_PAR_FOR_HPP
#include <thread>
#include <cmath>
#include <algorithm>
#include <vector>
#include <cassert>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
joinable_thread
:
std
::
thread
{
template
<
class
...
Xs
>
joinable_thread
(
Xs
&&
...
xs
)
:
std
::
thread
(
std
::
forward
<
Xs
>
(
xs
)...)
// NOLINT
{
}
joinable_thread
&
operator
=
(
joinable_thread
&&
other
)
=
default
;
joinable_thread
(
joinable_thread
&&
other
)
=
default
;
~
joinable_thread
()
{
if
(
this
->
joinable
())
this
->
join
();
}
};
template
<
class
F
>
void
par_for_impl
(
std
::
size_t
n
,
std
::
size_t
threadsize
,
F
f
)
{
if
(
threadsize
<=
1
)
{
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
f
(
i
);
}
else
{
std
::
vector
<
joinable_thread
>
threads
(
threadsize
);
const
std
::
size_t
grainsize
=
std
::
ceil
(
static_cast
<
double
>
(
n
)
/
threads
.
size
());
std
::
size_t
work
=
0
;
std
::
generate
(
threads
.
begin
(),
threads
.
end
(),
[
=
,
&
work
]
{
auto
result
=
joinable_thread
([
=
]
{
std
::
size_t
start
=
work
;
std
::
size_t
last
=
std
::
min
(
n
,
work
+
grainsize
);
for
(
std
::
size_t
i
=
start
;
i
<
last
;
i
++
)
{
f
(
i
);
}
});
work
+=
grainsize
;
return
result
;
});
assert
(
work
>=
n
);
}
}
template
<
class
F
>
void
par_for
(
std
::
size_t
n
,
std
::
size_t
min_grain
,
F
f
)
{
const
auto
threadsize
=
std
::
min
<
std
::
size_t
>
(
std
::
thread
::
hardware_concurrency
(),
n
/
min_grain
);
par_for_impl
(
n
,
threadsize
,
f
);
}
template
<
class
F
>
void
par_for
(
std
::
size_t
n
,
F
f
)
{
const
int
min_grain
=
8
;
par_for
(
n
,
min_grain
,
f
);
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/cpu/lowering.cpp
View file @
246c4236
...
@@ -5,6 +5,7 @@
...
@@ -5,6 +5,7 @@
#include <migraphx/operators.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/shape_for_each.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/par_dfor.hpp>
#include <migraphx/cpu/gemm.hpp>
#include <migraphx/cpu/gemm.hpp>
#include <unordered_map>
#include <unordered_map>
#include <utility>
#include <utility>
...
@@ -72,7 +73,7 @@ struct cpu_batch_norm_inference
...
@@ -72,7 +73,7 @@ struct cpu_batch_norm_inference
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_variance
,
arg_gamma
,
arg_bias
)(
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_variance
,
arg_gamma
,
arg_bias
)(
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
par_
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
[
&
](
std
::
size_t
n
,
std
::
size_t
c
,
std
::
size_t
h
,
std
::
size_t
w
)
{
[
&
](
std
::
size_t
n
,
std
::
size_t
c
,
std
::
size_t
h
,
std
::
size_t
w
)
{
assert
((
variance
(
c
)
+
epsilon
)
>
0
);
assert
((
variance
(
c
)
+
epsilon
)
>
0
);
result
(
n
,
c
,
h
,
w
)
=
gamma
(
c
)
*
(
buffer
(
n
,
c
,
h
,
w
)
-
mean
(
c
))
/
result
(
n
,
c
,
h
,
w
)
=
gamma
(
c
)
*
(
buffer
(
n
,
c
,
h
,
w
)
-
mean
(
c
))
/
...
@@ -87,7 +88,7 @@ struct cpu_batch_norm_inference
...
@@ -87,7 +88,7 @@ struct cpu_batch_norm_inference
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_mean
,
arg_gamma
,
arg_bias
)(
visit_all
(
output
,
input
,
mini_batch_mean
,
mini_batch_mean
,
arg_gamma
,
arg_bias
)(
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
[
&
](
auto
result
,
auto
buffer
,
auto
mean
,
auto
variance
,
auto
gamma
,
auto
bias
)
{
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
par_
dfor
(
num_batch
,
num_channels
,
image_height
,
image_width
)(
[
&
](
std
::
size_t
n
,
std
::
size_t
c
,
std
::
size_t
h
,
std
::
size_t
w
)
{
[
&
](
std
::
size_t
n
,
std
::
size_t
c
,
std
::
size_t
h
,
std
::
size_t
w
)
{
assert
((
variance
(
c
,
h
,
w
)
+
epsilon
)
>
0
);
assert
((
variance
(
c
,
h
,
w
)
+
epsilon
)
>
0
);
result
(
n
,
c
,
h
,
w
)
=
gamma
(
c
,
h
,
w
)
*
result
(
n
,
c
,
h
,
w
)
=
gamma
(
c
,
h
,
w
)
*
...
@@ -122,7 +123,7 @@ struct cpu_convolution
...
@@ -122,7 +123,7 @@ struct cpu_convolution
auto
wei_h
=
wei
[
2
];
auto
wei_h
=
wei
[
2
];
auto
wei_w
=
wei
[
3
];
auto
wei_w
=
wei
[
3
];
dfor
(
output_shape
.
lens
()[
0
],
par_
dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
output_shape
.
lens
()[
3
])(
...
@@ -245,7 +246,7 @@ struct cpu_pooling
...
@@ -245,7 +246,7 @@ struct cpu_pooling
auto
in_h
=
input
.
get_shape
().
lens
()[
2
];
auto
in_h
=
input
.
get_shape
().
lens
()[
2
];
auto
in_w
=
input
.
get_shape
().
lens
()[
3
];
auto
in_w
=
input
.
get_shape
().
lens
()[
3
];
dfor
(
output_shape
.
lens
()[
0
],
par_
dfor
(
output_shape
.
lens
()[
0
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
1
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
2
],
output_shape
.
lens
()[
3
])(
output_shape
.
lens
()[
3
])(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment