Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
dc23d605
Commit
dc23d605
authored
Oct 10, 2019
by
Shucai Xiao
Committed by
mvermeulen
Oct 10, 2019
Browse files
Accelerate calculating conflict table (#382)
* accelerate conflict table computation * removed an unnecessary comma
parent
a797f890
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
83 additions
and
17 deletions
+83
-17
src/include/migraphx/par_for.hpp
src/include/migraphx/par_for.hpp
+17
-3
src/schedule.cpp
src/schedule.cpp
+66
-14
No files found.
src/include/migraphx/par_for.hpp
View file @
dc23d605
...
...
@@ -27,13 +27,25 @@ struct joinable_thread : std::thread
}
};
template
<
class
F
>
auto
thread_invoke
(
std
::
size_t
i
,
std
::
size_t
tid
,
F
f
)
->
decltype
(
f
(
i
,
tid
))
{
f
(
i
,
tid
);
}
template
<
class
F
>
auto
thread_invoke
(
std
::
size_t
i
,
std
::
size_t
,
F
f
)
->
decltype
(
f
(
i
))
{
f
(
i
);
}
template
<
class
F
>
void
par_for_impl
(
std
::
size_t
n
,
std
::
size_t
threadsize
,
F
f
)
{
if
(
threadsize
<=
1
)
{
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
f
(
i
);
thread_invoke
(
i
,
0
,
f
);
}
else
{
...
...
@@ -45,16 +57,18 @@ void par_for_impl(std::size_t n, std::size_t threadsize, F f)
std
::
size_t
grainsize
=
std
::
ceil
(
static_cast
<
double
>
(
n
)
/
threads
.
size
());
std
::
size_t
work
=
0
;
std
::
generate
(
threads
.
begin
(),
threads
.
end
(),
[
=
,
&
work
]
{
std
::
size_t
tid
=
0
;
std
::
generate
(
threads
.
begin
(),
threads
.
end
(),
[
=
,
&
work
,
&
tid
]
{
auto
result
=
joinable_thread
([
=
]
{
std
::
size_t
start
=
work
;
std
::
size_t
last
=
std
::
min
(
n
,
work
+
grainsize
);
for
(
std
::
size_t
i
=
start
;
i
<
last
;
i
++
)
{
f
(
i
);
thread_invoke
(
i
,
tid
,
f
);
}
});
work
+=
grainsize
;
++
tid
;
return
result
;
});
assert
(
work
>=
n
);
...
...
src/schedule.cpp
View file @
dc23d605
...
...
@@ -4,12 +4,16 @@
#include <migraphx/op/identity.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <unordered_map>
#include <unordered_set>
#include <thread>
#include <mutex>
#include <set>
#include <deque>
#include <chrono>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
@@ -303,31 +307,78 @@ struct stream_info
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
get_conflicts
(
program
&
p
)
{
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
conflict_table
;
using
conflict_table_type
=
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
;
conflict_table_type
conflict_table
;
auto
concur_ins
=
this
->
find_concurrent_instructions
(
p
);
for
(
auto
&&
merge
:
concur_ins
)
std
::
vector
<
conflict_table_type
>
thread_conflict_tables
(
std
::
thread
::
hardware_concurrency
());
std
::
vector
<
instruction_ref
>
index_to_ins
;
index_to_ins
.
reserve
(
concur_ins
.
size
());
std
::
transform
(
concur_ins
.
begin
(),
concur_ins
.
end
(),
std
::
back_inserter
(
index_to_ins
),
[](
auto
&&
it
)
{
return
it
.
first
;
});
par_for
(
concur_ins
.
size
(),
[
&
](
auto
ins_index
,
auto
tid
)
{
auto
merge_first
=
index_to_ins
[
ins_index
];
assert
(
concur_ins
.
count
(
merge_first
)
>
0
);
auto
&
merge_second
=
concur_ins
.
at
(
merge_first
);
// ensure there are enough elements for different threads
assert
(
tid
<
thread_conflict_tables
.
size
());
auto
&
thrd_table
=
thread_conflict_tables
.
at
(
tid
);
std
::
unordered_set
<
instruction_ref
>
checked_ins_set
;
auto
range_i
=
range
(
merge_second
.
begin
(),
std
::
prev
(
merge_second
.
end
()));
for
(
auto
it_i
:
iterator_for
(
range_i
))
{
dfor
(
merge
.
second
.
size
(),
merge
.
second
.
size
())([
&
](
auto
i
,
auto
j
)
{
if
(
i
==
j
)
return
;
for
(
auto
ins1
:
merge
.
second
[
i
])
std
::
unordered_set
<
instruction_ref
>
ins1_set
;
std
::
copy_if
(
it_i
->
begin
(),
it_i
->
end
(),
std
::
inserter
(
ins1_set
,
ins1_set
.
end
()),
[
&
](
auto
i
)
{
return
not
contains
(
checked_ins_set
,
i
);
});
checked_ins_set
.
insert
(
ins1_set
.
begin
(),
ins1_set
.
end
());
auto
range_j
=
range
(
std
::
next
(
it_i
),
merge_second
.
end
());
std
::
unordered_set
<
instruction_ref
>
ins2_set
;
for
(
auto
it_j
:
iterator_for
(
range_j
))
{
std
::
copy_if
(
it_j
->
begin
(),
it_j
->
end
(),
std
::
inserter
(
ins2_set
,
ins2_set
.
end
()),
[
&
](
auto
i
)
{
return
not
contains
(
checked_ins_set
,
i
);
});
}
for
(
auto
ins1
:
ins1_set
)
{
auto
p1
=
std
::
distance
(
ins1
,
merge
.
first
);
for
(
auto
ins2
:
merge
.
second
[
j
]
)
auto
p1
=
std
::
distance
(
ins1
,
merge
_
first
);
for
(
auto
ins2
:
ins2_set
)
{
if
(
ins1
==
ins2
)
continue
;
auto
p2
=
std
::
distance
(
ins2
,
merge
.
first
);
auto
p2
=
std
::
distance
(
ins2
,
merge
_
first
);
// The smaller distance means the instruction occurs later
if
(
p1
>
p2
)
conflict
_table
[
ins2
].
insert
(
ins1
);
thrd
_table
[
ins2
].
insert
(
ins1
);
else
conflict_table
[
ins1
].
insert
(
ins2
);
thrd_table
[
ins1
].
insert
(
ins2
);
}
}
}
});
// merge thread_conflict_tables together
for
(
auto
&
tbl
:
thread_conflict_tables
)
{
for
(
auto
&
it
:
tbl
)
{
conflict_table
[
it
.
first
].
insert
(
it
.
second
.
begin
(),
it
.
second
.
end
());
}
}
// Remove duplicates
// Remove instructions from the conflict table of an ealier instruction
for
(
auto
&&
ip
:
conflict_table
)
{
auto
ins1
=
ip
.
first
;
...
...
@@ -335,6 +386,7 @@ struct stream_info
if
(
contains
(
conflict_table
[
ins2
],
ins1
))
conflict_table
[
ins2
].
erase
(
ins1
);
}
return
conflict_table
;
}
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment