Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
c6d92ecc
Commit
c6d92ecc
authored
Mar 01, 2019
by
Paul
Browse files
Schedule concurrently without memory coloring
parent
9cd5cf4e
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
120 additions
and
5 deletions
+120
-5
src/schedule.cpp
src/schedule.cpp
+109
-0
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+2
-0
src/targets/gpu/schedule_model.cpp
src/targets/gpu/schedule_model.cpp
+7
-3
src/targets/gpu/target.cpp
src/targets/gpu/target.cpp
+2
-2
No files found.
src/schedule.cpp
View file @
c6d92ecc
...
@@ -5,12 +5,68 @@
...
@@ -5,12 +5,68 @@
#include <migraphx/functional.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
#include <unordered_map>
#include <unordered_map>
#include <set>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
stream_info
{
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
ins2stream
;
void
set_stream
(
instruction_ref
ins
,
std
::
size_t
n
)
{
ins2stream
[
ins
]
=
n
;
}
std
::
size_t
get_stream
(
instruction_ref
ins
)
const
{
return
ins2stream
.
at
(
ins
);
}
bool
has_stream
(
instruction_ref
ins
)
const
{
return
ins2stream
.
count
(
ins
)
>
0
;
}
bool
different
(
const
std
::
vector
<
instruction_ref
>&
v
)
const
{
if
(
v
.
size
()
<
2
)
return
false
;
auto
stream
=
get_stream
(
v
.
front
());
return
not
std
::
all_of
(
v
.
begin
(),
v
.
end
(),
[
&
](
instruction_ref
x
)
{
return
get_stream
(
x
)
==
stream
;
});
}
bool
is_split_point
(
instruction_ref
ins
)
const
{
return
different
(
ins
->
outputs
());
}
bool
is_merge_point
(
instruction_ref
ins
)
const
{
return
different
(
ins
->
inputs
());
}
std
::
vector
<
std
::
size_t
>
wait_for
(
instruction_ref
ins
)
const
{
std
::
set
<
std
::
size_t
>
result
;
auto
s
=
get_stream
(
ins
);
for
(
auto
i
:
ins
->
inputs
())
{
auto
stream
=
get_stream
(
i
);
if
(
stream
!=
s
)
result
.
insert
(
stream
);
}
return
{
result
.
begin
(),
result
.
end
()};
}
};
void
schedule
::
apply
(
program
&
p
)
const
void
schedule
::
apply
(
program
&
p
)
const
{
{
const
std
::
size_t
min_partition_threshold
=
2
;
// Compute accumulated weights
// Compute accumulated weights
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
weights
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
weights
;
auto
last
=
std
::
prev
(
p
.
end
());
auto
last
=
std
::
prev
(
p
.
end
());
...
@@ -26,6 +82,44 @@ void schedule::apply(program& p) const
...
@@ -26,6 +82,44 @@ void schedule::apply(program& p) const
return
weights
[
ins
];
return
weights
[
ins
];
})(
last
);
})(
last
);
// Assign streams
auto
streams
=
model
.
concurrency
();
stream_info
si
;
for
(
std
::
size_t
stream
=
0
;
stream
<
streams
;
stream
++
)
{
fix
([
&
](
auto
self
,
auto
ins
)
{
// Only assign streams fi not already assigned
if
(
not
si
.
has_stream
(
ins
))
si
.
set_stream
(
ins
,
stream
);
instruction_ref
child
=
p
.
end
();
std
::
size_t
w
=
0
;
for
(
auto
i
:
ins
->
inputs
())
{
const
auto
weight
=
weights
[
i
];
// Skip instruction that already have stream assignment or too low of weights
if
(
si
.
has_stream
(
i
)
or
weight
<=
min_partition_threshold
)
{
self
(
i
);
}
// Accumulate the max weight
else
if
(
weight
>
w
)
{
child
=
i
;
w
=
weight
;
}
}
if
(
child
!=
p
.
end
())
self
(
child
);
})(
last
);
}
// Assign remaining instructions
for
(
auto
ins
:
iterator_for
(
p
))
{
if
(
si
.
has_stream
(
ins
))
continue
;
si
.
set_stream
(
ins
,
streams
-
1
);
}
// Topo sort
// Topo sort
fix
([
&
](
auto
self
,
auto
ins
)
{
fix
([
&
](
auto
self
,
auto
ins
)
{
for
(
auto
i
:
ins
->
inputs
())
for
(
auto
i
:
ins
->
inputs
())
...
@@ -33,6 +127,21 @@ void schedule::apply(program& p) const
...
@@ -33,6 +127,21 @@ void schedule::apply(program& p) const
for
(
auto
i
:
ins
->
inputs
())
for
(
auto
i
:
ins
->
inputs
())
self
(
i
);
self
(
i
);
})(
last
);
})(
last
);
// Schedule instructions
for
(
auto
ins
:
iterator_for
(
p
))
{
if
(
si
.
is_merge_point
(
ins
))
{
assert
(
not
si
.
wait_for
(
ins
).
empty
());
model
.
wait
(
p
,
ins
,
si
.
get_stream
(
ins
),
si
.
wait_for
(
ins
));
continue
;
}
// Skip scheduling instructions with no context
if
(
is_context_free
(
ins
->
get_operator
())
or
ins
->
get_operator
().
name
().
front
()
==
'@'
)
continue
;
model
.
schedule_instruction
(
p
,
ins
,
si
.
get_stream
(
ins
));
}
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/hip.cpp
View file @
c6d92ecc
...
@@ -43,6 +43,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
...
@@ -43,6 +43,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
template
<
class
T
>
template
<
class
T
>
std
::
vector
<
T
>
read_from_gpu
(
const
void
*
x
,
std
::
size_t
sz
)
std
::
vector
<
T
>
read_from_gpu
(
const
void
*
x
,
std
::
size_t
sz
)
{
{
gpu_sync
();
std
::
vector
<
T
>
result
(
sz
);
std
::
vector
<
T
>
result
(
sz
);
auto
status
=
hipMemcpy
(
result
.
data
(),
x
,
sz
*
sizeof
(
T
),
hipMemcpyDeviceToHost
);
auto
status
=
hipMemcpy
(
result
.
data
(),
x
,
sz
*
sizeof
(
T
),
hipMemcpyDeviceToHost
);
if
(
status
!=
hipSuccess
)
if
(
status
!=
hipSuccess
)
...
@@ -52,6 +53,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
...
@@ -52,6 +53,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
hip_ptr
write_to_gpu
(
const
void
*
x
,
std
::
size_t
sz
,
bool
host
=
false
)
hip_ptr
write_to_gpu
(
const
void
*
x
,
std
::
size_t
sz
,
bool
host
=
false
)
{
{
gpu_sync
();
auto
result
=
allocate_gpu
(
sz
,
host
);
auto
result
=
allocate_gpu
(
sz
,
host
);
auto
status
=
hipMemcpy
(
result
.
get
(),
x
,
sz
,
hipMemcpyHostToDevice
);
auto
status
=
hipMemcpy
(
result
.
get
(),
x
,
sz
,
hipMemcpyHostToDevice
);
if
(
status
!=
hipSuccess
)
if
(
status
!=
hipSuccess
)
...
...
src/targets/gpu/schedule_model.cpp
View file @
c6d92ecc
...
@@ -62,7 +62,7 @@ struct set_stream
...
@@ -62,7 +62,7 @@ struct set_stream
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
ctx
.
set_stream
(
stream
);
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
ctx
.
set_stream
(
stream
);
}
};
};
std
::
size_t
schedule_model
::
concurrency
()
const
{
return
n
;
}
std
::
size_t
schedule_model
::
concurrency
()
const
{
return
streams
;
}
void
schedule_model
::
schedule_instruction
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
void
schedule_model
::
schedule_instruction
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
{
{
p
.
insert_instruction
(
ins
,
set_stream
{
n
});
p
.
insert_instruction
(
ins
,
set_stream
{
n
});
...
@@ -99,7 +99,11 @@ static const std::unordered_map<std::string, std::size_t>& weight_map()
...
@@ -99,7 +99,11 @@ static const std::unordered_map<std::string, std::size_t>& weight_map()
std
::
size_t
schedule_model
::
weight
(
const
operation
&
op
)
const
std
::
size_t
schedule_model
::
weight
(
const
operation
&
op
)
const
{
{
if
(
weight_map
().
count
(
op
.
name
())
==
0
)
if
(
weight_map
().
count
(
op
.
name
())
==
0
)
{
if
(
is_context_free
(
op
)
or
op
.
name
()[
0
]
==
'@'
)
return
0
;
return
0
;
return
1
;
}
return
weight_map
().
at
(
op
.
name
());
return
weight_map
().
at
(
op
.
name
());
}
}
...
...
src/targets/gpu/target.cpp
View file @
c6d92ecc
...
@@ -54,8 +54,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const
...
@@ -54,8 +54,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx) const
dead_code_elimination
{},
dead_code_elimination
{},
write_literals
{
&
ctx
},
write_literals
{
&
ctx
},
schedule
{
gpu
::
schedule_model
{
ctx
.
get_current_device
().
nstreams
()}},
schedule
{
gpu
::
schedule_model
{
ctx
.
get_current_device
().
nstreams
()}},
memory_coloring
{
"hip::allocate"
},
//
memory_coloring{"hip::allocate"},
eliminate_workspace
{},
//
eliminate_workspace{},
eliminate_allocation
{
"hip::allocate"
},
eliminate_allocation
{
"hip::allocate"
},
check_context
<
context
>
{},
check_context
<
context
>
{},
dead_code_elimination
{}
dead_code_elimination
{}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment