Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
2b0b77f1
Unverified
Commit
2b0b77f1
authored
Mar 12, 2019
by
Paul Fultz II
Committed by
GitHub
Mar 12, 2019
Browse files
Merge pull request #175 from ROCmSoftwarePlatform/stream_execution_checkin
Stream execution checkin
parents
fa38a3a6
7ede2b3b
Changes
25
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1101 additions
and
65 deletions
+1101
-65
src/CMakeLists.txt
src/CMakeLists.txt
+1
-0
src/dead_code_elimination.cpp
src/dead_code_elimination.cpp
+3
-3
src/include/migraphx/functional.hpp
src/include/migraphx/functional.hpp
+12
-0
src/include/migraphx/iterator_for.hpp
src/include/migraphx/iterator_for.hpp
+54
-5
src/include/migraphx/operators.hpp
src/include/migraphx/operators.hpp
+10
-0
src/include/migraphx/program.hpp
src/include/migraphx/program.hpp
+6
-0
src/include/migraphx/schedule.hpp
src/include/migraphx/schedule.hpp
+27
-0
src/include/migraphx/schedule_model.hpp
src/include/migraphx/schedule_model.hpp
+286
-0
src/include/migraphx/streamutils.hpp
src/include/migraphx/streamutils.hpp
+3
-1
src/include/migraphx/target.hpp
src/include/migraphx/target.hpp
+0
-2
src/opt/common_header.hpp
src/opt/common_header.hpp
+0
-30
src/opt/memory_coloring_impl.hpp
src/opt/memory_coloring_impl.hpp
+17
-1
src/program.cpp
src/program.cpp
+15
-5
src/schedule.cpp
src/schedule.cpp
+432
-0
src/targets/gpu/CMakeLists.txt
src/targets/gpu/CMakeLists.txt
+1
-0
src/targets/gpu/convolution.cpp
src/targets/gpu/convolution.cpp
+20
-15
src/targets/gpu/hip.cpp
src/targets/gpu/hip.cpp
+2
-0
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+55
-3
src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
+30
-0
src/targets/gpu/schedule_model.cpp
src/targets/gpu/schedule_model.cpp
+127
-0
No files found.
src/CMakeLists.txt
View file @
2b0b77f1
...
...
@@ -17,6 +17,7 @@ add_library(migraphx
instruction.cpp
program.cpp
shape.cpp
schedule.cpp
simplify_algebra.cpp
simplify_reshapes.cpp
opt/memory_coloring.cpp
...
...
src/dead_code_elimination.cpp
View file @
2b0b77f1
...
...
@@ -41,9 +41,9 @@ void dead_code_elimination::apply(program& p) const
// Skip the last instruction
if
(
i
==
last
)
break
;
// Skip instruction with empty shape as output unless its a builtin or undefined
if
(
i
->
get_shape
().
elements
()
==
0
and
not
(
i
->
name
().
front
()
=
=
'@'
)
and
not
(
i
->
name
()
=
=
"undefined"
)
)
// Skip instruction with empty shape as output unless its a builtin or undefined
or identity
if
(
i
->
get_shape
().
elements
()
==
0
and
i
->
name
().
front
()
!
=
'@'
and
i
->
name
()
!
=
"undefined"
and
i
->
name
()
!=
"identity"
)
continue
;
assert
(
bidistance
(
p
,
i
,
last
)
>
0
);
fix
([
&
](
auto
self
,
auto
leaf
)
{
...
...
src/include/migraphx/functional.hpp
View file @
2b0b77f1
...
...
@@ -137,6 +137,18 @@ auto fold(F f)
return
[
=
](
auto
&&
...
xs
)
{
return
fold_impl
(
f
,
std
::
forward
<
decltype
(
xs
)
>
(
xs
)...);
};
}
template
<
class
F
,
class
Proj
>
auto
by
(
F
f
,
Proj
proj
)
{
return
[
=
](
auto
&&
...
xs
)
{
return
f
(
proj
(
std
::
forward
<
decltype
(
xs
)
>
(
xs
))...);
};
}
template
<
class
T
>
auto
index_of
(
T
&
x
)
{
return
[
&
](
auto
&&
y
)
{
return
x
[
y
];
};
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/include/migraphx/iterator_for.hpp
View file @
2b0b77f1
...
...
@@ -3,21 +3,64 @@
#include <cassert>
#include <type_traits>
#include <iterator>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
template
<
class
T
>
struct
iterator_for_select
{
template
<
class
T
>
static
T
deref
(
T
x
)
{
return
x
;
}
template
<
class
T
>
static
auto
begin
(
T
*
x
)
{
return
x
->
begin
();
}
template
<
class
T
>
static
auto
end
(
T
*
x
)
{
return
x
->
end
();
}
};
struct
iterator_for_select_reverse
{
template
<
class
T
>
static
auto
deref
(
T
x
)
{
return
std
::
prev
(
x
.
base
());
}
template
<
class
T
>
static
auto
begin
(
T
*
x
)
{
return
std
::
make_reverse_iterator
(
x
->
end
());
}
template
<
class
T
>
static
auto
end
(
T
*
x
)
{
return
std
::
make_reverse_iterator
(
x
->
begin
());
}
};
template
<
class
T
,
class
Selector
=
iterator_for_select
>
struct
iterator_for_range
{
T
*
base
;
using
base_iterator
=
std
::
remove_reference_t
<
decltype
(
base
->
begin
())
>
;
using
base_iterator
=
std
::
remove_reference_t
<
decltype
(
Selector
::
begin
(
base
))
>
;
struct
iterator
{
base_iterator
i
;
base_iter
ato
r
operator
*
()
const
{
return
i
;
}
a
u
to
operator
*
()
const
{
return
Selector
::
deref
(
i
)
;
}
base_iterator
operator
++
()
{
return
++
i
;
}
bool
operator
!=
(
const
iterator
&
rhs
)
const
{
return
i
!=
rhs
.
i
;
}
};
...
...
@@ -25,12 +68,12 @@ struct iterator_for_range
iterator
begin
()
{
assert
(
base
!=
nullptr
);
return
{
base
->
begin
()};
return
{
Selector
::
begin
(
base
)};
}
iterator
end
()
{
assert
(
base
!=
nullptr
);
return
{
base
->
end
(
)};
return
{
Selector
::
end
(
base
)};
}
};
template
<
class
T
>
...
...
@@ -39,6 +82,12 @@ iterator_for_range<T> iterator_for(T& x)
return
{
&
x
};
}
template
<
class
T
>
iterator_for_range
<
T
,
iterator_for_select_reverse
>
reverse_iterator_for
(
T
&
x
)
{
return
{
&
x
};
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
...
...
src/include/migraphx/operators.hpp
View file @
2b0b77f1
...
...
@@ -1174,9 +1174,19 @@ struct load
}
argument
compute
(
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
if
((
offset
+
s
.
bytes
())
>
args
[
0
].
get_shape
().
bytes
())
MIGRAPHX_THROW
(
"Load access is out of bounds"
);
return
{
s
,
args
[
0
].
data
()
+
offset
};
}
int
output_alias
(
const
std
::
vector
<
shape
>&
)
const
{
return
0
;
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
load
&
op
)
{
os
<<
op
.
name
()
<<
"["
;
os
<<
"offset="
<<
op
.
offset
<<
","
;
os
<<
"end="
<<
(
op
.
offset
+
op
.
s
.
bytes
())
<<
"]"
;
return
os
;
}
};
struct
outline
...
...
src/include/migraphx/program.hpp
View file @
2b0b77f1
...
...
@@ -9,6 +9,7 @@
#include <migraphx/instruction_ref.hpp>
#include <migraphx/target.hpp>
#include <migraphx/tracer.hpp>
#include <migraphx/env.hpp>
#include <migraphx/config.hpp>
#include <algorithm>
#include <iostream>
...
...
@@ -16,6 +17,9 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_COMPILE
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_EVAL
)
struct
program_impl
;
const
operation
&
get_operation
(
instruction_ref
ins
);
...
...
@@ -107,6 +111,8 @@ struct program
void
dry_run
(
parameter_map
params
)
const
;
void
annotate
(
std
::
ostream
&
os
,
std
::
function
<
void
(
instruction_ref
)
>
a
)
const
;
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
program
&
p
);
friend
bool
operator
==
(
const
program
&
x
,
const
program
&
y
);
friend
bool
operator
!=
(
const
program
&
x
,
const
program
&
y
)
{
return
!
(
x
==
y
);
}
...
...
src/include/migraphx/schedule.hpp
0 → 100644
View file @
2b0b77f1
#ifndef MIGRAPHX_GUARD_RTGLIB_SCHEDULE_HPP
#define MIGRAPHX_GUARD_RTGLIB_SCHEDULE_HPP
#include <string>
#include <migraphx/instruction_ref.hpp>
#include <migraphx/schedule_model.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
program
;
/**
* Schedule instructions for concurrent execution
*/
struct
schedule
{
schedule_model
model
{};
std
::
string
name
()
const
{
return
"schedule"
;
}
void
apply
(
program
&
p
)
const
;
};
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/include/migraphx/schedule_model.hpp
0 → 100644
View file @
2b0b77f1
#ifndef MIGRAPHX_GUARD_SCHEDULE_MODEL_HPP
#define MIGRAPHX_GUARD_SCHEDULE_MODEL_HPP
#include <cassert>
#include <string>
#include <functional>
#include <memory>
#include <type_traits>
#include <utility>
#include <migraphx/config.hpp>
#include <migraphx/instruction_ref.hpp>
#include <vector>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
program
;
struct
operation
;
#ifdef DOXYGEN
/// An interface for target-dependent model for the scheduler
struct
schedule_model
{
/// Get the number of concurrent instruction allowed
std
::
size_t
concurrency
()
const
;
/// Schedule a concurrent instruction
void
sched
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
;
// Insert necessary waits before an instruction
void
wait
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
// Insert necessary records after an instruction
void
record
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
/// Compute weights for an operation
std
::
size_t
weight
(
const
operation
&
op
)
const
;
};
#else
/*
* Type-erased interface for:
*
* struct schedule_model
* {
* std::size_t concurrency() const;
* void sched(program& p,instruction_ref ins,std::size_t n) const;
* void wait(program& p,instruction_ref ins,std::size_t wait_id) const;
* void record(program& p,instruction_ref ins,std::size_t wait_id) const;
* std::size_t weight(const operation& op) const;
* };
*
*/
struct
schedule_model
{
// Constructors
schedule_model
()
=
default
;
template
<
typename
PrivateDetailTypeErasedT
>
schedule_model
(
PrivateDetailTypeErasedT
value
)
:
private_detail_te_handle_mem_var
(
std
::
make_shared
<
private_detail_te_handle_type
<
typename
std
::
remove_reference
<
PrivateDetailTypeErasedT
>::
type
>>
(
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
)))
{
}
// Assignment
template
<
typename
PrivateDetailTypeErasedT
>
schedule_model
&
operator
=
(
PrivateDetailTypeErasedT
value
)
{
if
(
private_detail_te_handle_mem_var
.
unique
())
*
private_detail_te_handle_mem_var
=
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
);
else
if
(
!
private_detail_te_handle_mem_var
)
private_detail_te_handle_mem_var
=
std
::
make_shared
<
PrivateDetailTypeErasedT
>
(
std
::
forward
<
PrivateDetailTypeErasedT
>
(
value
));
return
*
this
;
}
// Cast
template
<
typename
PrivateDetailTypeErasedT
>
PrivateDetailTypeErasedT
*
any_cast
()
{
return
private_detail_te_get_handle
().
type
()
==
typeid
(
PrivateDetailTypeErasedT
)
?
std
::
addressof
(
static_cast
<
private_detail_te_handle_type
<
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
>&>
(
private_detail_te_get_handle
())
.
private_detail_te_value
)
:
nullptr
;
}
template
<
typename
PrivateDetailTypeErasedT
>
const
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
*
any_cast
()
const
{
return
private_detail_te_get_handle
().
type
()
==
typeid
(
PrivateDetailTypeErasedT
)
?
std
::
addressof
(
static_cast
<
const
private_detail_te_handle_type
<
typename
std
::
remove_cv
<
PrivateDetailTypeErasedT
>::
type
>&>
(
private_detail_te_get_handle
())
.
private_detail_te_value
)
:
nullptr
;
}
const
std
::
type_info
&
type_id
()
const
{
if
(
private_detail_te_handle_empty
())
return
typeid
(
std
::
nullptr_t
);
else
return
private_detail_te_get_handle
().
type
();
}
std
::
size_t
concurrency
()
const
{
assert
((
*
this
).
private_detail_te_handle_mem_var
);
return
(
*
this
).
private_detail_te_get_handle
().
concurrency
();
}
void
sched
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
{
assert
((
*
this
).
private_detail_te_handle_mem_var
);
(
*
this
).
private_detail_te_get_handle
().
sched
(
p
,
ins
,
n
);
}
void
wait
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
{
assert
((
*
this
).
private_detail_te_handle_mem_var
);
(
*
this
).
private_detail_te_get_handle
().
wait
(
p
,
ins
,
wait_id
);
}
void
record
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
{
assert
((
*
this
).
private_detail_te_handle_mem_var
);
(
*
this
).
private_detail_te_get_handle
().
record
(
p
,
ins
,
wait_id
);
}
std
::
size_t
weight
(
const
operation
&
op
)
const
{
assert
((
*
this
).
private_detail_te_handle_mem_var
);
return
(
*
this
).
private_detail_te_get_handle
().
weight
(
op
);
}
friend
bool
is_shared
(
const
schedule_model
&
private_detail_x
,
const
schedule_model
&
private_detail_y
)
{
return
private_detail_x
.
private_detail_te_handle_mem_var
==
private_detail_y
.
private_detail_te_handle_mem_var
;
}
private:
struct
private_detail_te_handle_base_type
{
virtual
~
private_detail_te_handle_base_type
()
{}
virtual
std
::
shared_ptr
<
private_detail_te_handle_base_type
>
clone
()
const
=
0
;
virtual
const
std
::
type_info
&
type
()
const
=
0
;
virtual
std
::
size_t
concurrency
()
const
=
0
;
virtual
void
sched
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
=
0
;
virtual
void
wait
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
=
0
;
virtual
void
record
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
=
0
;
virtual
std
::
size_t
weight
(
const
operation
&
op
)
const
=
0
;
};
template
<
typename
PrivateDetailTypeErasedT
>
struct
private_detail_te_handle_type
:
private_detail_te_handle_base_type
{
template
<
typename
PrivateDetailTypeErasedU
=
PrivateDetailTypeErasedT
>
private_detail_te_handle_type
(
PrivateDetailTypeErasedT
value
,
typename
std
::
enable_if
<
std
::
is_reference
<
PrivateDetailTypeErasedU
>::
value
>::
type
*
=
nullptr
)
:
private_detail_te_value
(
value
)
{
}
template
<
typename
PrivateDetailTypeErasedU
=
PrivateDetailTypeErasedT
>
private_detail_te_handle_type
(
PrivateDetailTypeErasedT
value
,
typename
std
::
enable_if
<!
std
::
is_reference
<
PrivateDetailTypeErasedU
>::
value
,
int
>::
type
*
=
nullptr
)
noexcept
:
private_detail_te_value
(
std
::
move
(
value
))
{
}
std
::
shared_ptr
<
private_detail_te_handle_base_type
>
clone
()
const
override
{
return
std
::
make_shared
<
private_detail_te_handle_type
>
(
private_detail_te_value
);
}
const
std
::
type_info
&
type
()
const
override
{
return
typeid
(
private_detail_te_value
);
}
std
::
size_t
concurrency
()
const
override
{
return
private_detail_te_value
.
concurrency
();
}
void
sched
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
override
{
private_detail_te_value
.
sched
(
p
,
ins
,
n
);
}
void
wait
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
override
{
private_detail_te_value
.
wait
(
p
,
ins
,
wait_id
);
}
void
record
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
override
{
private_detail_te_value
.
record
(
p
,
ins
,
wait_id
);
}
std
::
size_t
weight
(
const
operation
&
op
)
const
override
{
return
private_detail_te_value
.
weight
(
op
);
}
PrivateDetailTypeErasedT
private_detail_te_value
;
};
template
<
typename
PrivateDetailTypeErasedT
>
struct
private_detail_te_handle_type
<
std
::
reference_wrapper
<
PrivateDetailTypeErasedT
>>
:
private_detail_te_handle_type
<
PrivateDetailTypeErasedT
&>
{
private_detail_te_handle_type
(
std
::
reference_wrapper
<
PrivateDetailTypeErasedT
>
ref
)
:
private_detail_te_handle_type
<
PrivateDetailTypeErasedT
&>
(
ref
.
get
())
{
}
};
bool
private_detail_te_handle_empty
()
const
{
return
private_detail_te_handle_mem_var
==
nullptr
;
}
const
private_detail_te_handle_base_type
&
private_detail_te_get_handle
()
const
{
assert
(
private_detail_te_handle_mem_var
!=
nullptr
);
return
*
private_detail_te_handle_mem_var
;
}
private_detail_te_handle_base_type
&
private_detail_te_get_handle
()
{
assert
(
private_detail_te_handle_mem_var
!=
nullptr
);
if
(
!
private_detail_te_handle_mem_var
.
unique
())
private_detail_te_handle_mem_var
=
private_detail_te_handle_mem_var
->
clone
();
return
*
private_detail_te_handle_mem_var
;
}
std
::
shared_ptr
<
private_detail_te_handle_base_type
>
private_detail_te_handle_mem_var
;
};
template
<
typename
ValueType
>
inline
const
ValueType
*
any_cast
(
const
schedule_model
*
x
)
{
return
x
->
any_cast
<
ValueType
>
();
}
template
<
typename
ValueType
>
inline
ValueType
*
any_cast
(
schedule_model
*
x
)
{
return
x
->
any_cast
<
ValueType
>
();
}
template
<
typename
ValueType
>
inline
ValueType
&
any_cast
(
schedule_model
&
x
)
{
auto
*
y
=
x
.
any_cast
<
typename
std
::
remove_reference
<
ValueType
>::
type
>
();
if
(
y
==
nullptr
)
throw
std
::
bad_cast
();
return
*
y
;
}
template
<
typename
ValueType
>
inline
const
ValueType
&
any_cast
(
const
schedule_model
&
x
)
{
const
auto
*
y
=
x
.
any_cast
<
typename
std
::
remove_reference
<
ValueType
>::
type
>
();
if
(
y
==
nullptr
)
throw
std
::
bad_cast
();
return
*
y
;
}
#endif
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/include/migraphx/streamutils.hpp
View file @
2b0b77f1
...
...
@@ -36,6 +36,8 @@ inline stream_range_container<Range> stream_range(const Range& r)
namespace
detail
{
inline
void
stream_write_value_impl
(
rank
<
2
>
,
std
::
ostream
&
os
,
const
std
::
string
&
x
)
{
os
<<
x
;
}
template
<
class
Range
>
auto
stream_write_value_impl
(
rank
<
1
>
,
std
::
ostream
&
os
,
const
Range
&
r
)
->
decltype
(
r
.
begin
(),
r
.
end
(),
void
())
...
...
@@ -53,7 +55,7 @@ void stream_write_value_impl(rank<0>, std::ostream& os, const T& x)
template
<
class
T
>
void
stream_write_value
(
std
::
ostream
&
os
,
const
T
&
x
)
{
detail
::
stream_write_value_impl
(
rank
<
1
>
{},
os
,
x
);
detail
::
stream_write_value_impl
(
rank
<
2
>
{},
os
,
x
);
}
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/include/migraphx/target.hpp
View file @
2b0b77f1
...
...
@@ -22,10 +22,8 @@ struct target
{
/// A unique name used to identify the target
std
::
string
name
()
const
;
/// The transformation passes to be run
/**
* @brief The transformation pass to be run during compilation.
* @details [long description]
*
* @param ctx This is the target-dependent context that is created by `get_context`
* @return The passes to be ran
...
...
src/opt/common_header.hpp
deleted
100644 → 0
View file @
fa38a3a6
#ifndef MIGRAPHX_GUARD_RTGLIB_COMMON_HEADER_HPP
#define MIGRAPHX_GUARD_RTGLIB_COMMON_HEADER_HPP
#include <migraphx/program.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/pass_config.hpp>
#include <migraphx/config.hpp>
#include <set>
#include <list>
#include <vector>
#include <queue>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
//#define MIGRAPHX_DEBUG_OPT
#ifdef MIGRAPHX_DEBUG_OPT
#define MIGRAPHX_DEBUG(s) s
#else
#define MIGRAPHX_DEBUG(s)
#endif // MIGRAPHX_DEBUG_OPT
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif // MIGRAPHX_GUARD_RTGLIB_COMMON_HEADER_HPP
src/opt/memory_coloring_impl.hpp
View file @
2b0b77f1
#ifndef MIGRAPHX_GUARD_RTGLIB_MEMORY_COLORING_IMPL_HPP
#define MIGRAPHX_GUARD_RTGLIB_MEMORY_COLORING_IMPL_HPP
#include "common_header.hpp"
#include <migraphx/program.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/pass_config.hpp>
#include <migraphx/config.hpp>
#include <set>
#include <list>
#include <vector>
#include <queue>
#ifdef MIGRAPHX_DEBUG_OPT
#define MIGRAPHX_DEBUG(s) s
#else
#define MIGRAPHX_DEBUG(s)
#endif // MIGRAPHX_DEBUG_OPT
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
...
src/program.cpp
View file @
2b0b77f1
...
...
@@ -15,9 +15,6 @@
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_COMPILE
)
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_TRACE_EVAL
)
struct
program_impl
{
// A list is used to keep references to an instruction stable
...
...
@@ -107,11 +104,9 @@ instruction_ref program::insert_instruction(instruction_ref ins,
args
.
begin
(),
args
.
end
(),
[
&
](
instruction_ref
x
)
{
return
has_instruction
(
x
);
})
&&
"Argument is not an exisiting instruction"
);
assert
(
not
starts_with
(
op
.
name
(),
"@"
));
// TODO: Use move
shape
r
=
compute_shape
(
op
,
args
);
auto
result
=
impl
->
instructions
.
insert
(
ins
,
{
op
,
r
,
std
::
move
(
args
)});
instruction
::
backreference
(
result
);
// assert(result->inputs() == args);
assert
(
result
->
valid
(
begin
()));
return
result
;
}
...
...
@@ -510,6 +505,16 @@ void program::perf_report(std::ostream& os, std::size_t n, parameter_map params)
void
program
::
debug_print
()
const
{
std
::
cout
<<
*
this
<<
std
::
endl
;
}
void
program
::
debug_print
(
instruction_ref
ins
)
const
{
if
(
ins
==
this
->
end
())
{
std
::
cout
<<
"End instruction"
<<
std
::
endl
;
return
;
}
if
(
not
has_instruction
(
ins
))
{
std
::
cout
<<
"Instruction not part of program"
<<
std
::
endl
;
return
;
}
std
::
stringstream
ss
;
print_program
(
ss
,
*
this
,
[
&
](
auto
x
,
auto
&&
names
)
{
if
(
x
==
ins
)
...
...
@@ -532,6 +537,11 @@ void program::dry_run(std::unordered_map<std::string, argument> params) const
generic_eval
(
*
this
,
ctx
,
std
::
move
(
params
),
[](
auto
&&
...)
{
return
argument
{};
});
}
void
program
::
annotate
(
std
::
ostream
&
os
,
std
::
function
<
void
(
instruction_ref
)
>
a
)
const
{
print_program
(
os
,
*
this
,
[
&
](
auto
ins
,
auto
&&
)
{
a
(
ins
);
});
}
bool
operator
==
(
const
program
&
x
,
const
program
&
y
)
{
return
to_string
(
x
)
==
to_string
(
y
);
}
std
::
ostream
&
operator
<<
(
std
::
ostream
&
os
,
const
program
&
p
)
...
...
src/schedule.cpp
0 → 100644
View file @
2b0b77f1
#include <migraphx/schedule.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operators.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/ranges.hpp>
#include <unordered_map>
#include <unordered_set>
#include <set>
#include <deque>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
auto
get_inputs
()
{
return
[](
auto
i
)
{
return
i
->
inputs
();
};
}
auto
get_outputs
()
{
return
[](
auto
i
)
{
return
i
->
outputs
();
};
}
struct
stream_info
{
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
ins2stream
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
weights
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
iweights
;
void
accumulate_weights
(
instruction_ref
last
,
const
schedule_model
&
model
)
{
fix
<
std
::
size_t
>
([
&
](
auto
self
,
auto
ins
)
->
std
::
size_t
{
if
(
not
contains
(
weights
,
ins
))
{
std
::
size_t
weight
=
0
;
auto
&&
op
=
ins
->
get_operator
();
if
(
not
is_context_free
(
op
)
and
op
.
name
()[
0
]
!=
'@'
)
weight
=
model
.
weight
(
op
);
iweights
[
ins
]
=
weight
;
weights
[
ins
]
=
std
::
accumulate
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
weight
,
[
&
](
std
::
size_t
w
,
instruction_ref
i
)
{
return
w
+
self
(
i
);
});
}
return
weights
[
ins
];
})(
last
);
}
std
::
vector
<
instruction_ref
>::
iterator
sort_args
(
std
::
vector
<
instruction_ref
>&
args
)
{
if
(
args
.
size
()
<
2
)
{
return
args
.
end
();
}
const
std
::
size_t
min_partition_threshold
=
2
;
auto
compare
=
by
(
std
::
greater
<>
{},
[
&
](
auto
x
)
{
return
std
::
make_tuple
(
this
->
weights
[
x
],
x
->
inputs
().
size
());
});
std
::
sort
(
args
.
begin
(),
args
.
end
(),
compare
);
auto
it
=
std
::
lower_bound
(
std
::
next
(
args
.
begin
()),
args
.
end
(),
min_partition_threshold
,
[
&
](
auto
i
,
std
::
size_t
w
)
{
return
this
->
weights
[
i
]
>
w
;
});
assert
(
it
==
args
.
end
()
or
this
->
weights
[
*
it
]
<=
min_partition_threshold
);
assert
(
it
==
args
.
end
()
or
std
::
prev
(
it
)
==
args
.
begin
()
or
this
->
weights
[
*
std
::
prev
(
it
)]
>
min_partition_threshold
);
return
it
;
}
struct
partition
{
std
::
size_t
weight
=
0
;
std
::
vector
<
instruction_ref
>
instructions
{};
void
add
(
instruction_ref
ins
,
std
::
size_t
w
)
{
weight
+=
w
;
instructions
.
push_back
(
ins
);
}
};
void
assign_streams
(
program
&
p
,
std
::
size_t
n
)
{
partition
critical
;
std
::
unordered_map
<
instruction_ref
,
std
::
deque
<
partition
>>
partitions
;
partitions
.
reserve
(
weights
.
size
());
fix
([
&
](
auto
self
,
auto
ins
,
auto
&
part
)
{
assert
(
ins
!=
p
.
end
());
if
(
contains
(
partitions
,
ins
))
return
;
assert
(
p
.
has_instruction
(
ins
));
// Add an entry so we know the instruction was visited
partitions
[
ins
];
part
.
add
(
ins
,
this
->
iweights
[
ins
]);
auto
args
=
ins
->
inputs
();
auto
threshold_it
=
this
->
sort_args
(
args
);
if
(
not
args
.
empty
())
{
assert
(
threshold_it
!=
args
.
begin
());
self
(
args
.
front
(),
part
);
for
(
auto
i
:
range
(
std
::
next
(
args
.
begin
()),
threshold_it
))
{
partitions
[
ins
].
emplace_back
();
self
(
i
,
partitions
[
ins
].
back
());
}
for
(
auto
i
:
range
(
threshold_it
,
args
.
end
()))
{
self
(
i
,
part
);
}
}
// Sort instructions
p
.
move_instruction
(
ins
,
p
.
end
());
})(
std
::
prev
(
p
.
end
()),
critical
);
// Set the critical partition to stream 0
set_stream
(
critical
,
0
);
std
::
vector
<
std
::
size_t
>
streams
(
n
-
1
);
// Assign streams for the other partitions
for
(
auto
&&
ins_part
:
partitions
)
{
std
::
sort
(
ins_part
.
second
.
begin
(),
ins_part
.
second
.
end
(),
by
(
std
::
greater
<>
{},
[](
auto
&&
x
)
{
return
std
::
make_tuple
(
x
.
weight
,
x
.
instructions
.
size
());
}));
for
(
auto
&&
part
:
ins_part
.
second
)
{
auto
stream
=
std
::
min_element
(
streams
.
begin
(),
streams
.
end
())
-
streams
.
begin
();
set_stream
(
part
,
stream
+
1
);
streams
[
stream
]
+=
part
.
weight
;
}
}
}
void
set_stream
(
const
partition
&
p
,
std
::
size_t
n
)
{
for
(
auto
ins
:
p
.
instructions
)
if
(
iweights
[
ins
]
>
0
)
set_stream
(
ins
,
n
);
}
void
set_stream
(
instruction_ref
ins
,
std
::
size_t
n
)
{
assert
(
iweights
[
ins
]
>
0
);
ins2stream
[
ins
]
=
n
;
}
std
::
size_t
get_stream
(
instruction_ref
ins
)
const
{
return
ins2stream
.
at
(
ins
);
}
bool
has_stream
(
instruction_ref
ins
)
const
{
return
contains
(
ins2stream
,
ins
);
}
template
<
class
F
>
bool
different
(
F
f
,
std
::
size_t
stream
)
const
{
bool
result
=
false
;
f
([
&
](
auto
s
)
{
if
(
s
!=
stream
)
{
result
=
true
;
return
false
;
}
// cppcheck-suppress uselessAssignmentArg
stream
=
s
;
return
true
;
});
return
result
;
}
template
<
class
F
>
bool
different
(
F
f
)
const
{
bool
result
=
false
;
f
([
&
](
auto
s
)
{
result
=
this
->
different
(
f
,
s
);
return
false
;
});
return
result
;
}
template
<
class
Selector
>
auto
get_streams_from
(
instruction_ref
start
,
Selector
select
)
const
{
return
[
=
](
auto
f
)
{
return
fix
<
bool
>
([
&
](
auto
self
,
auto
ins
)
{
for
(
auto
i
:
select
(
ins
))
{
if
(
iweights
.
at
(
i
)
==
0
)
{
if
(
not
self
(
i
))
return
false
;
}
else
{
if
(
not
f
(
this
->
get_stream
(
i
)))
return
false
;
}
}
return
true
;
})(
start
);
};
}
std
::
unordered_set
<
std
::
size_t
>
get_streams
(
instruction_ref
ins
)
const
{
if
(
has_stream
(
ins
))
return
{
get_stream
(
ins
)};
std
::
unordered_set
<
std
::
size_t
>
result
;
get_streams_from
(
ins
,
get_inputs
())([
&
](
auto
s
)
{
result
.
insert
(
s
);
return
true
;
});
return
result
;
}
template
<
class
...
Ts
>
bool
is_merge_point
(
instruction_ref
ins
,
Ts
...
xs
)
const
{
return
different
(
get_streams_from
(
ins
,
get_inputs
()),
xs
...);
}
template
<
class
...
Ts
>
bool
is_split_point
(
instruction_ref
ins
,
Ts
...
xs
)
const
{
return
different
(
get_streams_from
(
ins
,
get_outputs
()),
xs
...);
}
std
::
vector
<
instruction_ref
>
get_recorded_instructions
(
instruction_ref
start
)
{
std
::
vector
<
instruction_ref
>
result
;
std
::
unordered_map
<
std
::
size_t
,
instruction_ref
>
m
;
fix
([
&
](
auto
self
,
auto
ins
)
{
for
(
auto
i
:
ins
->
inputs
())
{
if
(
iweights
.
at
(
i
)
==
0
)
{
self
(
i
);
continue
;
}
auto
stream
=
this
->
get_stream
(
i
);
if
(
not
contains
(
m
,
stream
))
m
[
stream
]
=
i
;
else
m
[
stream
]
=
std
::
min
(
m
[
stream
],
i
,
by
(
std
::
less
<>
{},
[
&
](
auto
x
)
{
return
std
::
distance
(
x
,
start
);
}));
}
})(
start
);
std
::
transform
(
m
.
begin
(),
m
.
end
(),
std
::
back_inserter
(
result
),
[](
auto
&&
p
)
{
return
p
.
second
;
});
return
result
;
}
std
::
unordered_map
<
instruction_ref
,
std
::
vector
<
std
::
vector
<
instruction_ref
>>>
find_concurrent_instructions
(
program
&
p
)
{
std
::
unordered_map
<
instruction_ref
,
std
::
vector
<
std
::
vector
<
instruction_ref
>>>
result
;
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
merge_from
;
result
.
reserve
(
p
.
size
());
merge_from
.
reserve
(
p
.
size
());
for
(
auto
ins
:
reverse_iterator_for
(
p
))
{
for
(
auto
&&
arg
:
ins
->
outputs
())
{
if
(
is_merge_point
(
arg
))
merge_from
[
ins
].
insert
(
arg
);
merge_from
[
ins
].
insert
(
merge_from
[
arg
].
begin
(),
merge_from
[
arg
].
end
());
}
auto
streams
=
this
->
get_streams
(
ins
);
// Collect concur instructions for each merge point.
for
(
auto
&
merge
:
merge_from
[
ins
])
{
for
(
auto
stream
:
streams
)
{
if
(
result
[
merge
].
size
()
<=
stream
)
result
[
merge
].
resize
(
stream
+
1
);
auto
&&
r
=
result
[
merge
][
stream
];
r
.
push_back
(
ins
);
// Copy inputs if they dont have a stream(and are not a builtin and context
// free). Inputs without a stream can have a implicit dependency
std
::
copy_if
(
ins
->
inputs
().
begin
(),
ins
->
inputs
().
end
(),
std
::
back_inserter
(
r
),
[
&
](
auto
x
)
{
return
not
this
->
has_stream
(
x
)
and
not
is_context_free
(
x
->
get_operator
())
and
x
->
name
().
front
()
!=
'@'
;
});
}
}
}
return
result
;
}
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
get_conflicts
(
program
&
p
)
{
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
instruction_ref
>>
conflict_table
;
auto
concur_ins
=
this
->
find_concurrent_instructions
(
p
);
for
(
auto
&&
merge
:
concur_ins
)
{
dfor
(
merge
.
second
.
size
(),
merge
.
second
.
size
())([
&
](
auto
i
,
auto
j
)
{
if
(
i
==
j
)
return
;
for
(
auto
ins1
:
merge
.
second
[
i
])
{
auto
p1
=
std
::
distance
(
ins1
,
merge
.
first
);
for
(
auto
ins2
:
merge
.
second
[
j
])
{
if
(
ins1
==
ins2
)
continue
;
auto
p2
=
std
::
distance
(
ins2
,
merge
.
first
);
// The smaller distance means the instruction occurs later
if
(
p1
>
p2
)
conflict_table
[
ins2
].
insert
(
ins1
);
else
conflict_table
[
ins1
].
insert
(
ins2
);
}
}
});
}
// Remove duplicates
for
(
auto
&&
ip
:
conflict_table
)
{
auto
ins1
=
ip
.
first
;
for
(
auto
ins2
:
ip
.
second
)
if
(
contains
(
conflict_table
[
ins2
],
ins1
))
conflict_table
[
ins2
].
erase
(
ins1
);
}
return
conflict_table
;
}
};
void
schedule
::
apply
(
program
&
p
)
const
{
stream_info
si
;
auto
last
=
std
::
prev
(
p
.
end
());
si
.
accumulate_weights
(
last
,
model
);
si
.
assign_streams
(
p
,
model
.
concurrency
());
if
(
enabled
(
MIGRAPHX_TRACE_COMPILE
{}))
{
p
.
annotate
(
std
::
cout
,
[
&
](
auto
ins
)
{
std
::
cout
<<
":"
;
std
::
cout
<<
" weight="
<<
si
.
weights
.
at
(
ins
);
std
::
cout
<<
" input={"
;
si
.
get_streams_from
(
ins
,
get_inputs
())([
&
](
auto
s
)
{
std
::
cout
<<
s
<<
","
;
return
true
;
});
std
::
cout
<<
"}"
;
if
(
si
.
has_stream
(
ins
))
std
::
cout
<<
" stream="
<<
si
.
get_stream
(
ins
);
});
std
::
cout
<<
std
::
endl
;
}
// Schedule instructions
std
::
size_t
wait_id
=
0
;
std
::
unordered_map
<
instruction_ref
,
std
::
size_t
>
ins2wait
;
std
::
unordered_map
<
std
::
size_t
,
std
::
unordered_set
<
std
::
size_t
>>
waited_for
;
std
::
unordered_map
<
instruction_ref
,
std
::
unordered_set
<
std
::
size_t
>>
ins2waited
;
ins2wait
.
reserve
(
p
.
size
());
ins2waited
.
reserve
(
p
.
size
());
for
(
auto
ins
:
iterator_for
(
p
))
{
// Only schedule instructions that have a stream
if
(
not
si
.
has_stream
(
ins
))
continue
;
assert
(
si
.
weights
[
ins
]
>
0
);
// Schedule instruction on the stream
auto
stream
=
si
.
get_stream
(
ins
);
assert
(
stream
<
model
.
concurrency
());
model
.
sched
(
p
,
ins
,
stream
);
// Insert wait instructions
if
(
si
.
is_merge_point
(
ins
,
stream
))
{
for
(
auto
i
:
si
.
get_recorded_instructions
(
ins
))
{
if
(
not
si
.
has_stream
(
i
))
continue
;
auto
istream
=
si
.
get_stream
(
i
);
if
(
stream
==
istream
)
continue
;
// Create a new event if it hasn't been recorded
if
(
not
contains
(
ins2wait
,
i
))
{
ins2wait
[
i
]
=
wait_id
;
model
.
record
(
p
,
i
,
wait_id
);
wait_id
++
;
}
auto
w
=
ins2wait
.
at
(
i
);
// If we already waited for the event on this stream then dont
// insert another wait event
if
(
not
contains
(
waited_for
[
stream
],
w
))
model
.
wait
(
p
,
ins
,
w
);
// Store the event as waited
waited_for
[
stream
].
insert
(
w
);
// Store all wait events that have been waited on prior to the recorded instruction
waited_for
[
stream
].
insert
(
ins2waited
[
i
].
begin
(),
ins2waited
[
i
].
end
());
}
}
// Store wait events that have already been waited on
if
(
si
.
is_split_point
(
ins
,
stream
))
{
ins2waited
[
ins
]
=
waited_for
[
stream
];
}
}
// Add memory conflicts
auto
conflict_table
=
si
.
get_conflicts
(
p
);
for
(
auto
&&
ip
:
conflict_table
)
{
if
(
ip
.
second
.
empty
())
continue
;
std
::
vector
<
instruction_ref
>
args
;
args
.
push_back
(
ip
.
first
);
args
.
insert
(
args
.
end
(),
ip
.
second
.
begin
(),
ip
.
second
.
end
());
p
.
insert_instruction
(
std
::
next
(
ip
.
first
),
op
::
identity
{},
args
);
}
}
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/CMakeLists.txt
View file @
2b0b77f1
...
...
@@ -64,6 +64,7 @@ add_library(migraphx_gpu
pad.cpp
gather.cpp
lrn.cpp
schedule_model.cpp
)
set_target_properties
(
migraphx_gpu PROPERTIES EXPORT_NAME gpu
)
rocm_clang_tidy_check
(
migraphx_gpu
)
...
...
src/targets/gpu/convolution.cpp
View file @
2b0b77f1
...
...
@@ -21,7 +21,7 @@ argument miopen_convolution::compute(context& ctx,
float
alpha
=
1
;
float
beta
=
0
;
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
auto
status
=
miopenConvolutionForward
(
ctx
.
get_stream
().
get_miopen
(),
&
alpha
,
x_desc
.
get
(),
args
[
0
].
implicit
(),
...
...
@@ -34,6 +34,8 @@ argument miopen_convolution::compute(context& ctx,
args
[
3
].
implicit
(),
args
[
2
].
implicit
(),
args
[
2
].
get_shape
().
bytes
());
if
(
status
!=
miopenStatusSuccess
)
MIGRAPHX_THROW
(
"Running convolution failed"
);
return
args
[
3
];
}
...
...
@@ -89,8 +91,11 @@ void miopen_convolution::finalize(context& ctx,
{
if
(
handle
==
ctx
.
get_stream
().
get_miopen
())
return
;
// TODO: Check that workspace hasn't changed
compile
(
ctx
,
output_shape
,
std
::
move
(
inputs
));
// Check that workspace hasn't changed
auto
size
=
inputs
.
at
(
2
).
bytes
();
auto
ws
=
compile
(
ctx
,
output_shape
,
std
::
move
(
inputs
));
if
(
ws
.
bytes
()
>
size
)
MIGRAPHX_THROW
(
"Workspace has changed during finalization."
);
}
}
// namespace gpu
...
...
src/targets/gpu/hip.cpp
View file @
2b0b77f1
...
...
@@ -43,6 +43,7 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
template
<
class
T
>
std
::
vector
<
T
>
read_from_gpu
(
const
void
*
x
,
std
::
size_t
sz
)
{
gpu_sync
();
std
::
vector
<
T
>
result
(
sz
);
auto
status
=
hipMemcpy
(
result
.
data
(),
x
,
sz
*
sizeof
(
T
),
hipMemcpyDeviceToHost
);
if
(
status
!=
hipSuccess
)
...
...
@@ -52,6 +53,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
hip_ptr
write_to_gpu
(
const
void
*
x
,
std
::
size_t
sz
,
bool
host
=
false
)
{
gpu_sync
();
auto
result
=
allocate_gpu
(
sz
,
host
);
auto
status
=
hipMemcpy
(
result
.
get
(),
x
,
sz
,
hipMemcpyHostToDevice
);
if
(
status
!=
hipSuccess
)
...
...
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
2b0b77f1
...
...
@@ -13,11 +13,17 @@ namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_ENABLE_NULL_STREAM
)
using
hip_event_ptr
=
MIGRAPHX_MANAGE_PTR
(
hipEvent_t
,
hipEventDestroy
);
struct
hip_device
{
hip_device
()
{
add_stream
();
}
hip_device
(
std
::
size_t
id
)
:
device_id
(
id
)
{
add_stream
();
}
hip_device
(
std
::
size_t
id
,
std
::
size_t
n
)
:
device_id
(
id
)
{
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
add_stream
();
}
struct
stream
{
...
...
@@ -32,7 +38,7 @@ struct hip_device
static
hip_stream_ptr
create_stream
()
{
hipStream_t
result
=
nullptr
;
auto
status
=
hipStreamCreate
(
&
result
);
auto
status
=
hipStreamCreate
WithFlags
(
&
result
,
hipStreamNonBlocking
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to allocate stream"
);
return
hip_stream_ptr
{
result
};
...
...
@@ -77,6 +83,22 @@ struct hip_device
return
rbhandle
.
get
();
}
void
wait
(
hipEvent_t
event
)
{
setup
();
auto
status
=
hipStreamWaitEvent
(
get
(),
event
,
0
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to wait."
);
}
void
record
(
hipEvent_t
event
)
{
setup
();
auto
status
=
hipEventRecord
(
event
,
get
());
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to record."
);
}
private:
std
::
size_t
id
=
0
;
shared
<
hip_stream_ptr
>
s
=
nullptr
;
...
...
@@ -88,8 +110,14 @@ struct hip_device
stream
&
get_stream
()
{
return
streams
.
at
(
current_stream
);
}
stream
&
get_stream
(
std
::
size_t
n
)
{
return
streams
.
at
(
n
);
}
void
set_stream
(
std
::
size_t
n
)
{
current_stream
=
n
;
}
std
::
size_t
nstreams
()
const
{
return
streams
.
size
();
}
std
::
size_t
stream_id
()
const
{
return
current_stream
;
}
private:
std
::
size_t
device_id
=
0
;
std
::
size_t
current_stream
=
0
;
...
...
@@ -98,7 +126,10 @@ struct hip_device
struct
context
{
context
(
std
::
size_t
n
=
0
)
:
current_device
(
std
::
make_shared
<
hip_device
>
(
n
))
{}
context
(
std
::
size_t
device_id
=
0
,
std
::
size_t
n
=
4
)
:
current_device
(
std
::
make_shared
<
hip_device
>
(
device_id
,
n
))
{
}
hip_device
&
get_current_device
()
{
...
...
@@ -107,13 +138,34 @@ struct context
}
hip_device
::
stream
&
get_stream
()
{
return
get_current_device
().
get_stream
();
}
hip_device
::
stream
&
get_stream
(
std
::
size_t
n
)
{
return
get_current_device
().
get_stream
(
n
);
}
void
set_stream
(
std
::
size_t
n
)
{
get_current_device
().
set_stream
(
n
);
}
void
create_events
(
std
::
size_t
num_of_events
)
{
for
(
std
::
size_t
i
=
events
.
size
();
i
<
num_of_events
+
1
;
++
i
)
events
.
emplace_back
(
create_event
());
}
hipEvent_t
get_event
(
std
::
size_t
i
)
const
{
return
events
.
at
(
i
).
get
();
}
std
::
vector
<
argument
>
literals
{};
void
finish
()
const
{
gpu_sync
();
}
static
hip_event_ptr
create_event
()
{
hipEvent_t
event
;
auto
status
=
hipEventCreateWithFlags
(
&
event
,
hipEventDisableTiming
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to create event"
);
return
hip_event_ptr
{
event
};
}
private:
// TODO: Make this a vector to support multiple devices
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
vector
<
shared
<
hip_event_ptr
>>
events
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
...
...
src/targets/gpu/include/migraphx/gpu/schedule_model.hpp
0 → 100644
View file @
2b0b77f1
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_SCHEDULE_MODEL_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_SCHEDULE_MODEL_HPP
#include <migraphx/config.hpp>
#include <migraphx/instruction_ref.hpp>
#include <vector>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
struct
program
;
struct
operation
;
namespace
gpu
{
struct
schedule_model
{
std
::
size_t
streams
=
0
;
std
::
size_t
concurrency
()
const
;
void
sched
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
;
void
wait
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
void
record
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
;
std
::
size_t
weight
(
const
operation
&
op
)
const
;
};
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/schedule_model.cpp
0 → 100644
View file @
2b0b77f1
#include <migraphx/gpu/schedule_model.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/program.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/operation.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
struct
record_event
{
std
::
size_t
event
=
0
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
event
,
"event"
));
}
std
::
string
name
()
const
{
return
"gpu::record_event"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
)
const
{
return
{};
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
)
const
{
ctx
.
get_stream
().
record
(
ctx
.
get_event
(
event
));
return
{};
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
ctx
.
create_events
(
event
);
}
};
struct
wait_event
{
std
::
size_t
event
=
0
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
event
,
"event"
));
}
std
::
string
name
()
const
{
return
"gpu::wait_event"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
)
const
{
return
{};
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
)
const
{
ctx
.
get_stream
().
wait
(
ctx
.
get_event
(
event
));
return
{};
}
};
struct
set_stream
{
std
::
size_t
stream
=
0
;
template
<
class
Self
,
class
F
>
static
auto
reflect
(
Self
&
self
,
F
f
)
{
return
pack
(
f
(
self
.
stream
,
"stream"
));
}
std
::
string
name
()
const
{
return
"gpu::set_stream"
;
}
shape
compute_shape
(
const
std
::
vector
<
shape
>&
)
const
{
return
{};
}
argument
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
)
const
{
ctx
.
set_stream
(
stream
);
return
{};
}
void
finalize
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
{
ctx
.
set_stream
(
stream
);
}
};
std
::
size_t
schedule_model
::
concurrency
()
const
{
return
streams
;
}
void
schedule_model
::
sched
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
n
)
const
{
auto
last_stream
=
std
::
find_if
(
std
::
make_reverse_iterator
(
ins
),
std
::
make_reverse_iterator
(
p
.
begin
()),
[
&
](
auto
&&
i
)
{
return
i
.
name
()
==
"gpu::set_stream"
;
});
if
(
last_stream
!=
std
::
make_reverse_iterator
(
p
.
begin
()))
{
auto
&&
op
=
any_cast
<
set_stream
>
(
last_stream
->
get_operator
());
// If the same stream was set earlier then skip
if
(
op
.
stream
==
n
)
return
;
}
p
.
insert_instruction
(
ins
,
set_stream
{
n
});
}
void
schedule_model
::
wait
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
{
p
.
insert_instruction
(
ins
,
wait_event
{
wait_id
});
}
void
schedule_model
::
record
(
program
&
p
,
instruction_ref
ins
,
std
::
size_t
wait_id
)
const
{
p
.
insert_instruction
(
std
::
next
(
ins
),
record_event
{
wait_id
});
}
static
std
::
unordered_map
<
std
::
string
,
std
::
size_t
>
create_weight_map
()
{
return
{
{
"hip::load_literal"
,
0
},
{
"hip::allocate"
,
0
},
{
"gpu::convolution"
,
4
},
{
"gpu::conv_bias_relu"
,
4
},
{
"gpu::pooling"
,
2
},
{
"gpu::gemm"
,
2
},
{
"gpu::concat"
,
1
},
{
"hip::add_relu"
,
2
},
};
}
static
const
std
::
unordered_map
<
std
::
string
,
std
::
size_t
>&
weight_map
()
{
static
std
::
unordered_map
<
std
::
string
,
std
::
size_t
>
m
=
create_weight_map
();
return
m
;
}
std
::
size_t
schedule_model
::
weight
(
const
operation
&
op
)
const
{
if
(
weight_map
().
count
(
op
.
name
())
==
0
)
{
return
1
;
}
return
weight_map
().
at
(
op
.
name
());
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment