Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
349635ce
Unverified
Commit
349635ce
authored
Aug 27, 2022
by
Paul Fultz II
Committed by
GitHub
Aug 27, 2022
Browse files
Show kernel time when using gpu-driver (#1289)
* Track kernel time
parent
8752875a
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
119 additions
and
27 deletions
+119
-27
src/targets/gpu/code_object_op.cpp
src/targets/gpu/code_object_op.cpp
+2
-1
src/targets/gpu/driver/compile_op.cpp
src/targets/gpu/driver/compile_op.cpp
+5
-2
src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
+2
-1
src/targets/gpu/driver/perf.cpp
src/targets/gpu/driver/perf.cpp
+19
-10
src/targets/gpu/driver/run_op.cpp
src/targets/gpu/driver/run_op.cpp
+2
-2
src/targets/gpu/include/migraphx/gpu/context.hpp
src/targets/gpu/include/migraphx/gpu/context.hpp
+48
-0
src/targets/gpu/include/migraphx/gpu/hip.hpp
src/targets/gpu/include/migraphx/gpu/hip.hpp
+2
-0
src/targets/gpu/include/migraphx/gpu/kernel.hpp
src/targets/gpu/include/migraphx/gpu/kernel.hpp
+9
-4
src/targets/gpu/kernel.cpp
src/targets/gpu/kernel.cpp
+30
-7
No files found.
src/targets/gpu/code_object_op.cpp
View file @
349635ce
...
@@ -51,7 +51,8 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&
...
@@ -51,7 +51,8 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&
std
::
vector
<
void
*>
kargs
(
args
.
size
());
std
::
vector
<
void
*>
kargs
(
args
.
size
());
std
::
transform
(
std
::
transform
(
args
.
begin
(),
args
.
end
(),
kargs
.
begin
(),
[](
const
argument
&
a
)
{
return
a
.
data
();
});
args
.
begin
(),
args
.
end
(),
kargs
.
begin
(),
[](
const
argument
&
a
)
{
return
a
.
data
();
});
k
.
launch
(
ctx
.
get_stream
().
get
(),
global
,
local
,
std
::
move
(
kargs
));
auto
[
start
,
stop
]
=
ctx
.
get_perf_events
();
k
.
launch
(
ctx
.
get_stream
().
get
(),
global
,
local
,
std
::
move
(
kargs
),
start
,
stop
);
return
args
[
get_output_arg
(
args
.
size
())];
return
args
[
get_output_arg
(
args
.
size
())];
}
}
void
code_object_op
::
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
void
code_object_op
::
finalize
(
context
&
,
const
shape
&
,
const
std
::
vector
<
shape
>&
)
...
...
src/targets/gpu/driver/compile_op.cpp
View file @
349635ce
...
@@ -38,8 +38,11 @@ struct compile_op : action<compile_op>
...
@@ -38,8 +38,11 @@ struct compile_op : action<compile_op>
context
ctx
;
context
ctx
;
auto
inputs
=
p
.
parse_shapes
(
v
.
at
(
"inputs"
));
auto
inputs
=
p
.
parse_shapes
(
v
.
at
(
"inputs"
));
auto
op
=
gpu
::
compile_op
(
v
.
at
(
"name"
).
to
<
std
::
string
>
(),
ctx
,
inputs
,
v
);
auto
op
=
gpu
::
compile_op
(
v
.
at
(
"name"
).
to
<
std
::
string
>
(),
ctx
,
inputs
,
v
);
double
t
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
auto
[
host_time
,
device_time
]
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
std
::
cout
<<
op
<<
": "
<<
t
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
op
<<
": "
<<
host_time
<<
"ms"
;
if
(
device_time
>
0
)
std
::
cout
<<
", "
<<
device_time
<<
"ms"
;
std
::
cout
<<
std
::
endl
;
}
}
};
};
...
...
src/targets/gpu/driver/include/migraphx/gpu/driver/perf.hpp
View file @
349635ce
...
@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -33,7 +33,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
driver
{
namespace
driver
{
double
time_op
(
context
&
ctx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
=
100
);
std
::
pair
<
double
,
double
>
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
=
100
);
}
// namespace driver
}
// namespace driver
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/driver/perf.cpp
View file @
349635ce
...
@@ -42,22 +42,31 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig
...
@@ -42,22 +42,31 @@ std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsig
}
}
using
milliseconds
=
std
::
chrono
::
duration
<
double
,
std
::
milli
>
;
using
milliseconds
=
std
::
chrono
::
duration
<
double
,
std
::
milli
>
;
double
time_op
(
context
&
ctx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
)
std
::
pair
<
double
,
double
>
time_op
(
context
&
ictx
,
operation
op
,
const
std
::
vector
<
shape
>&
inputs
,
int
n
)
{
{
// TODO: Use std::ref
// TODO: Use std::ref
migraphx
::
context
gctx
=
ctx
;
migraphx
::
context
ctx
=
ictx
;
auto
&
gctx
=
any_cast
<
migraphx
::
gpu
::
context
>
(
ctx
);
auto
output
=
op
.
compute_shape
(
inputs
);
auto
output
=
op
.
compute_shape
(
inputs
);
op
.
finalize
(
g
ctx
,
output
,
inputs
);
op
.
finalize
(
ctx
,
output
,
inputs
);
auto
args
=
generate_arguments
(
inputs
);
auto
args
=
generate_arguments
(
inputs
);
auto
run
=
[
&
]
{
auto
run
=
[
&
]
{
op
.
compute
(
g
ctx
,
output
,
args
);
op
.
compute
(
ctx
,
output
,
args
);
g
ctx
.
finish
();
ctx
.
finish
();
};
};
gctx
.
enable_perf_measurement
();
run
();
run
();
auto
r
=
range
(
n
);
double
host_time
=
0.0
;
double
t
=
std
::
accumulate
(
double
device_time
=
0.0
;
r
.
begin
(),
r
.
end
(),
double
{
0.0
},
[
&
](
auto
x
,
auto
)
{
return
x
+
time
<
milliseconds
>
(
run
);
});
for
(
auto
i
:
range
(
n
))
return
t
/
n
;
{
(
void
)
i
;
host_time
+=
time
<
milliseconds
>
(
run
);
device_time
+=
gctx
.
get_elapsed_ms
();
}
return
std
::
make_pair
(
host_time
/
n
,
device_time
/
n
);
}
}
}
// namespace driver
}
// namespace driver
...
...
src/targets/gpu/driver/run_op.cpp
View file @
349635ce
...
@@ -43,8 +43,8 @@ struct run_op : action<run_op>
...
@@ -43,8 +43,8 @@ struct run_op : action<run_op>
auto
op
=
make_op
(
name
);
auto
op
=
make_op
(
name
);
if
(
v
.
contains
(
"fields"
))
if
(
v
.
contains
(
"fields"
))
op
.
from_value
(
v
.
at
(
"fields"
));
op
.
from_value
(
v
.
at
(
"fields"
));
double
t
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
auto
[
host_time
,
device_time
]
=
time_op
(
ctx
,
op
,
inputs
,
p
.
get
(
v
,
"iterations"
,
100
));
std
::
cout
<<
op
<<
": "
<<
t
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
op
<<
": "
<<
host_time
<<
"ms"
<<
std
::
endl
;
}
}
};
};
...
...
src/targets/gpu/include/migraphx/gpu/context.hpp
View file @
349635ce
...
@@ -244,6 +244,15 @@ struct context
...
@@ -244,6 +244,15 @@ struct context
return
hip_event_ptr
{
event
};
return
hip_event_ptr
{
event
};
}
}
static
hip_event_ptr
create_event_for_timing
()
{
hipEvent_t
event
;
auto
status
=
hipEventCreate
(
&
event
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to create event"
);
return
hip_event_ptr
{
event
};
}
value
to_value
()
const
value
to_value
()
const
{
{
value
result
;
value
result
;
...
@@ -267,10 +276,49 @@ struct context
...
@@ -267,10 +276,49 @@ struct context
any_ptr
get_queue
()
{
return
get_stream
().
get
();
}
any_ptr
get_queue
()
{
return
get_stream
().
get
();
}
void
enable_perf_measurement
(
bool
b
=
true
)
{
if
(
b
)
{
start_event
=
create_event_for_timing
();
stop_event
=
create_event_for_timing
();
get_stream
().
record
(
start_event
.
get
());
get_stream
().
record
(
stop_event
.
get
());
}
else
{
start_event
=
nullptr
;
stop_event
=
nullptr
;
}
measure_perf
=
b
;
}
std
::
pair
<
hipEvent_t
,
hipEvent_t
>
get_perf_events
()
const
{
if
(
measure_perf
)
return
std
::
make_pair
(
start_event
.
get
(),
stop_event
.
get
());
return
std
::
make_pair
(
nullptr
,
nullptr
);
}
float
get_elapsed_ms
()
const
{
float
result
=
0
;
if
(
start_event
!=
nullptr
and
stop_event
!=
nullptr
)
{
auto
status
=
hipEventElapsedTime
(
&
result
,
start_event
.
get
(),
stop_event
.
get
());
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed hipEventElapsedTime: "
+
hip_error
(
status
));
}
return
result
;
}
private:
private:
// TODO: Make this a vector to support multiple devices
// TODO: Make this a vector to support multiple devices
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
shared_ptr
<
hip_device
>
current_device
;
std
::
vector
<
shared
<
hip_event_ptr
>>
events
;
std
::
vector
<
shared
<
hip_event_ptr
>>
events
;
bool
measure_perf
=
false
;
shared
<
hip_event_ptr
>
start_event
=
nullptr
;
shared
<
hip_event_ptr
>
stop_event
=
nullptr
;
};
};
inline
void
migraphx_to_value
(
value
&
v
,
const
context
&
ctx
)
{
v
=
ctx
.
to_value
();
}
inline
void
migraphx_to_value
(
value
&
v
,
const
context
&
ctx
)
{
v
=
ctx
.
to_value
();
}
...
...
src/targets/gpu/include/migraphx/gpu/hip.hpp
View file @
349635ce
...
@@ -37,6 +37,8 @@ namespace gpu {
...
@@ -37,6 +37,8 @@ namespace gpu {
struct
context
;
struct
context
;
std
::
string
hip_error
(
int
error
);
argument
allocate_gpu
(
const
shape
&
s
,
bool
host
=
false
);
argument
allocate_gpu
(
const
shape
&
s
,
bool
host
=
false
);
argument
register_on_gpu
(
const
argument
&
arg
);
argument
register_on_gpu
(
const
argument
&
arg
);
...
...
src/targets/gpu/include/migraphx/gpu/kernel.hpp
View file @
349635ce
...
@@ -50,17 +50,22 @@ struct kernel
...
@@ -50,17 +50,22 @@ struct kernel
void
launch
(
hipStream_t
stream
,
void
launch
(
hipStream_t
stream
,
std
::
size_t
global
,
std
::
size_t
global
,
std
::
size_t
local
,
std
::
size_t
local
,
const
std
::
vector
<
kernel_argument
>&
args
)
const
;
const
std
::
vector
<
kernel_argument
>&
args
,
hipEvent_t
start
=
nullptr
,
hipEvent_t
stop
=
nullptr
)
const
;
void
launch
(
hipStream_t
stream
,
void
launch
(
hipStream_t
stream
,
std
::
size_t
global
,
std
::
size_t
global
,
std
::
size_t
local
,
std
::
size_t
local
,
std
::
vector
<
void
*>
args
)
const
;
std
::
vector
<
void
*>
args
,
hipEvent_t
start
=
nullptr
,
hipEvent_t
stop
=
nullptr
)
const
;
auto
launch
(
hipStream_t
stream
,
std
::
size_t
global
,
std
::
size_t
local
)
const
template
<
class
...
Ts
>
auto
launch
(
hipStream_t
stream
,
std
::
size_t
global
,
std
::
size_t
local
,
Ts
...
zs
)
const
{
{
return
[
=
](
auto
&&
...
xs
)
{
return
[
=
](
auto
&&
...
xs
)
{
launch
(
stream
,
global
,
local
,
std
::
vector
<
kernel_argument
>
{
xs
...});
launch
(
stream
,
global
,
local
,
std
::
vector
<
kernel_argument
>
{
xs
...}
,
zs
...
);
};
};
}
}
...
...
src/targets/gpu/kernel.cpp
View file @
349635ce
...
@@ -80,7 +80,9 @@ void launch_kernel(hipFunction_t fun,
...
@@ -80,7 +80,9 @@ void launch_kernel(hipFunction_t fun,
std
::
size_t
global
,
std
::
size_t
global
,
std
::
size_t
local
,
std
::
size_t
local
,
void
*
kernargs
,
void
*
kernargs
,
std
::
size_t
size
)
std
::
size_t
size
,
hipEvent_t
start
,
hipEvent_t
stop
)
{
{
assert
(
global
>
0
);
assert
(
global
>
0
);
assert
(
local
>
0
);
assert
(
local
>
0
);
...
@@ -97,34 +99,55 @@ void launch_kernel(hipFunction_t fun,
...
@@ -97,34 +99,55 @@ void launch_kernel(hipFunction_t fun,
#endif
#endif
};
};
auto
status
=
hipExtModuleLaunchKernel
(
auto
status
=
hipExtModuleLaunchKernel
(
fun
,
fun
,
global
,
1
,
1
,
local
,
1
,
1
,
0
,
stream
,
nullptr
,
reinterpret_cast
<
void
**>
(
&
config
));
global
,
1
,
1
,
local
,
1
,
1
,
0
,
stream
,
nullptr
,
reinterpret_cast
<
void
**>
(
&
config
),
start
,
stop
);
if
(
status
!=
hipSuccess
)
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to launch kernel: "
+
hip_error
(
status
));
MIGRAPHX_THROW
(
"Failed to launch kernel: "
+
hip_error
(
status
));
if
(
stop
!=
nullptr
)
{
status
=
hipEventSynchronize
(
stop
);
if
(
status
!=
hipSuccess
)
MIGRAPHX_THROW
(
"Failed to sync event: "
+
hip_error
(
status
));
}
}
}
void
kernel
::
launch
(
hipStream_t
stream
,
void
kernel
::
launch
(
hipStream_t
stream
,
std
::
size_t
global
,
std
::
size_t
global
,
std
::
size_t
local
,
std
::
size_t
local
,
std
::
vector
<
void
*>
args
)
const
std
::
vector
<
void
*>
args
,
hipEvent_t
start
,
hipEvent_t
stop
)
const
{
{
assert
(
impl
!=
nullptr
);
assert
(
impl
!=
nullptr
);
void
*
kernargs
=
args
.
data
();
void
*
kernargs
=
args
.
data
();
std
::
size_t
size
=
args
.
size
()
*
sizeof
(
void
*
);
std
::
size_t
size
=
args
.
size
()
*
sizeof
(
void
*
);
launch_kernel
(
impl
->
fun
,
stream
,
global
,
local
,
kernargs
,
size
);
launch_kernel
(
impl
->
fun
,
stream
,
global
,
local
,
kernargs
,
size
,
start
,
stop
);
}
}
void
kernel
::
launch
(
hipStream_t
stream
,
void
kernel
::
launch
(
hipStream_t
stream
,
std
::
size_t
global
,
std
::
size_t
global
,
std
::
size_t
local
,
std
::
size_t
local
,
const
std
::
vector
<
kernel_argument
>&
args
)
const
const
std
::
vector
<
kernel_argument
>&
args
,
hipEvent_t
start
,
hipEvent_t
stop
)
const
{
{
assert
(
impl
!=
nullptr
);
assert
(
impl
!=
nullptr
);
std
::
vector
<
char
>
kernargs
=
pack_args
(
args
);
std
::
vector
<
char
>
kernargs
=
pack_args
(
args
);
std
::
size_t
size
=
kernargs
.
size
();
std
::
size_t
size
=
kernargs
.
size
();
launch_kernel
(
impl
->
fun
,
stream
,
global
,
local
,
kernargs
.
data
(),
size
);
launch_kernel
(
impl
->
fun
,
stream
,
global
,
local
,
kernargs
.
data
(),
size
,
start
,
stop
);
}
}
}
// namespace gpu
}
// namespace gpu
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment