Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8528d8eb
Unverified
Commit
8528d8eb
authored
Feb 09, 2023
by
Peter Eastman
Committed by
GitHub
Feb 09, 2023
Browse files
Profiling of OpenCL kernels (#3954)
* Profiling of OpenCL kernels * Minor improvements to profiling
parent
58ee361f
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
45 additions
and
1 deletion
+45
-1
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+4
-0
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+41
-1
No files found.
platforms/opencl/include/OpenCLContext.h
View file @
8528d8eb
...
@@ -666,6 +666,7 @@ public:
...
@@ -666,6 +666,7 @@ public:
void
flushQueue
();
void
flushQueue
();
private:
private:
OpenCLPlatform
::
PlatformData
&
platformData
;
OpenCLPlatform
::
PlatformData
&
platformData
;
void
printProfilingEvents
();
int
deviceIndex
;
int
deviceIndex
;
int
platformIndex
;
int
platformIndex
;
int
contextIndex
;
int
contextIndex
;
...
@@ -708,6 +709,9 @@ private:
...
@@ -708,6 +709,9 @@ private:
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
map
<
std
::
string
,
double
>
energyParamDerivWorkspace
;
std
::
vector
<
cl
::
Memory
*>
autoclearBuffers
;
std
::
vector
<
cl
::
Memory
*>
autoclearBuffers
;
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
cl
::
Event
>
profilingEvents
;
std
::
vector
<
std
::
string
>
profilingKernelNames
;
cl_ulong
profileStartTime
;
OpenCLIntegrationUtilities
*
integration
;
OpenCLIntegrationUtilities
*
integration
;
OpenCLExpressionUtilities
*
expression
;
OpenCLExpressionUtilities
*
expression
;
OpenCLBondedUtilities
*
bonded
;
OpenCLBondedUtilities
*
bonded
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
8528d8eb
...
@@ -52,6 +52,10 @@
...
@@ -52,6 +52,10 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
// Uncomment the following line to enable profiling of all kernel launches. The results are written
// to stdout in the JSON format used by https://ui.perfetto.dev.
//#define ENABLE_PROFILING
#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
#ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#endif
#endif
...
@@ -78,7 +82,7 @@ static bool isSupported(cl::Platform platform) {
...
@@ -78,7 +82,7 @@ static bool isSupported(cl::Platform platform) {
}
}
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
,
OpenCLContext
*
originalContext
)
:
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
,
OpenCLContext
*
originalContext
)
:
ComputeContext
(
system
),
platformData
(
platformData
),
numForceBuffers
(
0
),
hasAssignedPosqCharges
(
false
),
ComputeContext
(
system
),
platformData
(
platformData
),
numForceBuffers
(
0
),
hasAssignedPosqCharges
(
false
),
profileStartTime
(
0
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
pinnedBuffer
(
NULL
)
{
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
pinnedBuffer
(
NULL
)
{
if
(
precision
==
"single"
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
useDoublePrecision
=
false
;
...
@@ -293,7 +297,12 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -293,7 +297,12 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
cl_context_properties
cprops
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
bestPlatform
](),
0
};
cl_context_properties
cprops
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
bestPlatform
](),
0
};
if
(
originalContext
==
NULL
)
{
if
(
originalContext
==
NULL
)
{
context
=
cl
::
Context
(
contextDevices
,
cprops
,
errorCallback
);
context
=
cl
::
Context
(
contextDevices
,
cprops
,
errorCallback
);
#ifdef ENABLE_PROFILING
defaultQueue
=
cl
::
CommandQueue
(
context
,
device
,
CL_QUEUE_PROFILING_ENABLE
);
printf
(
"[ "
);
#else
defaultQueue
=
cl
::
CommandQueue
(
context
,
device
);
defaultQueue
=
cl
::
CommandQueue
(
context
,
device
);
#endif
}
}
else
{
else
{
context
=
originalContext
->
context
;
context
=
originalContext
->
context
;
...
@@ -495,6 +504,10 @@ OpenCLContext::~OpenCLContext() {
...
@@ -495,6 +504,10 @@ OpenCLContext::~OpenCLContext() {
delete
bonded
;
delete
bonded
;
if
(
nonbonded
!=
NULL
)
if
(
nonbonded
!=
NULL
)
delete
nonbonded
;
delete
nonbonded
;
#ifdef ENABLE_PROFILING
printProfilingEvents
();
printf
(
" ]
\n
"
);
#endif
}
}
void
OpenCLContext
::
initialize
()
{
void
OpenCLContext
::
initialize
()
{
...
@@ -675,7 +688,16 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
...
@@ -675,7 +688,16 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
blockSize
=
ThreadBlockSize
;
blockSize
=
ThreadBlockSize
;
int
size
=
std
::
min
((
workUnits
+
blockSize
-
1
)
/
blockSize
,
numThreadBlocks
)
*
blockSize
;
int
size
=
std
::
min
((
workUnits
+
blockSize
-
1
)
/
blockSize
,
numThreadBlocks
)
*
blockSize
;
try
{
try
{
#ifdef ENABLE_PROFILING
cl
::
Event
event
;
currentQueue
.
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
size
),
cl
::
NDRange
(
blockSize
),
NULL
,
&
event
);
profilingEvents
.
push_back
(
event
);
profilingKernelNames
.
push_back
(
kernel
.
getInfo
<
CL_KERNEL_FUNCTION_NAME
>
());
if
(
profilingEvents
.
size
()
>=
500
)
printProfilingEvents
();
#else
currentQueue
.
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
size
),
cl
::
NDRange
(
blockSize
));
currentQueue
.
enqueueNDRangeKernel
(
kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
size
),
cl
::
NDRange
(
blockSize
));
#endif
}
}
catch
(
cl
::
Error
err
)
{
catch
(
cl
::
Error
err
)
{
stringstream
str
;
stringstream
str
;
...
@@ -684,6 +706,24 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
...
@@ -684,6 +706,24 @@ void OpenCLContext::executeKernel(cl::Kernel& kernel, int workUnits, int blockSi
}
}
}
}
void
OpenCLContext
::
printProfilingEvents
()
{
for
(
int
i
=
0
;
i
<
profilingEvents
.
size
();
i
++
)
{
cl
::
Event
event
=
profilingEvents
[
i
];
event
.
wait
();
cl_ulong
start
,
end
;
event
.
getProfilingInfo
(
CL_PROFILING_COMMAND_START
,
&
start
);
event
.
getProfilingInfo
(
CL_PROFILING_COMMAND_END
,
&
end
);
if
(
profileStartTime
==
0
)
profileStartTime
=
start
;
else
printf
(
",
\n
"
);
printf
(
"{
\"
pid
\"
:1,
\"
tid
\"
:1,
\"
ts
\"
:%.6g,
\"
dur
\"
:%g,
\"
ph
\"
:
\"
X
\"
,
\"
name
\"
:
\"
%s
\"
}"
,
0.001
*
(
start
-
profileStartTime
),
0.001
*
(
end
-
start
),
profilingKernelNames
[
i
].
c_str
());
}
profilingEvents
.
clear
();
profilingKernelNames
.
clear
();
}
int
OpenCLContext
::
computeThreadBlockSize
(
double
memory
)
const
{
int
OpenCLContext
::
computeThreadBlockSize
(
double
memory
)
const
{
int
maxShared
=
device
.
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
();
int
maxShared
=
device
.
getInfo
<
CL_DEVICE_LOCAL_MEM_SIZE
>
();
// On some implementations, more local memory gets used than we calculate by
// On some implementations, more local memory gets used than we calculate by
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment