Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ef8a3447
"csrc/vscode:/vscode.git/clone" did not exist on "f0d34aabcb7bdcb3a05d022e7d11b3bf4ccf8ee8"
Commit
ef8a3447
authored
Jun 01, 2010
by
Peter Eastman
Browse files
Allow multiple buffers to be cleared by a single kernel, improving performance on small systems.
parent
94a151b1
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
297 additions
and
10 deletions
+297
-10
platforms/cuda/tests/TestCudaPerformance.cpp
platforms/cuda/tests/TestCudaPerformance.cpp
+99
-0
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+48
-1
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+16
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+10
-9
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+27
-0
platforms/opencl/tests/TestOpenCLPerformance.cpp
platforms/opencl/tests/TestOpenCLPerformance.cpp
+97
-0
No files found.
platforms/cuda/tests/TestCudaPerformance.cpp
0 → 100644
View file @
ef8a3447
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "../../../tests/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/NonbondedForce.h"
#include "openmm/GBSAOBCForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include <sys/time.h>
#include <iostream>
#include <stdlib.h>
using
namespace
OpenMM
;
using
namespace
std
;
void
testPerformance
()
{
const
int
xsize
=
20
;
const
int
ysize
=
21
;
const
int
zsize
=
21
;
const
int
numParticles
=
xsize
*
ysize
*
zsize
;
const
double
spacing
=
0.3
;
CudaPlatform
platform
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
xsize
*
spacing
,
0
,
0
),
Vec3
(
0
,
ysize
*
spacing
,
0
),
Vec3
(
0
,
0
,
zsize
*
spacing
));
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
LangevinIntegrator
integrator
(
1.0
,
0.1
,
0.001
);
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
vector
<
Vec3
>
positions
;
vector
<
Vec3
>
velocities
;
double
charge
=
0.1
;
for
(
int
i
=
0
;
i
<
xsize
;
++
i
)
for
(
int
j
=
0
;
j
<
ysize
;
++
j
)
for
(
int
k
=
0
;
k
<
zsize
;
++
k
)
{
nonbonded
->
addParticle
(
charge
,
0.2
,
0.1
);
charge
=
-
charge
;
positions
.
push_back
(
Vec3
(
i
*
spacing
,
j
*
spacing
,
k
*
spacing
));
velocities
.
push_back
(
Vec3
(
0
,
0
,
0
));
}
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
nonbonded
->
setCutoffDistance
(
3
*
spacing
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
timeval
startTime
;
gettimeofday
(
&
startTime
,
NULL
);
integrator
.
step
(
5000
);
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Forces
|
State
::
Energy
);
timeval
endTime
;
gettimeofday
(
&
endTime
,
NULL
);
double
dt
=
endTime
.
tv_sec
-
startTime
.
tv_sec
+
1e-6
*
(
endTime
.
tv_usec
-
startTime
.
tv_usec
);
std
::
cout
<<
"Elapsed time: "
<<
dt
<<
std
::
endl
;
std
::
cout
<<
"Final energy: "
<<
state
.
getPotentialEnergy
()
+
state
.
getKineticEnergy
()
<<
std
::
endl
;
}
int
main
()
{
try
{
testPerformance
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/opencl/src/OpenCLContext.cpp
View file @
ef8a3447
...
@@ -45,7 +45,8 @@
...
@@ -45,7 +45,8 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
OpenCLContext
::
OpenCLContext
(
int
numParticles
,
int
deviceIndex
)
:
time
(
0.0
),
stepCount
(
0
),
computeForceCount
(
0
)
{
OpenCLContext
::
OpenCLContext
(
int
numParticles
,
int
deviceIndex
)
:
time
(
0.0
),
stepCount
(
0
),
computeForceCount
(
0
),
posq
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
energyBuffer
(
NULL
),
atomIndex
(
NULL
),
integration
(
NULL
),
nonbonded
(
NULL
)
{
try
{
try
{
std
::
vector
<
cl
::
Platform
>
platforms
;
std
::
vector
<
cl
::
Platform
>
platforms
;
cl
::
Platform
::
get
(
&
platforms
);
cl
::
Platform
::
get
(
&
platforms
);
...
@@ -98,6 +99,9 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
...
@@ -98,6 +99,9 @@ OpenCLContext::OpenCLContext(int numParticles, int deviceIndex) : time(0.0), ste
utilities
=
createProgram
(
OpenCLKernelSources
::
utilities
);
utilities
=
createProgram
(
OpenCLKernelSources
::
utilities
);
clearBufferKernel
=
cl
::
Kernel
(
utilities
,
"clearBuffer"
);
clearBufferKernel
=
cl
::
Kernel
(
utilities
,
"clearBuffer"
);
clearTwoBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearTwoBuffers"
);
clearThreeBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearThreeBuffers"
);
clearFourBuffersKernel
=
cl
::
Kernel
(
utilities
,
"clearFourBuffers"
);
reduceFloat4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceFloat4Buffer"
);
reduceFloat4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceFloat4Buffer"
);
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
...
@@ -164,8 +168,10 @@ void OpenCLContext::initialize(const System& system) {
...
@@ -164,8 +168,10 @@ void OpenCLContext::initialize(const System& system) {
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
forces
.
size
();
i
++
)
numForceBuffers
=
std
::
max
(
numForceBuffers
,
forces
[
i
]
->
getRequiredForceBuffers
());
numForceBuffers
=
std
::
max
(
numForceBuffers
,
forces
[
i
]
->
getRequiredForceBuffers
());
forceBuffers
=
new
OpenCLArray
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
,
false
);
forceBuffers
=
new
OpenCLArray
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
,
false
);
addAutoclearBuffer
(
forceBuffers
->
getDeviceBuffer
(),
forceBuffers
->
getSize
()
*
4
);
force
=
new
OpenCLArray
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
,
true
);
force
=
new
OpenCLArray
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
,
true
);
energyBuffer
=
new
OpenCLArray
<
cl_float
>
(
*
this
,
numThreadBlocks
*
ThreadBlockSize
,
"energyBuffer"
,
true
);
energyBuffer
=
new
OpenCLArray
<
cl_float
>
(
*
this
,
numThreadBlocks
*
ThreadBlockSize
,
"energyBuffer"
,
true
);
addAutoclearBuffer
(
energyBuffer
->
getDeviceBuffer
(),
energyBuffer
->
getSize
());
atomIndex
=
new
OpenCLArray
<
cl_int
>
(
*
this
,
paddedNumAtoms
,
"atomIndex"
,
true
);
atomIndex
=
new
OpenCLArray
<
cl_int
>
(
*
this
,
paddedNumAtoms
,
"atomIndex"
,
true
);
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
(
*
atomIndex
)[
i
]
=
i
;
(
*
atomIndex
)[
i
]
=
i
;
...
@@ -257,6 +263,47 @@ void OpenCLContext::clearBuffer(cl::Memory& memory, int size) {
...
@@ -257,6 +263,47 @@ void OpenCLContext::clearBuffer(cl::Memory& memory, int size) {
executeKernel
(
clearBufferKernel
,
size
);
executeKernel
(
clearBufferKernel
,
size
);
}
}
void
OpenCLContext
::
addAutoclearBuffer
(
cl
::
Memory
&
memory
,
int
size
)
{
autoclearBuffers
.
push_back
(
&
memory
);
autoclearBufferSizes
.
push_back
(
size
);
}
void
OpenCLContext
::
clearAutoclearBuffers
()
{
int
base
=
0
;
int
total
=
autoclearBufferSizes
.
size
();
while
(
total
-
base
>=
4
)
{
clearFourBuffersKernel
.
setArg
<
cl
::
Memory
>
(
0
,
*
autoclearBuffers
[
base
]);
clearFourBuffersKernel
.
setArg
<
cl_int
>
(
1
,
autoclearBufferSizes
[
base
]);
clearFourBuffersKernel
.
setArg
<
cl
::
Memory
>
(
2
,
*
autoclearBuffers
[
base
+
1
]);
clearFourBuffersKernel
.
setArg
<
cl_int
>
(
3
,
autoclearBufferSizes
[
base
+
1
]);
clearFourBuffersKernel
.
setArg
<
cl
::
Memory
>
(
4
,
*
autoclearBuffers
[
base
+
2
]);
clearFourBuffersKernel
.
setArg
<
cl_int
>
(
5
,
autoclearBufferSizes
[
base
+
2
]);
clearFourBuffersKernel
.
setArg
<
cl
::
Memory
>
(
6
,
*
autoclearBuffers
[
base
+
3
]);
clearFourBuffersKernel
.
setArg
<
cl_int
>
(
7
,
autoclearBufferSizes
[
base
+
3
]);
executeKernel
(
clearFourBuffersKernel
,
max
(
max
(
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
autoclearBufferSizes
[
base
+
2
]),
autoclearBufferSizes
[
base
]
+
3
));
base
+=
4
;
}
if
(
total
-
base
==
3
)
{
clearThreeBuffersKernel
.
setArg
<
cl
::
Memory
>
(
0
,
*
autoclearBuffers
[
base
]);
clearThreeBuffersKernel
.
setArg
<
cl_int
>
(
1
,
autoclearBufferSizes
[
base
]);
clearThreeBuffersKernel
.
setArg
<
cl
::
Memory
>
(
2
,
*
autoclearBuffers
[
base
+
1
]);
clearThreeBuffersKernel
.
setArg
<
cl_int
>
(
3
,
autoclearBufferSizes
[
base
+
1
]);
clearThreeBuffersKernel
.
setArg
<
cl
::
Memory
>
(
4
,
*
autoclearBuffers
[
base
+
2
]);
clearThreeBuffersKernel
.
setArg
<
cl_int
>
(
5
,
autoclearBufferSizes
[
base
+
2
]);
executeKernel
(
clearThreeBuffersKernel
,
max
(
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
autoclearBufferSizes
[
base
+
2
]));
}
else
if
(
total
-
base
==
2
)
{
clearTwoBuffersKernel
.
setArg
<
cl
::
Memory
>
(
0
,
*
autoclearBuffers
[
base
]);
clearTwoBuffersKernel
.
setArg
<
cl_int
>
(
1
,
autoclearBufferSizes
[
base
]);
clearTwoBuffersKernel
.
setArg
<
cl
::
Memory
>
(
2
,
*
autoclearBuffers
[
base
+
1
]);
clearTwoBuffersKernel
.
setArg
<
cl_int
>
(
3
,
autoclearBufferSizes
[
base
+
1
]);
executeKernel
(
clearTwoBuffersKernel
,
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]));
}
else
if
(
total
-
base
==
1
)
{
clearBuffer
(
*
autoclearBuffers
[
base
],
autoclearBufferSizes
[
base
]);
}
}
void
OpenCLContext
::
reduceBuffer
(
OpenCLArray
<
mm_float4
>&
array
,
int
numBuffers
)
{
void
OpenCLContext
::
reduceBuffer
(
OpenCLArray
<
mm_float4
>&
array
,
int
numBuffers
)
{
int
bufferSize
=
array
.
getSize
()
/
numBuffers
;
int
bufferSize
=
array
.
getSize
()
/
numBuffers
;
reduceFloat4Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
array
.
getDeviceBuffer
());
reduceFloat4Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
array
.
getDeviceBuffer
());
...
...
platforms/opencl/src/OpenCLContext.h
View file @
ef8a3447
...
@@ -234,6 +234,17 @@ public:
...
@@ -234,6 +234,17 @@ public:
* @param size the number of float elements in the buffer
* @param size the number of float elements in the buffer
*/
*/
void
clearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
void
clearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*
* @param memory the Memory to clear
* @param size the number of float elements in the buffer
*/
void
addAutoclearBuffer
(
cl
::
Memory
&
memory
,
int
size
);
/**
* Clear all buffers that have been registered with addAutoclearBuffer().
*/
void
clearAutoclearBuffers
();
/**
/**
* Given a collection of buffers packed into an array, sum them and store
* Given a collection of buffers packed into an array, sum them and store
* the sum in the first buffer.
* the sum in the first buffer.
...
@@ -374,6 +385,9 @@ private:
...
@@ -374,6 +385,9 @@ private:
cl
::
CommandQueue
queue
;
cl
::
CommandQueue
queue
;
cl
::
Program
utilities
;
cl
::
Program
utilities
;
cl
::
Kernel
clearBufferKernel
;
cl
::
Kernel
clearBufferKernel
;
cl
::
Kernel
clearTwoBuffersKernel
;
cl
::
Kernel
clearThreeBuffersKernel
;
cl
::
Kernel
clearFourBuffersKernel
;
cl
::
Kernel
reduceFloat4Kernel
;
cl
::
Kernel
reduceFloat4Kernel
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
...
@@ -384,6 +398,8 @@ private:
...
@@ -384,6 +398,8 @@ private:
OpenCLArray
<
mm_float4
>*
forceBuffers
;
OpenCLArray
<
mm_float4
>*
forceBuffers
;
OpenCLArray
<
cl_float
>*
energyBuffer
;
OpenCLArray
<
cl_float
>*
energyBuffer
;
OpenCLArray
<
cl_int
>*
atomIndex
;
OpenCLArray
<
cl_int
>*
atomIndex
;
std
::
vector
<
cl
::
Memory
*>
autoclearBuffers
;
std
::
vector
<
int
>
autoclearBufferSizes
;
OpenCLIntegrationUtilities
*
integration
;
OpenCLIntegrationUtilities
*
integration
;
OpenCLNonbondedUtilities
*
nonbonded
;
OpenCLNonbondedUtilities
*
nonbonded
;
};
};
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
ef8a3447
...
@@ -73,7 +73,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context
...
@@ -73,7 +73,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context
if
(
cl
.
getNonbondedUtilities
().
getUseCutoff
()
&&
cl
.
getComputeForceCount
()
%
100
==
0
)
if
(
cl
.
getNonbondedUtilities
().
getUseCutoff
()
&&
cl
.
getComputeForceCount
()
%
100
==
0
)
cl
.
reorderAtoms
();
cl
.
reorderAtoms
();
cl
.
setComputeForceCount
(
cl
.
getComputeForceCount
()
+
1
);
cl
.
setComputeForceCount
(
cl
.
getComputeForceCount
()
+
1
);
cl
.
clear
Buffer
(
cl
.
getForce
Buffers
()
)
;
cl
.
clear
Autoclear
Buffers
();
cl
.
getNonbondedUtilities
().
prepareInteractions
();
cl
.
getNonbondedUtilities
().
prepareInteractions
();
}
}
...
@@ -86,7 +86,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& contex
...
@@ -86,7 +86,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& contex
if
(
cl
.
getNonbondedUtilities
().
getUseCutoff
()
&&
cl
.
getComputeForceCount
()
%
100
==
0
)
if
(
cl
.
getNonbondedUtilities
().
getUseCutoff
()
&&
cl
.
getComputeForceCount
()
%
100
==
0
)
cl
.
reorderAtoms
();
cl
.
reorderAtoms
();
cl
.
setComputeForceCount
(
cl
.
getComputeForceCount
()
+
1
);
cl
.
setComputeForceCount
(
cl
.
getComputeForceCount
()
+
1
);
cl
.
clear
Buffer
(
cl
.
getEnergy
Buffer
(
)
);
cl
.
clear
Autoclear
Buffer
s
();
cl
.
getNonbondedUtilities
().
prepareInteractions
();
cl
.
getNonbondedUtilities
().
prepareInteractions
();
}
}
...
@@ -1580,6 +1580,8 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
...
@@ -1580,6 +1580,8 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
nb
.
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"obcParams"
,
"float"
,
2
,
sizeof
(
cl_float2
),
params
->
getDeviceBuffer
()));;
nb
.
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"obcParams"
,
"float"
,
2
,
sizeof
(
cl_float2
),
params
->
getDeviceBuffer
()));;
nb
.
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"bornForce"
,
"float"
,
1
,
sizeof
(
cl_float
),
bornForce
->
getDeviceBuffer
()));;
nb
.
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"bornForce"
,
"float"
,
1
,
sizeof
(
cl_float
),
bornForce
->
getDeviceBuffer
()));;
cl
.
addForce
(
new
OpenCLGBSAOBCForceInfo
(
nb
.
getNumForceBuffers
(),
force
));
cl
.
addForce
(
new
OpenCLGBSAOBCForceInfo
(
nb
.
getNumForceBuffers
(),
force
));
cl
.
addAutoclearBuffer
(
bornSum
->
getDeviceBuffer
(),
bornSum
->
getSize
());
cl
.
addAutoclearBuffer
(
bornForce
->
getDeviceBuffer
(),
bornForce
->
getSize
());
}
}
void
OpenCLCalcGBSAOBCForceKernel
::
executeForces
(
ContextImpl
&
context
)
{
void
OpenCLCalcGBSAOBCForceKernel
::
executeForces
(
ContextImpl
&
context
)
{
...
@@ -1655,8 +1657,6 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
...
@@ -1655,8 +1657,6 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
bornRadii
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
bornRadii
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
obcChain
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
obcChain
->
getDeviceBuffer
());
}
}
cl
.
clearBuffer
(
*
bornSum
);
cl
.
clearBuffer
(
*
bornForce
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
computeBornSumKernel
.
setArg
<
mm_float4
>
(
8
,
cl
.
getPeriodicBoxSize
());
computeBornSumKernel
.
setArg
<
mm_float4
>
(
8
,
cl
.
getPeriodicBoxSize
());
computeBornSumKernel
.
setArg
<
mm_float4
>
(
9
,
cl
.
getInvPeriodicBoxSize
());
computeBornSumKernel
.
setArg
<
mm_float4
>
(
9
,
cl
.
getInvPeriodicBoxSize
());
...
@@ -2278,6 +2278,10 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2278,6 +2278,10 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
}
}
}
cl
.
addForce
(
new
OpenCLCustomGBForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
cl
.
addForce
(
new
OpenCLCustomGBForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
cl
.
addAutoclearBuffer
(
buffer
.
getMemory
(),
buffer
.
getSize
()
*
energyDerivs
->
getNumObjects
()
/
sizeof
(
cl_float
));
}
}
}
void
OpenCLCalcCustomGBForceKernel
::
executeForces
(
ContextImpl
&
context
)
{
void
OpenCLCalcCustomGBForceKernel
::
executeForces
(
ContextImpl
&
context
)
{
...
@@ -2285,6 +2289,8 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
...
@@ -2285,6 +2289,8 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
if
(
!
hasInitializedKernels
)
{
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
hasInitializedKernels
=
true
;
valueBuffers
=
new
OpenCLArray
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getNumForceBuffers
(),
"customGBValueBuffers"
);
valueBuffers
=
new
OpenCLArray
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getNumForceBuffers
(),
"customGBValueBuffers"
);
cl
.
addAutoclearBuffer
(
valueBuffers
->
getDeviceBuffer
(),
valueBuffers
->
getSize
());
cl
.
clearBuffer
(
*
valueBuffers
);
int
index
=
0
;
int
index
=
0
;
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
pairValueKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
pairValueKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
...
@@ -2404,11 +2410,6 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
...
@@ -2404,11 +2410,6 @@ void OpenCLCalcCustomGBForceKernel::executeForces(ContextImpl& context) {
if
(
changed
)
if
(
changed
)
globals
->
upload
(
globalParamValues
);
globals
->
upload
(
globalParamValues
);
}
}
cl
.
clearBuffer
(
*
valueBuffers
);
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
cl
.
clearBuffer
(
buffer
.
getMemory
(),
buffer
.
getSize
()
*
energyDerivs
->
getNumObjects
()
/
sizeof
(
cl_float
));
}
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
pairValueKernel
.
setArg
<
mm_float4
>
(
10
,
cl
.
getPeriodicBoxSize
());
pairValueKernel
.
setArg
<
mm_float4
>
(
10
,
cl
.
getPeriodicBoxSize
());
pairValueKernel
.
setArg
<
mm_float4
>
(
11
,
cl
.
getInvPeriodicBoxSize
());
pairValueKernel
.
setArg
<
mm_float4
>
(
11
,
cl
.
getInvPeriodicBoxSize
());
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
ef8a3447
...
@@ -15,6 +15,33 @@ __kernel void clearBuffer(__global float* buffer, int size) {
...
@@ -15,6 +15,33 @@ __kernel void clearBuffer(__global float* buffer, int size) {
buffer[i]
=
0.0f
;
buffer[i]
=
0.0f
;
}
}
/**
*
Fill
two
buffers
with
0.
*/
__kernel
void
clearTwoBuffers
(
__global
float*
buffer1,
int
size1,
__global
float*
buffer2,
int
size2
)
{
clearBuffer
(
buffer1,
size1
)
;
clearBuffer
(
buffer2,
size2
)
;
}
/**
*
Fill
three
buffers
with
0.
*/
__kernel
void
clearThreeBuffers
(
__global
float*
buffer1,
int
size1,
__global
float*
buffer2,
int
size2,
__global
float*
buffer3,
int
size3
)
{
clearBuffer
(
buffer1,
size1
)
;
clearBuffer
(
buffer2,
size2
)
;
clearBuffer
(
buffer3,
size3
)
;
}
/**
*
Fill
four
buffers
with
0.
*/
__kernel
void
clearFourBuffers
(
__global
float*
buffer1,
int
size1,
__global
float*
buffer2,
int
size2,
__global
float*
buffer3,
int
size3,
__global
float*
buffer4,
int
size4
)
{
clearBuffer
(
buffer1,
size1
)
;
clearBuffer
(
buffer2,
size2
)
;
clearBuffer
(
buffer3,
size3
)
;
clearBuffer
(
buffer4,
size4
)
;
}
/**
/**
*
Sum
a
collection
of
buffers
into
the
first
one.
*
Sum
a
collection
of
buffers
into
the
first
one.
*/
*/
...
...
platforms/opencl/tests/TestOpenCLPerformance.cpp
0 → 100644
View file @
ef8a3447
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "../../../tests/AssertionUtilities.h"
#include "openmm/Context.h"
#include "OpenCLPlatform.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include <sys/time.h>
#include <iostream>
#include <stdlib.h>
using
namespace
OpenMM
;
using
namespace
std
;
void
testPerformance
()
{
const
int
xsize
=
20
;
const
int
ysize
=
21
;
const
int
zsize
=
21
;
const
int
numParticles
=
xsize
*
ysize
*
zsize
;
const
double
spacing
=
0.3
;
OpenCLPlatform
platform
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
xsize
*
spacing
,
0
,
0
),
Vec3
(
0
,
ysize
*
spacing
,
0
),
Vec3
(
0
,
0
,
zsize
*
spacing
));
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
LangevinIntegrator
integrator
(
1.0
,
0.1
,
0.001
);
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
vector
<
Vec3
>
positions
;
vector
<
Vec3
>
velocities
;
double
charge
=
0.1
;
for
(
int
i
=
0
;
i
<
xsize
;
++
i
)
for
(
int
j
=
0
;
j
<
ysize
;
++
j
)
for
(
int
k
=
0
;
k
<
zsize
;
++
k
)
{
nonbonded
->
addParticle
(
charge
,
0.2
,
0.1
);
charge
=
-
charge
;
positions
.
push_back
(
Vec3
(
i
*
spacing
,
j
*
spacing
,
k
*
spacing
));
velocities
.
push_back
(
Vec3
(
0
,
0
,
0
));
}
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
nonbonded
->
setCutoffDistance
(
3
*
spacing
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
timeval
startTime
;
gettimeofday
(
&
startTime
,
NULL
);
integrator
.
step
(
50
);
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Forces
|
State
::
Energy
);
timeval
endTime
;
gettimeofday
(
&
endTime
,
NULL
);
double
dt
=
endTime
.
tv_sec
-
startTime
.
tv_sec
+
1e-6
*
(
endTime
.
tv_usec
-
startTime
.
tv_usec
);
std
::
cout
<<
"Elapsed time: "
<<
dt
<<
std
::
endl
;
std
::
cout
<<
"Final energy: "
<<
state
.
getPotentialEnergy
()
+
state
.
getKineticEnergy
()
<<
std
::
endl
;
}
int
main
()
{
try
{
testPerformance
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment