Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
eb232608
Commit
eb232608
authored
Apr 22, 2015
by
John Chodera (MSKCC)
Browse files
Merge remote-tracking branch 'upstream/master'
parents
62581e9c
7f8c5089
Changes
267
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
73 additions
and
76 deletions
+73
-76
platforms/cpu/src/CpuCustomGBForce.cpp
platforms/cpu/src/CpuCustomGBForce.cpp
+0
-2
platforms/cpu/src/CpuCustomManyParticleForce.cpp
platforms/cpu/src/CpuCustomManyParticleForce.cpp
+0
-2
platforms/cpu/src/CpuCustomNonbondedForce.cpp
platforms/cpu/src/CpuCustomNonbondedForce.cpp
+0
-2
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+6
-7
platforms/cpu/src/CpuLangevinDynamics.cpp
platforms/cpu/src/CpuLangevinDynamics.cpp
+0
-2
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+1
-2
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+0
-1
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+0
-1
platforms/cpu/tests/TestCpuNeighborList.cpp
platforms/cpu/tests/TestCpuNeighborList.cpp
+1
-1
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+3
-1
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+1
-1
platforms/cuda/include/CudaParallelKernels.h
platforms/cuda/include/CudaParallelKernels.h
+4
-1
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+8
-6
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-1
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+7
-19
platforms/cuda/src/CudaParallelKernels.cpp
platforms/cuda/src/CudaParallelKernels.cpp
+28
-16
platforms/cuda/src/kernels/findInteractingBlocks.cu
platforms/cuda/src/kernels/findInteractingBlocks.cu
+2
-2
platforms/cuda/tests/TestCudaLocalEnergyMinimizer.cpp
platforms/cuda/tests/TestCudaLocalEnergyMinimizer.cpp
+4
-4
platforms/cuda/tests/TestCudaVariableLangevinIntegrator.cpp
platforms/cuda/tests/TestCudaVariableLangevinIntegrator.cpp
+4
-4
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+3
-1
No files found.
platforms/cpu/src/CpuCustomGBForce.cpp
View file @
eb232608
...
...
@@ -25,8 +25,6 @@
#include <string.h>
#include <sstream>
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h"
#include "ReferenceForce.h"
#include "CpuCustomGBForce.h"
...
...
platforms/cpu/src/CpuCustomManyParticleForce.cpp
View file @
eb232608
...
...
@@ -26,8 +26,6 @@
#include <sstream>
#include <utility>
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h"
#include "ReferenceForce.h"
#include "CpuCustomManyParticleForce.h"
...
...
platforms/cpu/src/CpuCustomNonbondedForce.cpp
View file @
eb232608
...
...
@@ -25,8 +25,6 @@
#include <string.h>
#include <sstream>
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h"
#include "ReferenceForce.h"
#include "CpuCustomNonbondedForce.h"
...
...
platforms/cpu/src/CpuKernels.cpp
View file @
eb232608
...
...
@@ -231,13 +231,13 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
throw
OpenMMException
(
"Particle coordinate is nan"
);
}
double
CpuCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
double
CpuCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
// Sum the forces from all the threads.
SumForceTask
task
(
context
.
getSystem
().
getNumParticles
(),
extractForces
(
context
),
data
);
data
.
threads
.
execute
(
task
);
data
.
threads
.
waitForThreads
();
return
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
return
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
,
valid
);
}
CpuCalcPeriodicTorsionForceKernel
::~
CpuCalcPeriodicTorsionForceKernel
()
{
...
...
@@ -508,13 +508,12 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
if
(
nonbondedMethod
==
PME
)
{
// If available, use the optimized PME implementation.
try
{
vector
<
string
>
kernelNames
;
kernelNames
.
push_back
(
"CalcPmeReciprocalForce"
);
useOptimizedPme
=
getPlatform
().
supportsKernels
(
kernelNames
);
if
(
useOptimizedPme
)
{
optimizedPme
=
getPlatform
().
createKernel
(
CalcPmeReciprocalForceKernel
::
Name
(),
context
);
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
initialize
(
gridSize
[
0
],
gridSize
[
1
],
gridSize
[
2
],
numParticles
,
ewaldAlpha
);
useOptimizedPme
=
true
;
}
catch
(
OpenMMException
&
ex
)
{
// The CPU PME plugin isn't available.
}
}
}
...
...
platforms/cpu/src/CpuLangevinDynamics.cpp
View file @
eb232608
...
...
@@ -23,8 +23,6 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMLog.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuLangevinDynamics.h"
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
eb232608
...
...
@@ -24,7 +24,6 @@
#include <complex>
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
...
...
@@ -217,7 +216,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, c
// setup reciprocal box
float
recipBoxSize
[
3
]
=
{
TWO_PI
/
periodicBoxVectors
[
0
][
0
],
TWO_PI
/
periodicBoxVectors
[
1
][
1
],
TWO_PI
/
periodicBoxVectors
[
2
][
2
]};
float
recipBoxSize
[
3
]
=
{
(
float
)
(
TWO_PI
/
periodicBoxVectors
[
0
][
0
]
)
,
(
float
)
(
TWO_PI
/
periodicBoxVectors
[
1
][
1
]
)
,
(
float
)
(
TWO_PI
/
periodicBoxVectors
[
2
][
2
]
)
};
// setup K-vectors
...
...
platforms/cpu/src/CpuNonbondedForceVec4.cpp
View file @
eb232608
...
...
@@ -22,7 +22,6 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec4.h"
...
...
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
eb232608
...
...
@@ -22,7 +22,6 @@
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "SimTKOpenMMCommon.h"
#include "SimTKOpenMMUtilities.h"
#include "CpuNonbondedForceVec8.h"
#include "openmm/OpenMMException.h"
...
...
platforms/cpu/tests/TestCpuNeighborList.cpp
View file @
eb232608
...
...
@@ -62,7 +62,7 @@ void testNeighborList(bool periodic, bool triclinic) {
boxVectors
[
1
]
=
RealVec
(
0
,
15
,
0
);
boxVectors
[
2
]
=
RealVec
(
0
,
0
,
22
);
}
const
float
boxSize
[
3
]
=
{
boxVectors
[
0
][
0
],
boxVectors
[
1
][
1
],
boxVectors
[
2
][
2
]};
const
float
boxSize
[
3
]
=
{
(
float
)
boxVectors
[
0
][
0
],
(
float
)
boxVectors
[
1
][
1
],
(
float
)
boxVectors
[
2
][
2
]};
const
int
blockSize
=
8
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
...
...
platforms/cuda/include/CudaKernels.h
View file @
eb232608
...
...
@@ -92,11 +92,13 @@ public:
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @param valid the method may set this to false to indicate the results are invalid and the force/energy
* calculation should be repeated
* @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/
double
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
);
double
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
);
private:
CudaContext
&
cu
;
};
...
...
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
eb232608
...
...
@@ -278,7 +278,7 @@ private:
std
::
string
kernelSource
;
std
::
map
<
std
::
string
,
std
::
string
>
kernelDefines
;
double
cutoff
;
bool
useCutoff
,
usePeriodic
,
anyExclusions
,
usePadding
;
bool
useCutoff
,
usePeriodic
,
anyExclusions
,
usePadding
,
forceRebuildNeighborList
;
int
startTileIndex
,
numTiles
,
startBlockIndex
,
numBlocks
,
maxTiles
,
numForceThreadBlocks
,
forceThreadBlockSize
,
nonbondedForceGroup
,
numAtoms
;
};
...
...
platforms/cuda/include/CudaParallelKernels.h
View file @
eb232608
...
...
@@ -69,11 +69,13 @@ public:
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @param valid the method may set this to false to indicate the results are invalid and the force/energy
* calculation should be repeated
* @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/
double
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
);
double
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
);
private:
class
BeginComputationTask
;
class
FinishComputationTask
;
...
...
@@ -81,6 +83,7 @@ private:
std
::
vector
<
Kernel
>
kernels
;
std
::
vector
<
long
long
>
completionTimes
;
std
::
vector
<
double
>
contextNonbondedFractions
;
int
*
tileCounts
;
CudaArray
*
contextForces
;
void
*
pinnedPositionBuffer
;
long
long
*
pinnedForceBuffer
;
...
...
platforms/cuda/src/CudaContext.cpp
View file @
eb232608
...
...
@@ -77,12 +77,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
this
->
compiler
=
"
\"
"
+
compiler
+
"
\"
"
;
try
{
compilerKernel
=
platformData
.
context
->
getPlatform
().
createKernel
(
CudaCompilerKernel
::
Name
(),
*
platformData
.
context
);
hasCompilerKernel
=
true
;
}
catch
(...)
{
// The runtime compiler plugin isn't available.
if
(
platformData
.
context
!=
NULL
)
{
try
{
compilerKernel
=
platformData
.
context
->
getPlatform
().
createKernel
(
CudaCompilerKernel
::
Name
(),
*
platformData
.
context
);
hasCompilerKernel
=
true
;
}
catch
(...)
{
// The runtime compiler plugin isn't available.
}
}
if
(
hostCompiler
.
size
()
>
0
)
this
->
compiler
=
compiler
+
" --compiler-bindir "
+
hostCompiler
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
eb232608
...
...
@@ -104,7 +104,7 @@ void CudaCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool
nb
.
prepareInteractions
();
}
double
CudaCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
double
CudaCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
cu
.
getBondedUtilities
().
computeInteractions
(
groups
);
if
((
groups
&
(
1
<<
cu
.
getNonbondedUtilities
().
getForceGroup
()))
!=
0
)
cu
.
getNonbondedUtilities
().
computeInteractions
();
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
eb232608
...
...
@@ -62,10 +62,10 @@ private:
bool
useDouble
;
};
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
cutoff
(
-
1.0
),
useCutoff
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
cutoff
(
-
1.0
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
nonbondedForceGroup
(
0
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
nonbondedForceGroup
(
0
)
,
forceRebuildNeighborList
(
true
)
{
// Decide how many thread blocks to use.
string
errorMessage
=
"Error initializing nonbonded utilities"
;
...
...
@@ -264,14 +264,6 @@ void CudaNonbondedUtilities::initialize(const System& system) {
sortedBlockCenter
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockCenter"
);
sortedBlockBoundingBox
=
new
CudaArray
(
context
,
numAtomBlocks
+
1
,
4
*
elementSize
,
"sortedBlockBoundingBox"
);
oldPositions
=
new
CudaArray
(
context
,
numAtoms
,
4
*
elementSize
,
"oldPositions"
);
if
(
context
.
getUseDoublePrecision
())
{
vector
<
double4
>
oldPositionsVec
(
numAtoms
,
make_double4
(
1e30
,
1e30
,
1e30
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
else
{
vector
<
float4
>
oldPositionsVec
(
numAtoms
,
make_float4
(
1e30
f
,
1e30
f
,
1e30
f
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
rebuildNeighborList
=
CudaArray
::
create
<
int
>
(
context
,
1
,
"rebuildNeighborList"
);
blockSorter
=
new
CudaSort
(
context
,
new
BlockSortTrait
(
context
.
getUseDoublePrecision
()),
numAtomBlocks
);
vector
<
unsigned
int
>
count
(
1
,
0
);
...
...
@@ -322,6 +314,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
sortBoxDataArgs
.
push_back
(
&
oldPositions
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
forceRebuildNeighborList
);
findInteractingBlocksKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
...
...
@@ -363,6 +356,7 @@ void CudaNonbondedUtilities::prepareInteractions() {
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
forceRebuildNeighborList
=
false
;
}
void
CudaNonbondedUtilities
::
computeInteractions
()
{
...
...
@@ -400,14 +394,7 @@ void CudaNonbondedUtilities::updateNeighborListSize() {
if
(
forceArgs
.
size
()
>
0
)
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
if
(
context
.
getUseDoublePrecision
())
{
vector
<
double4
>
oldPositionsVec
(
numAtoms
,
make_double4
(
1e30
,
1e30
,
1e30
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
else
{
vector
<
float4
>
oldPositionsVec
(
numAtoms
,
make_float4
(
1e30
f
,
1e30
f
,
1e30
f
,
0
));
oldPositions
->
upload
(
oldPositionsVec
);
}
forceRebuildNeighborList
=
true
;
}
void
CudaNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
...
...
@@ -419,8 +406,9 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
startBlockIndex
=
(
int
)
(
startFraction
*
numAtomBlocks
);
numBlocks
=
(
int
)
(
endFraction
*
numAtomBlocks
)
-
startBlockIndex
;
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
startTileIndex
=
(
int
)
(
startFraction
*
totalTiles
);
;
startTileIndex
=
(
int
)
(
startFraction
*
totalTiles
);
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
forceRebuildNeighborList
=
true
;
}
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
{
...
...
platforms/cuda/src/CudaParallelKernels.cpp
View file @
eb232608
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2011-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -63,8 +63,8 @@ if (result != CUDA_SUCCESS) { \
class
CudaParallelCalcForcesAndEnergyKernel
::
BeginComputationTask
:
public
CudaContext
::
WorkTask
{
public:
BeginComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
void
*
pinnedMemory
,
CUevent
event
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
),
event
(
event
)
{
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
void
*
pinnedMemory
,
CUevent
event
,
int
&
numTiles
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
),
event
(
event
)
,
numTiles
(
numTiles
)
{
}
void
execute
()
{
// Copy coordinates over to this device and execute the kernel.
...
...
@@ -76,6 +76,8 @@ public:
cu
.
getPosq
().
upload
(
pinnedMemory
,
false
);
}
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
if
(
cu
.
getNonbondedUtilities
().
getUsePeriodic
())
cu
.
getNonbondedUtilities
().
getInteractionCount
().
download
(
&
numTiles
,
false
);
}
private:
ContextImpl
&
context
;
...
...
@@ -85,19 +87,20 @@ private:
int
groups
;
void
*
pinnedMemory
;
CUevent
event
;
int
&
numTiles
;
};
class
CudaParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
CudaContext
::
WorkTask
{
public:
FinishComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
long
long
*
pinnedMemory
,
CudaArray
&
contextForces
)
:
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
long
long
*
pinnedMemory
,
CudaArray
&
contextForces
,
bool
&
valid
,
int
&
numTiles
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
contextForces
(
contextForces
)
{
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
),
contextForces
(
contextForces
)
,
valid
(
valid
),
numTiles
(
numTiles
)
{
}
void
execute
()
{
// Execute the kernel, then download forces.
energy
+=
kernel
.
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
energy
+=
kernel
.
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
,
valid
);
if
(
cu
.
getComputeForceCount
()
<
200
)
{
// Record timing information for load balancing. Since this takes time, only do it at the start of the simulation.
...
...
@@ -117,6 +120,10 @@ public:
cu
.
getForce
().
download
(
&
pinnedMemory
[(
cu
.
getContextIndex
()
-
1
)
*
numAtoms
*
3
]);
}
}
if
(
cu
.
getNonbondedUtilities
().
getUsePeriodic
()
&&
numTiles
>
cu
.
getNonbondedUtilities
().
getInteractingTiles
().
getSize
())
{
valid
=
false
;
cu
.
getNonbondedUtilities
().
updateNeighborListSize
();
}
}
private:
ContextImpl
&
context
;
...
...
@@ -128,11 +135,13 @@ private:
long
long
&
completionTime
;
long
long
*
pinnedMemory
;
CudaArray
&
contextForces
;
bool
&
valid
;
int
&
numTiles
;
};
CudaParallelCalcForcesAndEnergyKernel
::
CudaParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
),
completionTimes
(
data
.
contexts
.
size
()),
contextNonbondedFractions
(
data
.
contexts
.
size
()),
contextForces
(
NULL
),
pinnedPositionBuffer
(
NULL
),
pinnedForceBuffer
(
NULL
)
{
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
),
completionTimes
(
data
.
contexts
.
size
()),
contextNonbondedFractions
(
data
.
contexts
.
size
()),
tileCounts
(
NULL
),
contextForces
(
NULL
),
pinnedPositionBuffer
(
NULL
),
pinnedForceBuffer
(
NULL
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcForcesAndEnergyKernel
(
name
,
platform
,
*
data
.
contexts
[
i
])));
}
...
...
@@ -147,6 +156,8 @@ CudaParallelCalcForcesAndEnergyKernel::~CudaParallelCalcForcesAndEnergyKernel()
cuMemFreeHost
(
pinnedForceBuffer
);
cuEventDestroy
(
event
);
cuStreamDestroy
(
peerCopyStream
);
if
(
tileCounts
!=
NULL
)
cuMemFreeHost
(
tileCounts
);
}
void
CudaParallelCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
...
...
@@ -154,12 +165,14 @@ void CudaParallelCalcForcesAndEnergyKernel::initialize(const System& system) {
cu
.
setAsCurrent
();
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
parallel
);
sumKernel
=
cu
.
getKernel
(
module
,
"sumForces"
);
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
int
numContexts
=
data
.
contexts
.
size
();
for
(
int
i
=
0
;
i
<
numContexts
;
i
++
)
getKernel
(
i
).
initialize
(
system
);
for
(
int
i
=
0
;
i
<
(
int
)
contextNonbondedFractions
.
size
()
;
i
++
)
contextNonbondedFractions
[
i
]
=
1
/
(
double
)
c
ontext
NonbondedFractions
.
size
()
;
for
(
int
i
=
0
;
i
<
numContexts
;
i
++
)
contextNonbondedFractions
[
i
]
=
1
/
(
double
)
numC
ontext
s
;
CHECK_RESULT
(
cuEventCreate
(
&
event
,
0
),
"Error creating event"
);
CHECK_RESULT
(
cuStreamCreate
(
&
peerCopyStream
,
CU_STREAM_NON_BLOCKING
),
"Error creating stream"
);
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
tileCounts
,
numContexts
*
sizeof
(
int
),
0
),
"Error creating tile count buffer"
);
}
void
CudaParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
...
...
@@ -189,22 +202,21 @@ void CudaParallelCalcForcesAndEnergyKernel::beginComputation(ContextImpl& contex
data
.
contextEnergy
[
i
]
=
0.0
;
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
BeginComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
pinnedPositionBuffer
,
event
));
thread
.
addTask
(
new
BeginComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
pinnedPositionBuffer
,
event
,
tileCounts
[
i
]
));
}
}
#include <cstdio>
double
CudaParallelCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
double
CudaParallelCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
*
contextForces
));
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
,
*
contextForces
,
valid
,
tileCounts
[
i
]
));
}
data
.
syncContexts
();
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contextEnergy
.
size
();
i
++
)
energy
+=
data
.
contextEnergy
[
i
];
if
(
includeForce
)
{
if
(
includeForce
&&
valid
)
{
// Sum the forces from all devices.
CudaContext
&
cu
=
*
data
.
contexts
[
0
];
...
...
platforms/cuda/src/kernels/findInteractingBlocks.cu
View file @
eb232608
...
...
@@ -43,7 +43,7 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
extern
"C"
__global__
void
sortBoxData
(
const
real2
*
__restrict__
sortedBlock
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockBoundingBox
,
real4
*
__restrict__
sortedBlockCenter
,
real4
*
__restrict__
sortedBlockBoundingBox
,
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
oldPositions
,
unsigned
int
*
__restrict__
interactionCount
,
int
*
__restrict__
rebuildNeighborList
)
{
unsigned
int
*
__restrict__
interactionCount
,
int
*
__restrict__
rebuildNeighborList
,
bool
forceRebuild
)
{
for
(
int
i
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
i
<
NUM_BLOCKS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
index
=
(
int
)
sortedBlock
[
i
].
y
;
sortedBlockCenter
[
i
]
=
blockCenter
[
index
];
...
...
@@ -52,7 +52,7 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
// Also check whether any atom has moved enough so that we really need to rebuild the neighbor list.
bool
rebuild
=
f
alse
;
bool
rebuild
=
f
orceRebuild
;
for
(
int
i
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
delta
=
oldPositions
[
i
]
-
posq
[
i
];
if
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
>
0.25
f
*
PADDING
*
PADDING
)
...
...
platforms/cuda/tests/TestCudaLocalEnergyMinimizer.cpp
View file @
eb232608
...
...
@@ -77,10 +77,10 @@ void testHarmonicBonds() {
}
void
testLargeSystem
()
{
const
int
numMolecules
=
5
0
;
const
int
numMolecules
=
2
5
;
const
int
numParticles
=
numMolecules
*
2
;
const
double
cutoff
=
2.0
;
const
double
boxSize
=
5
.0
;
const
double
boxSize
=
4
.0
;
const
double
tolerance
=
5
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
...
...
@@ -134,10 +134,10 @@ void testLargeSystem() {
}
void
testVirtualSites
()
{
const
int
numMolecules
=
5
0
;
const
int
numMolecules
=
2
5
;
const
int
numParticles
=
numMolecules
*
3
;
const
double
cutoff
=
2.0
;
const
double
boxSize
=
5
.0
;
const
double
boxSize
=
4
.0
;
const
double
tolerance
=
5
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
...
...
platforms/cuda/tests/TestCudaVariableLangevinIntegrator.cpp
View file @
eb232608
...
...
@@ -296,7 +296,7 @@ void testArgonBox() {
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
system
.
addForce
(
nonbonded
);
VariableLangevinIntegrator
integrator
(
temp
,
6.0
,
1e-
5
);
VariableLangevinIntegrator
integrator
(
temp
,
6.0
,
1e-
4
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setVelocitiesToTemperature
(
temp
);
...
...
@@ -308,13 +308,13 @@ void testArgonBox() {
// Make sure the temperature is correct.
double
ke
=
0.0
;
for
(
int
i
=
0
;
i
<
2
000
;
++
i
)
{
double
t
=
2.0
+
0.0
1
*
(
i
+
1
);
for
(
int
i
=
0
;
i
<
1
000
;
++
i
)
{
double
t
=
2.0
+
0.0
2
*
(
i
+
1
);
integrator
.
stepTo
(
t
);
State
state
=
context
.
getState
(
State
::
Energy
);
ke
+=
state
.
getKineticEnergy
();
}
ke
/=
2
000
;
ke
/=
1
000
;
double
expected
=
1.5
*
numParticles
*
BOLTZ
*
temp
;
ASSERT_USUALLY_EQUAL_TOL
(
expected
,
ke
,
0.01
);
}
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
eb232608
...
...
@@ -71,11 +71,13 @@ public:
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @param valid the method may set this to false to indicate the results are invalid and the force/energy
* calculation should be repeated
* @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/
double
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
);
double
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
);
private:
OpenCLContext
&
cl
;
};
...
...
Prev
1
2
3
4
5
6
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment