Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
387008ce
Commit
387008ce
authored
Jun 22, 2012
by
Peter Eastman
Browse files
Continuing to implement new CUDA platform: checkpointing, parallelization across multiple devices
parent
17ae3aae
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1624 additions
and
122 deletions
+1624
-122
platforms/cuda2/src/CudaContext.cpp
platforms/cuda2/src/CudaContext.cpp
+3
-1
platforms/cuda2/src/CudaKernelFactory.cpp
platforms/cuda2/src/CudaKernelFactory.cpp
+33
-33
platforms/cuda2/src/CudaKernels.cpp
platforms/cuda2/src/CudaKernels.cpp
+67
-45
platforms/cuda2/src/CudaParallelKernels.cpp
platforms/cuda2/src/CudaParallelKernels.cpp
+742
-0
platforms/cuda2/src/CudaParallelKernels.h
platforms/cuda2/src/CudaParallelKernels.h
+581
-0
platforms/cuda2/src/kernels/findInteractingBlocks.cu
platforms/cuda2/src/kernels/findInteractingBlocks.cu
+2
-1
platforms/cuda2/src/kernels/parallel.cu
platforms/cuda2/src/kernels/parallel.cu
+13
-0
platforms/cuda2/src/kernels/utilities.cu
platforms/cuda2/src/kernels/utilities.cu
+0
-30
platforms/cuda2/tests/TestCudaCheckpoints.cpp
platforms/cuda2/tests/TestCudaCheckpoints.cpp
+171
-0
platforms/cuda2/tests/TestCudaCustomAngleForce.cpp
platforms/cuda2/tests/TestCudaCustomAngleForce.cpp
+1
-1
platforms/cuda2/tests/TestCudaCustomBondForce.cpp
platforms/cuda2/tests/TestCudaCustomBondForce.cpp
+1
-1
platforms/cuda2/tests/TestCudaCustomCompoundBondForce.cpp
platforms/cuda2/tests/TestCudaCustomCompoundBondForce.cpp
+1
-1
platforms/cuda2/tests/TestCudaCustomExternalForce.cpp
platforms/cuda2/tests/TestCudaCustomExternalForce.cpp
+1
-1
platforms/cuda2/tests/TestCudaCustomNonbondedForce.cpp
platforms/cuda2/tests/TestCudaCustomNonbondedForce.cpp
+1
-1
platforms/cuda2/tests/TestCudaCustomTorsionForce.cpp
platforms/cuda2/tests/TestCudaCustomTorsionForce.cpp
+1
-1
platforms/cuda2/tests/TestCudaHarmonicAngleForce.cpp
platforms/cuda2/tests/TestCudaHarmonicAngleForce.cpp
+1
-1
platforms/cuda2/tests/TestCudaHarmonicBondForce.cpp
platforms/cuda2/tests/TestCudaHarmonicBondForce.cpp
+1
-1
platforms/cuda2/tests/TestCudaNonbondedForce.cpp
platforms/cuda2/tests/TestCudaNonbondedForce.cpp
+2
-2
platforms/cuda2/tests/TestCudaPeriodicTorsionForce.cpp
platforms/cuda2/tests/TestCudaPeriodicTorsionForce.cpp
+1
-1
platforms/cuda2/tests/TestCudaRBTorsionForce.cpp
platforms/cuda2/tests/TestCudaRBTorsionForce.cpp
+1
-1
No files found.
platforms/cuda2/src/CudaContext.cpp
View file @
387008ce
...
...
@@ -113,7 +113,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
deviceIndex
=
i
;
bestSpeed
=
speed
;
bestCompute
=
major
;
gpuArchitecture
=
intToString
(
major
)
+
intToString
(
minor
);
}
}
}
...
...
@@ -121,6 +120,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
throw
OpenMMException
(
"No compatible CUDA device is available"
);
CHECK_RESULT
(
cuDeviceGet
(
&
device
,
deviceIndex
));
this
->
deviceIndex
=
deviceIndex
;
int
major
,
minor
;
CHECK_RESULT
(
cuDeviceComputeCapability
(
&
major
,
&
minor
,
device
));
gpuArchitecture
=
intToString
(
major
)
+
intToString
(
minor
);
defaultOptimizationOptions
=
"--use_fast_math"
;
unsigned
int
flags
=
CU_CTX_MAP_HOST
;
if
(
useBlockingSync
)
...
...
platforms/cuda2/src/CudaKernelFactory.cpp
View file @
387008ce
...
...
@@ -26,7 +26,7 @@
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
//
#include "CudaParallelKernels.h"
#include "CudaParallelKernels.h"
#include "CudaPlatform.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/OpenMMException.h"
...
...
@@ -35,38 +35,38 @@ using namespace OpenMM;
KernelImpl
*
CudaKernelFactory
::
createKernelImpl
(
std
::
string
name
,
const
Platform
&
platform
,
ContextImpl
&
context
)
const
{
CudaPlatform
::
PlatformData
&
data
=
*
static_cast
<
CudaPlatform
::
PlatformData
*>
(
context
.
getPlatformData
());
//
if (data.contexts.size() > 1) {
//
// We are running in parallel on multiple devices, so we may want to create a parallel kernel.
//
//
if (name == CalcForcesAndEnergyKernel::Name())
//
return new CudaParallelCalcForcesAndEnergyKernel(name, platform, data);
//
if (name == CalcHarmonicBondForceKernel::Name())
//
return new CudaParallelCalcHarmonicBondForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcCustomBondForceKernel::Name())
//
return new CudaParallelCalcCustomBondForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcHarmonicAngleForceKernel::Name())
//
return new CudaParallelCalcHarmonicAngleForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcCustomAngleForceKernel::Name())
//
return new CudaParallelCalcCustomAngleForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcPeriodicTorsionForceKernel::Name())
//
return new CudaParallelCalcPeriodicTorsionForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcRBTorsionForceKernel::Name())
//
return new CudaParallelCalcRBTorsionForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcCMAPTorsionForceKernel::Name())
//
return new CudaParallelCalcCMAPTorsionForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcCustomTorsionForceKernel::Name())
//
return new CudaParallelCalcCustomTorsionForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcNonbondedForceKernel::Name())
//
return new CudaParallelCalcNonbondedForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcCustomNonbondedForceKernel::Name())
//
return new CudaParallelCalcCustomNonbondedForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcCustomExternalForceKernel::Name())
//
return new CudaParallelCalcCustomExternalForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcCustomHbondForceKernel::Name())
//
return new CudaParallelCalcCustomHbondForceKernel(name, platform, data, context.getSystem());
//
if (name == CalcCustomCompoundBondForceKernel::Name())
//
return new CudaParallelCalcCustomCompoundBondForceKernel(name, platform, data, context.getSystem());
//
}
if
(
data
.
contexts
.
size
()
>
1
)
{
// We are running in parallel on multiple devices, so we may want to create a parallel kernel.
if
(
name
==
CalcForcesAndEnergyKernel
::
Name
())
return
new
CudaParallelCalcForcesAndEnergyKernel
(
name
,
platform
,
data
);
if
(
name
==
CalcHarmonicBondForceKernel
::
Name
())
return
new
CudaParallelCalcHarmonicBondForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcCustomBondForceKernel
::
Name
())
return
new
CudaParallelCalcCustomBondForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcHarmonicAngleForceKernel
::
Name
())
return
new
CudaParallelCalcHarmonicAngleForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcCustomAngleForceKernel
::
Name
())
return
new
CudaParallelCalcCustomAngleForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcPeriodicTorsionForceKernel
::
Name
())
return
new
CudaParallelCalcPeriodicTorsionForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcRBTorsionForceKernel
::
Name
())
return
new
CudaParallelCalcRBTorsionForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcCMAPTorsionForceKernel
::
Name
())
return
new
CudaParallelCalcCMAPTorsionForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcCustomTorsionForceKernel
::
Name
())
return
new
CudaParallelCalcCustomTorsionForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcNonbondedForceKernel
::
Name
())
return
new
CudaParallelCalcNonbondedForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcCustomNonbondedForceKernel
::
Name
())
return
new
CudaParallelCalcCustomNonbondedForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcCustomExternalForceKernel
::
Name
())
return
new
CudaParallelCalcCustomExternalForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcCustomHbondForceKernel
::
Name
())
return
new
CudaParallelCalcCustomHbondForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
return
new
CudaParallelCalcCustomCompoundBondForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
}
CudaContext
&
cu
=
*
data
.
contexts
[
0
];
if
(
name
==
CalcForcesAndEnergyKernel
::
Name
())
return
new
CudaCalcForcesAndEnergyKernel
(
name
,
platform
,
cu
);
...
...
platforms/cuda2/src/CudaKernels.cpp
View file @
387008ce
...
...
@@ -37,7 +37,7 @@
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaIntegrationUtilities.h"
//
#include "CudaNonbondedUtilities.h"
#include "CudaNonbondedUtilities.h"
#include "CudaKernelSources.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/Operation.h"
...
...
@@ -282,48 +282,62 @@ void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, cons
void
CudaUpdateStateDataKernel
::
createCheckpoint
(
ContextImpl
&
context
,
ostream
&
stream
)
{
cu
.
setAsCurrent
();
// int version = 1;
// stream.write((char*) &version, sizeof(int));
// double time = cu.getTime();
// stream.write((char*) &time, sizeof(double));
// cu.getPosq().download();
// stream.write((char*) &cu.getPosq()[0], sizeof(mm_float4)*cu.getPosq().getSize());
// cu.getVelm().download();
// stream.write((char*) &cu.getVelm()[0], sizeof(mm_float4)*cu.getVelm().getSize());
// stream.write((char*) &cu.getAtomIndex()[0], sizeof(cl_int)*cu.getAtomIndex().getSize());
// stream.write((char*) &cu.getPosCellOffsets()[0], sizeof(mm_int4)*cu.getPosCellOffsets().size());
// mm_float4 box = cu.getPeriodicBoxSize();
// stream.write((char*) &box, sizeof(mm_float4));
// cu.getIntegrationUtilities().createCheckpoint(stream);
// SimTKOpenMMUtilities::createCheckpoint(stream);
int
version
=
1
;
stream
.
write
((
char
*
)
&
version
,
sizeof
(
int
));
double
time
=
cu
.
getTime
();
stream
.
write
((
char
*
)
&
time
,
sizeof
(
double
));
int
stepCount
=
cu
.
getStepCount
();
stream
.
write
((
char
*
)
&
stepCount
,
sizeof
(
int
));
int
computeForceCount
=
cu
.
getComputeForceCount
();
stream
.
write
((
char
*
)
&
computeForceCount
,
sizeof
(
int
));
int
bufferSize
=
cu
.
getPaddedNumAtoms
()
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
));
char
*
buffer
=
(
char
*
)
cu
.
getPinnedBuffer
();
cu
.
getPosq
().
download
(
buffer
);
stream
.
write
(
buffer
,
bufferSize
);
cu
.
getVelm
().
download
(
buffer
);
stream
.
write
(
buffer
,
bufferSize
);
stream
.
write
((
char
*
)
&
cu
.
getAtomIndex
()[
0
],
sizeof
(
int
)
*
cu
.
getAtomIndex
().
size
());
stream
.
write
((
char
*
)
&
cu
.
getPosCellOffsets
()[
0
],
sizeof
(
int4
)
*
cu
.
getPosCellOffsets
().
size
());
double4
box
=
cu
.
getPeriodicBoxSize
();
stream
.
write
((
char
*
)
&
box
,
sizeof
(
double4
));
cu
.
getIntegrationUtilities
().
createCheckpoint
(
stream
);
SimTKOpenMMUtilities
::
createCheckpoint
(
stream
);
}
void
CudaUpdateStateDataKernel
::
loadCheckpoint
(
ContextImpl
&
context
,
istream
&
stream
)
{
cu
.
setAsCurrent
();
// int version;
// stream.read((char*) &version, sizeof(int));
// if (version != 1)
// throw OpenMMException("Checkpoint was created with a different version of OpenMM");
// double time;
// stream.read((char*) &time, sizeof(double));
// vector<CudaContext*>& contexts = cu.getPlatformData().contexts;
// for (int i = 0; i < (int) contexts.size(); i++)
// contexts[i]->setTime(time);
// stream.read((char*) &cu.getPosq()[0], sizeof(mm_float4)*cu.getPosq().getSize());
// cu.getPosq().upload();
// stream.read((char*) &cu.getVelm()[0], sizeof(mm_float4)*cu.getVelm().getSize());
// cu.getVelm().upload();
// stream.read((char*) &cu.getAtomIndex()[0], sizeof(cl_int)*cu.getAtomIndex().getSize());
// cu.getAtomIndex().upload();
// stream.read((char*) &cu.getPosCellOffsets()[0], sizeof(mm_int4)*cu.getPosCellOffsets().size());
// mm_float4 box;
// stream.read((char*) &box, sizeof(mm_float4));
// for (int i = 0; i < (int) contexts.size(); i++)
// contexts[i]->setPeriodicBoxSize(box.x, box.y, box.z);
// cu.getIntegrationUtilities().loadCheckpoint(stream);
// SimTKOpenMMUtilities::loadCheckpoint(stream);
// for (int i = 0; i < cu.getReorderListeners().size(); i++)
// cu.getReorderListeners()[i]->execute();
int
version
;
stream
.
read
((
char
*
)
&
version
,
sizeof
(
int
));
if
(
version
!=
1
)
throw
OpenMMException
(
"Checkpoint was created with a different version of OpenMM"
);
double
time
;
stream
.
read
((
char
*
)
&
time
,
sizeof
(
double
));
int
stepCount
,
computeForceCount
;
stream
.
read
((
char
*
)
&
stepCount
,
sizeof
(
int
));
stream
.
read
((
char
*
)
&
computeForceCount
,
sizeof
(
int
));
vector
<
CudaContext
*>&
contexts
=
cu
.
getPlatformData
().
contexts
;
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
{
contexts
[
i
]
->
setTime
(
time
);
contexts
[
i
]
->
setStepCount
(
stepCount
);
contexts
[
i
]
->
setComputeForceCount
(
computeForceCount
);
}
int
bufferSize
=
cu
.
getPaddedNumAtoms
()
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
));
char
*
buffer
=
(
char
*
)
cu
.
getPinnedBuffer
();
stream
.
read
(
buffer
,
bufferSize
);
cu
.
getPosq
().
upload
(
buffer
);
stream
.
read
(
buffer
,
bufferSize
);
cu
.
getVelm
().
upload
(
buffer
);
stream
.
read
((
char
*
)
&
cu
.
getAtomIndex
()[
0
],
sizeof
(
int
)
*
cu
.
getAtomIndex
().
size
());
cu
.
getAtomIndexArray
().
upload
(
cu
.
getAtomIndex
());
stream
.
read
((
char
*
)
&
cu
.
getPosCellOffsets
()[
0
],
sizeof
(
int4
)
*
cu
.
getPosCellOffsets
().
size
());
double4
box
;
stream
.
read
((
char
*
)
&
box
,
sizeof
(
double4
));
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
contexts
[
i
]
->
setPeriodicBoxSize
(
box
.
x
,
box
.
y
,
box
.
z
);
cu
.
getIntegrationUtilities
().
loadCheckpoint
(
stream
);
SimTKOpenMMUtilities
::
loadCheckpoint
(
stream
);
for
(
int
i
=
0
;
i
<
cu
.
getReorderListeners
().
size
();
i
++
)
cu
.
getReorderListeners
()[
i
]
->
execute
();
}
void
CudaApplyConstraintsKernel
::
initialize
(
const
System
&
system
)
{
...
...
@@ -840,6 +854,7 @@ private:
};
CudaCalcPeriodicTorsionForceKernel
::~
CudaCalcPeriodicTorsionForceKernel
()
{
cu
.
setAsCurrent
();
if
(
params
!=
NULL
)
delete
params
;
}
...
...
@@ -926,6 +941,7 @@ private:
};
CudaCalcRBTorsionForceKernel
::~
CudaCalcRBTorsionForceKernel
()
{
cu
.
setAsCurrent
();
if
(
params1
!=
NULL
)
delete
params1
;
if
(
params2
!=
NULL
)
...
...
@@ -3983,8 +3999,8 @@ CudaIntegrateVerletStepKernel::~CudaIntegrateVerletStepKernel() {
}
void
CudaIntegrateVerletStepKernel
::
initialize
(
const
System
&
system
,
const
VerletIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
setAsCurrent
();
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
...
...
@@ -3995,6 +4011,7 @@ void CudaIntegrateVerletStepKernel::initialize(const System& system, const Verle
}
void
CudaIntegrateVerletStepKernel
::
execute
(
ContextImpl
&
context
,
const
VerletIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
double
dt
=
integrator
.
getStepSize
();
...
...
@@ -4042,8 +4059,8 @@ CudaIntegrateLangevinStepKernel::~CudaIntegrateLangevinStepKernel() {
}
void
CudaIntegrateLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
LangevinIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
setAsCurrent
();
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
...
...
@@ -4056,6 +4073,7 @@ void CudaIntegrateLangevinStepKernel::initialize(const System& system, const Lan
}
void
CudaIntegrateLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
LangevinIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
double
temperature
=
integrator
.
getTemperature
();
...
...
@@ -4120,8 +4138,8 @@ CudaIntegrateBrownianStepKernel::~CudaIntegrateBrownianStepKernel() {
}
void
CudaIntegrateBrownianStepKernel
::
initialize
(
const
System
&
system
,
const
BrownianIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
setAsCurrent
();
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
...
...
@@ -4133,6 +4151,7 @@ void CudaIntegrateBrownianStepKernel::initialize(const System& system, const Bro
}
void
CudaIntegrateBrownianStepKernel
::
execute
(
ContextImpl
&
context
,
const
BrownianIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
double
temperature
=
integrator
.
getTemperature
();
...
...
@@ -4175,8 +4194,8 @@ CudaIntegrateVariableVerletStepKernel::~CudaIntegrateVariableVerletStepKernel()
}
void
CudaIntegrateVariableVerletStepKernel
::
initialize
(
const
System
&
system
,
const
VariableVerletIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
setAsCurrent
();
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
...
...
@@ -4188,6 +4207,7 @@ void CudaIntegrateVariableVerletStepKernel::initialize(const System& system, con
}
double
CudaIntegrateVariableVerletStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableVerletIntegrator
&
integrator
,
double
maxTime
)
{
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
...
...
@@ -4252,8 +4272,8 @@ CudaIntegrateVariableLangevinStepKernel::~CudaIntegrateVariableLangevinStepKerne
}
void
CudaIntegrateVariableLangevinStepKernel
::
initialize
(
const
System
&
system
,
const
VariableLangevinIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
setAsCurrent
();
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
map
<
string
,
string
>
defines
;
defines
[
"NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getNumAtoms
());
...
...
@@ -4268,6 +4288,7 @@ void CudaIntegrateVariableLangevinStepKernel::initialize(const System& system, c
}
double
CudaIntegrateVariableLangevinStepKernel
::
execute
(
ContextImpl
&
context
,
const
VariableLangevinIntegrator
&
integrator
,
double
maxTime
)
{
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
...
...
@@ -4412,8 +4433,8 @@ CudaIntegrateCustomStepKernel::~CudaIntegrateCustomStepKernel() {
}
void
CudaIntegrateCustomStepKernel
::
initialize
(
const
System
&
system
,
const
CustomIntegrator
&
integrator
)
{
cu
.
setAsCurrent
();
cu
.
getPlatformData
().
initializeContexts
(
system
);
cu
.
setAsCurrent
();
cu
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
integrator
.
getRandomNumberSeed
());
numGlobalVariables
=
integrator
.
getNumGlobalVariables
();
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
...
...
@@ -4492,6 +4513,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
}
void
CudaIntegrateCustomStepKernel
::
execute
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
)
{
cu
.
setAsCurrent
();
CudaIntegrationUtilities
&
integration
=
cu
.
getIntegrationUtilities
();
int
numAtoms
=
cu
.
getNumAtoms
();
int
numSteps
=
integrator
.
getNumComputations
();
...
...
platforms/cuda2/src/CudaParallelKernels.cpp
0 → 100644
View file @
387008ce
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaParallelKernels.h"
#include "CudaKernelSources.h"
using
namespace
OpenMM
;
using
namespace
std
;
#define CHECK_RESULT(result) \
if (result != CUDA_SUCCESS) { \
std::stringstream m; \
m<<errorMessage<<": "<<cu.getErrorString(result)<<" ("<<result<<")"<<" at "<<__FILE__<<":"<<__LINE__; \
throw OpenMMException(m.str());\
}
/**
* Get the current clock time, measured in microseconds.
*/
#ifdef _MSC_VER
#include <Windows.h>
static
long
long
getTime
()
{
FILETIME
ft
;
GetSystemTimeAsFileTime
(
&
ft
);
// 100-nanoseconds since 1-1-1601
ULARGE_INTEGER
result
;
result
.
LowPart
=
ft
.
dwLowDateTime
;
result
.
HighPart
=
ft
.
dwHighDateTime
;
return
result
.
QuadPart
/
10
;
}
#else
#include <sys/time.h>
static
long
long
getTime
()
{
struct
timeval
tod
;
gettimeofday
(
&
tod
,
0
);
return
1000000
*
tod
.
tv_sec
+
tod
.
tv_usec
;
}
#endif
class
CudaParallelCalcForcesAndEnergyKernel
::
BeginComputationTask
:
public
CudaContext
::
WorkTask
{
public:
BeginComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
void
*
pinnedMemory
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
pinnedMemory
(
pinnedMemory
)
{
}
void
execute
()
{
// Copy coordinates over to this device and execute the kernel.
cu
.
setAsCurrent
();
if
(
cu
.
getContextIndex
()
>
0
)
cu
.
getPosq
().
upload
(
pinnedMemory
,
false
);
kernel
.
beginComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
}
private:
ContextImpl
&
context
;
CudaContext
&
cu
;
CudaCalcForcesAndEnergyKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
int
groups
;
void
*
pinnedMemory
;
};
class
CudaParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
CudaContext
::
WorkTask
{
public:
FinishComputationTask
(
ContextImpl
&
context
,
CudaContext
&
cu
,
CudaCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
double
&
energy
,
long
long
&
completionTime
,
void
*
pinnedMemory
)
:
context
(
context
),
cu
(
cu
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
groups
(
groups
),
energy
(
energy
),
completionTime
(
completionTime
),
pinnedMemory
(
pinnedMemory
)
{
}
void
execute
()
{
// Execute the kernel, then download forces.
energy
+=
kernel
.
finishComputation
(
context
,
includeForce
,
includeEnergy
,
groups
);
if
(
includeForce
)
{
if
(
cu
.
getContextIndex
()
>
0
)
{
int
numAtoms
=
cu
.
getPaddedNumAtoms
();
cu
.
getForce
().
download
(
&
pinnedMemory
[(
cu
.
getContextIndex
()
-
1
)
*
numAtoms
*
3
]);
}
else
{
string
errorMessage
=
"Error synchronizing CUDA context"
;
CHECK_RESULT
(
cuCtxSynchronize
());
}
}
completionTime
=
getTime
();
}
private:
ContextImpl
&
context
;
CudaContext
&
cu
;
CudaCalcForcesAndEnergyKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
int
groups
;
double
&
energy
;
long
long
&
completionTime
;
void
*
pinnedMemory
;
};
CudaParallelCalcForcesAndEnergyKernel
::
CudaParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
),
completionTimes
(
data
.
contexts
.
size
()),
contextTiles
(
data
.
contexts
.
size
()),
contextForces
(
NULL
),
pinnedPositionBuffer
(
NULL
),
pinnedForceBuffer
(
NULL
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcForcesAndEnergyKernel
(
name
,
platform
,
*
data
.
contexts
[
i
])));
}
CudaParallelCalcForcesAndEnergyKernel
::~
CudaParallelCalcForcesAndEnergyKernel
()
{
data
.
contexts
[
0
]
->
setAsCurrent
();
if
(
contextForces
!=
NULL
)
delete
contextForces
;
if
(
pinnedPositionBuffer
!=
NULL
)
cuMemFreeHost
(
pinnedPositionBuffer
);
if
(
pinnedForceBuffer
!=
NULL
)
cuMemFreeHost
(
pinnedForceBuffer
);
}
void
CudaParallelCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
0
];
cu
.
setAsCurrent
();
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
parallel
);
sumKernel
=
cu
.
getKernel
(
module
,
"sumForces"
);
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
);
}
void
CudaParallelCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
0
];
cu
.
setAsCurrent
();
if
(
contextForces
==
NULL
)
{
contextForces
=
CudaArray
::
create
<
long
long
>
(
cu
,
3
*
(
data
.
contexts
.
size
()
-
1
)
*
cu
.
getPaddedNumAtoms
(),
"contextForces"
);
string
errorMessage
=
"Error allocating pinned memory"
;
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedForceBuffer
,
3
*
(
data
.
contexts
.
size
()
-
1
)
*
cu
.
getPaddedNumAtoms
()
*
sizeof
(
long
long
),
CU_MEMHOSTALLOC_PORTABLE
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedPositionBuffer
,
cu
.
getPaddedNumAtoms
()
*
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double4
)
:
sizeof
(
float4
)),
CU_MEMHOSTALLOC_PORTABLE
));
}
// Copy coordinates over to each device and execute the kernel.
cu
.
getPosq
().
download
(
pinnedPositionBuffer
);
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
data
.
contextEnergy
[
i
]
=
0.0
;
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
BeginComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
pinnedPositionBuffer
));
}
}
double
CudaParallelCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cu
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
groups
,
data
.
contextEnergy
[
i
],
completionTimes
[
i
],
pinnedForceBuffer
));
}
data
.
syncContexts
();
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contextEnergy
.
size
();
i
++
)
energy
+=
data
.
contextEnergy
[
i
];
if
(
includeForce
)
{
// Sum the forces from all devices.
CudaContext
&
cu
=
*
data
.
contexts
[
0
];
contextForces
->
upload
(
pinnedForceBuffer
,
false
);
int
bufferSize
=
3
*
cu
.
getPaddedNumAtoms
();
int
numBuffers
=
data
.
contexts
.
size
()
-
1
;
void
*
args
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
contextForces
->
getDevicePointer
(),
&
bufferSize
,
&
numBuffers
};
cu
.
executeKernel
(
sumKernel
,
args
,
bufferSize
);
// Balance work between the contexts by transferring a few nonbonded tiles from the context that
// finished last to the one that finished first.
int
firstIndex
=
0
,
lastIndex
=
0
;
int
totalTiles
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
completionTimes
.
size
();
i
++
)
{
if
(
completionTimes
[
i
]
<
completionTimes
[
firstIndex
])
firstIndex
=
i
;
if
(
completionTimes
[
i
]
>
completionTimes
[
lastIndex
])
lastIndex
=
i
;
contextTiles
[
i
]
=
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
getNumTiles
();
totalTiles
+=
contextTiles
[
i
];
}
int
tilesToTransfer
=
totalTiles
/
1000
;
if
(
tilesToTransfer
<
1
)
tilesToTransfer
=
1
;
if
(
tilesToTransfer
>
contextTiles
[
lastIndex
])
tilesToTransfer
=
contextTiles
[
lastIndex
];
contextTiles
[
firstIndex
]
+=
tilesToTransfer
;
contextTiles
[
lastIndex
]
-=
tilesToTransfer
;
int
startIndex
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
contextTiles
.
size
();
i
++
)
{
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
setTileRange
(
startIndex
,
contextTiles
[
i
]);
startIndex
+=
contextTiles
[
i
];
}
}
return
energy
;
}
class
CudaParallelCalcHarmonicBondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcHarmonicBondForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcHarmonicBondForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcHarmonicBondForceKernel
::
CudaParallelCalcHarmonicBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcHarmonicBondForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcHarmonicBondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcHarmonicBondForceKernel
::
initialize
(
const
System
&
system
,
const
HarmonicBondForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcHarmonicBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcHarmonicBondForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicBondForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcCustomBondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomBondForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcCustomBondForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcCustomBondForceKernel
::
CudaParallelCalcCustomBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcCustomBondForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcCustomBondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcCustomBondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomBondForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcCustomBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcCustomBondForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomBondForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcHarmonicAngleForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcHarmonicAngleForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcHarmonicAngleForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcHarmonicAngleForceKernel
::
CudaParallelCalcHarmonicAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcHarmonicAngleForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcHarmonicAngleForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcHarmonicAngleForceKernel
::
initialize
(
const
System
&
system
,
const
HarmonicAngleForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcHarmonicAngleForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcHarmonicAngleForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicAngleForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcCustomAngleForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomAngleForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcCustomAngleForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcCustomAngleForceKernel
::
CudaParallelCalcCustomAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcCustomAngleForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcCustomAngleForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcCustomAngleForceKernel
::
initialize
(
const
System
&
system
,
const
CustomAngleForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcCustomAngleForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcCustomAngleForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomAngleForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcPeriodicTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcPeriodicTorsionForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcPeriodicTorsionForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcPeriodicTorsionForceKernel
::
CudaParallelCalcPeriodicTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcPeriodicTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcPeriodicTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcPeriodicTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
PeriodicTorsionForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcPeriodicTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcPeriodicTorsionForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
PeriodicTorsionForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcRBTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcRBTorsionForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcRBTorsionForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcRBTorsionForceKernel
::
CudaParallelCalcRBTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcRBTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcRBTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcRBTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
RBTorsionForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcRBTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcRBTorsionForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
RBTorsionForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcCMAPTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCMAPTorsionForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcCMAPTorsionForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcCMAPTorsionForceKernel
::
CudaParallelCalcCMAPTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcCMAPTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcCMAPTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcCMAPTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
CMAPTorsionForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcCMAPTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
class
CudaParallelCalcCustomTorsionForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomTorsionForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcCustomTorsionForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcCustomTorsionForceKernel
::
CudaParallelCalcCustomTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcCustomTorsionForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcCustomTorsionForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcCustomTorsionForceKernel
::
initialize
(
const
System
&
system
,
const
CustomTorsionForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcCustomTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcCustomTorsionForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomTorsionForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcNonbondedForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcNonbondedForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
includeDirect
(
includeDirect
),
includeReciprocal
(
includeReciprocal
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
,
includeDirect
,
includeReciprocal
);
}
private:
ContextImpl
&
context
;
CudaCalcNonbondedForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
,
includeDirect
,
includeReciprocal
;
double
&
energy
;
};
CudaParallelCalcNonbondedForceKernel
::
CudaParallelCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcNonbondedForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
includeDirect
,
includeReciprocal
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcNonbondedForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
NonbondedForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcCustomNonbondedForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcCustomNonbondedForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcCustomNonbondedForceKernel
::
CudaParallelCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcCustomNonbondedForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcCustomNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcCustomNonbondedForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcCustomExternalForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomExternalForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcCustomExternalForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcCustomExternalForceKernel
::
CudaParallelCalcCustomExternalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcCustomExternalForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcCustomExternalForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcCustomExternalForceKernel
::
initialize
(
const
System
&
system
,
const
CustomExternalForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcCustomExternalForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcCustomExternalForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomExternalForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcCustomHbondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomHbondForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcCustomHbondForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcCustomHbondForceKernel
::
CudaParallelCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcCustomHbondForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcCustomHbondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcCustomHbondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomHbondForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcCustomHbondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcCustomHbondForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomHbondForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
class
CudaParallelCalcCustomCompoundBondForceKernel
::
Task
:
public
CudaContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
CudaCalcCustomCompoundBondForceKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
}
void
execute
()
{
energy
+=
kernel
.
execute
(
context
,
includeForce
,
includeEnergy
);
}
private:
ContextImpl
&
context
;
CudaCalcCustomCompoundBondForceKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
};
CudaParallelCalcCustomCompoundBondForceKernel
::
CudaParallelCalcCustomCompoundBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcCustomCompoundBondForceKernel
(
name
,
platform
),
data
(
data
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
CudaCalcCustomCompoundBondForceKernel
(
name
,
platform
,
*
data
.
contexts
[
i
],
system
)));
}
void
CudaParallelCalcCustomCompoundBondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomCompoundBondForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
initialize
(
system
,
force
);
}
double
CudaParallelCalcCustomCompoundBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
CudaContext
&
cu
=
*
data
.
contexts
[
i
];
CudaContext
::
WorkThread
&
thread
=
cu
.
getWorkThread
();
thread
.
addTask
(
new
Task
(
context
,
getKernel
(
i
),
includeForces
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
}
return
0.0
;
}
void
CudaParallelCalcCustomCompoundBondForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCompoundBondForce
&
force
)
{
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
platforms/cuda2/src/CudaParallelKernels.h
0 → 100644
View file @
387008ce
#ifndef OPENMM_CUDAPARALLELKERNELS_H_
#define OPENMM_CUDAPARALLELKERNELS_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaPlatform.h"
#include "CudaContext.h"
#include "CudaKernels.h"
namespace
OpenMM
{
/**
* This kernel is invoked at the beginning and end of force and energy computations. It gives the
* Platform a chance to clear buffers and do other initialization at the beginning, and to do any
* necessary work at the end to determine the final results.
*/
class
CudaParallelCalcForcesAndEnergyKernel
:
public
CalcForcesAndEnergyKernel
{
public:
CudaParallelCalcForcesAndEnergyKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
);
~
CudaParallelCalcForcesAndEnergyKernel
();
CudaCalcForcesAndEnergyKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcForcesAndEnergyKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
*/
void
initialize
(
const
System
&
system
);
/**
* This is called at the beginning of each force/energy computation, before calcForcesAndEnergy() has been called on
* any ForceImpl.
*
* @param context the context in which to execute this kernel
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
*/
void
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
);
/**
* This is called at the end of each force/energy computation, after calcForcesAndEnergy() has been called on
* every ForceImpl.
*
* @param context the context in which to execute this kernel
* @param includeForce true if forces should be computed
* @param includeEnergy true if potential energy should be computed
* @param groups a set of bit flags for which force groups to include
* @return the potential energy of the system. This value is added to all values returned by ForceImpls'
* calcForcesAndEnergy() methods. That is, each force kernel may <i>either</i> return its contribution to the
* energy directly, <i>or</i> add it to an internal buffer so that it will be included here.
*/
double
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
);
private:
class
BeginComputationTask
;
class
FinishComputationTask
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
std
::
vector
<
long
long
>
completionTimes
;
std
::
vector
<
int
>
contextTiles
;
CudaArray
*
contextForces
;
void
*
pinnedPositionBuffer
;
void
*
pinnedForceBuffer
;
CUfunction
sumKernel
;
};
/**
* This kernel is invoked by HarmonicBondForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaParallelCalcHarmonicBondForceKernel
:
public
CalcHarmonicBondForceKernel
{
public:
CudaParallelCalcHarmonicBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcHarmonicBondForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcHarmonicBondForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the HarmonicBondForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
HarmonicBondForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the HarmonicBondForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicBondForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by CustomBondForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaParallelCalcCustomBondForceKernel
:
public
CalcCustomBondForceKernel
{
public:
CudaParallelCalcCustomBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcCustomBondForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcCustomBondForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomBondForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomBondForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomBondForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomBondForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by HarmonicAngleForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaParallelCalcHarmonicAngleForceKernel
:
public
CalcHarmonicAngleForceKernel
{
public:
CudaParallelCalcHarmonicAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcHarmonicAngleForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcHarmonicAngleForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the HarmonicAngleForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
HarmonicAngleForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the HarmonicAngleForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
HarmonicAngleForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by CustomAngleForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaParallelCalcCustomAngleForceKernel
:
public
CalcCustomAngleForceKernel
{
public:
CudaParallelCalcCustomAngleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcCustomAngleForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcCustomAngleForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomAngleForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomAngleForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomAngleForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomAngleForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by PeriodicTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaParallelCalcPeriodicTorsionForceKernel
:
public
CalcPeriodicTorsionForceKernel
{
public:
CudaParallelCalcPeriodicTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcPeriodicTorsionForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcPeriodicTorsionForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the PeriodicTorsionForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
PeriodicTorsionForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
class
Task
;
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the PeriodicTorsionForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
PeriodicTorsionForce
&
force
);
private:
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by RBTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaParallelCalcRBTorsionForceKernel
:
public
CalcRBTorsionForceKernel
{
public:
CudaParallelCalcRBTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcRBTorsionForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcRBTorsionForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the RBTorsionForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
RBTorsionForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the RBTorsionForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
RBTorsionForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by CMAPTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaParallelCalcCMAPTorsionForceKernel
:
public
CalcCMAPTorsionForceKernel
{
public:
CudaParallelCalcCMAPTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcCMAPTorsionForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcCMAPTorsionForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CMAPTorsionForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CMAPTorsionForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by CustomTorsionForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaParallelCalcCustomTorsionForceKernel
:
public
CalcCustomTorsionForceKernel
{
public:
CudaParallelCalcCustomTorsionForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcCustomTorsionForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcCustomTorsionForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomTorsionForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomTorsionForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomTorsionForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomTorsionForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by NonbondedForce to calculate the forces acting on the system.
*/
class
CudaParallelCalcNonbondedForceKernel
:
public
CalcNonbondedForceKernel
{
public:
CudaParallelCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcNonbondedForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcNonbondedForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the NonbondedForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @param includeReciprocal true if reciprocal space interactions should be included
* @param includeReciprocal true if reciprocal space interactions should be included
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
bool
includeDirect
,
bool
includeReciprocal
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the NonbondedForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
NonbondedForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
*/
class
CudaParallelCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
CudaParallelCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcCustomNonbondedForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcCustomNonbondedForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomNonbondedForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomNonbondedForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by CustomExternalForce to calculate the forces acting on the system and the energy of the system.
*/
class
CudaParallelCalcCustomExternalForceKernel
:
public
CalcCustomExternalForceKernel
{
public:
CudaParallelCalcCustomExternalForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcCustomExternalForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcCustomExternalForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomExternalForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomExternalForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomExternalForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomExternalForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by CustomHbondForce to calculate the forces acting on the system.
*/
class
CudaParallelCalcCustomHbondForceKernel
:
public
CalcCustomHbondForceKernel
{
public:
CudaParallelCalcCustomHbondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcCustomHbondForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcCustomHbondForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomHbondForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomHbondForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomHbondForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomHbondForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
/**
* This kernel is invoked by CustomCompoundBondForce to calculate the forces acting on the system.
*/
class
CudaParallelCalcCustomCompoundBondForceKernel
:
public
CalcCustomCompoundBondForceKernel
{
public:
CudaParallelCalcCustomCompoundBondForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
);
CudaCalcCustomCompoundBondForceKernel
&
getKernel
(
int
index
)
{
return
dynamic_cast
<
CudaCalcCustomCompoundBondForceKernel
&>
(
kernels
[
index
].
getImpl
());
}
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomCompoundBondForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomCompoundBondForce
&
force
);
/**
* Execute the kernel to calculate the forces and/or energy.
*
* @param context the context in which to execute this kernel
* @param includeForces true if forces should be calculated
* @param includeEnergy true if the energy should be calculated
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Copy changed parameters over to a context.
*
* @param context the context to copy parameters to
* @param force the CustomCompoundBondForce to copy the parameters from
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCompoundBondForce
&
force
);
private:
class
Task
;
CudaPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDAPARALLELKERNELS_H_*/
platforms/cuda2/src/kernels/findInteractingBlocks.cu
View file @
387008ce
...
...
@@ -101,13 +101,14 @@ __device__ void storeInteractionData(ushort2* buffer, int* valid, short* sum, us
extern
"C"
__global__
void
findBlocksWithInteractions
(
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockBoundingBox
,
unsigned
int
*
__restrict__
interactionCount
,
ushort2
*
__restrict__
interactingTiles
,
unsigned
int
*
__restrict__
interactionFlags
,
const
real4
*
__restrict__
posq
,
unsigned
int
maxTiles
,
unsigned
int
startTileIndex
,
unsigned
int
end
Tile
Index
)
{
unsigned
int
num
Tile
s
)
{
__shared__
ushort2
buffer
[
BUFFER_SIZE
];
__shared__
int
valid
[
BUFFER_SIZE
];
__shared__
short
sum
[
BUFFER_SIZE
];
__shared__
ushort2
temp
[
BUFFER_SIZE
];
__shared__
int
bufferFull
;
__shared__
int
globalIndex
;
unsigned
int
endTileIndex
=
startTileIndex
+
numTiles
;
int
valuesInBuffer
=
0
;
if
(
threadIdx
.
x
==
0
)
bufferFull
=
false
;
...
...
platforms/cuda2/src/kernels/parallel.cu
0 → 100644
View file @
387008ce
/**
* Sum the forces computed by different contexts.
*/
extern
"C"
__global__
void
sumForces
(
long
long
*
__restrict__
force
,
long
long
*
__restrict__
buffer
,
int
bufferSize
,
int
numBuffers
)
{
int
totalSize
=
bufferSize
*
numBuffers
;
for
(
int
index
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
index
<
bufferSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
long
long
sum
=
force
[
index
];
for
(
int
i
=
index
;
i
<
totalSize
;
i
+=
bufferSize
)
sum
+=
buffer
[
i
];
force
[
index
]
=
sum
;
}
}
platforms/cuda2/src/kernels/utilities.cu
View file @
387008ce
...
...
@@ -73,34 +73,4 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
clearSingleBuffer
(
buffer6
,
size6
);
}
/**
* Sum a collection of buffers into the first one.
*/
__global__
void
reduceFloat4Buffer
(
float4
*
__restrict__
buffer
,
int
bufferSize
,
int
numBuffers
)
{
int
index
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
int
totalSize
=
bufferSize
*
numBuffers
;
while
(
index
<
bufferSize
)
{
float4
sum
=
buffer
[
index
];
for
(
int
i
=
index
+
bufferSize
;
i
<
totalSize
;
i
+=
bufferSize
)
sum
+=
buffer
[
i
];
buffer
[
index
]
=
sum
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
/**
* Sum the various buffers containing forces.
*/
__global__
void
reduceForces
(
const
long
*
__restrict__
longBuffer
,
float4
*
__restrict__
buffer
,
int
bufferSize
,
int
numBuffers
)
{
int
totalSize
=
bufferSize
*
numBuffers
;
float
scale
=
1.0
f
/
(
float
)
0xFFFFFFFF
;
for
(
int
index
=
blockDim
.
x
*
blockIdx
.
x
+
threadIdx
.
x
;
index
<
bufferSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
float4
sum
=
make_float4
(
scale
*
longBuffer
[
index
],
scale
*
longBuffer
[
index
+
bufferSize
],
scale
*
longBuffer
[
index
+
2
*
bufferSize
],
0.0
f
);
for
(
int
i
=
index
;
i
<
totalSize
;
i
+=
bufferSize
)
sum
+=
buffer
[
i
];
buffer
[
index
]
=
sum
;
}
}
}
\ No newline at end of file
platforms/cuda2/tests/TestCudaCheckpoints.cpp
0 → 100644
View file @
387008ce
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests creating and loading checkpoints with the CUDA platform.
*/
#include "CudaPlatform.h"
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/AndersenThermostat.h"
#include "openmm/Context.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <sstream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
compareStates
(
State
&
s1
,
State
&
s2
)
{
ASSERT_EQUAL_TOL
(
s1
.
getTime
(),
s2
.
getTime
(),
TOL
);
int
numParticles
=
s1
.
getPositions
().
size
();
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL_VEC
(
s1
.
getPositions
()[
i
],
s2
.
getPositions
()[
i
],
TOL
);
ASSERT_EQUAL_VEC
(
s1
.
getVelocities
()[
i
],
s2
.
getVelocities
()[
i
],
TOL
);
Vec3
a1
,
b1
,
c1
,
a2
,
b2
,
c2
;
s1
.
getPeriodicBoxVectors
(
a1
,
b1
,
c1
);
s2
.
getPeriodicBoxVectors
(
a2
,
b2
,
c2
);
ASSERT_EQUAL_VEC
(
a1
,
a2
,
TOL
);
ASSERT_EQUAL_VEC
(
b1
,
b2
,
TOL
);
ASSERT_EQUAL_VEC
(
c1
,
c2
,
TOL
);
for
(
map
<
string
,
double
>::
const_iterator
iter
=
s1
.
getParameters
().
begin
();
iter
!=
s1
.
getParameters
().
end
();
++
iter
)
ASSERT_EQUAL
(
iter
->
second
,
(
*
s2
.
getParameters
().
find
(
iter
->
first
)).
second
);
}
}
void
testCheckpoint
()
{
const
int
numParticles
=
100
;
const
double
boxSize
=
5.0
;
const
double
temperature
=
200.0
;
CudaPlatform
platform
;
System
system
;
system
.
addForce
(
new
AndersenThermostat
(
0.0
,
100.0
));
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
system
.
addForce
(
nonbonded
);
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
i
%
2
==
0
?
0.1
:
-
0.1
,
0.2
,
0.1
);
bool
clash
;
do
{
clash
=
false
;
positions
[
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
Vec3
delta
=
positions
[
i
]
-
positions
[
j
];
if
(
sqrt
(
delta
.
dot
(
delta
))
<
0.1
)
clash
=
true
;
}
}
while
(
clash
);
}
VerletIntegrator
integrator
(
0.001
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
context
.
setPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
context
.
setParameter
(
AndersenThermostat
::
Temperature
(),
temperature
);
// Run for a little while.
integrator
.
step
(
100
);
// Record the current state and make a checkpoint.
State
s1
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
stringstream
stream1
(
ios_base
::
out
|
ios_base
::
in
|
ios_base
::
binary
);
context
.
createCheckpoint
(
stream1
);
// Continue the simulation for a few more steps and record the state again.
integrator
.
step
(
10
);
State
s2
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
// Restore from the checkpoint and see if everything gets restored correctly.
context
.
setPeriodicBoxVectors
(
Vec3
(
2
*
boxSize
,
0
,
0
),
Vec3
(
0
,
2
*
boxSize
,
0
),
Vec3
(
0
,
0
,
2
*
boxSize
));
context
.
setParameter
(
AndersenThermostat
::
Temperature
(),
temperature
+
10
);
context
.
loadCheckpoint
(
stream1
);
State
s3
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
compareStates
(
s1
,
s3
);
// Now simulate from there and see if the trajectory is identical.
integrator
.
step
(
10
);
State
s4
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
compareStates
(
s2
,
s4
);
// Create a new Context that uses multiple devices.
string
deviceIndex
=
platform
.
getPropertyValue
(
context
,
CudaPlatform
::
CudaDeviceIndex
());
map
<
string
,
string
>
props
;
props
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
+
","
+
deviceIndex
;
VerletIntegrator
integrator2
(
0.001
);
Context
context2
(
system
,
integrator2
,
platform
,
props
);
context2
.
setPositions
(
positions
);
context2
.
setPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
context2
.
setParameter
(
AndersenThermostat
::
Temperature
(),
temperature
);
// Now repeat all of the above tests with it.
integrator2
.
step
(
100
);
State
s5
=
context2
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
stringstream
stream2
(
ios_base
::
out
|
ios_base
::
in
|
ios_base
::
binary
);
context2
.
createCheckpoint
(
stream2
);
integrator2
.
step
(
10
);
State
s6
=
context2
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
context2
.
setPeriodicBoxVectors
(
Vec3
(
2
*
boxSize
,
0
,
0
),
Vec3
(
0
,
2
*
boxSize
,
0
),
Vec3
(
0
,
0
,
2
*
boxSize
));
context2
.
setParameter
(
AndersenThermostat
::
Temperature
(),
temperature
+
10
);
context2
.
loadCheckpoint
(
stream2
);
State
s7
=
context2
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
compareStates
(
s5
,
s7
);
integrator2
.
step
(
10
);
State
s8
=
context2
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Parameters
);
compareStates
(
s6
,
s8
);
}
int
main
()
{
try
{
testCheckpoint
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/cuda2/tests/TestCudaCustomAngleForce.cpp
View file @
387008ce
...
...
@@ -164,7 +164,7 @@ void testParallelComputation() {
int
main
()
{
try
{
testAngles
();
//
testParallelComputation();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda2/tests/TestCudaCustomBondForce.cpp
View file @
387008ce
...
...
@@ -169,7 +169,7 @@ int main() {
try
{
testBonds
();
testManyParameters
();
//
testParallelComputation();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda2/tests/TestCudaCustomCompoundBondForce.cpp
View file @
387008ce
...
...
@@ -205,7 +205,7 @@ int main() {
try
{
testBond
();
testPositionDependence
();
//
testParallelComputation();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda2/tests/TestCudaCustomExternalForce.cpp
View file @
387008ce
...
...
@@ -166,7 +166,7 @@ int main() {
try
{
testForce
();
testManyParameters
();
//
testParallelComputation();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda2/tests/TestCudaCustomNonbondedForce.cpp
View file @
387008ce
...
...
@@ -424,7 +424,7 @@ int main() {
testPeriodic
();
testTabulatedFunction
();
testCoulombLennardJones
();
//
testParallelComputation();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda2/tests/TestCudaCustomTorsionForce.cpp
View file @
387008ce
...
...
@@ -205,7 +205,7 @@ int main() {
try
{
testTorsions
();
testRange
();
//
testParallelComputation();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda2/tests/TestCudaHarmonicAngleForce.cpp
View file @
387008ce
...
...
@@ -130,7 +130,7 @@ void testParallelComputation() {
int
main
()
{
try
{
testAngles
();
//
testParallelComputation();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda2/tests/TestCudaHarmonicBondForce.cpp
View file @
387008ce
...
...
@@ -121,7 +121,7 @@ void testParallelComputation() {
int
main
()
{
try
{
testBonds
();
//
testParallelComputation();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda2/tests/TestCudaNonbondedForce.cpp
View file @
387008ce
...
...
@@ -814,8 +814,8 @@ int main() {
testBlockInteractions
(
true
);
testDispersionCorrection
();
testChangingParameters
();
//
testParallelComputation(false);
//
testParallelComputation(true);
testParallelComputation
(
false
);
testParallelComputation
(
true
);
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda2/tests/TestCudaPeriodicTorsionForce.cpp
View file @
387008ce
...
...
@@ -124,7 +124,7 @@ void testParallelComputation() {
int
main
()
{
try
{
testPeriodicTorsions
();
//
testParallelComputation();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda2/tests/TestCudaRBTorsionForce.cpp
View file @
387008ce
...
...
@@ -143,7 +143,7 @@ void testParallelComputation() {
int
main
()
{
try
{
testRBTorsions
();
//
testParallelComputation();
testParallelComputation
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment