Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c9bf6eee
"wrappers/python/vscode:/vscode.git/clone" did not exist on "b2c35a8b05a522c7716d928e6002861e3f1af0f1"
Commit
c9bf6eee
authored
Aug 06, 2009
by
Peter Eastman
Browse files
Began CUDA implementation of CustomNonbondedForce
parent
364130a5
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1328 additions
and
47 deletions
+1328
-47
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+2
-2
platforms/cuda/src/CudaKernelFactory.cpp
platforms/cuda/src/CudaKernelFactory.cpp
+2
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+97
-19
platforms/cuda/src/CudaKernels.h
platforms/cuda/src/CudaKernels.h
+35
-1
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+3
-2
platforms/cuda/src/kernels/cudaKernels.h
platforms/cuda/src/kernels/cudaKernels.h
+6
-0
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+20
-2
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+215
-3
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+9
-0
platforms/cuda/src/kernels/kCalculateCustomNonbondedForces.cu
...forms/cuda/src/kernels/kCalculateCustomNonbondedForces.cu
+331
-0
platforms/cuda/src/kernels/kCalculateCustomNonbondedForces.h
platforms/cuda/src/kernels/kCalculateCustomNonbondedForces.h
+370
-0
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
+220
-0
platforms/reference/src/ReferenceKernels.cpp
platforms/reference/src/ReferenceKernels.cpp
+15
-15
platforms/reference/src/ReferenceKernels.h
platforms/reference/src/ReferenceKernels.h
+3
-3
No files found.
platforms/cuda/include/CudaPlatform.h
View file @
c9bf6eee
...
...
@@ -81,8 +81,8 @@ public:
_gpuContext
*
gpu
;
KernelImpl
*
primaryKernel
;
bool
removeCM
;
bool
hasBonds
,
hasAngles
,
hasPeriodicTorsions
,
hasRB
,
hasNonbonded
;
int
nonbondedMethod
;
bool
hasBonds
,
hasAngles
,
hasPeriodicTorsions
,
hasRB
,
hasNonbonded
,
hasCustomNonbonded
;
int
nonbondedMethod
,
customNonbondedMethod
;
int
cmMotionFrequency
;
int
stepCount
,
computeForceCount
;
double
time
;
...
...
platforms/cuda/src/CudaKernelFactory.cpp
View file @
c9bf6eee
...
...
@@ -47,6 +47,8 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
return
new
CudaCalcRBTorsionForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcNonbondedForceKernel
::
Name
())
return
new
CudaCalcNonbondedForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcCustomNonbondedForceKernel
::
Name
())
return
new
CudaCalcCustomNonbondedForceKernel
(
name
,
platform
,
data
,
context
.
getSystem
());
if
(
name
==
CalcGBSAOBCForceKernel
::
Name
())
return
new
CudaCalcGBSAOBCForceKernel
(
name
,
platform
,
data
);
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
c9bf6eee
...
...
@@ -51,15 +51,11 @@ static void calcForces(ContextImpl& context, CudaPlatform::PlatformData& data) {
kReduceObcGbsaBornForces
(
gpu
);
kCalculateObcGbsaForces2
(
gpu
);
}
else
if
(
data
.
hasNonbonded
)
{
else
if
(
data
.
hasNonbonded
)
kCalculateCDLJForces
(
gpu
);
}
if
(
data
.
hasCustomNonbonded
)
kCalculateCustomNonbondedForces
(
gpu
,
data
.
hasNonbonded
);
kCalculateLocalForces
(
gpu
);
/*
if (gpu->bIncludeGBSA)
kReduceBornSumAndForces(gpu);
else
*/
kReduceForces
(
gpu
);
}
...
...
@@ -255,16 +251,15 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Identify which exceptions are 1-4 interactions.
vector
<
pair
<
int
,
int
>
>
exclusions
;
vector
<
int
>
nb14
s
;
vector
<
int
>
exception
s
;
for
(
int
i
=
0
;
i
<
force
.
getNumExceptions
();
i
++
)
{
int
particle1
,
particle2
;
double
chargeProd
,
sigma
,
epsilon
;
force
.
getExceptionParameters
(
i
,
particle1
,
particle2
,
chargeProd
,
sigma
,
epsilon
);
exclusions
.
push_back
(
pair
<
int
,
int
>
(
particle1
,
particle2
));
if
(
chargeProd
!=
0.0
||
epsilon
!=
0.0
)
nb14
s
.
push_back
(
i
);
exception
s
.
push_back
(
i
);
}
num14
=
nb14s
.
size
();
// Initialize nonbonded interactions.
...
...
@@ -329,15 +324,16 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
// Initialize 1-4 nonbonded interactions.
{
vector
<
int
>
particle1
(
num14
);
vector
<
int
>
particle2
(
num14
);
vector
<
float
>
c6
(
num14
);
vector
<
float
>
c12
(
num14
);
vector
<
float
>
q1
(
num14
);
vector
<
float
>
q2
(
num14
);
for
(
int
i
=
0
;
i
<
num14
;
i
++
)
{
int
numExceptions
=
exceptions
.
size
();
vector
<
int
>
particle1
(
numExceptions
);
vector
<
int
>
particle2
(
numExceptions
);
vector
<
float
>
c6
(
numExceptions
);
vector
<
float
>
c12
(
numExceptions
);
vector
<
float
>
q1
(
numExceptions
);
vector
<
float
>
q2
(
numExceptions
);
for
(
int
i
=
0
;
i
<
numExceptions
;
i
++
)
{
double
charge
,
sig
,
eps
;
force
.
getExceptionParameters
(
nb14
s
[
i
],
particle1
[
i
],
particle2
[
i
],
charge
,
sig
,
eps
);
force
.
getExceptionParameters
(
exception
s
[
i
],
particle1
[
i
],
particle2
[
i
],
charge
,
sig
,
eps
);
c6
[
i
]
=
(
float
)
(
4
*
eps
*
pow
(
sig
,
6.0
));
c12
[
i
]
=
(
float
)
(
4
*
eps
*
pow
(
sig
,
12.0
));
q1
[
i
]
=
(
float
)
charge
;
...
...
@@ -358,6 +354,88 @@ double CudaCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
return
0.0
;
}
CudaCalcCustomNonbondedForceKernel
::~
CudaCalcCustomNonbondedForceKernel
()
{
}
void
CudaCalcCustomNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
)
{
if
(
data
.
primaryKernel
==
NULL
)
data
.
primaryKernel
=
this
;
data
.
hasCustomNonbonded
=
true
;
numParticles
=
force
.
getNumParticles
();
_gpuContext
*
gpu
=
data
.
gpu
;
// Identify which exceptions are actual interactions.
vector
<
pair
<
int
,
int
>
>
exclusions
;
vector
<
int
>
exceptions
;
{
vector
<
double
>
parameters
;
for
(
int
i
=
0
;
i
<
force
.
getNumExceptions
();
i
++
)
{
int
particle1
,
particle2
;
force
.
getExceptionParameters
(
i
,
particle1
,
particle2
,
parameters
);
exclusions
.
push_back
(
pair
<
int
,
int
>
(
particle1
,
particle2
));
if
(
parameters
.
size
()
>
0
)
exceptions
.
push_back
(
i
);
}
}
// Initialize nonbonded interactions.
vector
<
int
>
particle
(
numParticles
);
vector
<
vector
<
double
>
>
parameters
(
numParticles
);
vector
<
vector
<
int
>
>
exclusionList
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
force
.
getParticleParameters
(
i
,
parameters
[
i
]);
particle
[
i
]
=
i
;
exclusionList
[
i
].
push_back
(
i
);
}
for
(
int
i
=
0
;
i
<
(
int
)
exclusions
.
size
();
i
++
)
{
exclusionList
[
exclusions
[
i
].
first
].
push_back
(
exclusions
[
i
].
second
);
exclusionList
[
exclusions
[
i
].
second
].
push_back
(
exclusions
[
i
].
first
);
}
CudaNonbondedMethod
method
=
NO_CUTOFF
;
if
(
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
NoCutoff
)
method
=
CUTOFF
;
if
(
force
.
getNonbondedMethod
()
==
CustomNonbondedForce
::
CutoffPeriodic
)
{
Vec3
boxVectors
[
3
];
force
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
gpuSetPeriodicBoxSize
(
gpu
,
(
float
)
boxVectors
[
0
][
0
],
(
float
)
boxVectors
[
1
][
1
],
(
float
)
boxVectors
[
2
][
2
]);
method
=
PERIODIC
;
}
data
.
customNonbondedMethod
=
method
;
// Initialize exceptions.
int
numExceptions
=
exceptions
.
size
();
vector
<
int
>
exceptionParticle1
(
numExceptions
);
vector
<
int
>
exceptionParticle2
(
numExceptions
);
vector
<
vector
<
double
>
>
exceptionParams
(
numExceptions
);
for
(
int
i
=
0
;
i
<
numExceptions
;
i
++
)
force
.
getExceptionParameters
(
exceptions
[
i
],
exceptionParticle1
[
i
],
exceptionParticle2
[
i
],
exceptionParams
[
i
]);
// Record information for the expressions.
vector
<
string
>
paramNames
;
vector
<
string
>
combiningRules
;
for
(
int
i
=
0
;
i
<
force
.
getNumParameters
();
i
++
)
{
paramNames
.
push_back
(
force
.
getParameterName
(
i
));
combiningRules
.
push_back
(
force
.
getParameterCombiningRule
(
i
));
}
gpuSetCustomNonbondedParameters
(
gpu
,
parameters
,
exclusionList
,
exceptionParticle1
,
exceptionParticle2
,
exceptionParams
,
method
,
(
float
)
force
.
getCutoffDistance
(),
force
.
getEnergyFunction
(),
combiningRules
,
paramNames
);
}
void
CudaCalcCustomNonbondedForceKernel
::
executeForces
(
ContextImpl
&
context
)
{
if
(
data
.
primaryKernel
==
this
)
calcForces
(
context
,
data
);
}
double
CudaCalcCustomNonbondedForceKernel
::
executeEnergy
(
ContextImpl
&
context
)
{
if
(
data
.
primaryKernel
==
this
)
return
calcEnergy
(
context
,
system
);
return
0.0
;
}
CudaCalcGBSAOBCForceKernel
::~
CudaCalcGBSAOBCForceKernel
()
{
}
...
...
platforms/cuda/src/CudaKernels.h
View file @
c9bf6eee
...
...
@@ -256,7 +256,41 @@ public:
double
executeEnergy
(
ContextImpl
&
context
);
private:
CudaPlatform
::
PlatformData
&
data
;
int
numParticles
,
num14
;
int
numParticles
;
System
&
system
;
};
/**
* This kernel is invoked by CustomNonbondedForce to calculate the forces acting on the system.
*/
class
CudaCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
CudaCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaPlatform
::
PlatformData
&
data
,
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
system
(
system
)
{
}
~
CudaCalcCustomNonbondedForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomNonbondedForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
);
/**
* Execute the kernel to calculate the forces.
*
* @param context the context in which to execute this kernel
*/
void
executeForces
(
ContextImpl
&
context
);
/**
* Execute the kernel to calculate the energy.
*
* @param context the context in which to execute this kernel
* @return the potential energy due to the CustomNonbondedForce
*/
double
executeEnergy
(
ContextImpl
&
context
);
private:
CudaPlatform
::
PlatformData
&
data
;
int
numParticles
;
System
&
system
;
};
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
c9bf6eee
...
...
@@ -53,6 +53,7 @@ CudaPlatform::CudaPlatform() {
registerKernelFactory
(
CalcPeriodicTorsionForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcRBTorsionForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGBSAOBCForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
...
...
@@ -104,8 +105,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
delete
data
;
}
CudaPlatform
::
PlatformData
::
PlatformData
(
_gpuContext
*
gpu
)
:
gpu
(
gpu
),
removeCM
(
false
),
nonbondedMethod
(
0
),
hasBonds
(
false
),
hasAngles
(
false
),
hasPeriodicTorsions
(
false
),
hasRB
(
false
),
hasNonbonded
(
false
),
primaryKernel
(
NULL
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
CudaPlatform
::
PlatformData
::
PlatformData
(
_gpuContext
*
gpu
)
:
gpu
(
gpu
),
removeCM
(
false
),
nonbondedMethod
(
0
),
customNonbondedMethod
(
0
),
hasBonds
(
false
),
hasAngles
(
false
),
hasPeriodicTorsions
(
false
),
hasRB
(
false
),
hasNonbonded
(
false
),
hasCustomNonbonded
(
false
),
primaryKernel
(
NULL
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
)
{
stringstream
device
;
device
<<
gpu
->
device
;
propertyValues
[
CudaPlatform
::
CudaDevice
()]
=
device
.
str
();
...
...
platforms/cuda/src/kernels/cudaKernels.h
View file @
c9bf6eee
...
...
@@ -35,6 +35,7 @@ extern void kGenerateRandoms(gpuContext gpu);
// Main loop
extern
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
);
extern
void
kCalculateCDLJForces
(
gpuContext
gpu
);
extern
void
kCalculateCustomNonbondedForces
(
gpuContext
gpu
,
bool
neighborListValid
);
extern
void
kReduceObcGbsaBornForces
(
gpuContext
gpu
);
extern
void
kCalculateObcGbsaForces2
(
gpuContext
gpu
);
extern
void
kCalculateLocalForces
(
gpuContext
gpu
);
...
...
@@ -64,6 +65,8 @@ extern void SetCalculateCDLJObcGbsaForces1Sim(gpuContext gpu);
extern
void
GetCalculateCDLJObcGbsaForces1Sim
(
gpuContext
gpu
);
extern
void
SetCalculateCDLJForcesSim
(
gpuContext
gpu
);
extern
void
GetCalculateCDLJForcesSim
(
gpuContext
gpu
);
extern
void
SetCalculateCustomNonbondedForcesSim
(
gpuContext
gpu
);
extern
void
GetCalculateCustomNonbondedForcesSim
(
gpuContext
gpu
);
extern
void
SetCalculateLocalForcesSim
(
gpuContext
gpu
);
extern
void
GetCalculateLocalForcesSim
(
gpuContext
gpu
);
extern
void
SetCalculateObcGbsaBornSumSim
(
gpuContext
gpu
);
...
...
@@ -88,3 +91,6 @@ extern void SetBrownianUpdateSim(gpuContext gpu);
extern
void
GetBrownianUpdateSim
(
gpuContext
gpu
);
extern
void
SetRandomSim
(
gpuContext
gpu
);
extern
void
GetRandomSim
(
gpuContext
gpu
);
extern
void
SetCustomNonbondedForceExpression
(
const
Expression
<
128
>&
expression
);
extern
void
SetCustomNonbondedEnergyExpression
(
const
Expression
<
128
>&
expression
);
extern
void
SetCustomNonbondedCombiningRules
(
const
Expression
<
64
>*
expressions
);
platforms/cuda/src/kernels/cudatypes.h
View file @
c9bf6eee
...
...
@@ -231,7 +231,7 @@ static const int G8X_RANDOM_THREADS_PER_BLOCK = 256;
static
const
int
GT2XX_RANDOM_THREADS_PER_BLOCK
=
384
;
static
const
int
G8X_NONBOND_WORKUNITS_PER_SM
=
220
;
static
const
int
GT2XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
unsigned
int
MAX_STACK_SIZE
=
8
;
enum
CudaNonbondedMethod
{
NO_CUTOFF
,
...
...
@@ -240,6 +240,18 @@ enum CudaNonbondedMethod
EWALD
};
enum
ExpressionOp
{
CONSTANT
=
0
,
VARIABLE0
,
VARIABLE1
,
VARIABLE2
,
VARIABLE3
,
VARIABLE4
,
VARIABLE5
,
VARIABLE6
,
VARIABLE7
,
VARIABLE8
,
CUSTOM
,
ADD
,
SUBTRACT
,
MULTIPLY
,
DIVIDE
,
POWER
,
NEGATE
,
SQRT
,
EXP
,
LOG
,
SIN
,
COS
,
SEC
,
CSC
,
TAN
,
COT
,
ASIN
,
ACOS
,
ATAN
,
SQUARE
,
CUBE
,
RECIPROCAL
,
INCREMENT
,
DECREMENT
};
template
<
int
SIZE
>
struct
Expression
{
int
op
[
SIZE
];
float
arg
[
SIZE
];
int
length
,
stackSize
;
};
struct
cudaGmxSimulation
{
// Constants
unsigned
int
atoms
;
// Number of atoms
...
...
@@ -291,7 +303,9 @@ struct cudaGmxSimulation {
float
bigFloat
;
// Floating point value used as a flag for Shaken atoms
float
epsfac
;
// Epsilon factor for CDLJ calculations
CudaNonbondedMethod
nonbondedMethod
;
// How to handle nonbonded interactions
float
nonbondedCutoffSqr
;
// Cutoff distance for CDLJ calculations
CudaNonbondedMethod
customNonbondedMethod
;
// How to handle custom nonbonded interactions
float
nonbondedCutoff
;
// Cutoff distance for nonbonded interactions
float
nonbondedCutoffSqr
;
// Square of the cutoff distance for nonbonded interactions
float
periodicBoxSizeX
;
// The X dimension of the periodic box
float
periodicBoxSizeY
;
// The Y dimension of the periodic box
float
periodicBoxSizeZ
;
// The Z dimension of the periodic box
...
...
@@ -324,6 +338,10 @@ struct cudaGmxSimulation {
float
collisionFrequency
;
// Collision frequency for Andersen thermostat
float2
*
pObcData
;
// Pointer to fixed Born data
float2
*
pAttr
;
// Pointer to additional atom attributes (sig, eps)
float4
*
pCustomParams
;
// Pointer to atom parameters for custom nonbonded force
int4
*
pCustomExceptionID
;
// Atom indices for custom nonbonded exceptions
float4
*
pCustomExceptionParams
;
// Parameters for custom nonbonded exceptions
unsigned
int
customExceptions
;
// Number of custom nonbonded exceptions
float2
*
pEwaldCosSinSum
;
// Pointer to the cos/sin sums (ewald)
unsigned
int
bonds
;
// Number of bonds
int4
*
pBondID
;
// Bond atom and output buffer IDs
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
c9bf6eee
...
...
@@ -49,8 +49,10 @@ using namespace std;
#include "hilbert.h"
#include "openmm/OpenMMException.h"
#include "quern.h"
#include "Lepton.h"
using
OpenMM
::
OpenMMException
;
using
Lepton
::
Operation
;
struct
ShakeCluster
{
int
centralID
;
...
...
@@ -130,6 +132,116 @@ static const float BOLTZ = (RGAS / KILO); // (k
#define DUMP_PARAMETERS 0
template
<
int
SIZE
>
static
Expression
<
SIZE
>
createExpression
(
const
string
&
expression
,
const
Lepton
::
ExpressionProgram
&
program
,
const
vector
<
string
>&
variables
)
{
Expression
<
SIZE
>
exp
;
if
(
program
.
getNumOperations
()
>
SIZE
)
throw
OpenMMException
(
"Expression contains too many operations: "
+
expression
);
exp
.
length
=
program
.
getNumOperations
();
exp
.
stackSize
=
program
.
getStackSize
();
for
(
int
i
=
0
;
i
<
program
.
getNumOperations
();
i
++
)
{
const
Operation
&
op
=
program
.
getOperation
(
i
);
switch
(
op
.
getId
())
{
case
Operation
::
CONSTANT
:
exp
.
op
[
i
]
=
CONSTANT
;
exp
.
arg
[
i
]
=
op
.
evaluate
(
NULL
,
map
<
string
,
double
>
());
break
;
case
Operation
::
VARIABLE
:
if
(
variables
.
size
()
>
0
&&
op
.
getName
()
==
variables
[
0
])
exp
.
op
[
i
]
=
VARIABLE0
;
else
if
(
variables
.
size
()
>
1
&&
op
.
getName
()
==
variables
[
1
])
exp
.
op
[
i
]
=
VARIABLE1
;
else
if
(
variables
.
size
()
>
2
&&
op
.
getName
()
==
variables
[
2
])
exp
.
op
[
i
]
=
VARIABLE2
;
else
if
(
variables
.
size
()
>
3
&&
op
.
getName
()
==
variables
[
3
])
exp
.
op
[
i
]
=
VARIABLE3
;
else
if
(
variables
.
size
()
>
4
&&
op
.
getName
()
==
variables
[
4
])
exp
.
op
[
i
]
=
VARIABLE4
;
else
if
(
variables
.
size
()
>
5
&&
op
.
getName
()
==
variables
[
5
])
exp
.
op
[
i
]
=
VARIABLE5
;
else
if
(
variables
.
size
()
>
6
&&
op
.
getName
()
==
variables
[
6
])
exp
.
op
[
i
]
=
VARIABLE6
;
else
if
(
variables
.
size
()
>
7
&&
op
.
getName
()
==
variables
[
7
])
exp
.
op
[
i
]
=
VARIABLE7
;
else
if
(
variables
.
size
()
>
8
&&
op
.
getName
()
==
variables
[
8
])
exp
.
op
[
i
]
=
VARIABLE8
;
else
throw
OpenMMException
(
"Unknown variable '"
+
op
.
getName
()
+
"' in expression: "
+
expression
);
break
;
case
Operation
::
ADD
:
exp
.
op
[
i
]
=
ADD
;
break
;
case
Operation
::
SUBTRACT
:
exp
.
op
[
i
]
=
SUBTRACT
;
break
;
case
Operation
::
MULTIPLY
:
exp
.
op
[
i
]
=
MULTIPLY
;
break
;
case
Operation
::
DIVIDE
:
exp
.
op
[
i
]
=
DIVIDE
;
break
;
case
Operation
::
POWER
:
exp
.
op
[
i
]
=
POWER
;
break
;
case
Operation
::
NEGATE
:
exp
.
op
[
i
]
=
NEGATE
;
break
;
case
Operation
::
SQRT
:
exp
.
op
[
i
]
=
SQRT
;
break
;
case
Operation
::
EXP
:
exp
.
op
[
i
]
=
EXP
;
break
;
case
Operation
::
LOG
:
exp
.
op
[
i
]
=
LOG
;
break
;
case
Operation
::
SIN
:
exp
.
op
[
i
]
=
SIN
;
break
;
case
Operation
::
COS
:
exp
.
op
[
i
]
=
COS
;
break
;
case
Operation
::
SEC
:
exp
.
op
[
i
]
=
SEC
;
break
;
case
Operation
::
CSC
:
exp
.
op
[
i
]
=
CSC
;
break
;
case
Operation
::
TAN
:
exp
.
op
[
i
]
=
TAN
;
break
;
case
Operation
::
COT
:
exp
.
op
[
i
]
=
COT
;
break
;
case
Operation
::
ASIN
:
exp
.
op
[
i
]
=
ASIN
;
break
;
case
Operation
::
ACOS
:
exp
.
op
[
i
]
=
ACOS
;
break
;
case
Operation
::
ATAN
:
exp
.
op
[
i
]
=
ATAN
;
break
;
case
Operation
::
SQUARE
:
exp
.
op
[
i
]
=
SQUARE
;
break
;
case
Operation
::
CUBE
:
exp
.
op
[
i
]
=
CUBE
;
break
;
case
Operation
::
RECIPROCAL
:
exp
.
op
[
i
]
=
RECIPROCAL
;
break
;
case
Operation
::
INCREMENT
:
exp
.
op
[
i
]
=
INCREMENT
;
break
;
case
Operation
::
DECREMENT
:
exp
.
op
[
i
]
=
DECREMENT
;
break
;
}
}
return
exp
;
}
extern
"C"
void
gpuSetBondParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
length
,
const
vector
<
float
>&
k
)
{
...
...
@@ -376,14 +488,33 @@ void gpuSetLJ14Parameters(gpuContext gpu, float epsfac, float fudge, const vecto
psLJ14Parameter
->
Upload
();
}
static
void
setExclusions
(
gpuContext
gpu
,
const
vector
<
vector
<
int
>
>&
exclusions
)
{
if
(
gpu
->
exclusions
.
size
()
>
0
)
{
bool
ok
=
(
exclusions
.
size
()
==
gpu
->
exclusions
.
size
());
for
(
int
i
=
0
;
i
<
exclusions
.
size
()
&&
ok
;
i
++
)
{
if
(
exclusions
[
i
].
size
()
!=
gpu
->
exclusions
[
i
].
size
())
ok
=
false
;
else
{
for
(
int
j
=
0
;
j
<
exclusions
[
i
].
size
();
j
++
)
if
(
find
(
gpu
->
exclusions
[
i
].
begin
(),
gpu
->
exclusions
[
i
].
end
(),
exclusions
[
i
][
j
])
==
gpu
->
exclusions
[
i
].
end
())
ok
=
false
;
}
}
if
(
!
ok
)
throw
OpenMMException
(
"All nonbonded forces must have identical sets of exceptions"
);
}
gpu
->
exclusions
=
exclusions
;
}
extern
"C"
void
gpuSetCoulombParameters
(
gpuContext
gpu
,
float
epsfac
,
const
vector
<
int
>&
atom
,
const
vector
<
float
>&
c6
,
const
vector
<
float
>&
c12
,
const
vector
<
float
>&
q
,
const
vector
<
char
>&
symbol
,
const
vector
<
vector
<
int
>
>&
exclusions
,
CudaNonbondedMethod
method
)
{
unsigned
int
coulombs
=
atom
.
size
();
unsigned
int
coulombs
=
c6
.
size
();
gpu
->
sim
.
epsfac
=
epsfac
;
gpu
->
sim
.
nonbondedMethod
=
method
;
gpu
->
exclusions
=
exclusions
;
if
(
coulombs
>
0
)
setExclusions
(
gpu
,
exclusions
);
for
(
unsigned
int
i
=
0
;
i
<
coulombs
;
i
++
)
{
...
...
@@ -419,10 +550,84 @@ void gpuSetCoulombParameters(gpuContext gpu, float epsfac, const vector<int>& at
extern
"C"
void
gpuSetNonbondedCutoff
(
gpuContext
gpu
,
float
cutoffDistance
,
float
solventDielectric
)
{
if
(
gpu
->
sim
.
nonbondedCutoff
!=
0.0
f
&&
gpu
->
sim
.
nonbondedCutoff
!=
cutoffDistance
)
throw
OpenMMException
(
"All nonbonded forces must use the same cutoff"
);
gpu
->
sim
.
nonbondedCutoff
=
cutoffDistance
;
gpu
->
sim
.
nonbondedCutoffSqr
=
cutoffDistance
*
cutoffDistance
;
gpu
->
sim
.
reactionFieldK
=
pow
(
cutoffDistance
,
-
3.0
f
)
*
(
solventDielectric
-
1.0
f
)
/
(
2.0
f
*
solventDielectric
+
1.0
f
);
}
extern
"C"
void
gpuSetCustomNonbondedParameters
(
gpuContext
gpu
,
const
vector
<
vector
<
double
>
>&
parameters
,
const
vector
<
vector
<
int
>
>&
exclusions
,
const
vector
<
int
>&
exceptionAtom1
,
const
vector
<
int
>&
exceptionAtom2
,
const
vector
<
vector
<
double
>
>&
exceptionParams
,
CudaNonbondedMethod
method
,
float
cutoffDistance
,
const
string
&
energyExp
,
const
vector
<
string
>&
combiningRules
,
const
vector
<
string
>&
paramNames
)
{
if
(
gpu
->
sim
.
nonbondedCutoff
!=
0.0
f
&&
gpu
->
sim
.
nonbondedCutoff
!=
cutoffDistance
)
throw
OpenMMException
(
"All nonbonded forces must use the same cutoff"
);
if
(
paramNames
.
size
()
>
4
)
throw
OpenMMException
(
"CudaPlatform only supports four per-atom parameters for custom nonbonded forces"
);
gpu
->
sim
.
nonbondedCutoff
=
cutoffDistance
;
gpu
->
sim
.
nonbondedCutoffSqr
=
cutoffDistance
*
cutoffDistance
;
gpu
->
sim
.
customNonbondedMethod
=
method
;
gpu
->
sim
.
customExceptions
=
exceptionAtom1
.
size
();
setExclusions
(
gpu
,
exclusions
);
gpu
->
psCustomParams
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
paddedNumberOfAtoms
,
1
,
"CustomParams"
);
gpu
->
sim
.
pCustomParams
=
gpu
->
psCustomParams
->
_pDevData
;
gpu
->
psCustomExceptionID
=
new
CUDAStream
<
int4
>
(
gpu
->
sim
.
customExceptions
,
1
,
"CustomExceptionId"
);
gpu
->
sim
.
pCustomExceptionID
=
gpu
->
psCustomExceptionID
->
_pDevData
;
gpu
->
psCustomExceptionParams
=
new
CUDAStream
<
float4
>
(
gpu
->
sim
.
customExceptions
,
1
,
"CustomExceptionParams"
);
gpu
->
sim
.
pCustomExceptionParams
=
gpu
->
psCustomExceptionParams
->
_pDevData
;
for
(
int
i
=
0
;
i
<
parameters
.
size
();
i
++
)
{
if
(
parameters
[
i
].
size
()
>
0
)
(
*
gpu
->
psCustomParams
)[
i
].
x
=
parameters
[
i
][
0
];
if
(
parameters
[
i
].
size
()
>
1
)
(
*
gpu
->
psCustomParams
)[
i
].
y
=
parameters
[
i
][
1
];
if
(
parameters
[
i
].
size
()
>
2
)
(
*
gpu
->
psCustomParams
)[
i
].
z
=
parameters
[
i
][
2
];
if
(
parameters
[
i
].
size
()
>
3
)
(
*
gpu
->
psCustomParams
)[
i
].
w
=
parameters
[
i
][
3
];
}
for
(
int
i
=
0
;
i
<
exceptionAtom1
.
size
();
i
++
)
{
(
*
gpu
->
psCustomExceptionID
)[
i
].
x
=
exceptionAtom1
[
i
];
(
*
gpu
->
psCustomExceptionID
)[
i
].
y
=
exceptionAtom2
[
i
];
(
*
gpu
->
psCustomExceptionID
)[
i
].
z
=
gpu
->
pOutputBufferCounter
[
exceptionAtom1
[
i
]]
++
;
(
*
gpu
->
psCustomExceptionID
)[
i
].
w
=
gpu
->
pOutputBufferCounter
[
exceptionAtom2
[
i
]]
++
;
if
(
exceptionParams
[
i
].
size
()
>
0
)
(
*
gpu
->
psCustomExceptionParams
)[
i
].
x
=
exceptionParams
[
i
][
0
];
if
(
exceptionParams
[
i
].
size
()
>
1
)
(
*
gpu
->
psCustomExceptionParams
)[
i
].
y
=
exceptionParams
[
i
][
1
];
if
(
exceptionParams
[
i
].
size
()
>
2
)
(
*
gpu
->
psCustomExceptionParams
)[
i
].
z
=
exceptionParams
[
i
][
2
];
if
(
exceptionParams
[
i
].
size
()
>
3
)
(
*
gpu
->
psCustomExceptionParams
)[
i
].
w
=
exceptionParams
[
i
][
3
];
}
gpu
->
psCustomParams
->
Upload
();
gpu
->
psCustomExceptionID
->
Upload
();
gpu
->
psCustomExceptionParams
->
Upload
();
// Create the Expressions.
vector
<
string
>
variables
;
variables
.
push_back
(
"r"
);
for
(
int
i
=
0
;
i
<
paramNames
.
size
();
i
++
)
variables
.
push_back
(
paramNames
[
i
]);
SetCustomNonbondedEnergyExpression
(
createExpression
<
128
>
(
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
optimize
().
createProgram
(),
variables
));
SetCustomNonbondedForceExpression
(
createExpression
<
128
>
(
energyExp
,
Lepton
::
Parser
::
parse
(
energyExp
).
differentiate
(
"r"
).
optimize
().
createProgram
(),
variables
));
Expression
<
64
>
paramExpressions
[
4
];
vector
<
string
>
combiningRuleParams
;
combiningRuleParams
.
push_back
(
""
);
for
(
int
j
=
1
;
j
<
3
;
j
++
)
{
for
(
int
i
=
0
;
i
<
paramNames
.
size
();
i
++
)
{
stringstream
name
;
name
<<
paramNames
[
i
]
<<
j
;
combiningRuleParams
.
push_back
(
name
.
str
());
}
}
for
(
int
i
=
0
;
i
<
paramNames
.
size
();
i
++
)
paramExpressions
[
i
]
=
createExpression
<
64
>
(
combiningRules
[
i
],
Lepton
::
Parser
::
parse
(
combiningRules
[
i
]).
optimize
().
createProgram
(),
combiningRuleParams
);
SetCustomNonbondedCombiningRules
(
paramExpressions
);
}
extern
"C"
void
gpuSetEwaldParameters
(
gpuContext
gpu
,
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
)
{
...
...
@@ -1216,6 +1421,7 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu
->
sim
.
surfaceAreaFactor
=
surfaceAreaFactor
;
gpu
->
sim
.
electricConstant
=
electricConstant
;
gpu
->
sim
.
nonbondedMethod
=
NO_CUTOFF
;
gpu
->
sim
.
nonbondedCutoff
=
0.0
f
;
gpu
->
sim
.
nonbondedCutoffSqr
=
0.0
f
;
gpu
->
sim
.
bigFloat
=
99999999.0
f
;
...
...
@@ -1408,6 +1614,11 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psxVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psSigEps2
;
if
(
gpu
->
psCustomParams
!=
NULL
)
{
delete
gpu
->
psCustomParams
;
delete
gpu
->
psCustomExceptionID
;
delete
gpu
->
psCustomExceptionParams
;
}
if
(
gpu
->
psEwaldCosSinSum
!=
NULL
)
delete
gpu
->
psEwaldCosSinSum
;
delete
gpu
->
psObcData
;
...
...
@@ -1761,6 +1972,7 @@ int gpuSetConstants(gpuContext gpu)
{
SetCalculateCDLJForcesSim
(
gpu
);
SetCalculateCDLJObcGbsaForces1Sim
(
gpu
);
SetCalculateCustomNonbondedForcesSim
(
gpu
);
SetCalculateLocalForcesSim
(
gpu
);
SetCalculateObcGbsaBornSumSim
(
gpu
);
SetCalculateObcGbsaForces2Sim
(
gpu
);
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
c9bf6eee
...
...
@@ -88,6 +88,9 @@ struct _gpuContext {
CUDAStream
<
float4
>*
psxVector4
;
CUDAStream
<
float4
>*
psvVector4
;
CUDAStream
<
float2
>*
psSigEps2
;
CUDAStream
<
float4
>*
psCustomParams
;
// Atom parameters for custom nonbonded force
CUDAStream
<
int4
>*
psCustomExceptionID
;
// Atom indices for custom nonbonded exceptions
CUDAStream
<
float4
>*
psCustomExceptionParams
;
// Parameters for custom nonbonded exceptions
CUDAStream
<
float2
>*
psEwaldCosSinSum
;
CUDAStream
<
float2
>*
psObcData
;
CUDAStream
<
float
>*
psObcChain
;
...
...
@@ -178,6 +181,12 @@ void gpuSetCoulombParameters(gpuContext gpu, float epsfac, const std::vector<int
extern
"C"
void
gpuSetNonbondedCutoff
(
gpuContext
gpu
,
float
cutoffDistance
,
float
solventDielectric
);
extern
"C"
void
gpuSetCustomNonbondedParameters
(
gpuContext
gpu
,
const
std
::
vector
<
std
::
vector
<
double
>
>&
parameters
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusions
,
const
std
::
vector
<
int
>&
exceptionAtom1
,
const
std
::
vector
<
int
>&
exceptionAtom2
,
const
std
::
vector
<
std
::
vector
<
double
>
>&
exceptionParams
,
CudaNonbondedMethod
method
,
float
cutoffDistance
,
const
std
::
string
&
energyExp
,
const
std
::
vector
<
std
::
string
>&
combiningRules
,
const
std
::
vector
<
std
::
string
>&
paramNames
);
extern
"C"
void
gpuSetEwaldParameters
(
gpuContext
gpu
,
float
alpha
,
int
kmaxx
,
int
kmaxy
,
int
kmaxz
);
...
...
platforms/cuda/src/kernels/kCalculateCustomNonbondedForces.cu
0 → 100644
View file @
c9bf6eee
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include <stdio.h>
#include <cuda.h>
#include <vector_functions.h>
#include <cstdlib>
#include <string>
#include <iostream>
#include <fstream>
using
namespace
std
;
#include "gputypes.h"
#include "cudatypes.h"
#define UNROLLXX 0
#define UNROLLXY 0
struct
Atom
{
float
x
;
float
y
;
float
z
;
float4
params
;
float
fx
;
float
fy
;
float
fz
;
};
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
Expression
<
128
>
forceExp
;
static
__constant__
Expression
<
128
>
energyExp
;
static
__constant__
Expression
<
64
>
combiningRules
[
4
];
void
SetCalculateCustomNonbondedForcesSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetCalculateCustomNonbondedForcesSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
void
SetCustomNonbondedForceExpression
(
const
Expression
<
128
>&
expression
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
forceExp
,
&
expression
,
sizeof
(
forceExp
));
RTERROR
(
status
,
"SetCustomNonbondedForceExpression: cudaMemcpyToSymbol failed"
);
}
void
SetCustomNonbondedEnergyExpression
(
const
Expression
<
128
>&
expression
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
energyExp
,
&
expression
,
sizeof
(
energyExp
));
RTERROR
(
status
,
"SetCustomNonbondedEnergyExpression: cudaMemcpyToSymbol failed"
);
}
void
SetCustomNonbondedCombiningRules
(
const
Expression
<
64
>*
expressions
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
combiningRules
,
expressions
,
sizeof
(
combiningRules
));
RTERROR
(
status
,
"SetCustomNonbondedCombiningRules: cudaMemcpyToSymbol failed"
);
}
template
<
int
SIZE
>
__device__
float
kEvaluateExpression_kernel
(
Expression
<
SIZE
>*
expression
,
float
*
stack
,
float
var0
,
float4
vars1
,
float4
vars2
)
{
int
stackPointer
=
-
1
;
for
(
int
i
=
0
;
i
<
expression
->
length
;
i
++
)
{
switch
(
expression
->
op
[
i
])
{
case
CONSTANT
:
stack
[
++
stackPointer
]
=
expression
->
arg
[
i
];
break
;
case
VARIABLE0
:
stack
[
++
stackPointer
]
=
var0
;
break
;
case
VARIABLE1
:
stack
[
++
stackPointer
]
=
vars1
.
x
;
break
;
// case VARIABLE2:
// stack[++stackPointer] = vars1.y;
// break;
// case VARIABLE3:
// stack[++stackPointer] = vars1.z;
// break;
// case VARIABLE4:
// stack[++stackPointer] = vars1.w;
// break;
case
VARIABLE5
:
stack
[
++
stackPointer
]
=
vars2
.
x
;
break
;
// case VARIABLE6:
// stack[++stackPointer] = vars2.y;
// break;
// case VARIABLE7:
// stack[++stackPointer] = vars2.z;
// break;
// case VARIABLE8:
// stack[++stackPointer] = vars2.w;
// break;
case
ADD
:
{
float
temp
=
stack
[
stackPointer
];
stack
[
stackPointer
]
=
temp
+
stack
[
--
stackPointer
];
break
;
}
case
SUBTRACT
:
{
float
temp
=
stack
[
stackPointer
];
stack
[
stackPointer
]
=
temp
-
stack
[
--
stackPointer
];
break
;
}
case
MULTIPLY
:
{
float
temp
=
stack
[
stackPointer
];
stack
[
stackPointer
]
=
temp
*
stack
[
--
stackPointer
];
break
;
}
case
DIVIDE
:
{
float
temp
=
stack
[
stackPointer
];
stack
[
stackPointer
]
=
temp
/
stack
[
--
stackPointer
];
break
;
}
case
POWER
:
{
float
temp
=
stack
[
stackPointer
];
stack
[
stackPointer
]
=
pow
(
temp
,
stack
[
--
stackPointer
]);
break
;
}
case
NEGATE
:
stack
[
stackPointer
]
=
-
stack
[
stackPointer
];
break
;
case
SQRT
:
stack
[
stackPointer
]
=
sqrt
(
stack
[
stackPointer
]);
break
;
case
EXP
:
stack
[
stackPointer
]
=
exp
(
stack
[
stackPointer
]);
break
;
case
LOG
:
stack
[
stackPointer
]
=
log
(
stack
[
stackPointer
]);
break
;
case
SIN
:
stack
[
stackPointer
]
=
sin
(
stack
[
stackPointer
]);
break
;
case
COS
:
stack
[
stackPointer
]
=
cos
(
stack
[
stackPointer
]);
break
;
case
SEC
:
stack
[
stackPointer
]
=
1.0
f
/
cos
(
stack
[
stackPointer
]);
break
;
case
CSC
:
stack
[
stackPointer
]
=
1.0
f
/
sin
(
stack
[
stackPointer
]);
break
;
case
TAN
:
stack
[
stackPointer
]
=
tan
(
stack
[
stackPointer
]);
break
;
case
COT
:
stack
[
stackPointer
]
=
1.0
f
/
tan
(
stack
[
stackPointer
]);
break
;
case
ASIN
:
stack
[
stackPointer
]
=
asin
(
stack
[
stackPointer
]);
break
;
case
ACOS
:
stack
[
stackPointer
]
=
acos
(
stack
[
stackPointer
]);
break
;
case
ATAN
:
stack
[
stackPointer
]
=
atan
(
stack
[
stackPointer
]);
break
;
case
SQUARE
:
{
float
temp
=
stack
[
stackPointer
];
stack
[
stackPointer
]
=
temp
*
temp
;
break
;
}
case
CUBE
:
{
float
temp
=
stack
[
stackPointer
];
stack
[
stackPointer
]
=
temp
*
temp
*
temp
;
break
;
}
case
RECIPROCAL
:
stack
[
stackPointer
]
=
1.0
f
/
stack
[
stackPointer
];
break
;
case
INCREMENT
:
stack
[
stackPointer
]
=
stack
[
stackPointer
]
+
1.0
f
;
break
;
case
DECREMENT
:
stack
[
stackPointer
]
=
stack
[
stackPointer
]
-
1.0
f
;
break
;
}
}
return
stack
[
stackPointer
];
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateCustomNonbondedForces.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateCustomNonbondedForces.h"
// Include versions of the kernels with cutoffs.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_CUTOFF
#define METHOD_NAME(a, b) a##Cutoff##b
#include "kCalculateCustomNonbondedForces.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##CutoffByWarp##b
#include "kCalculateCustomNonbondedForces.h"
// Include versions of the kernels with periodic boundary conditions.
#undef METHOD_NAME
#undef USE_OUTPUT_BUFFER_PER_WARP
#define USE_PERIODIC
#define METHOD_NAME(a, b) a##Periodic##b
#include "kCalculateCustomNonbondedForces.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##PeriodicByWarp##b
#include "kCalculateCustomNonbondedForces.h"
__global__
void
kFindBlockBoundsCutoff_kernel
();
__global__
void
kFindBlocksWithInteractionsCutoff_kernel
();
__global__
void
kFindInteractionsWithinBlocksCutoff_kernel
(
unsigned
int
*
workUnit
);
__global__
void
kFindBlockBoundsPeriodic_kernel
();
__global__
void
kFindBlocksWithInteractionsPeriodic_kernel
();
__global__
void
kFindInteractionsWithinBlocksPeriodic_kernel
(
unsigned
int
*
workUnit
);
void
kCalculateCustomNonbondedForces
(
gpuContext
gpu
,
bool
neighborListValid
)
{
// printf("kCalculateCustomNonbondedCutoffForces\n");
CUDPPResult
result
;
switch
(
gpu
->
sim
.
customNonbondedMethod
)
{
case
NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCustomNonbondedN2ByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
MAX_STACK_SIZE
*
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateCustomNonbondedN2Forces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
MAX_STACK_SIZE
*
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
LAUNCHERROR
(
"kCalculateCustomNonbondedN2Forces"
);
break
;
case
CUTOFF
:
if
(
!
neighborListValid
)
{
kFindBlockBoundsCutoff_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsCutoff"
);
kFindBlocksWithInteractionsCutoff_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsCutoff"
);
result
=
cudppCompact
(
gpu
->
cudpp
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractionCount
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
);
if
(
result
!=
CUDPP_SUCCESS
)
{
printf
(
"Error in cudppCompact: %d
\n
"
,
result
);
exit
(
-
1
);
}
kFindInteractionsWithinBlocksCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCustomNonbondedCutoffByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
MAX_STACK_SIZE
*
sizeof
(
float
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCustomNonbondedCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
MAX_STACK_SIZE
*
sizeof
(
float
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCustomNonbondedCutoffForces"
);
break
;
case
PERIODIC
:
if
(
!
neighborListValid
)
{
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
LAUNCHERROR
(
"kFindBlockBoundsPeriodic"
);
kFindBlocksWithInteractionsPeriodic_kernel
<<<
gpu
->
sim
.
interaction_blocks
,
gpu
->
sim
.
interaction_threads_per_block
>>>
();
LAUNCHERROR
(
"kFindBlocksWithInteractionsPeriodic"
);
result
=
cudppCompact
(
gpu
->
cudpp
,
gpu
->
sim
.
pInteractingWorkUnit
,
gpu
->
sim
.
pInteractionCount
,
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
pInteractionFlag
,
gpu
->
sim
.
workUnits
);
if
(
result
!=
CUDPP_SUCCESS
)
{
printf
(
"Error in cudppCompact: %d
\n
"
,
result
);
exit
(
-
1
);
}
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCustomNonbondedPeriodicByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
MAX_STACK_SIZE
*
sizeof
(
float
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCustomNonbondedPeriodicForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
MAX_STACK_SIZE
*
sizeof
(
float
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCustomNonbondedPeriodicForces"
);
break
;
}
}
platforms/cuda/src/kernels/kCalculateCustomNonbondedForces.h
0 → 100644
View file @
c9bf6eee
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
/**
* This file contains the kernels for evalauating nonbonded forces. It is included
* several times in kCalculateCustomNonbondedForces.cu with different #defines to generate
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateCustomNonbonded
,
Forces_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
float
stack
[];
Atom
*
sA
=
(
Atom
*
)
&
stack
[
MAX_STACK_SIZE
*
blockDim
.
x
];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
bool
bExclusionFlag
=
(
x
&
0x1
);
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float4
apos
;
// Local atom x, y, z, q
float3
af
;
// Local atom fx, fy, fz
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
unsigned
int
i
=
x
+
tgx
;
apos
=
cSim
.
pPosq
[
i
];
float4
params
=
make_float4
(
0
,
0
,
0
,
0
);
af
.
x
=
0
.
0
f
;
af
.
y
=
0
.
0
f
;
af
.
z
=
0
.
0
f
;
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
// Read fixed atom data into registers and GRF
sA
[
threadIdx
.
x
].
x
=
apos
.
x
;
sA
[
threadIdx
.
x
].
y
=
apos
.
y
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
params
=
params
;
if
(
!
bExclusionFlag
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float4
combinedParams
;
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r
=
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
);
float
invR
=
1
.
0
f
/
r
;
float
dEdR
=
-
kEvaluateExpression_kernel
(
&
forceExp
,
&
stack
[
MAX_STACK_SIZE
*
threadIdx
.
x
],
r
,
combinedParams
,
combinedParams
)
*
invR
;
#ifdef USE_CUTOFF
if
(
r
>
cSim
.
nonbondedCutoff
)
{
dEdR
=
0
.
0
f
;
}
#endif
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
}
}
else
// bExclusion
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
xi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float4
combinedParams
;
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r
=
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
);
float
invR
=
1
.
0
f
/
r
;
float
dEdR
=
-
kEvaluateExpression_kernel
(
&
forceExp
,
&
stack
[
MAX_STACK_SIZE
*
threadIdx
.
x
],
r
,
combinedParams
,
combinedParams
)
*
invR
;
#ifdef USE_CUTOFF
if
(
!
(
excl
&
0x1
)
||
r
>
cSim
.
nonbondedCutoff
)
#else
if
(
!
(
excl
&
0x1
))
#endif
{
dEdR
=
0
.
0
f
;
}
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
excl
>>=
1
;
}
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4a
[
offset
]
=
of
;
#else
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0
.
0
f
;
unsigned
int
offset
=
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
#endif
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
float4
temp
=
cSim
.
pPosq
[
j
];
sA
[
threadIdx
.
x
].
x
=
temp
.
x
;
sA
[
threadIdx
.
x
].
y
=
temp
.
y
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
params
=
make_float4
(
0
,
0
,
0
,
0
);;
}
sA
[
threadIdx
.
x
].
fx
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fz
=
0
.
0
f
;
if
(
!
bExclusionFlag
)
{
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
if
(
flags
==
0xFFFFFFFF
)
#endif
{
// Compute all interactions within this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float4
combinedParams
;
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r
=
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
);
float
invR
=
1
.
0
f
/
r
;
float
dEdR
=
-
kEvaluateExpression_kernel
(
&
forceExp
,
&
stack
[
MAX_STACK_SIZE
*
threadIdx
.
x
],
r
,
combinedParams
,
combinedParams
)
*
invR
;
#ifdef USE_CUTOFF
if
(
r
>
cSim
.
nonbondedCutoff
)
{
dEdR
=
0
.
0
f
;
}
#endif
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
float4
combinedParams
;
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r
=
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
);
float
invR
=
1
.
0
f
/
r
;
float
dEdR
=
-
kEvaluateExpression_kernel
(
&
forceExp
,
&
stack
[
MAX_STACK_SIZE
*
threadIdx
.
x
],
r
,
combinedParams
,
combinedParams
)
*
invR
;
#ifdef USE_CUTOFF
if
(
r
>
cSim
.
nonbondedCutoff
)
{
dEdR
=
0
.
0
f
;
}
#endif
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tempBuffer
[
threadIdx
.
x
].
x
=
dx
;
tempBuffer
[
threadIdx
.
x
].
y
=
dy
;
tempBuffer
[
threadIdx
.
x
].
z
=
dz
;
// Sum the forces on atom j.
if
(
tgx
%
2
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
1
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
1
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
1
].
z
;
}
if
(
tgx
%
4
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
2
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
2
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
2
].
z
;
}
if
(
tgx
%
8
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
4
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
4
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
4
].
z
;
}
if
(
tgx
%
16
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
8
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
8
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
8
].
z
;
}
if
(
tgx
==
0
)
{
psA
[
j
].
fx
+=
tempBuffer
[
threadIdx
.
x
].
x
+
tempBuffer
[
threadIdx
.
x
+
16
].
x
;
psA
[
j
].
fy
+=
tempBuffer
[
threadIdx
.
x
].
y
+
tempBuffer
[
threadIdx
.
x
+
16
].
y
;
psA
[
j
].
fz
+=
tempBuffer
[
threadIdx
.
x
].
z
+
tempBuffer
[
threadIdx
.
x
+
16
].
z
;
}
}
}
}
#endif
}
else
// bExclusion
{
// Read fixed atom data into registers and GRF
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
yi
=
y
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
yi
*
cSim
.
paddedNumberOfAtoms
/
GRID
-
yi
*
(
yi
+
1
)
/
2
;
unsigned
int
excl
=
cSim
.
pExclusion
[
cSim
.
pExclusionIndex
[
cell
]
+
tgx
];
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
GRID
-
tgx
));
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float4
combinedParams
;
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r
=
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
);
float
invR
=
1
.
0
f
/
r
;
float
dEdR
=
-
kEvaluateExpression_kernel
(
&
forceExp
,
&
stack
[
MAX_STACK_SIZE
*
threadIdx
.
x
],
r
,
combinedParams
,
combinedParams
)
*
invR
;
#ifdef USE_CUTOFF
if
(
!
(
excl
&
0x1
)
||
r
>
cSim
.
nonbondedCutoff
)
#else
if
(
!
(
excl
&
0x1
))
#endif
{
dEdR
=
0
.
0
f
;
}
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
// Write results
float4
of
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
x
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
af
.
x
;
of
.
y
+=
af
.
y
;
of
.
z
+=
af
.
z
;
cSim
.
pForce4a
[
offset
]
=
of
;
offset
=
y
+
tgx
+
warp
*
cSim
.
stride
;
of
=
cSim
.
pForce4a
[
offset
];
of
.
x
+=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
+=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
+=
sA
[
threadIdx
.
x
].
fz
;
cSim
.
pForce4a
[
offset
]
=
of
;
#else
of
.
x
=
af
.
x
;
of
.
y
=
af
.
y
;
of
.
z
=
af
.
z
;
of
.
w
=
0
.
0
f
;
unsigned
int
offset
=
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
of
.
x
=
sA
[
threadIdx
.
x
].
fx
;
of
.
y
=
sA
[
threadIdx
.
x
].
fy
;
of
.
z
=
sA
[
threadIdx
.
x
].
fz
;
offset
=
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cSim
.
stride
;
cSim
.
pForce4a
[
offset
]
=
of
;
#endif
lasty
=
y
;
}
pos
++
;
}
}
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
0 → 100644
View file @
c9bf6eee
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests all the different force terms in the Cuda implementation of CustomNonbondedForce.
*/
#include "../../../tests/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/CustomNonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
testSimpleExpression
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"-0.1*r^3"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
double
force
=
0.1
*
3
*
(
2
*
2
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
-
0.1
*
(
2
*
2
*
2
),
state
.
getPotentialEnergy
(),
TOL
);
}
void
testParameters
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"scale*a*(r*b)^3"
);
forceField
->
addParameter
(
"a"
,
"a1*a2"
);
forceField
->
addParameter
(
"b"
,
"c+b1+b2"
);
forceField
->
addGlobalParameter
(
"scale"
,
3.0
);
forceField
->
addGlobalParameter
(
"c"
,
-
1.0
);
vector
<
double
>
params
(
2
);
params
[
0
]
=
1.5
;
params
[
1
]
=
2.0
;
forceField
->
addParticle
(
params
);
params
[
0
]
=
2.0
;
params
[
1
]
=
3.0
;
forceField
->
addParticle
(
params
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
2
,
0
,
0
);
context
.
setPositions
(
positions
);
context
.
setParameter
(
"scale"
,
1.0
);
context
.
setParameter
(
"c"
,
0.0
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
vector
<
Vec3
>
forces
=
state
.
getForces
();
double
force
=
-
3.0
*
3
*
5.0
*
(
10
*
10
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
3.0
*
(
10
*
10
*
10
),
state
.
getPotentialEnergy
(),
TOL
);
context
.
setParameter
(
"scale"
,
1.5
);
context
.
setParameter
(
"c"
,
1.0
);
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
forces
=
state
.
getForces
();
force
=
-
1.5
*
3.0
*
3
*
6.0
*
(
12
*
12
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
force
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
force
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
1.5
*
3.0
*
(
12
*
12
*
12
),
state
.
getPotentialEnergy
(),
TOL
);
}
void
testExceptions
()
{
CudaPlatform
platform
;
System
system
;
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"a*r"
);
nonbonded
->
addParameter
(
"a"
,
"a1+a2"
);
vector
<
double
>
params
(
1
);
vector
<
Vec3
>
positions
(
4
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
system
.
addParticle
(
1.0
);
params
[
0
]
=
i
+
1
;
nonbonded
->
addParticle
(
params
);
positions
[
i
]
=
Vec3
(
i
,
0
,
0
);
}
nonbonded
->
addException
(
0
,
1
,
vector
<
double
>
());
nonbonded
->
addException
(
1
,
2
,
vector
<
double
>
());
nonbonded
->
addException
(
2
,
3
,
vector
<
double
>
());
params
[
0
]
=
0.5
;
nonbonded
->
addException
(
0
,
2
,
params
);
nonbonded
->
addException
(
1
,
3
,
params
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
+
1
+
4
,
0
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
(
0.5
+
1
+
4
),
0
,
0
),
forces
[
3
],
TOL
);
ASSERT_EQUAL_TOL
((
1
+
4
)
*
3
+
0.5
*
2
+
0.5
*
2
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testCutoff
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"r"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffNonPeriodic
);
forceField
->
setCutoffDistance
(
2.5
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
2
,
0
);
positions
[
2
]
=
Vec3
(
0
,
3
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
1
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
-
1
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
2
+
1
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testPeriodic
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
forceField
=
new
CustomNonbondedForce
(
"r"
);
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
addParticle
(
vector
<
double
>
());
forceField
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
forceField
->
setCutoffDistance
(
2.0
);
forceField
->
setPeriodicBoxVectors
(
Vec3
(
4
,
0
,
0
),
Vec3
(
0
,
4
,
0
),
Vec3
(
0
,
0
,
4
));
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
0
,
2.1
,
0
);
positions
[
2
]
=
Vec3
(
0
,
3
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
-
2
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
2
,
0
),
forces
[
1
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_TOL
(
1.9
+
1
+
0.9
,
state
.
getPotentialEnergy
(),
TOL
);
}
int
main
()
{
try
{
testSimpleExpression
();
// testParameters();
// testExceptions();
testCutoff
();
testPeriodic
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/reference/src/ReferenceKernels.cpp
View file @
c9bf6eee
...
...
@@ -476,19 +476,19 @@ double ReferenceCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
ReferenceCalcCustomNonbondedForceKernel
::~
ReferenceCalcCustomNonbondedForceKernel
()
{
disposeRealArray
(
particleParamArray
,
numParticles
);
disposeIntArray
(
exclusionArray
,
numParticles
);
disposeIntArray
(
bonded14
IndexArray
,
num
14
);
disposeRealArray
(
bonded14
ParamArray
,
num
14
);
disposeIntArray
(
exception
IndexArray
,
num
Exceptions
);
disposeRealArray
(
exception
ParamArray
,
num
Exceptions
);
if
(
neighborList
!=
NULL
)
delete
neighborList
;
}
void
ReferenceCalcCustomNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
CustomNonbondedForce
&
force
)
{
// Identify which exceptions are
1-4
interactions.
// Identify which exceptions are
actual
interactions.
numParticles
=
force
.
getNumParticles
();
exclusions
.
resize
(
numParticles
);
vector
<
int
>
nb14
s
;
vector
<
int
>
exception
s
;
vector
<
double
>
parameters
;
for
(
int
i
=
0
;
i
<
force
.
getNumExceptions
();
i
++
)
{
int
particle1
,
particle2
;
...
...
@@ -496,15 +496,15 @@ void ReferenceCalcCustomNonbondedForceKernel::initialize(const System& system, c
exclusions
[
particle1
].
insert
(
particle2
);
exclusions
[
particle2
].
insert
(
particle1
);
if
(
parameters
.
size
()
>
0
)
nb14
s
.
push_back
(
i
);
exception
s
.
push_back
(
i
);
}
// Build the arrays.
num
14
=
nb14
s
.
size
();
num
Exceptions
=
exception
s
.
size
();
int
numParameters
=
force
.
getNumParameters
();
bonded14
IndexArray
=
allocateIntArray
(
num
14
,
2
);
bonded14
ParamArray
=
allocateRealArray
(
num
14
,
numParameters
);
exception
IndexArray
=
allocateIntArray
(
num
Exceptions
,
2
);
exception
ParamArray
=
allocateRealArray
(
num
Exceptions
,
numParameters
);
particleParamArray
=
allocateRealArray
(
numParticles
,
numParameters
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
force
.
getParticleParameters
(
i
,
parameters
);
...
...
@@ -520,13 +520,13 @@ void ReferenceCalcCustomNonbondedForceKernel::initialize(const System& system, c
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
exclusionArray
[
i
][
++
index
]
=
*
iter
;
}
for
(
int
i
=
0
;
i
<
num
14
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num
Exceptions
;
++
i
)
{
int
particle1
,
particle2
;
force
.
getExceptionParameters
(
nb14
s
[
i
],
particle1
,
particle2
,
parameters
);
bonded14
IndexArray
[
i
][
0
]
=
particle1
;
bonded14
IndexArray
[
i
][
1
]
=
particle2
;
force
.
getExceptionParameters
(
exception
s
[
i
],
particle1
,
particle2
,
parameters
);
exception
IndexArray
[
i
][
0
]
=
particle1
;
exception
IndexArray
[
i
][
1
]
=
particle2
;
for
(
int
j
=
0
;
j
<
numParameters
;
j
++
)
bonded14
ParamArray
[
i
][
j
]
=
static_cast
<
RealOpenMM
>
(
parameters
[
j
]);
exception
ParamArray
[
i
][
j
]
=
static_cast
<
RealOpenMM
>
(
parameters
[
j
]);
}
nonbondedMethod
=
CalcCustomNonbondedForceKernel
::
NonbondedMethod
(
force
.
getNonbondedMethod
());
nonbondedCutoff
=
(
RealOpenMM
)
force
.
getCutoffDistance
();
...
...
@@ -568,7 +568,7 @@ void ReferenceCalcCustomNonbondedForceKernel::executeForces(ContextImpl& context
for
(
int
i
=
0
;
i
<
globalParameterNames
.
size
();
i
++
)
globalParameters
[
globalParameterNames
[
i
]]
=
context
.
getParameter
(
globalParameterNames
[
i
]);
ixn
.
calculatePairIxn
(
numParticles
,
posData
,
particleParamArray
,
exclusionArray
,
0
,
globalParameters
,
forceData
,
0
,
0
);
ixn
.
calculateExceptionIxn
(
num
14
,
bonded14
IndexArray
,
posData
,
bonded14
ParamArray
,
globalParameters
,
forceData
,
0
,
0
);
ixn
.
calculateExceptionIxn
(
num
Exceptions
,
exception
IndexArray
,
posData
,
exception
ParamArray
,
globalParameters
,
forceData
,
0
,
0
);
}
double
ReferenceCalcCustomNonbondedForceKernel
::
executeEnergy
(
ContextImpl
&
context
)
{
...
...
@@ -587,7 +587,7 @@ double ReferenceCalcCustomNonbondedForceKernel::executeEnergy(ContextImpl& conte
for
(
int
i
=
0
;
i
<
globalParameterNames
.
size
();
i
++
)
globalParameters
[
globalParameterNames
[
i
]]
=
context
.
getParameter
(
globalParameterNames
[
i
]);
ixn
.
calculatePairIxn
(
numParticles
,
posData
,
particleParamArray
,
exclusionArray
,
0
,
globalParameters
,
forceData
,
0
,
&
energy
);
ixn
.
calculateExceptionIxn
(
num
14
,
bonded14
IndexArray
,
posData
,
bonded14
ParamArray
,
globalParameters
,
forceData
,
0
,
&
energy
);
ixn
.
calculateExceptionIxn
(
num
Exceptions
,
exception
IndexArray
,
posData
,
exception
ParamArray
,
globalParameters
,
forceData
,
0
,
&
energy
);
disposeRealArray
(
forceData
,
numParticles
);
return
energy
;
}
...
...
platforms/reference/src/ReferenceKernels.h
View file @
c9bf6eee
...
...
@@ -304,9 +304,9 @@ public:
*/
double
executeEnergy
(
ContextImpl
&
context
);
private:
int
numParticles
,
num
14
;
int
**
exclusionArray
,
**
bonded14
IndexArray
;
RealOpenMM
**
particleParamArray
,
**
bonded14
ParamArray
;
int
numParticles
,
num
Exceptions
;
int
**
exclusionArray
,
**
exception
IndexArray
;
RealOpenMM
**
particleParamArray
,
**
exception
ParamArray
;
RealOpenMM
nonbondedCutoff
,
periodicBoxSize
[
3
];
std
::
vector
<
std
::
set
<
int
>
>
exclusions
;
Lepton
::
ExpressionProgram
energyExpression
,
forceExpression
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment