Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ffab317b
"...ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "46376ea3e4d0ff37cdaa5e55e034dbf5c3296b81"
Commit
ffab317b
authored
Oct 10, 2009
by
Peter Eastman
Browse files
Began OpenCL implementation of GBSA
parent
efac00cf
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
857 additions
and
237 deletions
+857
-237
platforms/opencl/src/OpenCLKernelFactory.cpp
platforms/opencl/src/OpenCLKernelFactory.cpp
+2
-2
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+101
-86
platforms/opencl/src/OpenCLKernels.h
platforms/opencl/src/OpenCLKernels.h
+40
-32
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+105
-72
platforms/opencl/src/OpenCLNonbondedUtilities.h
platforms/opencl/src/OpenCLNonbondedUtilities.h
+75
-12
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+1
-1
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+263
-0
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+38
-28
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+2
-4
platforms/opencl/tests/TestOpenCLGBSAOBCForce.cpp
platforms/opencl/tests/TestOpenCLGBSAOBCForce.cpp
+230
-0
No files found.
platforms/opencl/src/OpenCLKernelFactory.cpp
View file @
ffab317b
...
@@ -50,8 +50,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
...
@@ -50,8 +50,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return
new
OpenCLCalcNonbondedForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
return
new
OpenCLCalcNonbondedForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
// if (name == CalcCustomNonbondedForceKernel::Name())
// if (name == CalcCustomNonbondedForceKernel::Name())
// return new OpenCLCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
// return new OpenCLCalcCustomNonbondedForceKernel(name, platform, cl, context.getSystem());
//
if (name == CalcGBSAOBCForceKernel::Name())
if
(
name
==
CalcGBSAOBCForceKernel
::
Name
())
//
return new OpenCLCalcGBSAOBCForceKernel(name, platform, cl);
return
new
OpenCLCalcGBSAOBCForceKernel
(
name
,
platform
,
cl
);
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
return
new
OpenCLIntegrateVerletStepKernel
(
name
,
platform
,
cl
);
return
new
OpenCLIntegrateVerletStepKernel
(
name
,
platform
,
cl
);
if
(
name
==
IntegrateLangevinStepKernel
::
Name
())
if
(
name
==
IntegrateLangevinStepKernel
::
Name
())
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
ffab317b
...
@@ -54,6 +54,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context
...
@@ -54,6 +54,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginForceComputation(ContextImpl& context
}
}
void
OpenCLCalcForcesAndEnergyKernel
::
finishForceComputation
(
ContextImpl
&
context
)
{
void
OpenCLCalcForcesAndEnergyKernel
::
finishForceComputation
(
ContextImpl
&
context
)
{
cl
.
getNonbondedUtilities
().
computeInteractions
();
cl
.
reduceBuffer
(
cl
.
getForceBuffers
(),
cl
.
getNumForceBuffers
());
cl
.
reduceBuffer
(
cl
.
getForceBuffers
(),
cl
.
getNumForceBuffers
());
cl
.
getNonbondedUtilities
().
prepareInteractions
();
cl
.
getNonbondedUtilities
().
prepareInteractions
();
}
}
...
@@ -67,6 +68,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& contex
...
@@ -67,6 +68,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginEnergyComputation(ContextImpl& contex
}
}
double
OpenCLCalcForcesAndEnergyKernel
::
finishEnergyComputation
(
ContextImpl
&
context
)
{
double
OpenCLCalcForcesAndEnergyKernel
::
finishEnergyComputation
(
ContextImpl
&
context
)
{
cl
.
getNonbondedUtilities
().
computeInteractions
();
OpenCLArray
<
cl_float
>&
energy
=
cl
.
getEnergyBuffer
();
OpenCLArray
<
cl_float
>&
energy
=
cl
.
getEnergyBuffer
();
energy
.
download
();
energy
.
download
();
double
sum
=
0.0
f
;
double
sum
=
0.0
f
;
...
@@ -584,7 +586,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -584,7 +586,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
// gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method);
// gpuSetCoulombParameters(gpu, 138.935485f, particle, c6, c12, q, symbol, exclusionList, method);
string
source
=
cl
.
loadSourceFromFile
(
"coulombLennardJones.cl"
,
defines
);
string
source
=
cl
.
loadSourceFromFile
(
"coulombLennardJones.cl"
,
defines
);
cl
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
force
.
getCutoffDistance
(),
exclusionList
,
source
);
cl
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
force
.
getCutoffDistance
(),
exclusionList
,
source
);
cl
.
getNonbondedUtilities
().
addParameter
(
"sigmaEpsilon"
,
"float2"
,
8
,
sigmaEpsilon
->
getDeviceBuffer
());
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"sigmaEpsilon"
,
"float2"
,
8
,
sigmaEpsilon
->
getDeviceBuffer
())
)
;
cutoffSquared
=
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
();
cutoffSquared
=
force
.
getCutoffDistance
()
*
force
.
getCutoffDistance
();
// Compute the Ewald self energy.
// Compute the Ewald self energy.
...
@@ -629,7 +631,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -629,7 +631,6 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
}
}
void
OpenCLCalcNonbondedForceKernel
::
executeForces
(
ContextImpl
&
context
)
{
void
OpenCLCalcNonbondedForceKernel
::
executeForces
(
ContextImpl
&
context
)
{
cl
.
getNonbondedUtilities
().
computeInteractions
();
if
(
exceptionIndices
!=
NULL
)
{
if
(
exceptionIndices
!=
NULL
)
{
int
numExceptions
=
exceptionIndices
->
getSize
();
int
numExceptions
=
exceptionIndices
->
getSize
();
exceptionsKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getPaddedNumAtoms
());
exceptionsKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getPaddedNumAtoms
());
...
@@ -766,90 +767,104 @@ double OpenCLCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
...
@@ -766,90 +767,104 @@ double OpenCLCalcNonbondedForceKernel::executeEnergy(ContextImpl& context) {
// SetCustomNonbondedGlobalParams(&globalParamValues[0]);
// SetCustomNonbondedGlobalParams(&globalParamValues[0]);
//}
//}
//
//
//OpenCLCalcGBSAOBCForceKernel::~OpenCLCalcGBSAOBCForceKernel() {
OpenCLCalcGBSAOBCForceKernel
::~
OpenCLCalcGBSAOBCForceKernel
()
{
//}
if
(
params
!=
NULL
)
//
delete
params
;
//void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOBCForce& force) {
if
(
bornSum
!=
NULL
)
//
delete
bornSum
;
// int numParticles = system.getNumParticles();
if
(
bornRadii
!=
NULL
)
// _gpuContext* gpu = data.gpu;
delete
bornRadii
;
// vector<float> radius(numParticles);
if
(
bornForce
!=
NULL
)
// vector<float> scale(numParticles);
delete
bornForce
;
// vector<float> charge(numParticles);
if
(
obcChain
!=
NULL
)
// for (int i = 0; i < numParticles; i++) {
delete
obcChain
;
// double particleCharge, particleRadius, scalingFactor;
}
// force.getParticleParameters(i, particleCharge, particleRadius, scalingFactor);
// radius[i] = (float) particleRadius;
void
OpenCLCalcGBSAOBCForceKernel
::
initialize
(
const
System
&
system
,
const
GBSAOBCForce
&
force
)
{
// scale[i] = (float) scalingFactor;
// charge[i] = (float) particleCharge;
params
=
new
OpenCLArray
<
mm_float2
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"gbsaObcParams"
);
// }
bornRadii
=
new
OpenCLArray
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"bornRadii"
);
// gpuSetObcParameters(gpu, (float) force.getSoluteDielectric(), (float) force.getSolventDielectric(), radius, scale, charge);
bornForce
=
new
OpenCLArray
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"bornForce"
);
//}
obcChain
=
new
OpenCLArray
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
(),
"obcChain"
);
//
OpenCLArray
<
mm_float4
>&
posq
=
cl
.
getPosq
();
//void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
int
numParticles
=
force
.
getNumParticles
();
//}
vector
<
mm_float2
>
paramsVector
(
numParticles
);
//
const
double
dielectricOffset
=
0.009
;
//static void initializeIntegration(const System& system, OpenCLPlatform::PlatformData& data, const Integrator& integrator) {
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
//
double
charge
,
radius
,
scalingFactor
;
// // Initialize any terms that haven't already been handled by a Force.
force
.
getParticleParameters
(
i
,
charge
,
radius
,
scalingFactor
);
//
radius
-=
dielectricOffset
;
// _gpuContext* gpu = data.gpu;
paramsVector
[
i
]
=
(
mm_float2
)
{(
float
)
radius
,
(
float
)
(
scalingFactor
*
radius
)};
// if (!data.hasBonds)
posq
[
i
].
w
=
(
float
)
charge
;
// gpuSetBondParameters(gpu, vector<int>(), vector<int>(), vector<float>(), vector<float>());
}
// if (!data.hasAngles)
posq
.
upload
();
// gpuSetBondAngleParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>());
params
->
upload
(
paramsVector
);
// if (!data.hasPeriodicTorsions)
prefactor
=
2.0
*-
166.02691
*
0.4184
*
((
1.0
/
force
.
getSoluteDielectric
())
-
(
1.0
/
force
.
getSolventDielectric
()));
// gpuSetDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<int>());
}
// if (!data.hasRB)
// gpuSetRbDihedralParameters(gpu, vector<int>(), vector<int>(), vector<int>(), vector<int>(), vector<float>(), vector<float>(),
void
OpenCLCalcGBSAOBCForceKernel
::
executeForces
(
ContextImpl
&
context
)
{
// vector<float>(), vector<float>(), vector<float>(), vector<float>());
OpenCLNonbondedUtilities
&
nb
=
cl
.
getNonbondedUtilities
();
// if (!data.hasNonbonded) {
if
(
bornSum
==
NULL
)
{
// gpuSetCoulombParameters(gpu, 138.935485f, vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<char>(), vector<vector<int> >(), NO_CUTOFF);
// These objects cannot be created in initialize(), because the OpenCLNonbondedUtilities has not been initialized yet then.
// gpuSetLJ14Parameters(gpu, 138.935485f, 1.0f, vector<int>(), vector<int>(), vector<float>(), vector<float>(), vector<float>(), vector<float>());
// }
bornSum
=
new
OpenCLArray
<
cl_float
>
(
cl
,
cl
.
getPaddedNumAtoms
()
*
cl
.
getNumForceBuffers
(),
"bornSum"
);
//
map
<
string
,
string
>
defines
;
// // Set masses.
if
(
nb
.
getForceBufferPerAtomBlock
())
//
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
// int numParticles = system.getNumParticles();
if
(
nb
.
getUseCutoff
())
// vector<float> mass(numParticles);
defines
[
"USE_CUTOFF"
]
=
"1"
;
// for (int i = 0; i < numParticles; i++)
if
(
nb
.
getUsePeriodic
())
// mass[i] = (float) system.getParticleMass(i);
defines
[
"USE_PERIODIC"
]
=
"1"
;
// gpuSetMass(gpu, mass);
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
loadSourceFromFile
(
"gbsaObc.cl"
),
defines
);
//
computeBornSumKernel
=
cl
::
Kernel
(
program
,
"computeBornSum"
);
// // Set constraints.
reduceBornSumKernel
=
cl
::
Kernel
(
program
,
"reduceBornSum"
);
//
}
// int numConstraints = system.getNumConstraints();
cl
.
clearBuffer
(
*
bornSum
);
// vector<int> particle1(numConstraints);
cl
.
clearBuffer
(
*
bornForce
);
// vector<int> particle2(numConstraints);
// vector<float> distance(numConstraints);
// Compute the Born sum.
// vector<float> invMass1(numConstraints);
// vector<float> invMass2(numConstraints);
computeBornSumKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getNumAtoms
());
// for (int i = 0; i < numConstraints; i++) {
computeBornSumKernel
.
setArg
<
cl_int
>
(
1
,
cl
.
getPaddedNumAtoms
());
// int particle1Index, particle2Index;
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
bornSum
->
getDeviceBuffer
());
// double constraintDistance;
computeBornSumKernel
.
setArg
(
3
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
// system.getConstraintParameters(i, particle1Index, particle2Index, constraintDistance);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getPosq
().
getDeviceBuffer
());
// particle1[i] = particle1Index;
computeBornSumKernel
.
setArg
(
5
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
// particle2[i] = particle2Index;
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
params
->
getDeviceBuffer
());
// distance[i] = (float) constraintDistance;
computeBornSumKernel
.
setArg
(
7
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float2
),
NULL
);
// invMass1[i] = 1.0f/mass[particle1Index];
if
(
nb
.
getUseCutoff
())
{
// invMass2[i] = 1.0f/mass[particle2Index];
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
// }
computeBornSumKernel
.
setArg
<
cl_float
>
(
9
,
nb
.
getCutoffDistance
()
*
nb
.
getCutoffDistance
());
// gpuSetConstraintParameters(gpu, particle1, particle2, distance, invMass1, invMass2, (float)integrator.getConstraintTolerance());
computeBornSumKernel
.
setArg
<
mm_float4
>
(
10
,
nb
.
getPeriodicBoxSize
());
//
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
11
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
// // Finish initialization.
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
12
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
//
computeBornSumKernel
.
setArg
(
13
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
// gpuBuildThreadBlockWorkList(gpu);
}
// gpuBuildExclusionList(gpu);
else
{
// gpuBuildOutputBuffers(gpu);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
nb
.
getTiles
().
getDeviceBuffer
());
// gpuSetConstants(gpu);
computeBornSumKernel
.
setArg
<
cl_uint
>
(
9
,
nb
.
getTiles
().
getSize
());
// kClearBornForces(gpu);
}
// kClearForces(gpu);
cl
.
executeKernel
(
computeBornSumKernel
,
nb
.
getTiles
().
getSize
()
*
OpenCLContext
::
TileSize
);
// cudaThreadSynchronize();
//}
// Reduce the Born sum.
//
//double OpenCLCalcGBSAOBCForceKernel::executeEnergy(ContextImpl& context) {
reduceBornSumKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getNumAtoms
());
// return 0.0;
reduceBornSumKernel
.
setArg
<
cl_int
>
(
1
,
cl
.
getPaddedNumAtoms
());
//}
reduceBornSumKernel
.
setArg
<
cl_int
>
(
2
,
cl
.
getNumForceBuffers
());
reduceBornSumKernel
.
setArg
<
cl_float
>
(
3
,
1.0
f
);
reduceBornSumKernel
.
setArg
<
cl_float
>
(
4
,
0.8
f
);
reduceBornSumKernel
.
setArg
<
cl_float
>
(
5
,
4.85
f
);
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
bornSum
->
getDeviceBuffer
());
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
params
->
getDeviceBuffer
());
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
bornRadii
->
getDeviceBuffer
());
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
obcChain
->
getDeviceBuffer
());
cl
.
executeKernel
(
reduceBornSumKernel
,
cl
.
getPaddedNumAtoms
());
}
double
OpenCLCalcGBSAOBCForceKernel
::
executeEnergy
(
ContextImpl
&
context
)
{
executeForces
(
context
);
return
0.0
;
}
OpenCLIntegrateVerletStepKernel
::~
OpenCLIntegrateVerletStepKernel
()
{
OpenCLIntegrateVerletStepKernel
::~
OpenCLIntegrateVerletStepKernel
()
{
}
}
...
...
platforms/opencl/src/OpenCLKernels.h
View file @
ffab317b
...
@@ -368,38 +368,46 @@ private:
...
@@ -368,38 +368,46 @@ private:
// std::vector<float> globalParamValues;
// std::vector<float> globalParamValues;
// System& system;
// System& system;
//};
//};
//
///**
/**
// * This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
* This kernel is invoked by GBSAOBCForce to calculate the forces acting on the system.
// */
*/
//class OpenCLCalcGBSAOBCForceKernel : public CalcGBSAOBCForceKernel {
class
OpenCLCalcGBSAOBCForceKernel
:
public
CalcGBSAOBCForceKernel
{
//public:
public:
// OpenCLCalcGBSAOBCForceKernel(std::string name, const Platform& platform, OpenCLContext& cl) : CalcGBSAOBCForceKernel(name, platform), cl(cl) {
OpenCLCalcGBSAOBCForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
)
:
CalcGBSAOBCForceKernel
(
name
,
platform
),
cl
(
cl
),
bornSum
(
NULL
)
{
// }
}
// ~OpenCLCalcGBSAOBCForceKernel();
~
OpenCLCalcGBSAOBCForceKernel
();
// /**
/**
// * Initialize the kernel.
* Initialize the kernel.
// *
*
// * @param system the System this kernel will be applied to
* @param system the System this kernel will be applied to
// * @param force the GBSAOBCForce this kernel will be used for
* @param force the GBSAOBCForce this kernel will be used for
// */
*/
// void initialize(const System& system, const GBSAOBCForce& force);
void
initialize
(
const
System
&
system
,
const
GBSAOBCForce
&
force
);
// /**
/**
// * Execute the kernel to calculate the forces.
* Execute the kernel to calculate the forces.
// *
*
// * @param context the context in which to execute this kernel
* @param context the context in which to execute this kernel
// */
*/
// void executeForces(ContextImpl& context);
void
executeForces
(
ContextImpl
&
context
);
// /**
/**
// * Execute the kernel to calculate the energy.
* Execute the kernel to calculate the energy.
// *
*
// * @param context the context in which to execute this kernel
* @param context the context in which to execute this kernel
// * @return the potential energy due to the GBSAOBCForce
* @return the potential energy due to the GBSAOBCForce
// */
*/
// double executeEnergy(ContextImpl& context);
double
executeEnergy
(
ContextImpl
&
context
);
//private:
private:
// OpenCLContext& cl;
double
prefactor
;
//};
OpenCLContext
&
cl
;
OpenCLArray
<
mm_float2
>*
params
;
OpenCLArray
<
cl_float
>*
bornSum
;
OpenCLArray
<
cl_float
>*
bornRadii
;
OpenCLArray
<
cl_float
>*
bornForce
;
OpenCLArray
<
cl_float
>*
obcChain
;
cl
::
Kernel
computeBornSumKernel
;
cl
::
Kernel
reduceBornSumKernel
;
};
/**
/**
* This kernel is invoked by VerletIntegrator to take one time step.
* This kernel is invoked by VerletIntegrator to take one time step.
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
ffab317b
...
@@ -96,8 +96,8 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic
...
@@ -96,8 +96,8 @@ void OpenCLNonbondedUtilities::addInteraction(bool usesCutoff, bool usesPeriodic
}
}
}
}
void
OpenCLNonbondedUtilities
::
addParameter
(
const
string
&
name
,
const
string
&
type
,
int
size
,
cl
::
Buffer
&
buff
er
)
{
void
OpenCLNonbondedUtilities
::
addParameter
(
const
ParameterInfo
&
paramet
er
)
{
parameters
.
push_back
(
P
arameter
Info
(
name
,
type
,
size
,
buffer
)
);
parameters
.
push_back
(
p
arameter
);
}
}
void
OpenCLNonbondedUtilities
::
initialize
(
const
System
&
system
)
{
void
OpenCLNonbondedUtilities
::
initialize
(
const
System
&
system
)
{
...
@@ -117,50 +117,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -117,50 +117,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
// Create kernels.
// Create kernels.
map
<
string
,
string
>
replacements
;
forceKernel
=
createInteractionKernel
(
kernelSource
,
parameters
,
true
);
replacements
[
"COMPUTE_INTERACTION"
]
=
kernelSource
;
stringstream
args
;
for
(
int
i
=
0
;
i
<
parameters
.
size
();
i
++
)
{
args
<<
", __global "
;
args
<<
parameters
[
i
].
type
;
args
<<
"* global_"
;
args
<<
parameters
[
i
].
name
;
args
<<
", __local "
;
args
<<
parameters
[
i
].
type
;
args
<<
"* local_"
;
args
<<
parameters
[
i
].
name
;
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
stringstream
load1
;
for
(
int
i
=
0
;
i
<
parameters
.
size
();
i
++
)
{
load1
<<
parameters
[
i
].
type
;
load1
<<
" "
;
load1
<<
parameters
[
i
].
name
;
load1
<<
"1 = global_"
;
load1
<<
parameters
[
i
].
name
;
load1
<<
"[i];"
;
}
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
stringstream
load2j
;
for
(
int
i
=
0
;
i
<
parameters
.
size
();
i
++
)
{
load2j
<<
parameters
[
i
].
type
;
load2j
<<
" "
;
load2j
<<
parameters
[
i
].
name
;
load2j
<<
"2 = local_"
;
load2j
<<
parameters
[
i
].
name
;
load2j
<<
"[tbx+j];"
;
}
replacements
[
"LOAD_ATOM2_PARAMETERS_J"
]
=
load2j
.
str
();
stringstream
load2tj
;
for
(
int
i
=
0
;
i
<
parameters
.
size
();
i
++
)
{
load2tj
<<
parameters
[
i
].
type
;
load2tj
<<
" "
;
load2tj
<<
parameters
[
i
].
name
;
load2tj
<<
"2 = local_"
;
load2tj
<<
parameters
[
i
].
name
;
load2tj
<<
"[tbx+tj];"
;
}
replacements
[
"LOAD_ATOM2_PARAMETERS_TJ"
]
=
load2tj
.
str
();
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
if
(
forceBufferPerAtomBlock
)
if
(
forceBufferPerAtomBlock
)
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
...
@@ -168,8 +125,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -168,8 +125,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
usePeriodic
)
if
(
usePeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"USE_PERIODIC"
]
=
"1"
;
cl
::
Program
forceProgram
=
context
.
createProgram
(
context
.
loadSourceFromFile
(
"nonbonded.cl"
,
replacements
),
defines
);
forceKernel
=
cl
::
Kernel
(
forceProgram
,
"computeNonbonded"
);
cl
::
Program
interactingBlocksProgram
=
context
.
createProgram
(
context
.
loadSourceFromFile
(
"findInteractingBlocks.cl"
),
defines
);
cl
::
Program
interactingBlocksProgram
=
context
.
createProgram
(
context
.
loadSourceFromFile
(
"findInteractingBlocks.cl"
),
defines
);
findBlockBoundsKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
findBlockBoundsKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
...
@@ -271,7 +226,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -271,7 +226,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
}
}
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
hasComputedInteractions
=
false
;
if
(
!
useCutoff
)
if
(
!
useCutoff
)
return
;
return
;
...
@@ -308,34 +262,113 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
...
@@ -308,34 +262,113 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
}
}
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
if
(
hasComputedInteractions
)
invokeInteractionKernel
(
forceKernel
,
parameters
);
return
;
}
hasComputedInteractions
=
true
;
forceKernel
.
setArg
<
cl_int
>
(
0
,
context
.
getPaddedNumAtoms
());
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>
params
,
bool
useExclusions
)
const
{
forceKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
context
.
getForceBuffers
().
getDeviceBuffer
());
map
<
string
,
string
>
replacements
;
forceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
forceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
stringstream
args
;
forceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
exclusions
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
params
.
size
();
i
++
)
{
forceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
exclusionIndex
->
getDeviceBuffer
());
args
<<
", __global "
;
forceKernel
.
setArg
(
6
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
args
<<
params
[
i
].
getType
();
forceKernel
.
setArg
(
7
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
args
<<
"* global_"
;
args
<<
params
[
i
].
getName
();
args
<<
", __local "
;
args
<<
params
[
i
].
getType
();
args
<<
"* local_"
;
args
<<
params
[
i
].
getName
();
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
// local_sigmaEpsilon[get_local_id(0)] = sigmaEpsilon1;
stringstream
loadLocal1
;
for
(
int
i
=
0
;
i
<
params
.
size
();
i
++
)
{
loadLocal1
<<
"local_"
;
loadLocal1
<<
params
[
i
].
getName
();
loadLocal1
<<
"[get_local_id(0)] = "
;
loadLocal1
<<
params
[
i
].
getName
();
loadLocal1
<<
"1;"
;
}
replacements
[
"LOAD_LOCAL_PARAMETERS_FROM_1"
]
=
loadLocal1
.
str
();
// local_sigmaEpsilon[get_local_id(0)] = global_sigmaEpsilon[j];
stringstream
loadLocal2
;
for
(
int
i
=
0
;
i
<
params
.
size
();
i
++
)
{
loadLocal2
<<
"local_"
;
loadLocal2
<<
params
[
i
].
getName
();
loadLocal2
<<
"[get_local_id(0)] = global_"
;
loadLocal2
<<
params
[
i
].
getName
();
loadLocal2
<<
"[j];"
;
}
replacements
[
"LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"
]
=
loadLocal2
.
str
();
stringstream
load1
;
for
(
int
i
=
0
;
i
<
params
.
size
();
i
++
)
{
load1
<<
params
[
i
].
getType
();
load1
<<
" "
;
load1
<<
params
[
i
].
getName
();
load1
<<
"1 = global_"
;
load1
<<
params
[
i
].
getName
();
load1
<<
"[i];"
;
}
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
stringstream
load2j
;
for
(
int
i
=
0
;
i
<
params
.
size
();
i
++
)
{
load2j
<<
params
[
i
].
getType
();
load2j
<<
" "
;
load2j
<<
params
[
i
].
getName
();
load2j
<<
"2 = local_"
;
load2j
<<
params
[
i
].
getName
();
load2j
<<
"[tbx+j];"
;
}
replacements
[
"LOAD_ATOM2_PARAMETERS_J"
]
=
load2j
.
str
();
stringstream
load2tj
;
for
(
int
i
=
0
;
i
<
params
.
size
();
i
++
)
{
load2tj
<<
params
[
i
].
getType
();
load2tj
<<
" "
;
load2tj
<<
params
[
i
].
getName
();
load2tj
<<
"2 = local_"
;
load2tj
<<
params
[
i
].
getName
();
load2tj
<<
"[tbx+tj];"
;
}
replacements
[
"LOAD_ATOM2_PARAMETERS_TJ"
]
=
load2tj
.
str
();
map
<
string
,
string
>
defines
;
if
(
forceBufferPerAtomBlock
)
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
if
(
useCutoff
)
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
usePeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
if
(
useExclusions
)
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
cl
::
Program
program
=
context
.
createProgram
(
context
.
loadSourceFromFile
(
"nonbonded.cl"
,
replacements
),
defines
);
return
cl
::
Kernel
(
program
,
"computeNonbonded"
);
}
void
OpenCLNonbondedUtilities
::
invokeInteractionKernel
(
cl
::
Kernel
kernel
,
const
vector
<
ParameterInfo
>&
params
)
{
kernel
.
setArg
<
cl_int
>
(
0
,
context
.
getPaddedNumAtoms
());
kernel
.
setArg
<
cl
::
Buffer
>
(
1
,
context
.
getForceBuffers
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
4
,
exclusions
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
5
,
exclusionIndex
->
getDeviceBuffer
());
kernel
.
setArg
(
6
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
kernel
.
setArg
(
7
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
int
paramBase
=
10
;
int
paramBase
=
10
;
if
(
useCutoff
)
{
if
(
useCutoff
)
{
paramBase
=
14
;
paramBase
=
14
;
forceK
ernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactingTiles
->
getDeviceBuffer
());
k
ernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactingTiles
->
getDeviceBuffer
());
forceK
ernel
.
setArg
<
cl_float
>
(
9
,
cutoff
*
cutoff
);
k
ernel
.
setArg
<
cl_float
>
(
9
,
cutoff
*
cutoff
);
forceK
ernel
.
setArg
<
mm_float4
>
(
10
,
periodicBoxSize
);
k
ernel
.
setArg
<
mm_float4
>
(
10
,
periodicBoxSize
);
forceK
ernel
.
setArg
<
cl
::
Buffer
>
(
11
,
interactionFlags
->
getDeviceBuffer
());
k
ernel
.
setArg
<
cl
::
Buffer
>
(
11
,
interactionFlags
->
getDeviceBuffer
());
forceK
ernel
.
setArg
<
cl
::
Buffer
>
(
12
,
interactionCount
->
getDeviceBuffer
());
k
ernel
.
setArg
<
cl
::
Buffer
>
(
12
,
interactionCount
->
getDeviceBuffer
());
forceK
ernel
.
setArg
(
13
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
k
ernel
.
setArg
(
13
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
}
}
else
{
else
{
forceK
ernel
.
setArg
<
cl
::
Buffer
>
(
8
,
tiles
->
getDeviceBuffer
());
k
ernel
.
setArg
<
cl
::
Buffer
>
(
8
,
tiles
->
getDeviceBuffer
());
forceK
ernel
.
setArg
<
cl_uint
>
(
9
,
tiles
->
getSize
());
k
ernel
.
setArg
<
cl_uint
>
(
9
,
tiles
->
getSize
());
}
}
for
(
int
i
=
0
;
i
<
(
int
)
param
eter
s
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
forceK
ernel
.
setArg
<
cl
::
Buffer
>
(
i
*
2
+
paramBase
,
*
param
eter
s
[
i
].
b
uffer
);
k
ernel
.
setArg
<
cl
::
Buffer
>
(
i
*
2
+
paramBase
,
params
[
i
].
getB
uffer
()
);
forceK
ernel
.
setArg
(
i
*
2
+
paramBase
+
1
,
OpenCLContext
::
ThreadBlockSize
*
param
eter
s
[
i
].
s
ize
,
NULL
);
k
ernel
.
setArg
(
i
*
2
+
paramBase
+
1
,
OpenCLContext
::
ThreadBlockSize
*
params
[
i
].
getS
ize
()
,
NULL
);
}
}
context
.
executeKernel
(
forceK
ernel
,
tiles
->
getSize
()
*
OpenCLContext
::
TileSize
);
context
.
executeKernel
(
k
ernel
,
tiles
->
getSize
()
*
OpenCLContext
::
TileSize
);
}
}
platforms/opencl/src/OpenCLNonbondedUtilities.h
View file @
ffab317b
...
@@ -37,16 +37,37 @@ namespace OpenMM {
...
@@ -37,16 +37,37 @@ namespace OpenMM {
class
OpenCLCompact
;
class
OpenCLCompact
;
/**
/**
* This class implements features that are used by several different force. It provides
* This class provides a generic interface for calculating nonbonded interactions. It does this in two
* a generic interface for calculating nonbonded interactions.
* ways. First, it can be used to create and invoke Kernels that evaluate nonbonded interactions. Clients
* only need to provide the code for evaluating a single interaction and the list of parameters it depends on.
* A complete kernel is then synthesized using an appropriate algorithm to evaluate all interactions on all
* atoms.
*
* Second, this class itself creates and invokes a single "default" interaction kernel, allowing several
* different forces to be evaluated at once for greater efficiency. Call addInteraction() and addParameter()
* to add interactions to this default kernel.
*
* During each force or energy evaluation, the following sequence of steps takes place:
*
* 1. Data structures (e.g. neighbor lists) are calculated to allow nonbonded interactions to be evaluated
* quickly.
*
* 2. calcForces() or calcEnergy() is called on each ForceImpl in the System.
*
* 3. Finally, the default interaction kernel is invoked to calculate all interactions that were added
* to it.
*
* This sequence means that the default interaction kernel may depend on quantities that were calculated
* by ForceImpls during calcForces() or calcEnergy().
*/
*/
class
OpenCLNonbondedUtilities
{
class
OpenCLNonbondedUtilities
{
public:
public:
class
ParameterInfo
;
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
);
OpenCLNonbondedUtilities
(
OpenCLContext
&
context
);
~
OpenCLNonbondedUtilities
();
~
OpenCLNonbondedUtilities
();
/**
/**
* Add a nonbonded interaction.
* Add a nonbonded interaction
to be evaluated by the default interaction kernel
.
*
*
* @param usesCutoff specifies whether a cutoff should be applied to this interaction
* @param usesCutoff specifies whether a cutoff should be applied to this interaction
* @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction
* @param usesPeriodic specifies whether periodic boundary conditions should be applied to this interaction
...
@@ -56,14 +77,9 @@ public:
...
@@ -56,14 +77,9 @@ public:
*/
*/
void
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
double
cutoffDistance
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusionList
,
const
std
::
string
&
kernel
);
void
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
double
cutoffDistance
,
const
std
::
vector
<
std
::
vector
<
int
>
>&
exclusionList
,
const
std
::
string
&
kernel
);
/**
/**
* Add a per-atom parameter that interactions may depend on.
* Add a per-atom parameter that the default interaction kernel may depend on.
*
* @param name the name of the parameter
* @param type the data type of the parameter
* @param size the size of the parameter in bytes
* @param buffer the buffer containing the parameter values
*/
*/
void
addParameter
(
const
std
::
string
&
name
,
const
std
::
string
&
type
,
int
size
,
cl
::
Buffer
&
buff
er
);
void
addParameter
(
const
ParameterInfo
&
paramet
er
);
/**
/**
* Initialize this object in preparation for a simulation.
* Initialize this object in preparation for a simulation.
*/
*/
...
@@ -86,6 +102,12 @@ public:
...
@@ -86,6 +102,12 @@ public:
bool
getUsePeriodic
()
{
bool
getUsePeriodic
()
{
return
usePeriodic
;
return
usePeriodic
;
}
}
/**
* Get whether there is one force buffer per atom block.
*/
bool
getForceBufferPerAtomBlock
()
{
return
forceBufferPerAtomBlock
;
}
/**
/**
* Get the cutoff distance.
* Get the cutoff distance.
*/
*/
...
@@ -143,8 +165,24 @@ public:
...
@@ -143,8 +165,24 @@ public:
OpenCLArray
<
cl_uint
>&
getInteractionFlags
()
{
OpenCLArray
<
cl_uint
>&
getInteractionFlags
()
{
return
*
interactionFlags
;
return
*
interactionFlags
;
}
}
/**
* Create a Kernel for evaluating a nonbonded interaction. Cutoffs and periodic boundary conditions
* are assumed to be the same as those for the default interaction Kernel, since this kernel will use
* the same neighbor list.
*
* @param source the source code for evaluating the force and energy
* @param params the per-atom parameters this kernel may depend on
* @param useExclusions specifies whether exclusions are applied to this interaction
*/
cl
::
Kernel
createInteractionKernel
(
const
std
::
string
&
source
,
const
std
::
vector
<
ParameterInfo
>
params
,
bool
useExclusions
)
const
;
/**
* Invoke a Kernel that was created by createInteractionKernel.
*
* @param kernel the Kernel to invoke
* @param params the per-atom parameters to pass to the Kernel
*/
void
invokeInteractionKernel
(
cl
::
Kernel
kernel
,
const
std
::
vector
<
ParameterInfo
>&
params
);
private:
private:
class
ParameterInfo
;
OpenCLContext
&
context
;
OpenCLContext
&
context
;
cl
::
Kernel
forceKernel
;
cl
::
Kernel
forceKernel
;
cl
::
Kernel
findBlockBoundsKernel
;
cl
::
Kernel
findBlockBoundsKernel
;
...
@@ -164,16 +202,41 @@ private:
...
@@ -164,16 +202,41 @@ private:
std
::
string
kernelSource
;
std
::
string
kernelSource
;
std
::
map
<
std
::
string
,
std
::
string
>
kernelDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
kernelDefines
;
double
cutoff
;
double
cutoff
;
bool
useCutoff
,
usePeriodic
,
forceBufferPerAtomBlock
,
hasComputedInteractions
;
bool
useCutoff
,
usePeriodic
,
forceBufferPerAtomBlock
;
int
numForceBuffers
;
int
numForceBuffers
;
mm_float4
periodicBoxSize
;
mm_float4
periodicBoxSize
;
};
};
/**
* This class stores information about a per-atom parameter that may be used in a nonbonded kernel.
*/
class
OpenCLNonbondedUtilities
::
ParameterInfo
{
class
OpenCLNonbondedUtilities
::
ParameterInfo
{
public:
public:
/**
* Create a ParameterInfo object.
*
* @param name the name of the parameter
* @param type the data type of the parameter
* @param size the size of the parameter in bytes
* @param buffer the buffer containing the parameter values
*/
ParameterInfo
(
const
std
::
string
&
name
,
const
std
::
string
&
type
,
int
size
,
cl
::
Buffer
&
buffer
)
:
ParameterInfo
(
const
std
::
string
&
name
,
const
std
::
string
&
type
,
int
size
,
cl
::
Buffer
&
buffer
)
:
name
(
name
),
type
(
type
),
size
(
size
),
buffer
(
&
buffer
)
{
name
(
name
),
type
(
type
),
size
(
size
),
buffer
(
&
buffer
)
{
}
}
const
std
::
string
&
getName
()
const
{
return
name
;
}
const
std
::
string
&
getType
()
const
{
return
type
;
}
int
getSize
()
const
{
return
size
;
}
cl
::
Buffer
&
getBuffer
()
const
{
return
*
buffer
;
}
private:
std
::
string
name
;
std
::
string
name
;
std
::
string
type
;
std
::
string
type
;
int
size
;
int
size
;
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
ffab317b
...
@@ -53,7 +53,7 @@ OpenCLPlatform::OpenCLPlatform() {
...
@@ -53,7 +53,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory
(
CalcRBTorsionForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcRBTorsionForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcNonbondedForceKernel
::
Name
(),
factory
);
// registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
// registerKernelFactory(CalcCustomNonbondedForceKernel::Name(), factory);
//
registerKernelFactory(CalcGBSAOBCForceKernel::Name(), factory);
registerKernelFactory
(
CalcGBSAOBCForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
// registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
// registerKernelFactory(IntegrateBrownianStepKernel::Name(), factory);
...
...
platforms/opencl/src/kernels/gbsaObc.cl
0 → 100644
View file @
ffab317b
const
unsigned
int
TileSize
=
32
;
const
float
dielectricOffset
=
0.009f
;
/**
*
Compute
the
Born
sum.
*/
__kernel
void
computeBornSum
(
int
numAtoms,
int
paddedNumAtoms,
__global
float*
global_bornSum,
__local
float*
local_bornSum,
__global
float4*
posq,
__local
float4*
local_posq,
__global
float2*
global_params,
__local
float2*
local_params,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
float
cutoffSquared,
float4
periodicBoxSize,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
float*
tempBuffer
)
{
#
else
unsigned
int
numTiles
)
{
#
endif
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TileSize
;
unsigned
int
warp
=
get_global_id
(
0
)
/TileSize
;
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
unsigned
int
x
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
*TileSize
;
x
=
(
x>>17
)
*TileSize
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TileSize-1
)
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
i
=
x
+
tgx
;
float
bornSum
=
0.0f
;
float4
posq1
=
posq[i]
;
float2
params1
=
global_params[i]
;
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_params[get_local_id
(
0
)
]
=
params1
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
tile
=
xi+xi*paddedNumAtoms/TileSize-xi*
(
xi+1
)
/2
;
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y/periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z/periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
i
<
numAtoms
&&
x+j
<
numAtoms
&&
r2
<
cutoffSquared
)
{
#
else
if
(
i
<
numAtoms
&&
x+j
<
numAtoms
)
{
#
endif
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
float2
params2
=
local_params[tbx+j]
;
float
rScaledRadiusJ
=
r+params2.y
;
if
((
j
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
float
l_ij
=
1.0f/max
(
params1.x,
fabs
(
r-params2.y
))
;
float
u_ij
=
1.0f/rScaledRadiusJ
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params1.x
<
params2.x-r
)
bornSum
+=
2.0f*
(
1.0f/params1.x-l_ij
)
;
}
}
}
//
Write
results
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset
=
x
+
tgx
+
(
x/TileSize
)
*paddedNumAtoms
;
#
else
unsigned
int
offset
=
x
+
tgx
+
warp*paddedNumAtoms
;
#
endif
global_bornSum[offset]
+=
bornSum
;
}
else
{
//
This
is
an
off-diagonal
tile.
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
local_params[get_local_id
(
0
)
]
=
global_params[j]
;
}
local_bornSum[get_local_id
(
0
)
]
=
0.0f
;
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
if
(
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
}
else
{
//
Compute
only
a
subset
of
the
interactions
in
this
tile.
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
if
((
flags&
(
1<<j
))
!=
0
)
{
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y/periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z/periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
i
<
numAtoms
&&
x+j
<
numAtoms
&&
r2
<
cutoffSquared
)
{
#
else
if
(
i
<
numAtoms
&&
x+j
<
numAtoms
)
{
#
endif
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
float2
params2
=
local_params[tbx+j]
;
float
rScaledRadiusJ
=
r+params2.y
;
if
((
j
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
float
l_ij
=
1.0f/max
(
params1.x,
fabs
(
r-params2.y
))
;
float
u_ij
=
1.0f/rScaledRadiusJ
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params1.x
<
params2.x-r
)
bornSum
+=
2.0f*
(
1.0f/params1.x-l_ij
)
;
}
float
rScaledRadiusI
=
r+params1.y
;
if
((
j
!=
tgx
)
&&
(
params2.x
<
rScaledRadiusJ
))
{
float
l_ij
=
1.0f/max
(
params2.x,
fabs
(
r-params1.y
))
;
float
u_ij
=
1.0f/rScaledRadiusJ
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
float
term
=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params1.y*params1.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params2.x
<
params1.x-r
)
term
+=
2.0f*
(
1.0f/params2.x-l_ij
)
;
tempBuffer[get_local_id
(
0
)
]
=
term
;
}
}
//
Sum
the
forces
on
atom
j.
if
(
tgx
%
2
==
0
)
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+1]
;
if
(
tgx
%
4
==
0
)
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+2]
;
if
(
tgx
%
8
==
0
)
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+4]
;
if
(
tgx
%
16
==
0
)
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+8]
;
if
(
tgx
==
0
)
local_bornSum[tbx+j]
+=
tempBuffer[get_local_id
(
0
)
]
+
tempBuffer[get_local_id
(
0
)
+16]
;
}
}
}
}
else
#
endif
{
//
Compute
the
full
set
of
interactions
in
this
tile.
unsigned
int
xi
=
x/TileSize
;
unsigned
int
yi
=
y/TileSize
;
unsigned
int
tile
=
xi+yi*paddedNumAtoms/TileSize-yi*
(
yi+1
)
/2
;
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
float4
delta
=
(
float4
)
(
local_posq[tbx+tj].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y/periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z/periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
i
<
numAtoms
&&
x+tj
<
numAtoms
&&
r2
<
cutoffSquared
)
{
#
else
if
(
i
<
numAtoms
&&
x+tj
<
numAtoms
)
{
#
endif
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
float2
params2
=
local_params[tbx+tj]
;
float
rScaledRadiusJ
=
r+params2.y
;
if
((
tj
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
float
l_ij
=
1.0f/max
(
params1.x,
fabs
(
r-params2.y
))
;
float
u_ij
=
1.0f/rScaledRadiusJ
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
bornSum
+=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params2.y*params2.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params1.x
<
params2.x-r
)
bornSum
+=
2.0f*
(
1.0f/params1.x-l_ij
)
;
}
float
rScaledRadiusI
=
r+params1.y
;
if
((
tj
!=
tgx
)
&&
(
params2.x
<
rScaledRadiusJ
))
{
float
l_ij
=
1.0f/max
(
params2.x,
fabs
(
r-params1.y
))
;
float
u_ij
=
1.0f/rScaledRadiusJ
;
float
l_ij2
=
l_ij*l_ij
;
float
u_ij2
=
u_ij*u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
)
;
float
term
=
l_ij
-
u_ij
+
0.25f*r*
(
u_ij2-l_ij2
)
+
(
0.50f*invR*ratio
)
+
(
0.25f*params1.y*params1.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params2.x
<
params1.x-r
)
term
+=
2.0f*
(
1.0f/params2.x-l_ij
)
;
local_bornSum[tbx+tj]
=
term
;
}
}
tj
=
(
tj
+
1
)
&
(
TileSize
-
1
)
;
}
}
//
Write
results
float4
of
;
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset1
=
x
+
tgx
+
(
y/TileSize
)
*paddedNumAtoms
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TileSize
)
*paddedNumAtoms
;
#
else
unsigned
int
offset1
=
x
+
tgx
+
warp*paddedNumAtoms
;
unsigned
int
offset2
=
y
+
tgx
+
warp*paddedNumAtoms
;
#
endif
global_bornSum[offset1]
+=
bornSum
;
global_bornSum[offset2]
+=
local_bornSum[get_local_id
(
0
)
]
;
lasty
=
y
;
}
pos++
;
}
}
/**
*
Reduce
the
Born
sums
to
compute
the
Born
radii.
*/
__kernel
void
reduceBornSum
(
int
numAtoms,
int
bufferSize,
int
numBuffers,
float
alpha,
float
beta,
float
gamma,
__global
float*
bornSum,
__global
float2*
params,
__global
float*
bornRadii,
__global
float*
obcChain
)
{
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
//
Get
summed
Born
data
int
totalSize
=
bufferSize*numBuffers
;
float
sum
=
bornSum[index]
;
for
(
int
i
=
index+bufferSize
; i < totalSize; i += bufferSize)
sum
+=
bornSum[i]
;
bornSum[index]
=
sum
;
//
Now
calculate
Born
radius
and
OBC
term.
float
offsetRadius
=
params[index].x
;
sum
*=
0.5f*offsetRadius
;
float
sum2
=
sum*sum
;
float
sum3
=
sum*sum2
;
float
tanhSum
=
tanh
(
alpha*sum
-
beta*sum2
+
gamma*sum3
)
;
float
nonOffsetRadii
=
offsetRadius
+
dielectricOffset
;
float
radius
=
1.0f/
(
1.0f/offsetRadius
-
tanhSum/nonOffsetRadii
)
;
float
chain
=
offsetRadius*
(
alpha
-
2.0f*beta*sum
+
3.0f*gamma*sum2
)
;
chain
=
(
1.0f-tanhSum*tanhSum
)
*chain
/
nonOffsetRadii
;
bornRadii[index]
=
radius
;
obcChain[index]
=
chain
;
index
+=
get_global_size
(
0
)
;
}
}
platforms/opencl/src/kernels/nonbonded.cl
View file @
ffab317b
...
@@ -39,12 +39,16 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
...
@@ -39,12 +39,16 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
//
This
tile
is
on
the
diagonal.
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_sigmaEpsilon[get_local_id
(
0
)
]
=
sigmaEpsilon1
;
LOAD_LOCAL_PARAMETERS_FROM_1
unsigned
int
xi
=
x/TileSize
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
tile
=
xi+xi*paddedNumAtoms/TileSize-xi*
(
xi+1
)
/2
;
unsigned
int
tile
=
xi+xi*paddedNumAtoms/TileSize-xi*
(
xi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
exclusions[exclusionIndices[tile]+tgx]
;
unsigned
int
excl
=
exclusions[exclusionIndices[tile]+tgx]
;
#
endif
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
...
@@ -59,33 +63,35 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
...
@@ -59,33 +63,35 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
float
tempEnergy
=
0.0f
;
float
tempEnergy
=
0.0f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
#
ifdef
USE_CUTOFF
#
if
defined
USE_CUTOFF
|
| defined USE_EXCLUSIONS
#if defined USE_CUTOFF && defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded || r2 > cutoffSquared) {
if (isExcluded || r2 > cutoffSquared) {
#else
#elif defined USE_CUTOFF
if (r2 > cutoffSquared) {
#elif defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded) {
if (isExcluded) {
#endif
#endif
dEdR = 0.0f;
dEdR = 0.0f;
tempEnergy = 0.0f;
tempEnergy = 0.0f;
}
}
#endif
energy += 0.5f*tempEnergy;
energy += 0.5f*tempEnergy;
delta.xyz *= dEdR;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
force.xyz -= delta.xyz;
excl >>= 1;
}
}
// Write results
// Write results
float4 of;
float4 of;
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
#ifdef USE_OUTPUT_BUFFER_PER_BLOCK
of.xyz = force.xyz;
of.w = 0.0f;
unsigned int offset = x + tgx + (x/TileSize)*paddedNumAtoms;
unsigned int offset = x + tgx + (x/TileSize)*paddedNumAtoms;
forceBuffers[offset] = of;
#else
#else
unsigned int offset = x + tgx + warp*paddedNumAtoms;
unsigned int offset = x + tgx + warp*paddedNumAtoms;
#endif
of = forceBuffers[offset];
of = forceBuffers[offset];
of.xyz += force.xyz;
of.xyz += force.xyz;
forceBuffers[offset] = of;
forceBuffers[offset] = of;
#endif
}
}
else {
else {
// This is an off-diagonal tile.
// This is an off-diagonal tile.
...
@@ -93,7 +99,7 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
...
@@ -93,7 +99,7 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
if (lasty != y) {
if (lasty != y) {
unsigned int j = y + tgx;
unsigned int j = y + tgx;
local_posq[get_local_id(0)] = posq[j];
local_posq[get_local_id(0)] = posq[j];
local_sigmaEpsilon[get_local_id(0)] = global_sigmaEpsilon[j];
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
}
local_force[get_local_id(0)] = 0.0f;
local_force[get_local_id(0)] = 0.0f;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -157,10 +163,14 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
...
@@ -157,10 +163,14 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
unsigned int xi = x/TileSize;
unsigned int xi = x/TileSize;
unsigned int yi = y/TileSize;
unsigned int yi = y/TileSize;
unsigned int tile = xi+yi*paddedNumAtoms/TileSize-yi*(yi+1)/2;
unsigned int tile = xi+yi*paddedNumAtoms/TileSize-yi*(yi+1)/2;
#ifdef USE_EXCLUSIONS
unsigned int excl = (hasExclusions ? exclusions[exclusionIndices[tile]+tgx] : 0xFFFFFFFF);
unsigned int excl = (hasExclusions ? exclusions[exclusionIndices[tile]+tgx] : 0xFFFFFFFF);
excl = (excl >> tgx) |
(
excl
<<
(
TileSize
-
tgx
))
;
excl = (excl >> tgx) |
(
excl
<<
(
TileSize
-
tgx
))
;
#
endif
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
float4
delta
=
(
float4
)
(
local_posq[tbx+tj].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+tj].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
...
@@ -175,19 +185,24 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
...
@@ -175,19 +185,24 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
float
dEdR
=
0.0f
;
float
dEdR
=
0.0f
;
float
tempEnergy
=
0.0f
;
float
tempEnergy
=
0.0f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
#
ifdef
USE_CUTOFF
#
if
defined
USE_CUTOFF
|
| defined USE_EXCLUSIONS
#if defined USE_CUTOFF && defined USE_EXCLUSIONS
excl >>= 1;
if (isExcluded |
|
r2
>
cutoffSquared
)
{
if (isExcluded |
|
r2
>
cutoffSquared
)
{
#
else
#
elif
defined
USE_CUTOFF
if
(
r2
>
cutoffSquared
)
{
#
elif
defined
USE_EXCLUSIONS
excl
>>=
1
;
if
(
isExcluded
)
{
if
(
isExcluded
)
{
#
endif
#
endif
dEdR
=
0.0f
;
dEdR
=
0.0f
;
tempEnergy
=
0.0f
;
tempEnergy
=
0.0f
;
}
}
#
endif
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
force.xyz
-=
delta.xyz
;
local_force[tbx+tj].xyz
+=
delta.xyz
;
local_force[tbx+tj].xyz
+=
delta.xyz
;
excl
>>=
1
;
tj
=
(
tj
+
1
)
&
(
TileSize
-
1
)
;
tj
=
(
tj
+
1
)
&
(
TileSize
-
1
)
;
}
}
}
}
...
@@ -195,23 +210,18 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
...
@@ -195,23 +210,18 @@ __kernel void computeNonbonded(int paddedNumAtoms, __global float4* forceBuffers
//
Write
results
//
Write
results
float4
of
;
float4
of
;
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
of.xyz
=
force.xyz
;
unsigned
int
offset1
=
x
+
tgx
+
(
y/TileSize
)
*paddedNumAtoms
;
of.w
=
0.0f
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TileSize
)
*paddedNumAtoms
;
unsigned
int
offset
=
x
+
tgx
+
(
y/TileSize
)
*paddedNumAtoms
;
forceBuffers[offset]
=
of
;
of
=
local_force[get_local_id
(
0
)
]
;
offset
=
y
+
tgx
+
(
x/TileSize
)
*paddedNumAtoms
;
forceBuffers[offset]
=
of
;
#
else
#
else
unsigned
int
offset
=
x
+
tgx
+
warp*paddedNumAtoms
;
unsigned
int
offset1
=
x
+
tgx
+
warp*paddedNumAtoms
;
of
=
forceBuffers[offset]
;
unsigned
int
offset2
=
y
+
tgx
+
warp*paddedNumAtoms
;
#
endif
of
=
forceBuffers[offset1]
;
of.xyz
+=
force.xyz
;
of.xyz
+=
force.xyz
;
forceBuffers[offset]
=
of
;
forceBuffers[offset1]
=
of
;
offset
=
y
+
tgx
+
warp*paddedNumAtoms
;
of
=
forceBuffers[offset2]
;
of
=
forceBuffers[offset]
;
of.xyz
+=
local_force[get_local_id
(
0
)
].xyz
;
of.xyz
+=
local_force[get_local_id
(
0
)
].xyz
;
forceBuffers[offset]
=
of
;
forceBuffers[offset2]
=
of
;
#
endif
lasty
=
y
;
lasty
=
y
;
}
}
pos++
;
pos++
;
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
ffab317b
...
@@ -4,12 +4,11 @@
...
@@ -4,12 +4,11 @@
__kernel
void
clearBuffer
(
__global
float*
buffer,
int
size
)
{
__kernel
void
clearBuffer
(
__global
float*
buffer,
int
size
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
int
step
=
get_global_size
(
0
)
;
__global
float4*
buffer4
=
(
__global
float4*
)
buffer
;
__global
float4*
buffer4
=
(
__global
float4*
)
buffer
;
int
sizeDiv4
=
size/4
;
int
sizeDiv4
=
size/4
;
while
(
index
<
sizeDiv4
)
{
while
(
index
<
sizeDiv4
)
{
buffer4[index]
=
(
float4
)
(
0.0f
)
;
buffer4[index]
=
(
float4
)
(
0.0f
)
;
index
+=
step
;
index
+=
get_global_size
(
0
)
;
}
}
if
(
get_global_id
(
0
)
==
0
)
if
(
get_global_id
(
0
)
==
0
)
for
(
int
i
=
sizeDiv4*4
; i < size; i++)
for
(
int
i
=
sizeDiv4*4
; i < size; i++)
...
@@ -22,13 +21,12 @@ __kernel void clearBuffer(__global float* buffer, int size) {
...
@@ -22,13 +21,12 @@ __kernel void clearBuffer(__global float* buffer, int size) {
__kernel
void
reduceFloat4Buffer
(
__global
float4*
buffer,
int
bufferSize,
int
numBuffers
)
{
__kernel
void
reduceFloat4Buffer
(
__global
float4*
buffer,
int
bufferSize,
int
numBuffers
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
int
step
=
get_global_size
(
0
)
;
int
totalSize
=
bufferSize*numBuffers
;
int
totalSize
=
bufferSize*numBuffers
;
while
(
index
<
bufferSize
)
{
while
(
index
<
bufferSize
)
{
float4
sum
=
buffer[index]
;
float4
sum
=
buffer[index]
;
for
(
int
i
=
index+bufferSize
; i < totalSize; i += bufferSize)
for
(
int
i
=
index+bufferSize
; i < totalSize; i += bufferSize)
sum
+=
buffer[i]
;
sum
+=
buffer[i]
;
buffer[index]
=
sum
;
buffer[index]
=
sum
;
index
+=
step
;
index
+=
get_global_size
(
0
)
;
}
}
}
}
platforms/opencl/tests/TestOpenCLGBSAOBCForce.cpp
0 → 100644
View file @
ffab317b
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2009 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the OpenCL implementation of GBSAOBCForce.
*/
#include "../../../tests/AssertionUtilities.h"
#include "openmm/Context.h"
#include "OpenCLPlatform.h"
#include "ReferencePlatform.h"
#include "openmm/GBSAOBCForce.h"
#include "openmm/System.h"
#include "openmm/LangevinIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "../src/sfmt/SFMT.h"
#include "openmm/NonbondedForce.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
testSingleParticle
()
{
OpenCLPlatform
platform
;
System
system
;
system
.
addParticle
(
2.0
);
LangevinIntegrator
integrator
(
0
,
0.1
,
0.01
);
GBSAOBCForce
*
gbsa
=
new
GBSAOBCForce
();
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
gbsa
->
addParticle
(
0.5
,
0.15
,
1
);
nonbonded
->
addParticle
(
0.5
,
1
,
0
);
system
.
addForce
(
gbsa
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
1
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Energy
);
double
bornRadius
=
0.15
-
0.009
;
// dielectric offset
double
eps0
=
EPSILON0
;
double
bornEnergy
=
(
-
0.5
*
0.5
/
(
8
*
PI_M
*
eps0
))
*
(
1.0
/
gbsa
->
getSoluteDielectric
()
-
1.0
/
gbsa
->
getSolventDielectric
())
/
bornRadius
;
double
extendedRadius
=
bornRadius
+
0.14
;
// probe radius
double
nonpolarEnergy
=
CAL2JOULE
*
PI_M
*
0.0216
*
(
10
*
extendedRadius
)
*
(
10
*
extendedRadius
)
*
std
::
pow
(
0.15
/
bornRadius
,
6.0
);
// Where did this formula come from? Just copied it from CpuImplicitSolvent.cpp
// ASSERT_EQUAL_TOL((bornEnergy+nonpolarEnergy), state.getPotentialEnergy(), 0.01);
}
void
testCutoffAndPeriodic
()
{
OpenCLPlatform
cl
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
LangevinIntegrator
integrator
(
0
,
0.1
,
0.01
);
GBSAOBCForce
*
gbsa
=
new
GBSAOBCForce
();
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
gbsa
->
addParticle
(
-
1
,
0.15
,
1
);
nonbonded
->
addParticle
(
-
1
,
1
,
0
);
gbsa
->
addParticle
(
1
,
0.15
,
1
);
nonbonded
->
addParticle
(
1
,
1
,
0
);
const
double
cutoffDistance
=
3.0
;
const
double
boxSize
=
10.0
;
nonbonded
->
setCutoffDistance
(
cutoffDistance
);
gbsa
->
setCutoffDistance
(
cutoffDistance
);
system
.
setPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
system
.
addForce
(
gbsa
);
system
.
addForce
(
nonbonded
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
2
,
0
,
0
);
// Calculate the forces for both cutoff and periodic with two different atom positions.
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffNonPeriodic
);
gbsa
->
setNonbondedMethod
(
GBSAOBCForce
::
CutoffNonPeriodic
);
Context
context
(
system
,
integrator
,
cl
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
);
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
gbsa
->
setNonbondedMethod
(
GBSAOBCForce
::
CutoffPeriodic
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state2
=
context
.
getState
(
State
::
Forces
);
positions
[
1
][
0
]
+=
boxSize
;
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffNonPeriodic
);
gbsa
->
setNonbondedMethod
(
GBSAOBCForce
::
CutoffNonPeriodic
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state3
=
context
.
getState
(
State
::
Forces
);
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
gbsa
->
setNonbondedMethod
(
GBSAOBCForce
::
CutoffPeriodic
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state4
=
context
.
getState
(
State
::
Forces
);
// All forces should be identical, exception state3 which should be zero.
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
0
],
state2
.
getForces
()[
0
],
0.01
);
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
1
],
state2
.
getForces
()[
1
],
0.01
);
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
0
],
state4
.
getForces
()[
0
],
0.01
);
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
1
],
state4
.
getForces
()[
1
],
0.01
);
ASSERT_EQUAL_VEC
(
state3
.
getForces
()[
0
],
Vec3
(
0
,
0
,
0
),
0.01
);
ASSERT_EQUAL_VEC
(
state3
.
getForces
()[
1
],
Vec3
(
0
,
0
,
0
),
0.01
);
}
void
testForce
(
int
numParticles
,
NonbondedForce
::
NonbondedMethod
method
,
GBSAOBCForce
::
NonbondedMethod
method2
)
{
OpenCLPlatform
cl
;
ReferencePlatform
reference
;
System
system
;
LangevinIntegrator
integrator
(
0
,
0.1
,
0.01
);
GBSAOBCForce
*
gbsa
=
new
GBSAOBCForce
();
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
1.0
);
double
charge
=
i
%
2
==
0
?
-
1
:
1
;
gbsa
->
addParticle
(
charge
,
0.15
,
1
);
nonbonded
->
addParticle
(
charge
,
1
,
0
);
}
nonbonded
->
setNonbondedMethod
(
method
);
gbsa
->
setNonbondedMethod
(
method2
);
nonbonded
->
setCutoffDistance
(
3.0
);
gbsa
->
setCutoffDistance
(
3.0
);
int
grid
=
(
int
)
floor
(
0.5
+
pow
(
numParticles
,
1.0
/
3.0
));
if
(
method
==
NonbondedForce
::
CutoffPeriodic
)
{
double
boxSize
=
(
grid
+
1
)
*
1.1
;
system
.
setPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
}
system
.
addForce
(
gbsa
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
cl
);
Context
refContext
(
system
,
integrator
,
reference
);
// Set random (but uniformly distributed) positions for all the particles.
vector
<
Vec3
>
positions
(
numParticles
);
init_gen_rand
(
0
);
for
(
int
i
=
0
;
i
<
grid
;
i
++
)
for
(
int
j
=
0
;
j
<
grid
;
j
++
)
for
(
int
k
=
0
;
k
<
grid
;
k
++
)
positions
[
i
*
grid
*
grid
+
j
*
grid
+
k
]
=
Vec3
(
i
*
1.1
,
j
*
1.1
,
k
*
1.1
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
positions
[
i
]
=
positions
[
i
]
+
Vec3
(
0.5
*
genrand_real2
(),
0.5
*
genrand_real2
(),
0.5
*
genrand_real2
());
context
.
setPositions
(
positions
);
refContext
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
State
refState
=
refContext
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Make sure the OpenCL and Reference platforms agree.
double
norm
=
0.0
;
double
diff
=
0.0
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
Vec3
f
=
state
.
getForces
()[
i
];
norm
+=
f
[
0
]
*
f
[
0
]
+
f
[
1
]
*
f
[
1
]
+
f
[
2
]
*
f
[
2
];
Vec3
delta
=
f
-
refState
.
getForces
()[
i
];
diff
+=
delta
[
0
]
*
delta
[
0
]
+
delta
[
1
]
*
delta
[
1
]
+
delta
[
2
]
*
delta
[
2
];
}
norm
=
std
::
sqrt
(
norm
);
diff
=
std
::
sqrt
(
diff
);
ASSERT_EQUAL_TOL
(
0.0
,
diff
,
0.001
*
norm
);
ASSERT_EQUAL_TOL
(
state
.
getPotentialEnergy
(),
refState
.
getPotentialEnergy
(),
1e-3
);
// Take a small step in the direction of the energy gradient. (This doesn't work with cutoffs, since the energy
// changes discontinuously at the cutoff distance.)
if
(
method
==
NonbondedForce
::
NoCutoff
)
{
const
double
delta
=
1e-2
;
double
step
=
delta
/
norm
;
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
Vec3
p
=
positions
[
i
];
Vec3
f
=
state
.
getForces
()[
i
];
positions
[
i
]
=
Vec3
(
p
[
0
]
-
f
[
0
]
*
step
,
p
[
1
]
-
f
[
1
]
*
step
,
p
[
2
]
-
f
[
2
]
*
step
);
}
context
.
setPositions
(
positions
);
// See whether the potential energy changed by the expected amount.
State
state2
=
context
.
getState
(
State
::
Energy
);
ASSERT_EQUAL_TOL
(
norm
,
(
state2
.
getPotentialEnergy
()
-
state
.
getPotentialEnergy
())
/
delta
,
1e-3
*
abs
(
state
.
getPotentialEnergy
()));
}
}
int
main
()
{
try
{
testSingleParticle
();
testCutoffAndPeriodic
();
// for (int i = 5; i < 11; i++) {
// testForce(i*i*i, NonbondedForce::NoCutoff, GBSAOBCForce::NoCutoff);
// testForce(i*i*i, NonbondedForce::CutoffNonPeriodic, GBSAOBCForce::CutoffNonPeriodic);
// testForce(i*i*i, NonbondedForce::CutoffPeriodic, GBSAOBCForce::CutoffPeriodic);
// }
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment