Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
fd473eea
Commit
fd473eea
authored
Oct 29, 2015
by
Peter Eastman
Browse files
Merge branch 'master' into nucleic
parents
0a751b5b
6a985cfd
Changes
279
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
756 additions
and
93 deletions
+756
-93
platforms/opencl/src/OpenCLKernelFactory.cpp
platforms/opencl/src/OpenCLKernelFactory.cpp
+2
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+463
-12
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+84
-34
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+4
-0
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+1
-0
platforms/opencl/src/kernels/customCentroidBond.cl
platforms/opencl/src/kernels/customCentroidBond.cl
+141
-0
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+2
-2
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+2
-2
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
+2
-2
platforms/opencl/src/kernels/customHbondForce.cl
platforms/opencl/src/kernels/customHbondForce.cl
+3
-3
platforms/opencl/src/kernels/customManyParticle.cl
platforms/opencl/src/kernels/customManyParticle.cl
+2
-2
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+2
-2
platforms/opencl/src/kernels/ewald.cl
platforms/opencl/src/kernels/ewald.cl
+2
-2
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+2
-2
platforms/opencl/src/kernels/gbsaObcReductions.cl
platforms/opencl/src/kernels/gbsaObcReductions.cl
+2
-2
platforms/opencl/src/kernels/gbsaObc_cpu.cl
platforms/opencl/src/kernels/gbsaObc_cpu.cl
+2
-2
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+24
-10
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+13
-13
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+2
-2
platforms/opencl/staticTarget/CMakeLists.txt
platforms/opencl/staticTarget/CMakeLists.txt
+1
-1
No files found.
Too many changes to show.
To preserve performance only
279 of 279+
files are displayed.
Plain diff
Email patch
platforms/opencl/src/OpenCLKernelFactory.cpp
View file @
fd473eea
...
...
@@ -102,6 +102,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return
new
OpenCLCalcCustomExternalForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomHbondForceKernel
::
Name
())
return
new
OpenCLCalcCustomHbondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomCentroidBondForceKernel
::
Name
())
return
new
OpenCLCalcCustomCentroidBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomCompoundBondForceKernel
::
Name
())
return
new
OpenCLCalcCustomCompoundBondForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomManyParticleForceKernel
::
Name
())
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
fd473eea
...
...
@@ -31,6 +31,7 @@
#include "openmm/internal/AndersenThermostatImpl.h"
#include "openmm/internal/CMAPTorsionForceImpl.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/CustomCentroidBondForceImpl.h"
#include "openmm/internal/CustomCompoundBondForceImpl.h"
#include "openmm/internal/CustomHbondForceImpl.h"
#include "openmm/internal/CustomManyParticleForceImpl.h"
...
...
@@ -127,7 +128,7 @@ void OpenCLCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, boo
double
OpenCLCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
cl
.
getBondedUtilities
().
computeInteractions
(
groups
);
cl
.
getNonbondedUtilities
().
computeInteractions
(
groups
);
cl
.
getNonbondedUtilities
().
computeInteractions
(
groups
,
includeForces
,
includeEnergy
);
double
sum
=
0.0
;
for
(
vector
<
OpenCLContext
::
ForcePostComputation
*>::
iterator
iter
=
cl
.
getPostComputations
().
begin
();
iter
!=
cl
.
getPostComputations
().
end
();
++
iter
)
sum
+=
(
*
iter
)
->
computeForceAndEnergy
(
includeForces
,
includeEnergy
,
groups
);
...
...
@@ -135,7 +136,7 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
cl
.
getIntegrationUtilities
().
distributeForcesFromVirtualSites
();
if
(
includeEnergy
)
{
OpenCLArray
&
energyArray
=
cl
.
getEnergyBuffer
();
if
(
cl
.
getUseDoublePrecision
())
{
if
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
()
)
{
double
*
energy
=
(
double
*
)
cl
.
getPinnedBuffer
();
energyArray
.
download
(
energy
);
for
(
int
i
=
0
;
i
<
energyArray
.
getSize
();
i
++
)
...
...
@@ -1551,8 +1552,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
else
cl
.
getPosq
().
upload
(
posqf
);
sigmaEpsilon
->
upload
(
sigmaEpsilonVector
);
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
NonbondedForce
::
CutoffNonPeriodic
);
nonbondedMethod
=
CalcNonbondedForceKernel
::
NonbondedMethod
(
force
.
getNonbondedMethod
());
bool
useCutoff
=
(
nonbondedMethod
!=
NoCutoff
);
bool
usePeriodic
=
(
nonbondedMethod
!=
NoCutoff
&&
nonbondedMethod
!=
CutoffNonPeriodic
);
map
<
string
,
string
>
defines
;
defines
[
"HAS_COULOMB"
]
=
(
hasCoulomb
?
"1"
:
"0"
);
defines
[
"HAS_LENNARD_JONES"
]
=
(
hasLJ
?
"1"
:
"0"
);
...
...
@@ -1580,7 +1582,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
dispersionCoefficient
=
0.0
;
alpha
=
0
;
ewaldSelfEnergy
=
0.0
;
if
(
force
.
getN
onbondedMethod
()
==
NonbondedForce
::
Ewald
)
{
if
(
n
onbondedMethod
==
Ewald
)
{
// Compute the Ewald parameters.
int
kmaxx
,
kmaxy
,
kmaxz
;
...
...
@@ -1606,10 +1608,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
cosSinSums
=
new
OpenCLArray
(
cl
,
(
2
*
kmaxx
-
1
)
*
(
2
*
kmaxy
-
1
)
*
(
2
*
kmaxz
-
1
),
elementSize
,
"cosSinSums"
);
}
}
else
if
(
force
.
getN
onbondedMethod
()
==
NonbondedForce
::
PME
)
{
else
if
(
n
onbondedMethod
==
PME
)
{
// Compute the PME parameters.
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
NonbondedForceImpl
::
calcPMEParameters
(
system
,
force
,
alpha
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
gridSizeX
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeX
);
gridSizeY
=
OpenCLFFT3D
::
findLegalDimension
(
gridSizeY
);
...
...
@@ -2056,14 +2057,26 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
// Compute other values.
NonbondedForce
::
NonbondedMethod
method
=
force
.
getNonbondedMethod
();
if
(
method
==
NonbondedForce
::
Ewald
||
method
==
NonbondedForce
::
PME
)
if
(
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
)
ewaldSelfEnergy
=
(
cl
.
getContextIndex
()
==
0
?
-
ONE_4PI_EPS0
*
alpha
*
sumSquaredCharges
/
sqrt
(
M_PI
)
:
0.0
);
if
(
force
.
getUseDispersionCorrection
()
&&
cl
.
getContextIndex
()
==
0
&&
(
method
==
NonbondedForce
::
CutoffPeriodic
||
method
==
NonbondedForce
::
Ewald
||
method
==
NonbondedForce
::
PME
))
if
(
force
.
getUseDispersionCorrection
()
&&
cl
.
getContextIndex
()
==
0
&&
(
nonbondedMethod
==
CutoffPeriodic
||
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
))
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
context
.
getSystem
(),
force
);
cl
.
invalidateMolecules
();
}
void
OpenCLCalcNonbondedForceKernel
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
if
(
nonbondedMethod
!=
PME
)
throw
OpenMMException
(
"getPMEParametersInContext: This Context is not using PME"
);
if
(
cl
.
getPlatformData
().
useCpuPme
)
cpuPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
else
{
alpha
=
this
->
alpha
;
nx
=
gridSizeX
;
ny
=
gridSizeY
;
nz
=
gridSizeZ
;
}
}
class
OpenCLCustomNonbondedForceInfo
:
public
OpenCLForceInfo
{
public:
OpenCLCustomNonbondedForceInfo
(
int
requiredBuffers
,
const
CustomNonbondedForce
&
force
)
:
OpenCLForceInfo
(
requiredBuffers
),
force
(
force
)
{
...
...
@@ -3642,7 +3655,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
if
(
useLong
)
{
pairEnergyKernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
longEnergyDerivs
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
numComputedValues
;
++
i
)
pairEnergyKernel
.
setArg
(
index
++
,
nb
.
getForceThreadBlockSize
()
*
elementSize
,
NULL
);
pairEnergyKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
nb
.
getForceThreadBlockSize
()
)
*
elementSize
,
NULL
);
}
else
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
...
...
@@ -3808,7 +3821,9 @@ void OpenCLCalcCustomExternalForceKernel::initialize(const System& system, const
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
globalParamValues
[
i
]
=
(
cl_float
)
force
.
getGlobalParameterDefaultValue
(
i
);
}
Lepton
::
ParsedExpression
energyExpression
=
Lepton
::
Parser
::
parse
(
force
.
getEnergyFunction
()).
optimize
();
map
<
string
,
Lepton
::
CustomFunction
*>
customFunctions
;
customFunctions
[
"periodicdistance"
]
=
cl
.
getExpressionUtilities
().
getPeriodicDistancePlaceholder
();
Lepton
::
ParsedExpression
energyExpression
=
Lepton
::
Parser
::
parse
(
force
.
getEnergyFunction
(),
customFunctions
).
optimize
();
Lepton
::
ParsedExpression
forceExpressionX
=
energyExpression
.
differentiate
(
"x"
).
optimize
();
Lepton
::
ParsedExpression
forceExpressionY
=
energyExpression
.
differentiate
(
"y"
).
optimize
();
Lepton
::
ParsedExpression
forceExpressionZ
=
energyExpression
.
differentiate
(
"z"
).
optimize
();
...
...
@@ -4433,6 +4448,442 @@ void OpenCLCalcCustomHbondForceKernel::copyParametersToContext(ContextImpl& cont
cl
.
invalidateMolecules
();
}
class
OpenCLCustomCentroidBondForceInfo
:
public
OpenCLForceInfo
{
public:
OpenCLCustomCentroidBondForceInfo
(
const
CustomCentroidBondForce
&
force
)
:
OpenCLForceInfo
(
0
),
force
(
force
)
{
}
int
getNumParticleGroups
()
{
return
force
.
getNumBonds
();
}
void
getParticlesInGroup
(
int
index
,
vector
<
int
>&
particles
)
{
vector
<
double
>
parameters
;
vector
<
int
>
groups
;
force
.
getBondParameters
(
index
,
groups
,
parameters
);
for
(
int
i
=
0
;
i
<
groups
.
size
();
i
++
)
{
vector
<
int
>
groupParticles
;
vector
<
double
>
weights
;
force
.
getGroupParameters
(
groups
[
i
],
groupParticles
,
weights
);
particles
.
insert
(
particles
.
end
(),
groupParticles
.
begin
(),
groupParticles
.
end
());
}
}
bool
areGroupsIdentical
(
int
group1
,
int
group2
)
{
vector
<
int
>
groups1
,
groups2
;
vector
<
double
>
parameters1
,
parameters2
;
force
.
getBondParameters
(
group1
,
groups1
,
parameters1
);
force
.
getBondParameters
(
group2
,
groups2
,
parameters2
);
for
(
int
i
=
0
;
i
<
(
int
)
parameters1
.
size
();
i
++
)
if
(
parameters1
[
i
]
!=
parameters2
[
i
])
return
false
;
for
(
int
i
=
0
;
i
<
groups1
.
size
();
i
++
)
{
vector
<
int
>
groupParticles
;
vector
<
double
>
weights1
,
weights2
;
force
.
getGroupParameters
(
groups1
[
i
],
groupParticles
,
weights1
);
force
.
getGroupParameters
(
groups2
[
i
],
groupParticles
,
weights2
);
if
(
weights1
.
size
()
!=
weights2
.
size
())
return
false
;
for
(
int
j
=
0
;
j
<
weights1
.
size
();
j
++
)
if
(
weights1
[
j
]
!=
weights2
[
j
])
return
false
;
}
return
true
;
}
private:
const
CustomCentroidBondForce
&
force
;
};
OpenCLCalcCustomCentroidBondForceKernel
::~
OpenCLCalcCustomCentroidBondForceKernel
()
{
if
(
params
!=
NULL
)
delete
params
;
if
(
globals
!=
NULL
)
delete
globals
;
if
(
groupParticles
!=
NULL
)
delete
groupParticles
;
if
(
groupWeights
!=
NULL
)
delete
groupWeights
;
if
(
groupOffsets
!=
NULL
)
delete
groupOffsets
;
if
(
groupForces
!=
NULL
)
delete
groupForces
;
if
(
bondGroups
!=
NULL
)
delete
bondGroups
;
if
(
centerPositions
!=
NULL
)
delete
centerPositions
;
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
delete
tabulatedFunctions
[
i
];
}
void
OpenCLCalcCustomCentroidBondForceKernel
::
initialize
(
const
System
&
system
,
const
CustomCentroidBondForce
&
force
)
{
numBonds
=
force
.
getNumBonds
();
if
(
numBonds
==
0
)
return
;
if
(
!
cl
.
getSupports64BitGlobalAtomics
())
throw
OpenMMException
(
"CustomCentroidBondForce requires a device that supports 64 bit atomic operations"
);
cl
.
addForce
(
new
OpenCLCustomCentroidBondForceInfo
(
force
));
// Record the groups.
numGroups
=
force
.
getNumGroups
();
vector
<
cl_int
>
groupParticleVec
;
vector
<
cl_float
>
groupWeightVecFloat
;
vector
<
cl_double
>
groupWeightVecDouble
;
vector
<
cl_int
>
groupOffsetVec
;
groupOffsetVec
.
push_back
(
0
);
for
(
int
i
=
0
;
i
<
numGroups
;
i
++
)
{
vector
<
int
>
particles
;
vector
<
double
>
weights
;
force
.
getGroupParameters
(
i
,
particles
,
weights
);
groupParticleVec
.
insert
(
groupParticleVec
.
end
(),
particles
.
begin
(),
particles
.
end
());
groupOffsetVec
.
push_back
(
groupParticleVec
.
size
());
}
vector
<
vector
<
double
>
>
normalizedWeights
;
CustomCentroidBondForceImpl
::
computeNormalizedWeights
(
force
,
system
,
normalizedWeights
);
if
(
cl
.
getUseDoublePrecision
())
{
for
(
int
i
=
0
;
i
<
numGroups
;
i
++
)
groupWeightVecDouble
.
insert
(
groupWeightVecDouble
.
end
(),
normalizedWeights
[
i
].
begin
(),
normalizedWeights
[
i
].
end
());
}
else
{
for
(
int
i
=
0
;
i
<
numGroups
;
i
++
)
for
(
int
j
=
0
;
j
<
normalizedWeights
[
i
].
size
();
j
++
)
groupWeightVecFloat
.
push_back
((
float
)
normalizedWeights
[
i
][
j
]);
}
groupParticles
=
OpenCLArray
::
create
<
int
>
(
cl
,
groupParticleVec
.
size
(),
"groupParticles"
);
groupParticles
->
upload
(
groupParticleVec
);
if
(
cl
.
getUseDoublePrecision
())
{
groupWeights
=
OpenCLArray
::
create
<
double
>
(
cl
,
groupParticleVec
.
size
(),
"groupWeights"
);
groupWeights
->
upload
(
groupWeightVecDouble
);
centerPositions
=
OpenCLArray
::
create
<
mm_double4
>
(
cl
,
numGroups
,
"centerPositions"
);
}
else
{
groupWeights
=
OpenCLArray
::
create
<
float
>
(
cl
,
groupParticleVec
.
size
(),
"groupWeights"
);
groupWeights
->
upload
(
groupWeightVecFloat
);
centerPositions
=
OpenCLArray
::
create
<
mm_float4
>
(
cl
,
numGroups
,
"centerPositions"
);
}
groupOffsets
=
OpenCLArray
::
create
<
int
>
(
cl
,
groupOffsetVec
.
size
(),
"groupOffsets"
);
groupOffsets
->
upload
(
groupOffsetVec
);
groupForces
=
OpenCLArray
::
create
<
long
long
>
(
cl
,
numGroups
*
3
,
"groupForces"
);
cl
.
addAutoclearBuffer
(
*
groupForces
);
// Record the bonds.
int
groupsPerBond
=
force
.
getNumGroupsPerBond
();
vector
<
cl_int
>
bondGroupVec
(
numBonds
*
groupsPerBond
);
params
=
new
OpenCLParameterSet
(
cl
,
force
.
getNumPerBondParameters
(),
numBonds
,
"customCentroidBondParams"
);
vector
<
vector
<
float
>
>
paramVector
(
numBonds
);
for
(
int
i
=
0
;
i
<
numBonds
;
i
++
)
{
vector
<
int
>
groups
;
vector
<
double
>
parameters
;
force
.
getBondParameters
(
i
,
groups
,
parameters
);
for
(
int
j
=
0
;
j
<
groups
.
size
();
j
++
)
bondGroupVec
[
i
+
j
*
numBonds
]
=
groups
[
j
];
paramVector
[
i
].
resize
(
parameters
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
paramVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
}
params
->
setParameterValues
(
paramVector
);
bondGroups
=
OpenCLArray
::
create
<
int
>
(
cl
,
bondGroupVec
.
size
(),
"bondGroups"
);
bondGroups
->
upload
(
bondGroupVec
);
// Record the tabulated functions.
map
<
string
,
Lepton
::
CustomFunction
*>
functions
;
vector
<
pair
<
string
,
string
>
>
functionDefinitions
;
vector
<
const
TabulatedFunction
*>
functionList
;
stringstream
extraArgs
;
for
(
int
i
=
0
;
i
<
force
.
getNumTabulatedFunctions
();
i
++
)
{
functionList
.
push_back
(
&
force
.
getTabulatedFunction
(
i
));
string
name
=
force
.
getTabulatedFunctionName
(
i
);
string
arrayName
=
"table"
+
cl
.
intToString
(
i
);
functionDefinitions
.
push_back
(
make_pair
(
name
,
arrayName
));
functions
[
name
]
=
cl
.
getExpressionUtilities
().
getFunctionPlaceholder
(
force
.
getTabulatedFunction
(
i
));
int
width
;
vector
<
float
>
f
=
cl
.
getExpressionUtilities
().
computeFunctionCoefficients
(
force
.
getTabulatedFunction
(
i
),
width
);
tabulatedFunctions
.
push_back
(
OpenCLArray
::
create
<
float
>
(
cl
,
f
.
size
(),
"TabulatedFunction"
));
tabulatedFunctions
.
back
()
->
upload
(
f
);
extraArgs
<<
", __global const float"
;
if
(
width
>
1
)
extraArgs
<<
width
;
extraArgs
<<
"* restrict "
<<
arrayName
;
}
// Record information about parameters.
globalParamNames
.
resize
(
force
.
getNumGlobalParameters
());
globalParamValues
.
resize
(
force
.
getNumGlobalParameters
());
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
globalParamNames
[
i
]
=
force
.
getGlobalParameterName
(
i
);
globalParamValues
[
i
]
=
(
float
)
force
.
getGlobalParameterDefaultValue
(
i
);
}
map
<
string
,
string
>
variables
;
for
(
int
i
=
0
;
i
<
groupsPerBond
;
i
++
)
{
string
index
=
cl
.
intToString
(
i
+
1
);
variables
[
"x"
+
index
]
=
"pos"
+
index
+
".x"
;
variables
[
"y"
+
index
]
=
"pos"
+
index
+
".y"
;
variables
[
"z"
+
index
]
=
"pos"
+
index
+
".z"
;
}
for
(
int
i
=
0
;
i
<
force
.
getNumPerBondParameters
();
i
++
)
{
const
string
&
name
=
force
.
getPerBondParameterName
(
i
);
variables
[
name
]
=
"bondParams"
+
params
->
getParameterSuffix
(
i
);
}
if
(
force
.
getNumGlobalParameters
()
>
0
)
{
globals
=
OpenCLArray
::
create
<
float
>
(
cl
,
force
.
getNumGlobalParameters
(),
"customCentroidBondGlobals"
);
globals
->
upload
(
globalParamValues
);
extraArgs
<<
", __global const float* restrict globals"
;
for
(
int
i
=
0
;
i
<
force
.
getNumGlobalParameters
();
i
++
)
{
const
string
&
name
=
force
.
getGlobalParameterName
(
i
);
string
value
=
"globals["
+
cl
.
intToString
(
i
)
+
"]"
;
variables
[
name
]
=
value
;
}
}
// Now to generate the kernel. First, it needs to calculate all distances, angles,
// and dihedrals the expression depends on.
map
<
string
,
vector
<
int
>
>
distances
;
map
<
string
,
vector
<
int
>
>
angles
;
map
<
string
,
vector
<
int
>
>
dihedrals
;
Lepton
::
ParsedExpression
energyExpression
=
CustomCentroidBondForceImpl
::
prepareExpression
(
force
,
functions
,
distances
,
angles
,
dihedrals
);
map
<
string
,
Lepton
::
ParsedExpression
>
forceExpressions
;
set
<
string
>
computedDeltas
;
vector
<
string
>
atomNames
,
posNames
;
for
(
int
i
=
0
;
i
<
groupsPerBond
;
i
++
)
{
string
index
=
cl
.
intToString
(
i
+
1
);
atomNames
.
push_back
(
"P"
+
index
);
posNames
.
push_back
(
"pos"
+
index
);
}
stringstream
compute
;
for
(
int
i
=
0
;
i
<
groupsPerBond
;
i
++
)
{
compute
<<
"int group"
<<
(
i
+
1
)
<<
" = bondGroups[index+"
<<
(
i
*
numBonds
)
<<
"];
\n
"
;
compute
<<
"real4 pos"
<<
(
i
+
1
)
<<
" = centerPositions[group"
<<
(
i
+
1
)
<<
"];
\n
"
;
}
int
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
groups
=
iter
->
second
;
string
deltaName
=
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]];
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
compute
<<
"real4 delta"
<<
deltaName
<<
" = delta("
<<
posNames
[
groups
[
0
]]
<<
", "
<<
posNames
[
groups
[
1
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName
);
}
compute
<<
"real r_"
<<
deltaName
<<
" = sqrt(delta"
<<
deltaName
<<
".w);
\n
"
;
variables
[
iter
->
first
]
=
"r_"
+
deltaName
;
forceExpressions
[
"real dEdDistance"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
groups
=
iter
->
second
;
string
deltaName1
=
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
0
]];
string
deltaName2
=
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
2
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
compute
<<
"real4 delta"
<<
deltaName1
<<
" = delta("
<<
posNames
[
groups
[
1
]]
<<
", "
<<
posNames
[
groups
[
0
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName1
);
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
compute
<<
"real4 delta"
<<
deltaName2
<<
" = delta("
<<
posNames
[
groups
[
1
]]
<<
", "
<<
posNames
[
groups
[
2
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName2
);
}
compute
<<
"real "
<<
angleName
<<
" = computeAngle(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
variables
[
iter
->
first
]
=
angleName
;
forceExpressions
[
"real dEdAngle"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
groups
=
iter
->
second
;
string
deltaName1
=
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]];
string
deltaName2
=
atomNames
[
groups
[
2
]]
+
atomNames
[
groups
[
1
]];
string
deltaName3
=
atomNames
[
groups
[
2
]]
+
atomNames
[
groups
[
3
]];
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
dihedralName
=
"dihedral_"
+
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
2
]]
+
atomNames
[
groups
[
3
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
compute
<<
"real4 delta"
<<
deltaName1
<<
" = delta("
<<
posNames
[
groups
[
0
]]
<<
", "
<<
posNames
[
groups
[
1
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName1
);
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
compute
<<
"real4 delta"
<<
deltaName2
<<
" = delta("
<<
posNames
[
groups
[
2
]]
<<
", "
<<
posNames
[
groups
[
1
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName2
);
}
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
compute
<<
"real4 delta"
<<
deltaName3
<<
" = delta("
<<
posNames
[
groups
[
2
]]
<<
", "
<<
posNames
[
groups
[
3
]]
<<
");
\n
"
;
computedDeltas
.
insert
(
deltaName3
);
}
compute
<<
"real4 "
<<
crossName1
<<
" = computeCross(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"real4 "
<<
crossName2
<<
" = computeCross(delta"
<<
deltaName2
<<
", delta"
<<
deltaName3
<<
");
\n
"
;
compute
<<
"real "
<<
dihedralName
<<
" = computeAngle("
<<
crossName1
<<
", "
<<
crossName2
<<
");
\n
"
;
compute
<<
dihedralName
<<
" *= (delta"
<<
deltaName1
<<
".x*"
<<
crossName2
<<
".x + delta"
<<
deltaName1
<<
".y*"
<<
crossName2
<<
".y + delta"
<<
deltaName1
<<
".z*"
<<
crossName2
<<
".z < 0 ? -1 : 1);
\n
"
;
variables
[
iter
->
first
]
=
dihedralName
;
forceExpressions
[
"real dEdDihedral"
+
cl
.
intToString
(
index
)
+
" = "
]
=
energyExpression
.
differentiate
(
iter
->
first
).
optimize
();
}
// Now evaluate the expressions.
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
extraArgs
<<
", __global const "
<<
buffer
.
getType
()
<<
"* restrict globalParams"
<<
i
;
compute
<<
buffer
.
getType
()
<<
" bondParams"
<<
(
i
+
1
)
<<
" = globalParams"
<<
i
<<
"[index];
\n
"
;
}
forceExpressions
[
"energy += "
]
=
energyExpression
;
compute
<<
cl
.
getExpressionUtilities
().
createExpressions
(
forceExpressions
,
variables
,
functionList
,
functionDefinitions
,
"temp"
);
// Finally, apply forces to groups.
vector
<
string
>
forceNames
;
for
(
int
i
=
0
;
i
<
groupsPerBond
;
i
++
)
{
string
istr
=
cl
.
intToString
(
i
+
1
);
string
forceName
=
"force"
+
istr
;
forceNames
.
push_back
(
forceName
);
compute
<<
"real3 "
<<
forceName
<<
" = (real3) 0;
\n
"
;
compute
<<
"{
\n
"
;
Lepton
::
ParsedExpression
forceExpressionX
=
energyExpression
.
differentiate
(
"x"
+
istr
).
optimize
();
Lepton
::
ParsedExpression
forceExpressionY
=
energyExpression
.
differentiate
(
"y"
+
istr
).
optimize
();
Lepton
::
ParsedExpression
forceExpressionZ
=
energyExpression
.
differentiate
(
"z"
+
istr
).
optimize
();
map
<
string
,
Lepton
::
ParsedExpression
>
expressions
;
if
(
!
isZeroExpression
(
forceExpressionX
))
expressions
[
forceName
+
".x -= "
]
=
forceExpressionX
;
if
(
!
isZeroExpression
(
forceExpressionY
))
expressions
[
forceName
+
".y -= "
]
=
forceExpressionY
;
if
(
!
isZeroExpression
(
forceExpressionZ
))
expressions
[
forceName
+
".z -= "
]
=
forceExpressionZ
;
if
(
expressions
.
size
()
>
0
)
compute
<<
cl
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functionList
,
functionDefinitions
,
"coordtemp"
);
compute
<<
"}
\n
"
;
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
distances
.
begin
();
iter
!=
distances
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
groups
=
iter
->
second
;
string
deltaName
=
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]];
string
value
=
"(dEdDistance"
+
cl
.
intToString
(
index
)
+
"/r_"
+
deltaName
+
")*delta"
+
deltaName
+
".xyz"
;
compute
<<
forceNames
[
groups
[
0
]]
<<
" += "
<<
"-"
<<
value
<<
";
\n
"
;
compute
<<
forceNames
[
groups
[
1
]]
<<
" += "
<<
value
<<
";
\n
"
;
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
angles
.
begin
();
iter
!=
angles
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
groups
=
iter
->
second
;
string
deltaName1
=
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
0
]];
string
deltaName2
=
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
2
]];
compute
<<
"{
\n
"
;
compute
<<
"real4 crossProd = cross(delta"
<<
deltaName2
<<
", delta"
<<
deltaName1
<<
");
\n
"
;
compute
<<
"real lengthCross = max(length(crossProd), (real) 1e-6f);
\n
"
;
compute
<<
"real4 deltaCross0 = -cross(delta"
<<
deltaName1
<<
", crossProd)*dEdAngle"
<<
cl
.
intToString
(
index
)
<<
"/(delta"
<<
deltaName1
<<
".w*lengthCross);
\n
"
;
compute
<<
"real4 deltaCross2 = cross(delta"
<<
deltaName2
<<
", crossProd)*dEdAngle"
<<
cl
.
intToString
(
index
)
<<
"/(delta"
<<
deltaName2
<<
".w*lengthCross);
\n
"
;
compute
<<
"real4 deltaCross1 = -(deltaCross0+deltaCross2);
\n
"
;
compute
<<
forceNames
[
groups
[
0
]]
<<
".xyz += deltaCross0.xyz;
\n
"
;
compute
<<
forceNames
[
groups
[
1
]]
<<
".xyz += deltaCross1.xyz;
\n
"
;
compute
<<
forceNames
[
groups
[
2
]]
<<
".xyz += deltaCross2.xyz;
\n
"
;
compute
<<
"}
\n
"
;
}
index
=
0
;
for
(
map
<
string
,
vector
<
int
>
>::
const_iterator
iter
=
dihedrals
.
begin
();
iter
!=
dihedrals
.
end
();
++
iter
,
++
index
)
{
const
vector
<
int
>&
groups
=
iter
->
second
;
string
deltaName1
=
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]];
string
deltaName2
=
atomNames
[
groups
[
2
]]
+
atomNames
[
groups
[
1
]];
string
deltaName3
=
atomNames
[
groups
[
2
]]
+
atomNames
[
groups
[
3
]];
string
crossName1
=
"cross_"
+
deltaName1
+
"_"
+
deltaName2
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
compute
<<
"{
\n
"
;
compute
<<
"real r = sqrt(delta"
<<
deltaName2
<<
".w);
\n
"
;
compute
<<
"real4 ff;
\n
"
;
compute
<<
"ff.x = (-dEdDihedral"
<<
cl
.
intToString
(
index
)
<<
"*r)/"
<<
crossName1
<<
".w;
\n
"
;
compute
<<
"ff.y = (delta"
<<
deltaName1
<<
".x*delta"
<<
deltaName2
<<
".x + delta"
<<
deltaName1
<<
".y*delta"
<<
deltaName2
<<
".y + delta"
<<
deltaName1
<<
".z*delta"
<<
deltaName2
<<
".z)/delta"
<<
deltaName2
<<
".w;
\n
"
;
compute
<<
"ff.z = (delta"
<<
deltaName3
<<
".x*delta"
<<
deltaName2
<<
".x + delta"
<<
deltaName3
<<
".y*delta"
<<
deltaName2
<<
".y + delta"
<<
deltaName3
<<
".z*delta"
<<
deltaName2
<<
".z)/delta"
<<
deltaName2
<<
".w;
\n
"
;
compute
<<
"ff.w = (dEdDihedral"
<<
cl
.
intToString
(
index
)
<<
"*r)/"
<<
crossName2
<<
".w;
\n
"
;
compute
<<
"real4 internalF0 = ff.x*"
<<
crossName1
<<
";
\n
"
;
compute
<<
"real4 internalF3 = ff.w*"
<<
crossName2
<<
";
\n
"
;
compute
<<
"real4 s = ff.y*internalF0 - ff.z*internalF3;
\n
"
;
compute
<<
forceNames
[
groups
[
0
]]
<<
".xyz += internalF0.xyz;
\n
"
;
compute
<<
forceNames
[
groups
[
1
]]
<<
".xyz += s.xyz-internalF0.xyz;
\n
"
;
compute
<<
forceNames
[
groups
[
2
]]
<<
".xyz += -s.xyz-internalF3.xyz;
\n
"
;
compute
<<
forceNames
[
groups
[
3
]]
<<
".xyz += internalF3.xyz;
\n
"
;
compute
<<
"}
\n
"
;
}
// Save the forces to global memory.
for
(
int
i
=
0
;
i
<
groupsPerBond
;
i
++
)
{
compute
<<
"atom_add(&groupForce[group"
<<
(
i
+
1
)
<<
"], (long) (force"
<<
(
i
+
1
)
<<
".x*0x100000000));
\n
"
;
compute
<<
"atom_add(&groupForce[group"
<<
(
i
+
1
)
<<
"+NUM_GROUPS], (long) (force"
<<
(
i
+
1
)
<<
".y*0x100000000));
\n
"
;
compute
<<
"atom_add(&groupForce[group"
<<
(
i
+
1
)
<<
"+NUM_GROUPS*2], (long) (force"
<<
(
i
+
1
)
<<
".z*0x100000000));
\n
"
;
}
map
<
string
,
string
>
replacements
;
replacements
[
"M_PI"
]
=
cl
.
doubleToString
(
M_PI
);
replacements
[
"NUM_GROUPS"
]
=
cl
.
intToString
(
numGroups
);
replacements
[
"NUM_BONDS"
]
=
cl
.
intToString
(
numBonds
);
replacements
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
replacements
[
"EXTRA_ARGS"
]
=
extraArgs
.
str
();
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customCentroidBond
,
replacements
));
index
=
0
;
computeCentersKernel
=
cl
::
Kernel
(
program
,
"computeGroupCenters"
);
computeCentersKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
computeCentersKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
groupParticles
->
getDeviceBuffer
());
computeCentersKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
groupWeights
->
getDeviceBuffer
());
computeCentersKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
groupOffsets
->
getDeviceBuffer
());
computeCentersKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
centerPositions
->
getDeviceBuffer
());
index
=
0
;
groupForcesKernel
=
cl
::
Kernel
(
program
,
"computeGroupForces"
);
groupForcesKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
groupForces
->
getDeviceBuffer
());
index
++
;
// Energy buffer hasn't been created yet
groupForcesKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
centerPositions
->
getDeviceBuffer
());
groupForcesKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bondGroups
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
tabulatedFunctions
.
size
();
i
++
)
groupForcesKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
tabulatedFunctions
[
i
]
->
getDeviceBuffer
());
if
(
globals
!=
NULL
)
groupForcesKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globals
->
getDeviceBuffer
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
groupForcesKernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
params
->
getBuffers
()[
i
].
getMemory
());
index
=
0
;
applyForcesKernel
=
cl
::
Kernel
(
program
,
"applyForcesToAtoms"
);
applyForcesKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
groupParticles
->
getDeviceBuffer
());
applyForcesKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
groupWeights
->
getDeviceBuffer
());
applyForcesKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
groupOffsets
->
getDeviceBuffer
());
applyForcesKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
groupForces
->
getDeviceBuffer
());
}
double
OpenCLCalcCustomCentroidBondForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
numBonds
==
0
)
return
0.0
;
if
(
globals
!=
NULL
)
{
bool
changed
=
false
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParamNames
.
size
();
i
++
)
{
float
value
=
(
float
)
context
.
getParameter
(
globalParamNames
[
i
]);
if
(
value
!=
globalParamValues
[
i
])
changed
=
true
;
globalParamValues
[
i
]
=
value
;
}
if
(
changed
)
globals
->
upload
(
globalParamValues
);
}
cl
.
executeKernel
(
computeCentersKernel
,
OpenCLContext
::
TileSize
*
numGroups
);
groupForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
cl
.
executeKernel
(
groupForcesKernel
,
numBonds
);
applyForcesKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getLongForceBuffer
().
getDeviceBuffer
());
cl
.
executeKernel
(
applyForcesKernel
,
OpenCLContext
::
TileSize
*
numGroups
);
return
0.0
;
}
void
OpenCLCalcCustomCentroidBondForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomCentroidBondForce
&
force
)
{
if
(
numBonds
!=
force
.
getNumBonds
())
throw
OpenMMException
(
"updateParametersInContext: The number of bonds has changed"
);
if
(
numBonds
==
0
)
return
;
// Record the per-bond parameters.
vector
<
vector
<
float
>
>
paramVector
(
numBonds
);
vector
<
int
>
particles
;
vector
<
double
>
parameters
;
for
(
int
i
=
0
;
i
<
numBonds
;
i
++
)
{
force
.
getBondParameters
(
i
,
particles
,
parameters
);
paramVector
[
i
].
resize
(
parameters
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
parameters
.
size
();
j
++
)
paramVector
[
i
][
j
]
=
(
float
)
parameters
[
j
];
}
params
->
setParameterValues
(
paramVector
);
// Mark that the current reordering may be invalid.
cl
.
invalidateMolecules
();
}
class
OpenCLCustomCompoundBondForceInfo
:
public
OpenCLForceInfo
{
public:
OpenCLCustomCompoundBondForceInfo
(
const
CustomCompoundBondForce
&
force
)
:
OpenCLForceInfo
(
0
),
force
(
force
)
{
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
fd473eea
...
...
@@ -180,13 +180,29 @@ void OpenCLNonbondedUtilities::requestExclusions(const vector<vector<int> >& exc
}
static
bool
compareUshort2
(
mm_ushort2
a
,
mm_ushort2
b
)
{
// This version is used on devices with SIMD width of 32 or less. It sorts tiles to improve cache efficiency.
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
}
static
bool
compareUshort2LargeSIMD
(
mm_ushort2
a
,
mm_ushort2
b
)
{
// This version is used on devices with SIMD width greater than 32. It puts diagonal tiles before off-diagonal
// ones to reduce thread divergence.
if
(
a
.
x
==
a
.
y
)
{
if
(
b
.
x
==
b
.
y
)
return
(
a
.
x
<
b
.
x
);
return
true
;
}
if
(
b
.
x
==
b
.
y
)
return
false
;
return
((
a
.
y
<
b
.
y
)
||
(
a
.
y
==
b
.
y
&&
a
.
x
<
b
.
x
));
}
void
OpenCLNonbondedUtilities
::
initialize
(
const
System
&
system
)
{
if
(
atomExclusions
.
size
()
==
0
)
{
// No exclusions were specifically requested, so just mark every atom as not interacting with itself.
atomExclusions
.
resize
(
context
.
getNumAtoms
());
for
(
int
i
=
0
;
i
<
(
int
)
atomExclusions
.
size
();
i
++
)
atomExclusions
[
i
].
push_back
(
i
);
...
...
@@ -199,7 +215,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
setAtomBlockRange
(
context
.
getContextIndex
()
/
(
double
)
numContexts
,
(
context
.
getContextIndex
()
+
1
)
/
(
double
)
numContexts
);
// Build a list of tiles that contain exclusions.
set
<
pair
<
int
,
int
>
>
tilesWithExclusions
;
for
(
int
atom1
=
0
;
atom1
<
(
int
)
atomExclusions
.
size
();
++
atom1
)
{
int
x
=
atom1
/
OpenCLContext
::
TileSize
;
...
...
@@ -212,7 +228,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
vector
<
mm_ushort2
>
exclusionTilesVec
;
for
(
set
<
pair
<
int
,
int
>
>::
const_iterator
iter
=
tilesWithExclusions
.
begin
();
iter
!=
tilesWithExclusions
.
end
();
++
iter
)
exclusionTilesVec
.
push_back
(
mm_ushort2
((
unsigned
short
)
iter
->
first
,
(
unsigned
short
)
iter
->
second
));
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
co
mpareUshort2
);
sort
(
exclusionTilesVec
.
begin
(),
exclusionTilesVec
.
end
(),
co
ntext
.
getSIMDWidth
()
<=
32
?
compareUshort2
:
compareUshort2LargeSIMD
);
exclusionTiles
=
OpenCLArray
::
create
<
mm_ushort2
>
(
context
,
exclusionTilesVec
.
size
(),
"exclusionTiles"
);
exclusionTiles
->
upload
(
exclusionTilesVec
);
map
<
pair
<
int
,
int
>
,
int
>
exclusionTileMap
;
...
...
@@ -341,37 +357,43 @@ void OpenCLNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
kernels
.
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
forceRebuildNeighborList
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
bool
rebuild
=
false
;
do
{
setPeriodicBoxArgs
(
context
,
kernels
.
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
kernels
.
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
forceRebuildNeighborList
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxArgs
(
context
,
kernels
.
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
lastCutoff
=
kernels
.
cutoffDistance
;
}
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
)
{
void
OpenCLNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
if
((
forceGroups
&
groupFlags
)
==
0
)
return
;
KernelSet
&
kernels
=
groupKernels
[
forceGroups
];
if
(
kernels
.
hasForces
)
{
cl
::
Kernel
&
kernel
=
(
includeForces
?
(
includeEnergy
?
kernels
.
forceEnergyKernel
:
kernels
.
forceKernel
)
:
kernels
.
energyKernel
);
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernel
)
==
NULL
)
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
if
(
useCutoff
)
setPeriodicBoxArgs
(
context
,
kernels
.
forceKernel
,
9
);
context
.
executeKernel
(
kernels
.
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
if
(
context
.
getComputeForceCount
()
==
1
)
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
setPeriodicBoxArgs
(
context
,
kernel
,
9
);
context
.
executeKernel
(
kernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
void
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
bool
OpenCLNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
return
;
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
interactingTiles
->
getSize
())
return
;
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
...
...
@@ -387,14 +409,28 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
iter
->
second
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
KernelSet
&
kernels
=
iter
->
second
;
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceKernel
)
!=
NULL
)
{
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
forceKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
energyKernel
)
!=
NULL
)
{
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
energyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceEnergyKernel
)
!=
NULL
)
{
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
14
,
maxTiles
);
kernels
.
forceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
17
,
interactingAtoms
->
getDeviceBuffer
());
}
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
}
forceRebuildNeighborList
=
true
;
return
true
;
}
void
OpenCLNonbondedUtilities
::
setUsePadding
(
bool
padding
)
{
...
...
@@ -410,12 +446,23 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
if
(
useCutoff
)
{
// We are using a cutoff, and the kernels have already been created.
for
(
map
<
int
,
KernelSet
>::
iterator
iter
=
groupKernels
.
begin
();
iter
!=
groupKernels
.
end
();
++
iter
)
{
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
iter
->
second
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
iter
->
second
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
KernelSet
&
kernels
=
iter
->
second
;
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceKernel
)
!=
NULL
)
{
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
energyKernel
)
!=
NULL
)
{
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
energyKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
if
(
*
reinterpret_cast
<
cl_kernel
*>
(
&
kernels
.
forceEnergyKernel
)
!=
NULL
)
{
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
kernels
.
forceEnergyKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
}
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
}
forceRebuildNeighborList
=
true
;
}
...
...
@@ -433,8 +480,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
}
kernels
.
hasForces
=
(
source
.
size
()
>
0
);
kernels
.
cutoffDistance
=
cutoff
;
if
(
kernels
.
hasForces
)
kernels
.
forceKernel
=
createInteractionKernel
(
source
,
parameters
,
arguments
,
true
,
true
,
groups
);
kernels
.
source
=
source
;
if
(
useCutoff
)
{
double
padding
=
(
usePadding
?
0.1
*
cutoff
:
0.0
);
double
paddedCutoff
=
cutoff
+
padding
;
...
...
@@ -491,7 +537,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
kernels
.
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
18
,
rebuildNeighborList
->
getDeviceBuffer
());
if
(
kernels
.
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
// The device can't handle this block size, so reduce it.
groupSize
-=
32
;
if
(
groupSize
<
32
)
throw
OpenMMException
(
"Failed to create findInteractingBlocks kernel"
);
...
...
@@ -504,7 +550,7 @@ void OpenCLNonbondedUtilities::createKernelsForGroups(int groups) {
groupKernels
[
groups
]
=
kernels
;
}
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
)
{
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
,
int
groups
,
bool
includeForces
,
bool
includeEnergy
)
{
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
...
...
@@ -603,6 +649,10 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
if
(
useCutoff
&&
context
.
getSIMDWidth
()
<
32
)
defines
[
"PRUNE_BY_CUTOFF"
]
=
"1"
;
if
(
includeForces
)
defines
[
"INCLUDE_FORCES"
]
=
"1"
;
if
(
includeEnergy
)
defines
[
"INCLUDE_ENERGY"
]
=
"1"
;
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
double
maxCutoff
=
0.0
;
for
(
int
i
=
0
;
i
<
32
;
i
++
)
{
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
fd473eea
...
...
@@ -579,6 +579,10 @@ void OpenCLParallelCalcNonbondedForceKernel::copyParametersToContext(ContextImpl
getKernel
(
i
).
copyParametersToContext
(
context
,
force
);
}
void
OpenCLParallelCalcNonbondedForceKernel
::
getPMEParameters
(
double
&
alpha
,
int
&
nx
,
int
&
ny
,
int
&
nz
)
const
{
dynamic_cast
<
const
OpenCLCalcNonbondedForceKernel
&>
(
kernels
[
0
].
getImpl
()).
getPMEParameters
(
alpha
,
nx
,
ny
,
nz
);
}
class
OpenCLParallelCalcCustomNonbondedForceKernel
::
Task
:
public
OpenCLContext
::
WorkTask
{
public:
Task
(
ContextImpl
&
context
,
OpenCLCalcCustomNonbondedForceKernel
&
kernel
,
bool
includeForce
,
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
fd473eea
...
...
@@ -74,6 +74,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory
(
CalcCustomGBForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomExternalForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomHbondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCentroidBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomCompoundBondForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomManyParticleForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
...
...
platforms/opencl/src/kernels/customCentroidBond.cl
0 → 100644
View file @
fd473eea
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
/**
*
Compute
the
center
of
each
group.
*/
__kernel
void
computeGroupCenters
(
__global
const
real4*
restrict
posq,
__global
const
int*
restrict
groupParticles,
__global
const
real*
restrict
groupWeights,
__global
const
int*
restrict
groupOffsets,
__global
real4*
restrict
centerPositions
)
{
__local
volatile
real3
temp[64]
;
for
(
int
group
=
get_group_id
(
0
)
; group < NUM_GROUPS; group += get_num_groups(0)) {
//
The
threads
in
this
block
work
together
to
compute
the
center
one
group.
int
firstIndex
=
groupOffsets[group]
;
int
lastIndex
=
groupOffsets[group+1]
;
real3
center
=
(
real3
)
0
;
for
(
int
index
=
get_local_id
(
0
)
; index < lastIndex-firstIndex; index += get_local_size(0)) {
int
atom
=
groupParticles[firstIndex+index]
;
real
weight
=
groupWeights[firstIndex+index]
;
real4
pos
=
posq[atom]
;
center.x
+=
weight*pos.x
;
center.y
+=
weight*pos.y
;
center.z
+=
weight*pos.z
;
}
//
Sum
the
values.
int
thread
=
get_local_id
(
0
)
;
temp[thread].x
=
center.x
;
temp[thread].y
=
center.y
;
temp[thread].z
=
center.z
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
thread
<
32
)
{
temp[thread].x
+=
temp[thread+32].x
;
temp[thread].y
+=
temp[thread+32].y
;
temp[thread].z
+=
temp[thread+32].z
;
}
SYNC_WARPS
;
if
(
thread
<
16
)
{
temp[thread].x
+=
temp[thread+16].x
;
temp[thread].y
+=
temp[thread+16].y
;
temp[thread].z
+=
temp[thread+16].z
;
}
SYNC_WARPS
;
if
(
thread
<
8
)
{
temp[thread].x
+=
temp[thread+8].x
;
temp[thread].y
+=
temp[thread+8].y
;
temp[thread].z
+=
temp[thread+8].z
;
}
SYNC_WARPS
;
if
(
thread
<
4
)
{
temp[thread].x
+=
temp[thread+4].x
;
temp[thread].y
+=
temp[thread+4].y
;
temp[thread].z
+=
temp[thread+4].z
;
}
SYNC_WARPS
;
if
(
thread
<
2
)
{
temp[thread].x
+=
temp[thread+2].x
;
temp[thread].y
+=
temp[thread+2].y
;
temp[thread].z
+=
temp[thread+2].z
;
}
SYNC_WARPS
;
if
(
thread
==
0
)
centerPositions[group]
=
(
real4
)
(
temp[0].x+temp[1].x,
temp[0].y+temp[1].y,
temp[0].z+temp[1].z,
0
)
;
}
}
/**
*
Compute
the
difference
between
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
real4
delta
(
real4
vec1,
real4
vec2
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
/**
*
Compute
the
angle
between
two
vectors.
The
w
component
of
each
vector
should
contain
the
squared
magnitude.
*/
real
computeAngle
(
real4
vec1,
real4
vec2
)
{
real
dotProduct
=
vec1.x*vec2.x
+
vec1.y*vec2.y
+
vec1.z*vec2.z
;
real
cosine
=
dotProduct*RSQRT
(
vec1.w*vec2.w
)
;
real
angle
;
if
(
cosine
>
0.99f
||
cosine
<
-0.99f
)
{
//
We
're
close
to
the
singularity
in
acos
()
,
so
take
the
cross
product
and
use
asin
()
instead.
real4
crossProduct
=
cross
(
vec1,
vec2
)
;
real
scale
=
vec1.w*vec2.w
;
angle
=
asin
(
SQRT
(
dot
(
crossProduct,
crossProduct
)
/scale
))
;
if
(
cosine
<
0
)
angle
=
M_PI-angle
;
}
else
angle
=
acos
(
cosine
)
;
return
angle
;
}
/**
*
Compute
the
cross
product
of
two
vectors,
setting
the
fourth
component
to
the
squared
magnitude.
*/
real4
computeCross
(
real4
vec1,
real4
vec2
)
{
real4
result
=
cross
(
vec1,
vec2
)
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
}
/**
*
Compute
the
forces
on
groups
based
on
the
bonds.
*/
__kernel
void
computeGroupForces
(
__global
long*
restrict
groupForce,
__global
mixed*
restrict
energyBuffer,
__global
const
real4*
restrict
centerPositions,
__global
const
int*
restrict
bondGroups
EXTRA_ARGS
)
{
mixed
energy
=
0
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_BONDS; index += get_global_size(0)) {
COMPUTE_FORCE
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
/**
*
Apply
the
forces
from
the
group
centers
to
the
individual
atoms.
*/
__kernel
void
applyForcesToAtoms
(
__global
const
int*
restrict
groupParticles,
__global
const
real*
restrict
groupWeights,
__global
const
int*
restrict
groupOffsets,
__global
const
long*
restrict
groupForce,
__global
long*
restrict
atomForce
)
{
for
(
int
group
=
get_group_id
(
0
)
; group < NUM_GROUPS; group += get_num_groups(0)) {
long
fx
=
groupForce[group]
;
long
fy
=
groupForce[group+NUM_GROUPS]
;
long
fz
=
groupForce[group+NUM_GROUPS*2]
;
int
firstIndex
=
groupOffsets[group]
;
int
lastIndex
=
groupOffsets[group+1]
;
for
(
int
index
=
get_local_id
(
0
)
; index < lastIndex-firstIndex; index += get_local_size(0)) {
int
atom
=
groupParticles[firstIndex+index]
;
real
weight
=
groupWeights[firstIndex+index]
;
atom_add
(
&atomForce[atom],
(
long
)
(
fx*weight
))
;
atom_add
(
&atomForce[atom+PADDED_NUM_ATOMS],
(
long
)
(
fy*weight
))
;
atom_add
(
&atomForce[atom+2*PADDED_NUM_ATOMS],
(
long
)
(
fz*weight
))
;
}
}
}
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
fd473eea
...
...
@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
#
else
__global
real4*
restrict
forceBuffers,
#
endif
__global
real
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
mixed
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
...
...
@@ -31,7 +31,7 @@ __kernel void computeN2Energy(
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
fd473eea
...
...
@@ -16,7 +16,7 @@ __kernel void computeN2Energy(
#
else
__global
real4*
restrict
forceBuffers,
#
endif
__global
real
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
mixed
*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
...
...
@@ -27,7 +27,7 @@ __kernel void computeN2Energy(
unsigned
int
numTiles
#
endif
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
//
First
loop:
process
tiles
that
contain
exclusions.
...
...
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
View file @
fd473eea
...
...
@@ -9,9 +9,9 @@
*
Reduce
the
derivatives
computed
in
the
N^2
energy
kernel,
and
compute
all
per-particle
energy
terms.
*/
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
real4*
restrict
forceBuffers,
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
real4*
restrict
forceBuffers,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
//
Reduce
the
derivatives
...
...
platforms/opencl/src/kernels/customHbondForce.cl
View file @
fd473eea
...
...
@@ -53,11 +53,11 @@ real4 computeCross(real4 vec1, real4 vec2) {
/**
* Compute forces on donors.
*/
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
real
energy = 0;
mixed
energy = 0;
real4 f1 = (real4) 0;
real4 f2 = (real4) 0;
real4 f3 = (real4) 0;
...
...
@@ -142,7 +142,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
/**
* Compute forces on acceptors.
*/
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
...
...
platforms/opencl/src/kernels/customManyParticle.cl
View file @
fd473eea
...
...
@@ -72,7 +72,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
*
Compute
the
interaction.
*/
__kernel
void
computeInteraction
(
__global
long*
restrict
forceBuffers,
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
long*
restrict
forceBuffers,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
neighbors,
__global
const
int*
restrict
neighborStartIndex
...
...
@@ -84,7 +84,7 @@ __kernel void computeInteraction(
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
#
endif
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
.0f
;
mixed
energy
=
0
;
//
Loop
over
particles
to
be
the
first
one
in
the
set.
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
View file @
fd473eea
...
...
@@ -42,14 +42,14 @@ __kernel void computeInteractionGroups(
#
else
__global
real4*
restrict
forceBuffers,
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
; // index within the warp
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
; // block warpIndex
real
energy
=
0
.0f
;
mixed
energy
=
0
;
__local
AtomData
localData[LOCAL_MEMORY_SIZE]
;
const
unsigned
int
startTile
=
FIRST_TILE+warp*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
;
...
...
platforms/opencl/src/kernels/ewald.cl
View file @
fd473eea
...
...
@@ -6,13 +6,13 @@ real2 multofReal2(real2 a, real2 b) {
*
Precompute
the
cosine
and
sine
sums
which
appear
in
each
force
term.
*/
__kernel
void
calculateEwaldCosSinSums
(
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
real2*
restrict
cosSinSum,
real4
reciprocalPeriodicBoxSize,
real
reciprocalCoefficient
)
{
__kernel
void
calculateEwaldCosSinSums
(
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
real2*
restrict
cosSinSum,
real4
reciprocalPeriodicBoxSize,
real
reciprocalCoefficient
)
{
const
unsigned
int
ksizex
=
2*KMAX_X-1
;
const
unsigned
int
ksizey
=
2*KMAX_Y-1
;
const
unsigned
int
ksizez
=
2*KMAX_Z-1
;
const
unsigned
int
totalK
=
ksizex*ksizey*ksizez
;
unsigned
int
index
=
get_global_id
(
0
)
;
real
energy
=
0
.0f
;
mixed
energy
=
0
;
while
(
index
<
(
KMAX_Y-1
)
*ksizez+KMAX_Z
)
index
+=
get_global_size
(
0
)
;
while
(
index
<
totalK
)
{
...
...
platforms/opencl/src/kernels/gbsaObc.cl
View file @
fd473eea
...
...
@@ -387,7 +387,7 @@ __kernel void computeGBSAForce1(
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
__global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...
...
@@ -400,7 +400,7 @@ __kernel void computeGBSAForce1(
const unsigned int warp = get_global_id(0)/TILE_SIZE;
const unsigned int tgx = get_local_id(0) & (TILE_SIZE-1);
const unsigned int tbx = get_local_id(0) - tgx;
real
energy = 0
.0f
;
mixed
energy = 0;
__local AtomData2 localData[FORCE_WORK_GROUP_SIZE];
// First loop: process tiles that contain exclusions.
...
...
platforms/opencl/src/kernels/gbsaObcReductions.cl
View file @
fd473eea
...
...
@@ -50,8 +50,8 @@ __kernel void reduceBornForce(int bufferSize, int numBuffers, __global real* bor
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
const
long*
restrict
bornForceIn,
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
float2*
restrict
params,
__global
const
real*
restrict
bornRadii,
__global
const
real*
restrict
obcChain
)
{
real
energy
=
0
.0f
;
__global
mixed
*
restrict
energyBuffer,
__global
const
float2*
restrict
params,
__global
const
real*
restrict
bornRadii,
__global
const
real*
restrict
obcChain
)
{
mixed
energy
=
0
;
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
//
Sum
the
Born
force
...
...
platforms/opencl/src/kernels/gbsaObc_cpu.cl
View file @
fd473eea
...
...
@@ -409,7 +409,7 @@ __kernel void computeGBSAForce1(
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
__global
real
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global
mixed
* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...
...
@@ -418,7 +418,7 @@ __kernel void computeGBSAForce1(
unsigned int numTiles,
#endif
__global const ushort2* exclusionTiles) {
real
energy = 0
.0f
;
mixed
energy = 0;
__local AtomData2 localData[TILE_SIZE];
// First loop: process tiles that contain exclusions.
...
...
platforms/opencl/src/kernels/nonbonded.cl
View file @
fd473eea
...
...
@@ -22,10 +22,10 @@ __kernel void computeNonbonded(
#
else
__global
real4*
restrict
forceBuffers,
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
...
...
@@ -34,11 +34,11 @@ __kernel void computeNonbonded(
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
real
energy
=
0
;
mixed
energy
=
0
;
__local
AtomData
localData[FORCE_WORK_GROUP_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+warp*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
warp+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/totalWarps
;
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
...
...
@@ -87,11 +87,13 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += 0.5f*tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
force.xyz -= delta.xyz*dEdR;
#else
force.xyz -= dEdR1.xyz;
#endif
#endif
#ifdef USE_EXCLUSIONS
excl >>= 1;
#endif
...
...
@@ -100,7 +102,7 @@ __kernel void computeNonbonded(
}
else {
// This is an off-diagonal tile.
const unsigned int localAtomIndex = get_local_id(0);
unsigned int j = y*TILE_SIZE + tgx;
real4 tempPosq = posq[j];
...
...
@@ -126,7 +128,7 @@ __kernel void computeNonbonded(
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
PRUNE_BY_CUTOFF
if
(
r2
<
CUTOFF
_SQUARED
)
{
if
(
r2
<
MAX_
CUTOFF
*MAX_CUTOFF
)
{
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
r
=
r2*invR
;
...
...
@@ -144,6 +146,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
...
...
@@ -156,6 +159,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
...
...
@@ -169,6 +173,7 @@ __kernel void computeNonbonded(
// Write results.
#ifdef INCLUDE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset], (long) (force.x*0x100000000));
...
...
@@ -186,6 +191,7 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz += force.xyz;
if (x != y)
forceBuffers[offset2] += (real4) (localData[get_local_id(0)].fx, localData[get_local_id(0)].fy, localData[get_local_id(0)].fz, 0.0f);
#endif
#endif
}
...
...
@@ -213,7 +219,7 @@ __kernel void computeNonbonded(
bool includeTile = true;
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
...
...
@@ -245,7 +251,7 @@ __kernel void computeNonbonded(
}
else
skipTiles[get_local_id(0)] = end;
skipBase += TILE_SIZE;
skipBase += TILE_SIZE;
currentSkipIndex = tbx;
SYNC_WARPS;
}
...
...
@@ -300,7 +306,7 @@ __kernel void computeNonbonded(
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
...
...
@@ -318,6 +324,7 @@ __kernel void computeNonbonded(
real tempEnergy = 0;
COMPUTE_INTERACTION
energy += tempEnergy;
#ifdef INCLUDE_FORCES
#ifdef USE_SYMMETRIC
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
...
...
@@ -330,6 +337,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
#endif
...
...
@@ -352,7 +360,7 @@ __kernel void computeNonbonded(
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
...
...
@@ -370,6 +378,7 @@ __kernel void computeNonbonded(
real
tempEnergy
=
0
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
#
ifdef
INCLUDE_FORCES
#
ifdef
USE_SYMMETRIC
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
...
...
@@ -382,6 +391,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy
+=
dEdR2.y
;
localData[tbx+tj].fz
+=
dEdR2.z
;
#
endif
#
endif
#
ifdef
PRUNE_BY_CUTOFF
}
#
endif
...
...
@@ -392,6 +402,7 @@ __kernel void computeNonbonded(
//
Write
results.
#
ifdef
INCLUDE_FORCES
#
ifdef
USE_CUTOFF
unsigned
int
atom2
=
atomIndices[get_local_id
(
0
)
]
;
#
else
...
...
@@ -412,9 +423,12 @@ __kernel void computeNonbonded(
forceBuffers[offset1].xyz
+=
force.xyz
;
if
(
atom2
<
PADDED_NUM_ATOMS
)
forceBuffers[offset2]
+=
(
real4
)
(
localData[get_local_id
(
0
)
].fx,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0.0f
)
;
#
endif
#
endif
}
pos++
;
}
#
ifdef
INCLUDE_ENERGY
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
#
endif
}
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
fd473eea
...
...
@@ -19,19 +19,19 @@ __kernel void computeNonbonded(
#
else
__global
real4*
restrict
forceBuffers,
#
endif
__global
real
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
mixed
*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
restrict
exclusionTiles,
unsigned
int
startTileIndex,
unsigned
int
numTileIndices
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
endif
PARAMETER_ARGUMENTS
)
{
real
energy
=
0
;
mixed
energy
=
0
;
__local
AtomData
localData[TILE_SIZE]
;
//
First
loop:
process
tiles
that
contain
exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE+get_group_id
(
0
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE+
(
get_group_id
(
0
)
+1
)
*
(
LAST_EXCLUSION_TILE-FIRST_EXCLUSION_TILE
)
/get_num_groups
(
0
)
;
for
(
int
pos
=
firstExclusionTile
; pos < lastExclusionTile; pos++) {
...
...
@@ -70,7 +70,7 @@ __kernel void computeNonbonded(
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF
_SQUARED
)
{
if
(
r2
<
MAX_
CUTOFF
*MAX_CUTOFF
)
{
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
r
=
r2*invR
;
...
...
@@ -138,7 +138,7 @@ __kernel void computeNonbonded(
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
...
...
@@ -228,18 +228,18 @@ __kernel void computeNonbonded(
while (pos < end) {
const bool hasExclusions = false;
bool includeTile = true;
// Extract the coordinates of this tile.
int x, y;
bool singlePeriodicCopy = false;
#ifdef USE_CUTOFF
if (numTiles <= maxTiles) {
x = tiles[pos];
real4 blockSizeX = blockSize[x];
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >= CUTOFF);
singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >=
MAX_
CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >=
MAX_
CUTOFF &&
0.5f*periodicBoxSize.z-blockSizeX.z >=
MAX_
CUTOFF);
}
else
#endif
...
...
@@ -304,7 +304,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = dot(delta.xyz, delta.xyz);
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
real invR = RSQRT(r2);
real r = r2*invR;
unsigned int atom2 = j;
...
...
@@ -367,7 +367,7 @@ __kernel void computeNonbonded(
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
if (r2 < CUTOFF
_SQUARED
) {
if (r2 <
MAX_
CUTOFF
*MAX_CUTOFF
) {
#endif
real invR = RSQRT(r2);
real r = r2*invR;
...
...
platforms/opencl/src/kernels/pme.cl
View file @
fd473eea
...
...
@@ -325,14 +325,14 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global c
}
}
__kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global
real
* restrict energyBuffer,
__kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global
mixed
* restrict energyBuffer,
__global const real* restrict pmeBsplineModuliX, __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real
energy = 0;
mixed
energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z));
...
...
platforms/opencl/staticTarget/CMakeLists.txt
View file @
fd473eea
...
...
@@ -15,6 +15,6 @@ ADD_LIBRARY(${STATIC_TARGET} STATIC ${SOURCE_FILES} ${SOURCE_INCLUDE_FILES} ${AP
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
OPENMM_LIBRARY_NAME
}
${
OPENCL_LIBRARIES
}
${
PTHREADS_LIB_STATIC
}
)
#-DPTW32_STATIC_LIB only works for the windows pthreads.
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
COMPILE
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_OPENCL_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB"
)
SET_TARGET_PROPERTIES
(
${
STATIC_TARGET
}
PROPERTIES LINK_FLAGS
"
${
EXTRA_
LINK
_FLAGS
}
"
COMPILE_FLAGS
"
${
EXTRA_COMPILE_FLAGS
}
-DOPENMM_OPENCL_BUILDING_STATIC_LIBRARY -DPTW32_STATIC_LIB"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
STATIC_TARGET
}
)
Prev
1
…
5
6
7
8
9
10
11
12
13
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment