Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
61a8b779
Commit
61a8b779
authored
Oct 09, 2014
by
peastman
Browse files
Merge pull request #649 from peastman/opt
Further optimizations to NonbondedForce
parents
fea6a22f
3d1b2186
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
136 additions
and
139 deletions
+136
-139
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+7
-8
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+90
-104
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+1
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+16
-9
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+2
-0
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+12
-13
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+8
-4
No files found.
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
61a8b779
{
#if USE_EWALD
#if USE_EWALD
bool
needCorrection
=
hasExclusions
&&
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
bool
needCorrection
=
hasExclusions
&&
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
if
((
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
)
||
needCorrection
)
{
unsigned
int
includeInteraction
=
((
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
)
||
needCorrection
)
;
const
real
alphaR
=
EWALD_ALPHA
*
r
;
const
real
alphaR
=
EWALD_ALPHA
*
r
;
const
real
expAlphaRSqr
=
EXP
(
-
alphaR
*
alphaR
);
const
real
expAlphaRSqr
=
EXP
(
-
alphaR
*
alphaR
);
const
real
prefactor
=
138.935456
f
*
posq1
.
w
*
posq2
.
w
*
invR
;
const
real
prefactor
=
138.935456
f
*
posq1
.
w
*
posq2
.
w
*
invR
;
...
@@ -44,16 +45,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
...
@@ -44,16 +45,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
}
}
#endif
#endif
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
ljEnergy
+
prefactor
*
erfcAlphaR
;
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
#else
#else
tempForce
=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempForce
=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
prefactor
*
erfcAlphaR
;
tempEnergy
+=
includeInteraction
?
prefactor
*
erfcAlphaR
:
0
;
#endif
#endif
}
}
dEdR
+=
tempForce
*
invR
*
invR
;
dEdR
+=
includeInteraction
?
tempForce
*
invR
*
invR
:
0
;
}
#else
#else
{
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
includeInteraction
=
(
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
);
unsigned
int
includeInteraction
=
(
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
);
#else
#else
...
@@ -91,5 +90,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
...
@@ -91,5 +90,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
#endif
#endif
#endif
#endif
dEdR
+=
includeInteraction
?
tempForce
*
invR
*
invR
:
0
;
dEdR
+=
includeInteraction
?
tempForce
*
invR
*
invR
:
0
;
}
#endif
#endif
}
platforms/cuda/src/kernels/nonbonded.cu
View file @
61a8b779
...
@@ -228,9 +228,6 @@ extern "C" __global__ void computeNonbonded(
...
@@ -228,9 +228,6 @@ extern "C" __global__ void computeNonbonded(
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
real
r
=
r2
*
invR
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
...
@@ -276,9 +273,6 @@ extern "C" __global__ void computeNonbonded(
...
@@ -276,9 +273,6 @@ extern "C" __global__ void computeNonbonded(
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif
#endif // end USE_SYMMETRIC
#endif // end USE_SYMMETRIC
#ifdef USE_CUTOFF
}
#endif
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
excl
>>=
1
;
#endif
#endif
...
@@ -431,7 +425,6 @@ extern "C" __global__ void computeNonbonded(
...
@@ -431,7 +425,6 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
real
r
=
r2
*
invR
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
...
@@ -477,7 +470,6 @@ extern "C" __global__ void computeNonbonded(
...
@@ -477,7 +470,6 @@ extern "C" __global__ void computeNonbonded(
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif
#endif // end USE_SYMMETRIC
#endif // end USE_SYMMETRIC
}
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
SHUFFLE_WARP_DATA
#endif
#endif
...
@@ -503,9 +495,6 @@ extern "C" __global__ void computeNonbonded(
...
@@ -503,9 +495,6 @@ extern "C" __global__ void computeNonbonded(
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
real
r
=
r2
*
invR
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
...
@@ -551,9 +540,6 @@ extern "C" __global__ void computeNonbonded(
...
@@ -551,9 +540,6 @@ extern "C" __global__ void computeNonbonded(
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif
#endif // end USE_SYMMETRIC
#endif // end USE_SYMMETRIC
#ifdef USE_CUTOFF
}
#endif
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
SHUFFLE_WARP_DATA
#endif
#endif
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
61a8b779
...
@@ -633,7 +633,7 @@ private:
...
@@ -633,7 +633,7 @@ private:
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
bool
hasCoulomb
,
hasLJ
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
};
};
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
61a8b779
...
@@ -1609,12 +1609,16 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1609,12 +1609,16 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
string
vendor
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
usePmeQueue
=
(
vendor
.
size
()
>=
6
&&
vendor
.
substr
(
0
,
6
)
==
"NVIDIA"
);
if
(
usePmeQueue
)
{
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
if
(
recipForceGroup
<
0
)
if
(
recipForceGroup
<
0
)
recipForceGroup
=
force
.
getForceGroup
();
recipForceGroup
=
force
.
getForceGroup
();
cl
.
addPreComputation
(
new
SyncQueuePreComputation
(
cl
,
pmeQueue
,
recipForceGroup
));
cl
.
addPreComputation
(
new
SyncQueuePreComputation
(
cl
,
pmeQueue
,
recipForceGroup
));
cl
.
addPostComputation
(
new
SyncQueuePostComputation
(
cl
,
pmeSyncEvent
,
recipForceGroup
));
cl
.
addPostComputation
(
new
SyncQueuePostComputation
(
cl
,
pmeSyncEvent
,
recipForceGroup
));
}
// Initialize the b-spline moduli.
// Initialize the b-spline moduli.
...
@@ -1794,6 +1798,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1794,6 +1798,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl
.
executeKernel
(
ewaldForcesKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
ewaldForcesKernel
,
cl
.
getNumAtoms
());
}
}
if
(
pmeGrid
!=
NULL
&&
includeReciprocal
)
{
if
(
pmeGrid
!=
NULL
&&
includeReciprocal
)
{
if
(
usePmeQueue
)
cl
.
setQueue
(
pmeQueue
);
cl
.
setQueue
(
pmeQueue
);
setPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
4
);
setPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
4
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
5
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
5
);
...
@@ -1837,9 +1842,11 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1837,9 +1842,11 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
else
else
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
cl
.
getNumAtoms
());
if
(
usePmeQueue
)
{
pmeQueue
.
enqueueMarker
(
&
pmeSyncEvent
);
pmeQueue
.
enqueueMarker
(
&
pmeSyncEvent
);
cl
.
restoreDefaultQueue
();
cl
.
restoreDefaultQueue
();
}
}
}
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
61a8b779
...
@@ -573,6 +573,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -573,6 +573,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
isSymmetric
)
if
(
isSymmetric
)
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
if
(
useCutoff
&&
context
.
getSIMDWidth
()
<
32
)
defines
[
"PRUNE_BY_CUTOFF"
]
=
"1"
;
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"CUTOFF"
]
=
context
.
doubleToString
(
cutoff
);
defines
[
"CUTOFF"
]
=
context
.
doubleToString
(
cutoff
);
...
...
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
61a8b779
{
#
ifdef
USE_DOUBLE_PRECISION
unsigned
long
includeInteraction
;
#
else
unsigned
int
includeInteraction
;
#
endif
#
if
USE_EWALD
#
if
USE_EWALD
bool
needCorrection
=
hasExclusions
&&
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
bool
needCorrection
=
hasExclusions
&&
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
if
((
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
||
needCorrection
)
{
includeInteraction
=
((
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
||
needCorrection
)
;
const
real
alphaR
=
EWALD_ALPHA*r
;
const
real
alphaR
=
EWALD_ALPHA*r
;
const
real
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
real
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
...
@@ -44,21 +50,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
...
@@ -44,21 +50,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
}
}
#
endif
#
endif
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
ljEnergy
+
prefactor*erfcAlphaR
;
tempEnergy
+=
select
((
real
)
0
,
ljEnergy
+
prefactor*erfcAlphaR
,
includeInteraction
)
;
#
else
#
else
tempForce
=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempForce
=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
prefactor*erfcAlphaR
;
tempEnergy
+=
select
((
real
)
0
,
prefactor*erfcAlphaR
,
includeInteraction
)
;
#
endif
#
endif
}
}
dEdR
+=
tempForce*invR*invR
;
dEdR
+=
select
((
real
)
0
,
tempForce*invR*invR,
includeInteraction
)
;
}
#
else
{
#
ifdef
USE_DOUBLE_PRECISION
unsigned
long
includeInteraction
;
#
else
#
else
unsigned
int
includeInteraction
;
#
endif
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
#
else
#
else
...
@@ -97,5 +96,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
...
@@ -97,5 +96,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
#
endif
#
endif
#
endif
#
endif
dEdR
+=
select
((
real
)
0
,
tempForce*invR*invR,
includeInteraction
)
;
dEdR
+=
select
((
real
)
0
,
tempForce*invR*invR,
includeInteraction
)
;
}
#
endif
#
endif
}
platforms/opencl/src/kernels/nonbonded.cl
View file @
61a8b779
...
@@ -124,7 +124,7 @@ __kernel void computeNonbonded(
...
@@ -124,7 +124,7 @@ __kernel void computeNonbonded(
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE
_CUTOFF
#
ifdef
PRUNE_BY
_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
invR
=
RSQRT
(
r2
)
;
...
@@ -155,7 +155,7 @@ __kernel void computeNonbonded(
...
@@ -155,7 +155,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef
USE
_CUTOFF
#ifdef
PRUNE_BY
_CUTOFF
}
}
#endif
#endif
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
...
@@ -295,7 +295,9 @@ __kernel void computeNonbonded(
...
@@ -295,7 +295,9 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
...
@@ -324,7 +326,9 @@ __kernel void computeNonbonded(
...
@@ -324,7 +326,9 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
}
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
tj = (tj + 1) & (TILE_SIZE - 1);
SYNC_WARPS;
SYNC_WARPS;
}
}
...
@@ -343,7 +347,7 @@ __kernel void computeNonbonded(
...
@@ -343,7 +347,7 @@ __kernel void computeNonbonded(
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef
USE
_CUTOFF
#ifdef
PRUNE_BY
_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < CUTOFF_SQUARED) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
...
@@ -374,7 +378,7 @@ __kernel void computeNonbonded(
...
@@ -374,7 +378,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy
+=
dEdR2.y
;
localData[tbx+tj].fy
+=
dEdR2.y
;
localData[tbx+tj].fz
+=
dEdR2.z
;
localData[tbx+tj].fz
+=
dEdR2.z
;
#
endif
#
endif
#
ifdef
USE
_CUTOFF
#
ifdef
PRUNE_BY
_CUTOFF
}
}
#
endif
#
endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment