Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3d1b2186
Commit
3d1b2186
authored
Oct 09, 2014
by
peastman
Browse files
Further optimizations to NonbondedForce
parent
86d09347
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
136 additions
and
139 deletions
+136
-139
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+7
-8
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+90
-104
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+1
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+16
-9
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+2
-0
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+12
-13
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+8
-4
No files found.
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
3d1b2186
{
#if USE_EWALD
#if USE_EWALD
bool
needCorrection
=
hasExclusions
&&
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
bool
needCorrection
=
hasExclusions
&&
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
if
((
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
)
||
needCorrection
)
{
unsigned
int
includeInteraction
=
((
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
)
||
needCorrection
)
;
const
real
alphaR
=
EWALD_ALPHA
*
r
;
const
real
alphaR
=
EWALD_ALPHA
*
r
;
const
real
expAlphaRSqr
=
EXP
(
-
alphaR
*
alphaR
);
const
real
expAlphaRSqr
=
EXP
(
-
alphaR
*
alphaR
);
const
real
prefactor
=
138.935456
f
*
posq1
.
w
*
posq2
.
w
*
invR
;
const
real
prefactor
=
138.935456
f
*
posq1
.
w
*
posq2
.
w
*
invR
;
...
@@ -44,16 +45,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
...
@@ -44,16 +45,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
}
}
#endif
#endif
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempForce
+=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
ljEnergy
+
prefactor
*
erfcAlphaR
;
tempEnergy
+=
includeInteraction
?
ljEnergy
+
prefactor
*
erfcAlphaR
:
0
;
#else
#else
tempForce
=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempForce
=
prefactor
*
(
erfcAlphaR
+
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
prefactor
*
erfcAlphaR
;
tempEnergy
+=
includeInteraction
?
prefactor
*
erfcAlphaR
:
0
;
#endif
#endif
}
}
dEdR
+=
tempForce
*
invR
*
invR
;
dEdR
+=
includeInteraction
?
tempForce
*
invR
*
invR
:
0
;
}
#else
#else
{
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
includeInteraction
=
(
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
);
unsigned
int
includeInteraction
=
(
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
);
#else
#else
...
@@ -91,5 +90,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
...
@@ -91,5 +90,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
#endif
#endif
#endif
#endif
dEdR
+=
includeInteraction
?
tempForce
*
invR
*
invR
:
0
;
dEdR
+=
includeInteraction
?
tempForce
*
invR
*
invR
:
0
;
#endif
}
}
#endif
\ No newline at end of file
platforms/cuda/src/kernels/nonbonded.cu
View file @
3d1b2186
...
@@ -228,57 +228,51 @@ extern "C" __global__ void computeNonbonded(
...
@@ -228,57 +228,51 @@ extern "C" __global__ void computeNonbonded(
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
real
invR
=
RSQRT
(
r2
);
if
(
r2
<
CUTOFF_SQUARED
)
{
real
r
=
r2
*
invR
;
#endif
LOAD_ATOM2_PARAMETERS
real
invR
=
RSQRT
(
r2
);
atom2
=
y
*
TILE_SIZE
+
tj
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
y
*
TILE_SIZE
+
tj
;
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
real
dEdR
=
0.0
f
;
real
dEdR
=
0.0
f
;
#else
#else
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
#endif
#endif
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
||
!
(
excl
&
0x1
));
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
||
!
(
excl
&
0x1
));
#endif
#endif
real
tempEnergy
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
force
.
z
-=
delta
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
delta
.
x
;
shflForce
.
x
+=
delta
.
x
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
shflForce
.
z
+=
delta
.
z
;
#else
#else
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
#endif
#endif
#else // !USE_SYMMETRIC
#else // !USE_SYMMETRIC
force
.
x
-=
dEdR1
.
x
;
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
force
.
z
-=
dEdR1
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
shflForce
.
z
+=
dEdR2
.
z
;
#else
#else
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif
#endif // end USE_SYMMETRIC
#endif // end USE_SYMMETRIC
#ifdef USE_CUTOFF
}
#endif
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
excl
>>=
1
;
excl
>>=
1
;
#endif
#endif
...
@@ -431,53 +425,51 @@ extern "C" __global__ void computeNonbonded(
...
@@ -431,53 +425,51 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
real
r
=
r2
*
invR
;
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
atom2
=
atomIndices
[
tbx
+
tj
];
atom2
=
atomIndices
[
tbx
+
tj
];
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
real
dEdR
=
0.0
f
;
real
dEdR
=
0.0
f
;
#else
#else
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
#endif
#endif
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
);
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
);
#endif
#endif
real
tempEnergy
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
force
.
z
-=
delta
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
delta
.
x
;
shflForce
.
x
+=
delta
.
x
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
shflForce
.
z
+=
delta
.
z
;
#else
#else
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
#endif
#endif
#else // !USE_SYMMETRIC
#else // !USE_SYMMETRIC
force
.
x
-=
dEdR1
.
x
;
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
force
.
z
-=
dEdR1
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
shflForce
.
z
+=
dEdR2
.
z
;
#else
#else
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif
#endif // end USE_SYMMETRIC
#endif // end USE_SYMMETRIC
}
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
SHUFFLE_WARP_DATA
#endif
#endif
...
@@ -503,57 +495,51 @@ extern "C" __global__ void computeNonbonded(
...
@@ -503,57 +495,51 @@ extern "C" __global__ void computeNonbonded(
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
real
invR
=
RSQRT
(
r2
);
if
(
r2
<
CUTOFF_SQUARED
)
{
real
r
=
r2
*
invR
;
#endif
LOAD_ATOM2_PARAMETERS
real
invR
=
RSQRT
(
r2
);
atom2
=
atomIndices
[
tbx
+
tj
];
real
r
=
r2
*
invR
;
LOAD_ATOM2_PARAMETERS
atom2
=
atomIndices
[
tbx
+
tj
];
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
real
dEdR
=
0.0
f
;
real
dEdR
=
0.0
f
;
#else
#else
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR1
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
real3
dEdR2
=
make_real3
(
0
);
#endif
#endif
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
);
bool
isExcluded
=
(
atom1
>=
NUM_ATOMS
||
atom2
>=
NUM_ATOMS
);
#endif
#endif
real
tempEnergy
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
#ifdef USE_SYMMETRIC
#ifdef USE_SYMMETRIC
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
force
.
z
-=
delta
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
delta
.
x
;
shflForce
.
x
+=
delta
.
x
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
y
+=
delta
.
y
;
shflForce
.
z
+=
delta
.
z
;
shflForce
.
z
+=
delta
.
z
;
#else
#else
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fx
+=
delta
.
x
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fy
+=
delta
.
y
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
localData
[
tbx
+
tj
].
fz
+=
delta
.
z
;
#endif
#endif
#else // !USE_SYMMETRIC
#else // !USE_SYMMETRIC
force
.
x
-=
dEdR1
.
x
;
force
.
x
-=
dEdR1
.
x
;
force
.
y
-=
dEdR1
.
y
;
force
.
y
-=
dEdR1
.
y
;
force
.
z
-=
dEdR1
.
z
;
force
.
z
-=
dEdR1
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
x
+=
dEdR2
.
x
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
y
+=
dEdR2
.
y
;
shflForce
.
z
+=
dEdR2
.
z
;
shflForce
.
z
+=
dEdR2
.
z
;
#else
#else
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fx
+=
dEdR2
.
x
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fy
+=
dEdR2
.
y
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
localData
[
tbx
+
tj
].
fz
+=
dEdR2
.
z
;
#endif
#endif
#endif // end USE_SYMMETRIC
#endif // end USE_SYMMETRIC
#ifdef USE_CUTOFF
}
#endif
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
SHUFFLE_WARP_DATA
SHUFFLE_WARP_DATA
#endif
#endif
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
3d1b2186
...
@@ -633,7 +633,7 @@ private:
...
@@ -633,7 +633,7 @@ private:
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
double
ewaldSelfEnergy
,
dispersionCoefficient
,
alpha
;
bool
hasCoulomb
,
hasLJ
;
bool
hasCoulomb
,
hasLJ
,
usePmeQueue
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
};
};
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
3d1b2186
...
@@ -1609,12 +1609,16 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1609,12 +1609,16 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
);
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
string
vendor
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
usePmeQueue
=
(
vendor
.
size
()
>=
6
&&
vendor
.
substr
(
0
,
6
)
==
"NVIDIA"
);
if
(
recipForceGroup
<
0
)
if
(
usePmeQueue
)
{
recipForceGroup
=
force
.
getForceGroup
();
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
cl
.
addPreComputation
(
new
SyncQueuePreComputation
(
cl
,
pmeQueue
,
recipForceGroup
));
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
cl
.
addPostComputation
(
new
SyncQueuePostComputation
(
cl
,
pmeSyncEvent
,
recipForceGroup
));
if
(
recipForceGroup
<
0
)
recipForceGroup
=
force
.
getForceGroup
();
cl
.
addPreComputation
(
new
SyncQueuePreComputation
(
cl
,
pmeQueue
,
recipForceGroup
));
cl
.
addPostComputation
(
new
SyncQueuePostComputation
(
cl
,
pmeSyncEvent
,
recipForceGroup
));
}
// Initialize the b-spline moduli.
// Initialize the b-spline moduli.
...
@@ -1794,7 +1798,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1794,7 +1798,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl
.
executeKernel
(
ewaldForcesKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
ewaldForcesKernel
,
cl
.
getNumAtoms
());
}
}
if
(
pmeGrid
!=
NULL
&&
includeReciprocal
)
{
if
(
pmeGrid
!=
NULL
&&
includeReciprocal
)
{
cl
.
setQueue
(
pmeQueue
);
if
(
usePmeQueue
)
cl
.
setQueue
(
pmeQueue
);
setPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
4
);
setPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
4
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
5
);
setInvPeriodicBoxSizeArg
(
cl
,
pmeUpdateBsplinesKernel
,
5
);
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeUpdateBsplinesKernel
,
cl
.
getNumAtoms
());
...
@@ -1837,8 +1842,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1837,8 +1842,10 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
2
*
cl
.
getDevice
().
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
(),
1
);
else
else
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
cl
.
getNumAtoms
());
cl
.
executeKernel
(
pmeInterpolateForceKernel
,
cl
.
getNumAtoms
());
pmeQueue
.
enqueueMarker
(
&
pmeSyncEvent
);
if
(
usePmeQueue
)
{
cl
.
restoreDefaultQueue
();
pmeQueue
.
enqueueMarker
(
&
pmeSyncEvent
);
cl
.
restoreDefaultQueue
();
}
}
}
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
if
(
dispersionCoefficient
!=
0.0
&&
includeDirect
)
{
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
3d1b2186
...
@@ -573,6 +573,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -573,6 +573,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
isSymmetric
)
if
(
isSymmetric
)
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
defines
[
"USE_SYMMETRIC"
]
=
"1"
;
if
(
useCutoff
&&
context
.
getSIMDWidth
()
<
32
)
defines
[
"PRUNE_BY_CUTOFF"
]
=
"1"
;
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"FORCE_WORK_GROUP_SIZE"
]
=
context
.
intToString
(
forceThreadBlockSize
);
defines
[
"CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"CUTOFF_SQUARED"
]
=
context
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"CUTOFF"
]
=
context
.
doubleToString
(
cutoff
);
defines
[
"CUTOFF"
]
=
context
.
doubleToString
(
cutoff
);
...
...
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
3d1b2186
{
#
ifdef
USE_DOUBLE_PRECISION
unsigned
long
includeInteraction
;
#
else
unsigned
int
includeInteraction
;
#
endif
#
if
USE_EWALD
#
if
USE_EWALD
bool
needCorrection
=
hasExclusions
&&
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
bool
needCorrection
=
hasExclusions
&&
isExcluded
&&
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
;
if
((
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
||
needCorrection
)
{
includeInteraction
=
((
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
||
needCorrection
)
;
const
real
alphaR
=
EWALD_ALPHA*r
;
const
real
alphaR
=
EWALD_ALPHA*r
;
const
real
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
real
expAlphaRSqr
=
EXP
(
-alphaR*alphaR
)
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
const
real
prefactor
=
138.935456f*posq1.w*posq2.w*invR
;
...
@@ -44,21 +50,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
...
@@ -44,21 +50,14 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
}
}
#
endif
#
endif
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempForce
+=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
ljEnergy
+
prefactor*erfcAlphaR
;
tempEnergy
+=
select
((
real
)
0
,
ljEnergy
+
prefactor*erfcAlphaR
,
includeInteraction
)
;
#
else
#
else
tempForce
=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempForce
=
prefactor*
(
erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI
)
;
tempEnergy
+=
prefactor*erfcAlphaR
;
tempEnergy
+=
select
((
real
)
0
,
prefactor*erfcAlphaR
,
includeInteraction
)
;
#
endif
#
endif
}
}
dEdR
+=
tempForce*invR*invR
;
dEdR
+=
select
((
real
)
0
,
tempForce*invR*invR,
includeInteraction
)
;
}
#
else
{
#
ifdef
USE_DOUBLE_PRECISION
unsigned
long
includeInteraction
;
#
else
#
else
unsigned
int
includeInteraction
;
#
endif
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
includeInteraction
=
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
;
#
else
#
else
...
@@ -97,5 +96,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
...
@@ -97,5 +96,5 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
#
endif
#
endif
#
endif
#
endif
dEdR
+=
select
((
real
)
0
,
tempForce*invR*invR,
includeInteraction
)
;
dEdR
+=
select
((
real
)
0
,
tempForce*invR*invR,
includeInteraction
)
;
#
endif
}
}
#
endif
\ No newline at end of file
platforms/opencl/src/kernels/nonbonded.cl
View file @
3d1b2186
...
@@ -124,7 +124,7 @@ __kernel void computeNonbonded(
...
@@ -124,7 +124,7 @@ __kernel void computeNonbonded(
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE
_CUTOFF
#
ifdef
PRUNE_BY
_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
invR
=
RSQRT
(
r2
)
;
...
@@ -155,7 +155,7 @@ __kernel void computeNonbonded(
...
@@ -155,7 +155,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef
USE
_CUTOFF
#ifdef
PRUNE_BY
_CUTOFF
}
}
#endif
#endif
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
...
@@ -295,7 +295,9 @@ __kernel void computeNonbonded(
...
@@ -295,7 +295,9 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
real4 posq2 = (real4) (localData[atom2].x, localData[atom2].y, localData[atom2].z, localData[atom2].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef PRUNE_BY_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < CUTOFF_SQUARED) {
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
real r = r2*invR;
real r = r2*invR;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
...
@@ -324,7 +326,9 @@ __kernel void computeNonbonded(
...
@@ -324,7 +326,9 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fy += dEdR2.y;
localData[tbx+tj].fz += dEdR2.z;
localData[tbx+tj].fz += dEdR2.z;
#endif
#endif
#ifdef PRUNE_BY_CUTOFF
}
}
#endif
tj = (tj + 1) & (TILE_SIZE - 1);
tj = (tj + 1) & (TILE_SIZE - 1);
SYNC_WARPS;
SYNC_WARPS;
}
}
...
@@ -343,7 +347,7 @@ __kernel void computeNonbonded(
...
@@ -343,7 +347,7 @@ __kernel void computeNonbonded(
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef
USE
_CUTOFF
#ifdef
PRUNE_BY
_CUTOFF
if (r2 < CUTOFF_SQUARED) {
if (r2 < CUTOFF_SQUARED) {
#endif
#endif
real invR = RSQRT(r2);
real invR = RSQRT(r2);
...
@@ -374,7 +378,7 @@ __kernel void computeNonbonded(
...
@@ -374,7 +378,7 @@ __kernel void computeNonbonded(
localData[tbx+tj].fy
+=
dEdR2.y
;
localData[tbx+tj].fy
+=
dEdR2.y
;
localData[tbx+tj].fz
+=
dEdR2.z
;
localData[tbx+tj].fz
+=
dEdR2.z
;
#
endif
#
endif
#
ifdef
USE
_CUTOFF
#
ifdef
PRUNE_BY
_CUTOFF
}
}
#
endif
#
endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment