Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d65992ea
Commit
d65992ea
authored
Oct 19, 2009
by
Peter Eastman
Browse files
Optimized nonbonded kernels by inlining lots of constant quantities
parent
d3ec1d0c
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
186 additions
and
160 deletions
+186
-160
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+71
-59
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+33
-19
platforms/opencl/src/kernels/coulombLennardJones.cl
platforms/opencl/src/kernels/coulombLennardJones.cl
+1
-1
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+57
-57
platforms/opencl/src/kernels/gbsaObc2.cl
platforms/opencl/src/kernels/gbsaObc2.cl
+2
-2
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+22
-22
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
d65992ea
...
@@ -542,11 +542,13 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -542,11 +542,13 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if
(
useCutoff
)
{
if
(
useCutoff
)
{
double
reactionFieldK
=
pow
(
force
.
getCutoffDistance
(),
-
3.0
)
*
(
force
.
getReactionFieldDielectric
()
-
1.0
)
/
(
2.0
*
force
.
getReactionFieldDielectric
()
+
1.0
);
double
reactionFieldK
=
pow
(
force
.
getCutoffDistance
(),
-
3.0
)
*
(
force
.
getReactionFieldDielectric
()
-
1.0
)
/
(
2.0
*
force
.
getReactionFieldDielectric
()
+
1.0
);
double
reactionFieldC
=
(
1.0
/
force
.
getCutoffDistance
())
*
(
3.0
*
force
.
getReactionFieldDielectric
())
/
(
2.0
*
force
.
getReactionFieldDielectric
()
+
1.0
);
double
reactionFieldC
=
(
1.0
/
force
.
getCutoffDistance
())
*
(
3.0
*
force
.
getReactionFieldDielectric
())
/
(
2.0
*
force
.
getReactionFieldDielectric
()
+
1.0
);
char
k
[
50
],
c
[
50
];
stringstream
k
,
c
;
sprintf
(
k
,
"%.8ef"
,
reactionFieldK
);
k
.
precision
(
8
);
sprintf
(
c
,
"%.8ef"
,
reactionFieldC
);
c
.
precision
(
8
);
defines
[
"REACTION_FIELD_K"
]
=
string
(
k
);
k
<<
scientific
<<
reactionFieldK
<<
"f"
;
defines
[
"REACTION_FIELD_C"
]
=
string
(
c
);
c
<<
scientific
<<
reactionFieldC
<<
"f"
;
defines
[
"REACTION_FIELD_K"
]
=
k
.
str
();
defines
[
"REACTION_FIELD_C"
]
=
c
.
str
();
}
}
// if (force.getNonbondedMethod() != NonbondedForce::NoCutoff) {
// if (force.getNonbondedMethod() != NonbondedForce::NoCutoff) {
// method = CUTOFF;
// method = CUTOFF;
...
@@ -838,73 +840,83 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
...
@@ -838,73 +840,83 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
defines
[
"USE_CUTOFF"
]
=
"1"
;
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
nb
.
getUsePeriodic
())
if
(
nb
.
getUsePeriodic
())
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"USE_PERIODIC"
]
=
"1"
;
stringstream
xsize
,
ysize
,
zsize
,
cutoffSquared
,
prefac
;
xsize
.
precision
(
8
);
ysize
.
precision
(
8
);
zsize
.
precision
(
8
);
cutoffSquared
.
precision
(
8
);
prefac
.
precision
(
8
);
xsize
<<
scientific
<<
nb
.
getPeriodicBoxSize
().
x
<<
"f"
;
ysize
<<
scientific
<<
nb
.
getPeriodicBoxSize
().
y
<<
"f"
;
zsize
<<
scientific
<<
nb
.
getPeriodicBoxSize
().
z
<<
"f"
;
cutoffSquared
<<
scientific
<<
(
nb
.
getCutoffDistance
()
*
nb
.
getCutoffDistance
())
<<
"f"
;
prefac
<<
scientific
<<
prefactor
<<
"f"
;
defines
[
"PERIODIC_BOX_SIZE_X"
]
=
xsize
.
str
();
defines
[
"PERIODIC_BOX_SIZE_Y"
]
=
ysize
.
str
();
defines
[
"PERIODIC_BOX_SIZE_Z"
]
=
zsize
.
str
();
defines
[
"CUTOFF_SQUARED"
]
=
cutoffSquared
.
str
();
defines
[
"PREFACTOR"
]
=
prefac
.
str
();
stringstream
natom
,
padded
;
natom
<<
cl
.
getNumAtoms
();
padded
<<
cl
.
getPaddedNumAtoms
();
defines
[
"NUM_ATOMS"
]
=
natom
.
str
();
defines
[
"PADDED_NUM_ATOMS"
]
=
padded
.
str
();
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
loadSourceFromFile
(
"gbsaObc.cl"
),
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
loadSourceFromFile
(
"gbsaObc.cl"
),
defines
);
computeBornSumKernel
=
cl
::
Kernel
(
program
,
"computeBornSum"
);
computeBornSumKernel
=
cl
::
Kernel
(
program
,
"computeBornSum"
);
computeBornSumKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getNumAtoms
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
bornSum
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl_int
>
(
1
,
cl
.
getPaddedNumAtoms
());
computeBornSumKernel
.
setArg
(
1
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
bornSum
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getPosq
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
3
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
(
3
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getPosq
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
params
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
5
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
computeBornSumKernel
.
setArg
(
5
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float2
),
NULL
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
params
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
7
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float2
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl_float
>
(
9
,
nb
.
getCutoffDistance
()
*
nb
.
getCutoffDistance
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
mm_float4
>
(
10
,
nb
.
getPeriodicBoxSize
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
11
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
9
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
12
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
13
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
}
}
else
{
else
{
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
nb
.
getTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
nb
.
getTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl_uint
>
(
9
,
nb
.
getTiles
().
getSize
());
computeBornSumKernel
.
setArg
<
cl_uint
>
(
7
,
nb
.
getTiles
().
getSize
());
}
}
reduceBornSumKernel
=
cl
::
Kernel
(
program
,
"reduceBornSum"
);
reduceBornSumKernel
=
cl
::
Kernel
(
program
,
"reduceBornSum"
);
reduceBornSumKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getNumAtoms
());
reduceBornSumKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getPaddedNumAtoms
());
reduceBornSumKernel
.
setArg
<
cl_int
>
(
1
,
cl
.
getPaddedNumAtoms
());
reduceBornSumKernel
.
setArg
<
cl_int
>
(
1
,
cl
.
getNumForceBuffers
());
reduceBornSumKernel
.
setArg
<
cl_int
>
(
2
,
cl
.
getNumForceBuffers
());
reduceBornSumKernel
.
setArg
<
cl_float
>
(
2
,
1.0
f
);
reduceBornSumKernel
.
setArg
<
cl_float
>
(
3
,
1.0
f
);
reduceBornSumKernel
.
setArg
<
cl_float
>
(
3
,
0.8
f
);
reduceBornSumKernel
.
setArg
<
cl_float
>
(
4
,
0.8
f
);
reduceBornSumKernel
.
setArg
<
cl_float
>
(
4
,
4.85
f
);
reduceBornSumKernel
.
setArg
<
cl_float
>
(
5
,
4.85
f
);
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
bornSum
->
getDeviceBuffer
());
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
bornSum
->
getDeviceBuffer
());
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
params
->
getDeviceBuffer
());
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
params
->
getDeviceBuffer
());
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
bornRadii
->
getDeviceBuffer
());
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
bornRadii
->
getDeviceBuffer
());
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
obcChain
->
getDeviceBuffer
());
reduceBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
obcChain
->
getDeviceBuffer
());
force1Kernel
=
cl
::
Kernel
(
program
,
"computeGBSAForce1"
);
force1Kernel
=
cl
::
Kernel
(
program
,
"computeGBSAForce1"
);
force1Kernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getNumAtoms
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl_int
>
(
1
,
cl
.
getPaddedNumAtoms
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl_float
>
(
2
,
prefactor
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getPosq
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
3
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
force1Kernel
.
setArg
(
3
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
force1Kernel
.
setArg
(
4
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
5
,
cl
.
getPosq
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
5
,
bornRadii
->
getDeviceBuffer
());
force1Kernel
.
setArg
(
6
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
force1Kernel
.
setArg
(
6
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
force1Kernel
.
setArg
(
7
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
7
,
bornForce
->
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
8
,
bornRadii
->
getDeviceBuffer
());
force1Kernel
.
setArg
(
8
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
force1Kernel
.
setArg
(
9
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
10
,
bornForce
->
getDeviceBuffer
());
force1Kernel
.
setArg
(
11
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
12
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
9
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl_float
>
(
13
,
nb
.
getCutoffDistance
()
*
nb
.
getCutoffDistance
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
10
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
mm_float4
>
(
14
,
nb
.
getPeriodicBoxSize
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
11
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
15
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
force1Kernel
.
setArg
(
12
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
mm_float4
),
NULL
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
16
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
force1Kernel
.
setArg
(
17
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
mm_float4
),
NULL
);
}
}
else
{
else
{
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
12
,
nb
.
getTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
9
,
nb
.
getTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl_uint
>
(
1
3
,
nb
.
getTiles
().
getSize
());
force1Kernel
.
setArg
<
cl_uint
>
(
1
0
,
nb
.
getTiles
().
getSize
());
}
}
reduceBornForceKernel
=
cl
::
Kernel
(
program
,
"reduceBornForce"
);
reduceBornForceKernel
=
cl
::
Kernel
(
program
,
"reduceBornForce"
);
reduceBornForceKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getNumAtoms
());
reduceBornForceKernel
.
setArg
<
cl_int
>
(
0
,
cl
.
getPaddedNumAtoms
());
reduceBornForceKernel
.
setArg
<
cl_int
>
(
1
,
cl
.
getPaddedNumAtoms
());
reduceBornForceKernel
.
setArg
<
cl_int
>
(
1
,
cl
.
getNumForceBuffers
());
reduceBornForceKernel
.
setArg
<
cl_int
>
(
2
,
cl
.
getNumForceBuffers
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
bornForce
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
bornForce
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
params
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
params
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
bornRadii
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
bornRadii
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
obcChain
->
getDeviceBuffer
());
reduceBornForceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
obcChain
->
getDeviceBuffer
());
}
}
cl
.
clearBuffer
(
*
bornSum
);
cl
.
clearBuffer
(
*
bornSum
);
cl
.
clearBuffer
(
*
bornForce
);
cl
.
clearBuffer
(
*
bornForce
);
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
d65992ea
...
@@ -327,33 +327,47 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -327,33 +327,47 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"USE_PERIODIC"
]
=
"1"
;
if
(
useExclusions
)
if
(
useExclusions
)
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
defines
[
"USE_EXCLUSIONS"
]
=
"1"
;
stringstream
xsize
,
ysize
,
zsize
,
cutoffSquared
;
xsize
.
precision
(
8
);
ysize
.
precision
(
8
);
zsize
.
precision
(
8
);
cutoffSquared
.
precision
(
8
);
xsize
<<
scientific
<<
periodicBoxSize
.
x
<<
"f"
;
ysize
<<
scientific
<<
periodicBoxSize
.
y
<<
"f"
;
zsize
<<
scientific
<<
periodicBoxSize
.
z
<<
"f"
;
cutoffSquared
<<
scientific
<<
(
cutoff
*
cutoff
)
<<
"f"
;
defines
[
"PERIODIC_BOX_SIZE_X"
]
=
xsize
.
str
();
defines
[
"PERIODIC_BOX_SIZE_Y"
]
=
ysize
.
str
();
defines
[
"PERIODIC_BOX_SIZE_Z"
]
=
zsize
.
str
();
defines
[
"CUTOFF_SQUARED"
]
=
cutoffSquared
.
str
();
stringstream
natom
,
padded
;
natom
<<
context
.
getNumAtoms
();
padded
<<
context
.
getPaddedNumAtoms
();
defines
[
"NUM_ATOMS"
]
=
natom
.
str
();
defines
[
"PADDED_NUM_ATOMS"
]
=
padded
.
str
();
cl
::
Program
program
=
context
.
createProgram
(
context
.
loadSourceFromFile
(
"nonbonded.cl"
,
replacements
),
defines
);
cl
::
Program
program
=
context
.
createProgram
(
context
.
loadSourceFromFile
(
"nonbonded.cl"
,
replacements
),
defines
);
cl
::
Kernel
kernel
(
program
,
"computeNonbonded"
);
cl
::
Kernel
kernel
(
program
,
"computeNonbonded"
);
// Set arguments to the Kernel.
// Set arguments to the Kernel.
kernel
.
setArg
<
cl_int
>
(
0
,
context
.
getNumAtoms
());
kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
context
.
getForceBuffers
().
getDeviceBuffer
());
kernel
.
setArg
<
cl_int
>
(
1
,
context
.
getPaddedNumAtoms
());
kernel
.
setArg
<
cl
::
Buffer
>
(
1
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getForceBuffers
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
2
,
context
.
getPosq
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
3
,
exclusions
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
4
,
context
.
getPosq
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
4
,
exclusionIndex
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
5
,
exclusions
->
getDeviceBuffer
());
kernel
.
setArg
(
5
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
kernel
.
setArg
<
cl
::
Buffer
>
(
6
,
exclusionIndex
->
getDeviceBuffer
());
kernel
.
setArg
(
6
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
kernel
.
setArg
(
7
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
int
paramBase
=
9
;
kernel
.
setArg
(
8
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
int
paramBase
=
11
;
if
(
useCutoff
)
{
if
(
useCutoff
)
{
paramBase
=
15
;
paramBase
=
11
;
kernel
.
setArg
<
cl
::
Buffer
>
(
9
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl_float
>
(
10
,
cutoff
*
cutoff
);
kernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactionFlags
->
getDeviceBuffer
());
kernel
.
setArg
<
mm_float4
>
(
11
,
periodicBoxSize
);
kernel
.
setArg
<
cl
::
Buffer
>
(
9
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
12
,
interactionFlags
->
getDeviceBuffer
());
kernel
.
setArg
(
10
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
kernel
.
setArg
<
cl
::
Buffer
>
(
13
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
(
14
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
}
}
else
{
else
{
kernel
.
setArg
<
cl
::
Buffer
>
(
9
,
tiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
7
,
tiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl_uint
>
(
10
,
tiles
->
getSize
());
kernel
.
setArg
<
cl_uint
>
(
8
,
tiles
->
getSize
());
}
}
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
i
*
2
+
paramBase
,
params
[
i
].
getBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
i
*
2
+
paramBase
,
params
[
i
].
getBuffer
());
...
...
platforms/opencl/src/kernels/coulombLennardJones.cl
View file @
d65992ea
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
!isExcluded
&&
r2
<
cutoffSquared
)
{
if
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
{
#
else
#
else
if
(
!isExcluded
)
{
if
(
!isExcluded
)
{
#
endif
#
endif
...
...
platforms/opencl/src/kernels/gbsaObc.cl
View file @
d65992ea
...
@@ -7,10 +7,10 @@ const float surfaceAreaFactor = -6.0f*3.14159265358979323846f*0.0216f*1000.0f*0.
...
@@ -7,10 +7,10 @@ const float surfaceAreaFactor = -6.0f*3.14159265358979323846f*0.0216f*1000.0f*0.
*
Compute
the
Born
sum.
*
Compute
the
Born
sum.
*/
*/
__kernel
void
computeBornSum
(
int
numAtoms,
int
paddedNumAtoms,
__global
float*
global_bornSum,
__local
float*
local_bornSum,
__global
float4*
posq,
__kernel
void
computeBornSum
(
__global
float*
global_bornSum,
__local
float*
local_bornSum,
__global
float4*
posq,
__local
float4*
local_posq,
__global
float2*
global_params,
__local
float2*
local_params,
__global
unsigned
int*
tiles,
__local
float4*
local_posq,
__global
float2*
global_params,
__local
float2*
local_params,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
float
cutoffSquared,
float4
periodicBoxSize,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
float*
tempBuffer
)
{
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
float*
tempBuffer
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -42,19 +42,19 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
...
@@ -42,19 +42,19 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_params[get_local_id
(
0
)
]
=
params1
;
local_params[get_local_id
(
0
)
]
=
params1
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
tile
=
xi+xi*
paddedNumAtoms
/TileSize-xi*
(
xi+1
)
/2
;
unsigned
int
tile
=
xi+xi*
PADDED_NUM_ATOMS
/TileSize-xi*
(
xi+1
)
/2
;
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/
periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/
PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/
periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.y
-=
floor
(
delta.y/
PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/
periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z/
PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
atom1
<
numAtoms
&&
y+j
<
numAtoms
&&
r2
<
cutoffSquared
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
&&
r2
<
CUTOFF_SQUARED
)
{
#
else
#
else
if
(
atom1
<
numAtoms
&&
y+j
<
numAtoms
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
)
{
#
endif
#
endif
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
float
invR
=
1.0f/r
;
...
@@ -76,9 +76,9 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
...
@@ -76,9 +76,9 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
//
Write
results
//
Write
results
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset
=
x
+
tgx
+
(
x/TileSize
)
*
paddedNumAtoms
;
unsigned
int
offset
=
x
+
tgx
+
(
x/TileSize
)
*
PADDED_NUM_ATOMS
;
#
else
#
else
unsigned
int
offset
=
x
+
tgx
+
warp*
paddedNumAtoms
;
unsigned
int
offset
=
x
+
tgx
+
warp*
PADDED_NUM_ATOMS
;
#
endif
#
endif
global_bornSum[offset]
+=
bornSum
;
global_bornSum[offset]
+=
bornSum
;
}
}
...
@@ -104,16 +104,16 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
...
@@ -104,16 +104,16 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
if
((
flags&
(
1<<j
))
!=
0
)
{
if
((
flags&
(
1<<j
))
!=
0
)
{
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/
periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/
PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/
periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.y
-=
floor
(
delta.y/
PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/
periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z/
PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
tempBuffer[get_local_id
(
0
)
]
=
0.0f
;
tempBuffer[get_local_id
(
0
)
]
=
0.0f
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
atom1
<
numAtoms
&&
y+j
<
numAtoms
&&
r2
<
cutoffSquared
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
&&
r2
<
CUTOFF_SQUARED
)
{
#
else
#
else
if
(
atom1
<
numAtoms
&&
y+j
<
numAtoms
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
)
{
#
endif
#
endif
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
float
invR
=
1.0f/r
;
...
@@ -168,19 +168,19 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
...
@@ -168,19 +168,19 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
unsigned
int
xi
=
x/TileSize
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
yi
=
y/TileSize
;
unsigned
int
yi
=
y/TileSize
;
unsigned
int
tile
=
xi+yi*
paddedNumAtoms
/TileSize-yi*
(
yi+1
)
/2
;
unsigned
int
tile
=
xi+yi*
PADDED_NUM_ATOMS
/TileSize-yi*
(
yi+1
)
/2
;
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
float4
delta
=
(
float4
)
(
local_posq[tbx+tj].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+tj].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/
periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/
PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/
periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.y
-=
floor
(
delta.y/
PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/
periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z/
PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
atom1
<
numAtoms
&&
y+tj
<
numAtoms
&&
r2
<
cutoffSquared
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+tj
<
NUM_ATOMS
&&
r2
<
CUTOFF_SQUARED
)
{
#
else
#
else
if
(
atom1
<
numAtoms
&&
y+tj
<
numAtoms
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+tj
<
NUM_ATOMS
)
{
#
endif
#
endif
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f/r
;
float
invR
=
1.0f/r
;
...
@@ -218,11 +218,11 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
...
@@ -218,11 +218,11 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
//
Write
results
//
Write
results
float4
of
;
float4
of
;
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset1
=
x
+
tgx
+
(
y/TileSize
)
*
paddedNumAtoms
;
unsigned
int
offset1
=
x
+
tgx
+
(
y/TileSize
)
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TileSize
)
*
paddedNumAtoms
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TileSize
)
*
PADDED_NUM_ATOMS
;
#
else
#
else
unsigned
int
offset1
=
x
+
tgx
+
warp*
paddedNumAtoms
;
unsigned
int
offset1
=
x
+
tgx
+
warp*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
warp*
paddedNumAtoms
;
unsigned
int
offset2
=
y
+
tgx
+
warp*
PADDED_NUM_ATOMS
;
#
endif
#
endif
global_bornSum[offset1]
+=
bornSum
;
global_bornSum[offset1]
+=
bornSum
;
global_bornSum[offset2]
+=
local_bornSum[get_local_id
(
0
)
]
;
global_bornSum[offset2]
+=
local_bornSum[get_local_id
(
0
)
]
;
...
@@ -236,10 +236,10 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
...
@@ -236,10 +236,10 @@ __kernel void computeBornSum(int numAtoms, int paddedNumAtoms, __global float* g
*
Reduce
the
Born
sums
to
compute
the
Born
radii.
*
Reduce
the
Born
sums
to
compute
the
Born
radii.
*/
*/
__kernel
void
reduceBornSum
(
int
numAtoms,
int
bufferSize,
int
numBuffers,
float
alpha,
float
beta,
float
gamma,
__kernel
void
reduceBornSum
(
int
bufferSize,
int
numBuffers,
float
alpha,
float
beta,
float
gamma,
__global
float*
bornSum,
__global
float2*
params,
__global
float*
bornRadii,
__global
float*
obcChain
)
{
__global
float*
bornSum,
__global
float2*
params,
__global
float*
bornRadii,
__global
float*
obcChain
)
{
unsigned
int
index
=
get_global_id
(
0
)
;
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
while
(
index
<
NUM_ATOMS
)
{
//
Get
summed
Born
data
//
Get
summed
Born
data
int
totalSize
=
bufferSize*numBuffers
;
int
totalSize
=
bufferSize*numBuffers
;
...
@@ -268,11 +268,11 @@ __kernel void reduceBornSum(int numAtoms, int bufferSize, int numBuffers, float
...
@@ -268,11 +268,11 @@ __kernel void reduceBornSum(int numAtoms, int bufferSize, int numBuffers, float
*
First
part
of
computing
the
GBSA
interaction.
*
First
part
of
computing
the
GBSA
interaction.
*/
*/
__kernel
void
computeGBSAForce1
(
int
numAtoms,
int
paddedNumAtoms,
float
prefactor,
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__kernel
void
computeGBSAForce1
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__local
float4*
local_posq,
__local
float4*
local_force,
__global
float*
global_bornRadii,
__local
float*
local_bornRadii,
__global
float4*
posq,
__local
float4*
local_posq,
__local
float4*
local_force,
__global
float*
global_bornRadii,
__local
float*
local_bornRadii,
__global
float*
global_bornForce,
__local
float*
local_bornForce,
__global
unsigned
int*
tiles,
__global
float*
global_bornForce,
__local
float*
local_bornForce,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
float
cutoffSquared,
float4
periodicBoxSize,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
float4*
tempBuffer
)
{
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
float4*
tempBuffer
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -304,14 +304,14 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
...
@@ -304,14 +304,14 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_bornRadii[get_local_id
(
0
)
]
=
bornRadius1
;
local_bornRadii[get_local_id
(
0
)
]
=
bornRadius1
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
tile
=
xi+xi*
paddedNumAtoms
/TileSize-xi*
(
xi+1
)
/2
;
unsigned
int
tile
=
xi+xi*
PADDED_NUM_ATOMS
/TileSize-xi*
(
xi+1
)
/2
;
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
if
(
atom1
<
numAtoms
&&
y+j
<
numAtoms
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
)
{
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/
periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/
PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/
periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.y
-=
floor
(
delta.y/
PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/
periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z/
PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
...
@@ -323,13 +323,13 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
...
@@ -323,13 +323,13 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
sqrt
(
denominator2
)
;
float
denominator
=
sqrt
(
denominator2
)
;
float
tempEnergy
=
(
prefactor
*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR
*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
force.w
+=
dGpol_dalpha2_ij*bornRadius2
;
force.w
+=
dGpol_dalpha2_ij*bornRadius2
;
float
dEdR
=
Gpol*
(
1.0f
-
0.25f*expTerm
)
;
float
dEdR
=
Gpol*
(
1.0f
-
0.25f*expTerm
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
>
cutoffSquared
)
{
if
(
r2
>
CUTOFF_SQUARED
)
{
dEdR
=
0.0f
;
dEdR
=
0.0f
;
tempEnergy
=
0.0f
;
tempEnergy
=
0.0f
;
}
}
...
@@ -342,9 +342,9 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
...
@@ -342,9 +342,9 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
//
Write
results
//
Write
results
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset
=
x
+
tgx
+
(
x/TileSize
)
*
paddedNumAtoms
;
unsigned
int
offset
=
x
+
tgx
+
(
x/TileSize
)
*
PADDED_NUM_ATOMS
;
#
else
#
else
unsigned
int
offset
=
x
+
tgx
+
warp*
paddedNumAtoms
;
unsigned
int
offset
=
x
+
tgx
+
warp*
PADDED_NUM_ATOMS
;
#
endif
#
endif
forceBuffers[offset].xyz
+=
force.xyz
;
forceBuffers[offset].xyz
+=
force.xyz
;
global_bornForce[offset]
+=
force.w
;
global_bornForce[offset]
+=
force.w
;
...
@@ -371,9 +371,9 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
...
@@ -371,9 +371,9 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
if
((
flags&
(
1<<j
))
!=
0
)
{
if
((
flags&
(
1<<j
))
!=
0
)
{
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/
periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/
PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/
periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.y
-=
floor
(
delta.y/
PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/
periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z/
PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
...
@@ -385,15 +385,15 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
...
@@ -385,15 +385,15 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
sqrt
(
denominator2
)
;
float
denominator
=
sqrt
(
denominator2
)
;
float
tempEnergy
=
(
prefactor
*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR
*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
force.w
+=
dGpol_dalpha2_ij*bornRadius2
;
force.w
+=
dGpol_dalpha2_ij*bornRadius2
;
float
dEdR
=
Gpol*
(
1.0f
-
0.25f*expTerm
)
;
float
dEdR
=
Gpol*
(
1.0f
-
0.25f*expTerm
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
atom1
>=
numAtoms
|
| y+j >=
numAtoms || r2 > cutoffSquared
) {
if
(
atom1
>=
NUM_ATOMS
|
| y+j >=
NUM_ATOMS || r2 > CUTOFF_SQUARED
) {
#else
#else
if (atom1 >=
numAtoms
|
|
y+j
>=
numAtoms
)
{
if (atom1 >=
NUM_ATOMS
|
|
y+j
>=
NUM_ATOMS
)
{
#
endif
#
endif
dEdR
=
0.0f
;
dEdR
=
0.0f
;
tempEnergy
=
0.0f
;
tempEnergy
=
0.0f
;
...
@@ -426,14 +426,14 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
...
@@ -426,14 +426,14 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
unsigned
int
xi
=
x/TileSize
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
yi
=
y/TileSize
;
unsigned
int
yi
=
y/TileSize
;
unsigned
int
tile
=
xi+yi*
paddedNumAtoms
/TileSize-yi*
(
yi+1
)
/2
;
unsigned
int
tile
=
xi+yi*
PADDED_NUM_ATOMS
/TileSize-yi*
(
yi+1
)
/2
;
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
for
(
unsigned
int
j
=
0
; j < TileSize; j++) {
if
(
atom1
<
numAtoms
&&
y+tj
<
numAtoms
)
{
if
(
atom1
<
NUM_ATOMS
&&
y+tj
<
NUM_ATOMS
)
{
float4
delta
=
(
float4
)
(
local_posq[tbx+tj].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+tj].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/
periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/
PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/
periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.y
-=
floor
(
delta.y/
PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/
periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z/
PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
...
@@ -445,13 +445,13 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
...
@@ -445,13 +445,13 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
float
expTerm
=
exp
(
-D_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator2
=
r2
+
alpha2_ij*expTerm
;
float
denominator
=
sqrt
(
denominator2
)
;
float
denominator
=
sqrt
(
denominator2
)
;
float
tempEnergy
=
(
prefactor
*posq1.w*posq2.w
)
/denominator
;
float
tempEnergy
=
(
PREFACTOR
*posq1.w*posq2.w
)
/denominator
;
float
Gpol
=
tempEnergy/denominator2
;
float
Gpol
=
tempEnergy/denominator2
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
float
dGpol_dalpha2_ij
=
-0.5f*Gpol*expTerm*
(
1.0f+D_ij
)
;
force.w
+=
dGpol_dalpha2_ij*bornRadius2
;
force.w
+=
dGpol_dalpha2_ij*bornRadius2
;
float
dEdR
=
Gpol*
(
1.0f
-
0.25f*expTerm
)
;
float
dEdR
=
Gpol*
(
1.0f
-
0.25f*expTerm
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
r2
>
cutoffSquared
)
{
if
(
r2
>
CUTOFF_SQUARED
)
{
dEdR
=
0.0f
;
dEdR
=
0.0f
;
tempEnergy
=
0.0f
;
tempEnergy
=
0.0f
;
}
}
...
@@ -467,11 +467,11 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
...
@@ -467,11 +467,11 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
//
Write
results
//
Write
results
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset1
=
x
+
tgx
+
(
y/TileSize
)
*
paddedNumAtoms
;
unsigned
int
offset1
=
x
+
tgx
+
(
y/TileSize
)
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TileSize
)
*
paddedNumAtoms
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TileSize
)
*
PADDED_NUM_ATOMS
;
#
else
#
else
unsigned
int
offset1
=
x
+
tgx
+
warp*
paddedNumAtoms
;
unsigned
int
offset1
=
x
+
tgx
+
warp*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
warp*
paddedNumAtoms
;
unsigned
int
offset2
=
y
+
tgx
+
warp*
PADDED_NUM_ATOMS
;
#
endif
#
endif
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset2].xyz
+=
local_force[get_local_id
(
0
)
].xyz
;
forceBuffers[offset2].xyz
+=
local_force[get_local_id
(
0
)
].xyz
;
...
@@ -488,11 +488,11 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
...
@@ -488,11 +488,11 @@ __kernel void computeGBSAForce1(int numAtoms, int paddedNumAtoms, float prefacto
*
Reduce
the
Born
force.
*
Reduce
the
Born
force.
*/
*/
__kernel
void
reduceBornForce
(
int
numAtoms,
int
bufferSize,
int
numBuffers,
__global
float*
bornForce,
__global
float*
energyBuffer,
__kernel
void
reduceBornForce
(
int
bufferSize,
int
numBuffers,
__global
float*
bornForce,
__global
float*
energyBuffer,
__global
float2*
params,
__global
float*
bornRadii,
__global
float*
obcChain
)
{
__global
float2*
params,
__global
float*
bornRadii,
__global
float*
obcChain
)
{
float
energy
=
0.0f
;
float
energy
=
0.0f
;
unsigned
int
index
=
get_global_id
(
0
)
;
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
numAtoms
)
{
while
(
index
<
NUM_ATOMS
)
{
//
Sum
the
Born
force
//
Sum
the
Born
force
int
totalSize
=
bufferSize*numBuffers
;
int
totalSize
=
bufferSize*numBuffers
;
...
...
platforms/opencl/src/kernels/gbsaObc2.cl
View file @
d65992ea
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
atom1
<
numAtoms
&&
atom2
<
numAtoms
&&
atom1
!=
atom2
&&
r2
<
cutoffSquared
)
{
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
&&
r2
<
CUTOFF_SQUARED
)
{
#
else
#
else
if
(
atom1
<
numAtoms
&&
atom2
<
numAtoms
&&
atom1
!=
atom2
)
{
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
{
#
endif
#
endif
float
invRSquared
=
1.0f/r2
;
float
invRSquared
=
1.0f/r2
;
float
rScaledRadiusJ
=
r+obcParams2.y
;
float
rScaledRadiusJ
=
r+obcParams2.y
;
...
...
platforms/opencl/src/kernels/nonbonded.cl
View file @
d65992ea
...
@@ -4,10 +4,10 @@ const unsigned int TileSize = 32;
...
@@ -4,10 +4,10 @@ const unsigned int TileSize = 32;
*
Compute
nonbonded
interactions.
*
Compute
nonbonded
interactions.
*/
*/
__kernel
void
computeNonbonded
(
int
numAtoms,
int
paddedNumAtoms,
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__kernel
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__local
float4*
local_posq,
__local
float4*
local_force,
__global
unsigned
int*
tiles,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__local
float4*
local_posq,
__local
float4*
local_force,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
float
cutoffSquared,
float4
periodicBoxSize,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
float4*
tempBuffer
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
float4*
tempBuffer
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -41,7 +41,7 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
...
@@ -41,7 +41,7 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_posq[get_local_id
(
0
)
]
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
LOAD_LOCAL_PARAMETERS_FROM_1
unsigned
int
xi
=
x/TileSize
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
tile
=
xi+xi*
paddedNumAtoms
/TileSize-xi*
(
xi+1
)
/2
;
unsigned
int
tile
=
xi+xi*
PADDED_NUM_ATOMS
/TileSize-xi*
(
xi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
exclusions[exclusionIndices[tile]+tgx]
;
unsigned
int
excl
=
exclusions[exclusionIndices[tile]+tgx]
;
#
endif
#
endif
...
@@ -51,13 +51,13 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
...
@@ -51,13 +51,13 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
#
endif
#
endif
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/
periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/
PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/
periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.y
-=
floor
(
delta.y/
PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/
periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z/
PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f
/
r
;
float
invR
=
1.0f
/
r
;
int
atom2
=
tbx+j
;
int
atom2
=
tbx+j
;
float4
posq2
=
local_posq[atom2]
;
float4
posq2
=
local_posq[atom2]
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
...
@@ -73,9 +73,9 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
...
@@ -73,9 +73,9 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
//
Write
results
//
Write
results
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset
=
x
+
tgx
+
(
x/TileSize
)
*
paddedNumAtoms
;
unsigned
int
offset
=
x
+
tgx
+
(
x/TileSize
)
*
PADDED_NUM_ATOMS
;
#
else
#
else
unsigned
int
offset
=
x
+
tgx
+
warp*
paddedNumAtoms
;
unsigned
int
offset
=
x
+
tgx
+
warp*
PADDED_NUM_ATOMS
;
#
endif
#
endif
forceBuffers[offset].xyz
+=
force.xyz
;
forceBuffers[offset].xyz
+=
force.xyz
;
}
}
...
@@ -102,13 +102,13 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
...
@@ -102,13 +102,13 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
bool
isExcluded
=
false
;
bool
isExcluded
=
false
;
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+j].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/
periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/
PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/
periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.y
-=
floor
(
delta.y/
PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/
periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z/
PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f
/
r
;
float
invR
=
1.0f
/
r
;
int
atom2
=
tbx+j
;
int
atom2
=
tbx+j
;
float4
posq2
=
local_posq[atom2]
;
float4
posq2
=
local_posq[atom2]
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
...
@@ -144,7 +144,7 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
...
@@ -144,7 +144,7 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
unsigned
int
xi
=
x/TileSize
;
unsigned
int
xi
=
x/TileSize
;
unsigned
int
yi
=
y/TileSize
;
unsigned
int
yi
=
y/TileSize
;
unsigned
int
tile
=
xi+yi*
paddedNumAtoms
/TileSize-yi*
(
yi+1
)
/2
;
unsigned
int
tile
=
xi+yi*
PADDED_NUM_ATOMS
/TileSize-yi*
(
yi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
(
hasExclusions
?
exclusions[exclusionIndices[tile]+tgx]
:
0xFFFFFFFF
)
;
unsigned
int
excl
=
(
hasExclusions
?
exclusions[exclusionIndices[tile]+tgx]
:
0xFFFFFFFF
)
;
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
TileSize
-
tgx
))
;
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
TileSize
-
tgx
))
;
...
@@ -155,13 +155,13 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
...
@@ -155,13 +155,13 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
#
endif
#
endif
float4
delta
=
(
float4
)
(
local_posq[tbx+tj].xyz
-
posq1.xyz,
0.0f
)
;
float4
delta
=
(
float4
)
(
local_posq[tbx+tj].xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/
periodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.x
-=
floor
(
delta.x/
PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/
periodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.y
-=
floor
(
delta.y/
PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/
periodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
delta.z
-=
floor
(
delta.z/
PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
float
r
=
sqrt
(
r2
)
;
float
invR
=
1.0f
/
r
;
float
invR
=
1.0f
/
r
;
int
atom2
=
tbx+tj
;
int
atom2
=
tbx+tj
;
float4
posq2
=
local_posq[atom2]
;
float4
posq2
=
local_posq[atom2]
;
LOAD_ATOM2_PARAMETERS
LOAD_ATOM2_PARAMETERS
...
@@ -180,11 +180,11 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
...
@@ -180,11 +180,11 @@ __kernel void computeNonbonded(int numAtoms, int paddedNumAtoms, __global float4
//
Write
results
//
Write
results
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset1
=
x
+
tgx
+
(
y/TileSize
)
*
paddedNumAtoms
;
unsigned
int
offset1
=
x
+
tgx
+
(
y/TileSize
)
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TileSize
)
*
paddedNumAtoms
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TileSize
)
*
PADDED_NUM_ATOMS
;
#
else
#
else
unsigned
int
offset1
=
x
+
tgx
+
warp*
paddedNumAtoms
;
unsigned
int
offset1
=
x
+
tgx
+
warp*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
warp*
paddedNumAtoms
;
unsigned
int
offset2
=
y
+
tgx
+
warp*
PADDED_NUM_ATOMS
;
#
endif
#
endif
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset2].xyz
+=
local_force[get_local_id
(
0
)
].xyz
;
forceBuffers[offset2].xyz
+=
local_force[get_local_id
(
0
)
].xyz
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment