Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
bb7cba08
Commit
bb7cba08
authored
Sep 06, 2016
by
Peter Eastman
Browse files
Improved GB performance in mixed precision
parent
73c4302d
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
56 additions
and
32 deletions
+56
-32
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+8
-4
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+9
-5
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+9
-5
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+12
-8
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+9
-5
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+9
-5
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
bb7cba08
...
@@ -2794,6 +2794,7 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
...
@@ -2794,6 +2794,7 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer());
force1Args.push_back(&cu.getEnergyBuffer().getDevicePointer());
force1Args.push_back(&cu.getPosq().getDevicePointer());
force1Args.push_back(&cu.getPosq().getDevicePointer());
force1Args.push_back(&bornRadii->getDevicePointer());
force1Args.push_back(&bornRadii->getDevicePointer());
force1Args.push_back(NULL);
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
force1Args.push_back(&nb.getInteractingTiles().getDevicePointer());
force1Args.push_back(&nb.getInteractingTiles().getDevicePointer());
force1Args.push_back(&nb.getInteractionCount().getDevicePointer());
force1Args.push_back(&nb.getInteractionCount().getDevicePointer());
...
@@ -2813,13 +2814,14 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
...
@@ -2813,13 +2814,14 @@ double CudaCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeFor
reduceBornSumKernel = cu.getKernel(module, "reduceBornSum");
reduceBornSumKernel = cu.getKernel(module, "reduceBornSum");
reduceBornForceKernel = cu.getKernel(module, "reduceBornForce");
reduceBornForceKernel = cu.getKernel(module, "reduceBornForce");
}
}
force1Args[5] = &includeEnergy;
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
if (maxTiles < nb.getInteractingTiles().getSize()) {
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
maxTiles = nb.getInteractingTiles().getSize();
computeSumArgs[3] = &nb.getInteractingTiles().getDevicePointer();
computeSumArgs[3] = &nb.getInteractingTiles().getDevicePointer();
force1Args[
5
] = &nb.getInteractingTiles().getDevicePointer();
force1Args[
6
] = &nb.getInteractingTiles().getDevicePointer();
computeSumArgs[13] = &nb.getInteractingAtoms().getDevicePointer();
computeSumArgs[13] = &nb.getInteractingAtoms().getDevicePointer();
force1Args[1
5
] = &nb.getInteractingAtoms().getDevicePointer();
force1Args[1
6
] = &nb.getInteractingAtoms().getDevicePointer();
}
}
}
}
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
cu.executeKernel(computeBornSumKernel, &computeSumArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
...
@@ -3754,6 +3756,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -3754,6 +3756,7 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
pairEnergyArgs.push_back(&cu.getPosq().getDevicePointer());
pairEnergyArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
pairEnergyArgs.push_back(&cu.getNonbondedUtilities().getExclusions().getDevicePointer());
pairEnergyArgs.push_back(&cu.getNonbondedUtilities().getExclusionTiles().getDevicePointer());
pairEnergyArgs.push_back(&cu.getNonbondedUtilities().getExclusionTiles().getDevicePointer());
pairEnergyArgs.push_back(NULL);
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
pairEnergyArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
pairEnergyArgs.push_back(&nb.getInteractingTiles().getDevicePointer());
pairEnergyArgs.push_back(&nb.getInteractionCount().getDevicePointer());
pairEnergyArgs.push_back(&nb.getInteractionCount().getDevicePointer());
...
@@ -3832,13 +3835,14 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
...
@@ -3832,13 +3835,14 @@ double CudaCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFo
if (changed)
if (changed)
globals->upload(globalParamValues);
globals->upload(globalParamValues);
}
}
pairEnergyArgs[5] = &includeEnergy;
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
if (maxTiles < nb.getInteractingTiles().getSize()) {
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
maxTiles = nb.getInteractingTiles().getSize();
pairValueArgs[4] = &nb.getInteractingTiles().getDevicePointer();
pairValueArgs[4] = &nb.getInteractingTiles().getDevicePointer();
pairEnergyArgs[
5
] = &nb.getInteractingTiles().getDevicePointer();
pairEnergyArgs[
6
] = &nb.getInteractingTiles().getDevicePointer();
pairValueArgs[14] = &nb.getInteractingAtoms().getDevicePointer();
pairValueArgs[14] = &nb.getInteractingAtoms().getDevicePointer();
pairEnergyArgs[1
5
] = &nb.getInteractingAtoms().getDevicePointer();
pairEnergyArgs[1
6
] = &nb.getInteractingAtoms().getDevicePointer();
}
}
}
}
cu.executeKernel(pairValueKernel, &pairValueArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
cu.executeKernel(pairValueKernel, &pairValueArgs[0], nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
...
...
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
bb7cba08
...
@@ -14,7 +14,7 @@ typedef struct {
...
@@ -14,7 +14,7 @@ typedef struct {
* Compute a force based on pair interactions.
* Compute a force based on pair interactions.
*/
*/
extern
"C"
__global__
void
computeN2Energy
(
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
extern
"C"
__global__
void
computeN2Energy
(
unsigned
long
long
*
__restrict__
forceBuffers
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
unsigned
int
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
const
real4
*
__restrict__
posq
,
const
unsigned
int
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
bool
needEnergy
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
...
@@ -78,7 +78,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -78,7 +78,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
COMPUTE_INTERACTION
COMPUTE_INTERACTION
dEdR
/=
-
r
;
dEdR
/=
-
r
;
}
}
energy
+=
0.5
f
*
tempEnergy
;
if
(
needEnergy
)
energy
+=
0.5
f
*
tempEnergy
;
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
...
@@ -130,7 +131,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -130,7 +131,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
COMPUTE_INTERACTION
COMPUTE_INTERACTION
dEdR
/=
-
r
;
dEdR
/=
-
r
;
}
}
energy
+=
tempEnergy
;
if
(
needEnergy
)
energy
+=
tempEnergy
;
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
...
@@ -274,7 +276,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -274,7 +276,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
COMPUTE_INTERACTION
COMPUTE_INTERACTION
dEdR
/=
-
r
;
dEdR
/=
-
r
;
}
}
energy
+=
tempEnergy
;
if
(
needEnergy
)
energy
+=
tempEnergy
;
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
...
@@ -318,7 +321,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -318,7 +321,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
COMPUTE_INTERACTION
COMPUTE_INTERACTION
dEdR
/=
-
r
;
dEdR
/=
-
r
;
}
}
energy
+=
tempEnergy
;
if
(
needEnergy
)
energy
+=
tempEnergy
;
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
...
...
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
bb7cba08
...
@@ -400,7 +400,7 @@ typedef struct {
...
@@ -400,7 +400,7 @@ typedef struct {
*/
*/
extern
"C"
__global__
void
computeGBSAForce1
(
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
global_bornForce
,
extern
"C"
__global__
void
computeGBSAForce1
(
unsigned
long
long
*
__restrict__
forceBuffers
,
unsigned
long
long
*
__restrict__
global_bornForce
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
global_bornRadii
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
global_bornRadii
,
bool
needEnergy
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
...
@@ -465,7 +465,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -465,7 +465,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
if
(
atom1
!=
y
*
TILE_SIZE
+
j
)
if
(
atom1
!=
y
*
TILE_SIZE
+
j
)
tempEnergy
-=
scaledChargeProduct
/
CUTOFF
;
tempEnergy
-=
scaledChargeProduct
/
CUTOFF
;
#endif
#endif
energy
+=
0.5
f
*
tempEnergy
;
if
(
needEnergy
)
energy
+=
0.5
f
*
tempEnergy
;
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
...
@@ -519,7 +520,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -519,7 +520,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
tempEnergy
-=
scaledChargeProduct
/
CUTOFF
;
tempEnergy
-=
scaledChargeProduct
/
CUTOFF
;
#endif
#endif
energy
+=
tempEnergy
;
if
(
needEnergy
)
energy
+=
tempEnergy
;
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
...
@@ -667,7 +669,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -667,7 +669,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
tempEnergy
-=
scaledChargeProduct
/
CUTOFF
;
tempEnergy
-=
scaledChargeProduct
/
CUTOFF
;
#endif
#endif
energy
+=
tempEnergy
;
if
(
needEnergy
)
energy
+=
tempEnergy
;
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
...
@@ -716,7 +719,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -716,7 +719,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
tempEnergy
-=
scaledChargeProduct
/
CUTOFF
;
tempEnergy
-=
scaledChargeProduct
/
CUTOFF
;
#endif
#endif
energy
+=
tempEnergy
;
if
(
needEnergy
)
energy
+=
tempEnergy
;
delta
*=
dEdR
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
y
-=
delta
.
y
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
bb7cba08
...
@@ -2872,6 +2872,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -2872,6 +2872,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
force1Kernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, bornRadii->getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, bornRadii->getDeviceBuffer());
index++; // Whether to include energy.
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
...
@@ -2907,17 +2908,18 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -2907,17 +2908,18 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornRadii->getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, bornRadii->getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain->getDeviceBuffer());
reduceBornForceKernel.setArg<cl::Buffer>(index++, obcChain->getDeviceBuffer());
}
}
force1Kernel.setArg<cl_int>(5, includeEnergy);
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
setPeriodicBoxArgs(cl, computeBornSumKernel, 5);
setPeriodicBoxArgs(cl, computeBornSumKernel, 5);
setPeriodicBoxArgs(cl, force1Kernel,
7
);
setPeriodicBoxArgs(cl, force1Kernel,
8
);
if (maxTiles < nb.getInteractingTiles().getSize()) {
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
maxTiles = nb.getInteractingTiles().getSize();
computeBornSumKernel.setArg<cl::Buffer>(3, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(3, nb.getInteractingTiles().getDeviceBuffer());
computeBornSumKernel.setArg<cl_uint>(10, maxTiles);
computeBornSumKernel.setArg<cl_uint>(10, maxTiles);
computeBornSumKernel.setArg<cl::Buffer>(13, nb.getInteractingAtoms().getDeviceBuffer());
computeBornSumKernel.setArg<cl::Buffer>(13, nb.getInteractingAtoms().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(
5
, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(
6
, nb.getInteractingTiles().getDeviceBuffer());
force1Kernel.setArg<cl_uint>(1
2
, maxTiles);
force1Kernel.setArg<cl_uint>(1
3
, maxTiles);
force1Kernel.setArg<cl::Buffer>(1
5
, nb.getInteractingAtoms().getDeviceBuffer());
force1Kernel.setArg<cl::Buffer>(1
6
, nb.getInteractingAtoms().getDeviceBuffer());
}
}
}
}
cl.executeKernel(computeBornSumKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
cl.executeKernel(computeBornSumKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
...
@@ -3933,6 +3935,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -3933,6 +3935,7 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairEnergyKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*4*elementSize, NULL);
pairEnergyKernel.setArg(index++, (deviceIsCpu ? OpenCLContext::TileSize : nb.getForceThreadBlockSize())*4*elementSize, NULL);
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusions().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusions().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusionTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, cl.getNonbondedUtilities().getExclusionTiles().getDeviceBuffer());
index++; // Whether to include energy.
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(index++, nb.getInteractionCount().getDeviceBuffer());
...
@@ -4029,17 +4032,18 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -4029,17 +4032,18 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
if (changed)
if (changed)
globals->upload(globalParamValues);
globals->upload(globalParamValues);
}
}
pairEnergyKernel.setArg<cl_int>(7, includeEnergy);
if (nb.getUseCutoff()) {
if (nb.getUseCutoff()) {
setPeriodicBoxArgs(cl, pairValueKernel, 8);
setPeriodicBoxArgs(cl, pairValueKernel, 8);
setPeriodicBoxArgs(cl, pairEnergyKernel,
9
);
setPeriodicBoxArgs(cl, pairEnergyKernel,
10
);
if (maxTiles < nb.getInteractingTiles().getSize()) {
if (maxTiles < nb.getInteractingTiles().getSize()) {
maxTiles = nb.getInteractingTiles().getSize();
maxTiles = nb.getInteractingTiles().getSize();
pairValueKernel.setArg<cl::Buffer>(6, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(6, nb.getInteractingTiles().getDeviceBuffer());
pairValueKernel.setArg<cl_uint>(13, maxTiles);
pairValueKernel.setArg<cl_uint>(13, maxTiles);
pairValueKernel.setArg<cl::Buffer>(16, nb.getInteractingAtoms().getDeviceBuffer());
pairValueKernel.setArg<cl::Buffer>(16, nb.getInteractingAtoms().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(
7
, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(
8
, nb.getInteractingTiles().getDeviceBuffer());
pairEnergyKernel.setArg<cl_uint>(1
4
, maxTiles);
pairEnergyKernel.setArg<cl_uint>(1
5
, maxTiles);
pairEnergyKernel.setArg<cl::Buffer>(1
7
, nb.getInteractingAtoms().getDeviceBuffer());
pairEnergyKernel.setArg<cl::Buffer>(1
8
, nb.getInteractingAtoms().getDeviceBuffer());
}
}
}
}
cl.executeKernel(pairValueKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
cl.executeKernel(pairValueKernel, nb.getNumForceThreadBlocks()*nb.getForceThreadBlockSize(), nb.getForceThreadBlockSize());
...
...
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
bb7cba08
...
@@ -18,7 +18,7 @@ __kernel void computeN2Energy(
...
@@ -18,7 +18,7 @@ __kernel void computeN2Energy(
#
endif
#
endif
__global
mixed*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
mixed*
restrict
energyBuffer,
__local
real4*
restrict
local_force,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
exclusionTiles,
__global
const
ushort2*
exclusionTiles,
int
needEnergy,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
...
@@ -82,7 +82,8 @@ __kernel void computeN2Energy(
...
@@ -82,7 +82,8 @@ __kernel void computeN2Energy(
COMPUTE_INTERACTION
COMPUTE_INTERACTION
dEdR
/=
-r
;
dEdR
/=
-r
;
}
}
energy
+=
0.5f*tempEnergy
;
if
(
needEnergy
)
energy
+=
0.5f*tempEnergy
;
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
force.xyz
-=
delta.xyz
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -133,7 +134,8 @@ __kernel void computeN2Energy(
...
@@ -133,7 +134,8 @@ __kernel void computeN2Energy(
COMPUTE_INTERACTION
COMPUTE_INTERACTION
dEdR /= -r;
dEdR /= -r;
}
}
energy += tempEnergy;
if (needEnergy)
energy += tempEnergy;
delta.xyz *= dEdR;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
force.xyz -= delta.xyz;
atom2 = tbx+tj;
atom2 = tbx+tj;
...
@@ -289,7 +291,8 @@ __kernel void computeN2Energy(
...
@@ -289,7 +291,8 @@ __kernel void computeN2Energy(
COMPUTE_INTERACTION
COMPUTE_INTERACTION
dEdR
/=
-r
;
dEdR
/=
-r
;
}
}
energy
+=
tempEnergy
;
if
(
needEnergy
)
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
force.xyz
-=
delta.xyz
;
atom2
=
tbx+tj
;
atom2
=
tbx+tj
;
...
@@ -328,7 +331,8 @@ __kernel void computeN2Energy(
...
@@ -328,7 +331,8 @@ __kernel void computeN2Energy(
COMPUTE_INTERACTION
COMPUTE_INTERACTION
dEdR
/=
-r
;
dEdR
/=
-r
;
}
}
energy
+=
tempEnergy
;
if
(
needEnergy
)
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
force.xyz
-=
delta.xyz
;
atom2
=
tbx+tj
;
atom2
=
tbx+tj
;
...
...
platforms/opencl/src/kernels/gbsaObc.cl
View file @
bb7cba08
...
@@ -385,7 +385,7 @@ __kernel void computeGBSAForce1(
...
@@ -385,7 +385,7 @@ __kernel void computeGBSAForce1(
#else
#else
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
__global real4* restrict forceBuffers, __global real* restrict global_bornForce,
#endif
#endif
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
int needEnergy,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
...
@@ -452,7 +452,8 @@ __kernel void computeGBSAForce1(
...
@@ -452,7 +452,8 @@ __kernel void computeGBSAForce1(
if (atom1 != y*TILE_SIZE+j)
if (atom1 != y*TILE_SIZE+j)
tempEnergy -= scaledChargeProduct/CUTOFF;
tempEnergy -= scaledChargeProduct/CUTOFF;
#endif
#endif
energy += 0.5f*tempEnergy;
if (needEnergy)
energy += 0.5f*tempEnergy;
delta.xyz *= dEdR;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
force.xyz -= delta.xyz;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -506,7 +507,8 @@ __kernel void computeGBSAForce1(
...
@@ -506,7 +507,8 @@ __kernel void computeGBSAForce1(
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
tempEnergy -= scaledChargeProduct/CUTOFF;
tempEnergy -= scaledChargeProduct/CUTOFF;
#endif
#endif
energy += tempEnergy;
if (needEnergy)
energy += tempEnergy;
delta.xyz *= dEdR;
delta.xyz *= dEdR;
force.xyz -= delta.xyz;
force.xyz -= delta.xyz;
localData[tbx+tj].fx += delta.x;
localData[tbx+tj].fx += delta.x;
...
@@ -669,7 +671,8 @@ __kernel void computeGBSAForce1(
...
@@ -669,7 +671,8 @@ __kernel void computeGBSAForce1(
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
tempEnergy
-=
scaledChargeProduct/CUTOFF
;
tempEnergy
-=
scaledChargeProduct/CUTOFF
;
#
endif
#
endif
energy
+=
tempEnergy
;
if
(
needEnergy
)
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
force.xyz
-=
delta.xyz
;
localData[tbx+tj].fx
+=
delta.x
;
localData[tbx+tj].fx
+=
delta.x
;
...
@@ -717,7 +720,8 @@ __kernel void computeGBSAForce1(
...
@@ -717,7 +720,8 @@ __kernel void computeGBSAForce1(
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
tempEnergy
-=
scaledChargeProduct/CUTOFF
;
tempEnergy
-=
scaledChargeProduct/CUTOFF
;
#
endif
#
endif
energy
+=
tempEnergy
;
if
(
needEnergy
)
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
force.xyz
-=
delta.xyz
;
localData[tbx+tj].fx
+=
delta.x
;
localData[tbx+tj].fx
+=
delta.x
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment