Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
408469c3
Commit
408469c3
authored
Oct 07, 2010
by
Peter Eastman
Browse files
Optimizations to PME
parent
45b0302d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
258 additions
and
265 deletions
+258
-265
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
...src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
+82
-87
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
.../src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
+20
-20
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+92
-93
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
.../src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
+63
-64
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
...a/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
+1
-1
No files found.
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.cu
View file @
408469c3
...
...
@@ -15,9 +15,9 @@ void SetCalculateAmoebaPmeDirectElectrostaticSim(amoebaGpuContext amoebaGpu)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaPmeDirectElectrostaticSim: cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
cAmoebaSim
,
&
amoebaGpu
->
amoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
status
=
cudaMemcpyToSymbol
(
cAmoebaSim
,
&
amoebaGpu
->
amoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaPmeDirectElectrostaticSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed"
);
}
...
...
@@ -25,15 +25,15 @@ void GetCalculateAmoebaPmeDirectElectrostaticSim(amoebaGpuContext amoebaGpu)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaPmeDirectElectrostaticSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
status
=
cudaMemcpyFromSymbol
(
&
amoebaGpu
->
amoebaSim
,
cAmoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
status
=
cudaMemcpyFromSymbol
(
&
amoebaGpu
->
amoebaSim
,
cAmoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaPmeDirectElectrostaticSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
static
int
const
PScaleIndex
=
0
;
static
int
const
DScaleIndex
=
1
;
static
int
const
UScaleIndex
=
2
;
static
int
const
PScaleIndex
=
0
;
static
int
const
DScaleIndex
=
1
;
static
int
const
UScaleIndex
=
2
;
static
int
const
MScaleIndex
=
3
;
static
int
const
LastScalingIndex
=
4
;
...
...
@@ -105,7 +105,7 @@ __device__ static void debugSetup( unsigned int atomI, unsigned int atomJ,
debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = pullBack[pullIndex].w;
}
}
}
*/
...
...
@@ -152,43 +152,16 @@ __device__ static void calculatePmeSelfTorqueElectrostaticPairIxn_kernel( PmeDir
}
__device__
void
calculatePmeDirectElectrostaticPairIxn_kernel
(
PmeDirectElectrostaticParticle
&
atomI
,
PmeDirectElectrostaticParticle
&
atomJ
,
float
*
scalingFactors
,
float
*
outputForce
,
float
outputTorque
[
2
][
3
],
float
*
energy
float
*
scalingFactors
,
float
*
outputForce
,
float
3
outputTorque
[
3
],
float
*
energy
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
float4
*
debugArray
#endif
){
float
e
,
ei
;
float
erl
,
erli
;
float
conversionFactor
=
(
-
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
// set the permanent multipole and induced dipole values;
float
pdi
=
atomI
.
damp
;
float
pti
=
atomI
.
thole
;
float
ci
=
atomI
.
q
;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi4
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
4
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
5
];
float
qi7
=
atomI
.
labFrameQuadrupole
[
6
];
float
qi8
=
atomI
.
labFrameQuadrupole
[
7
];
float
qi9
=
atomI
.
labFrameQuadrupole
[
8
];
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
float
zr
=
atomJ
.
z
-
atomI
.
z
;
// periodic box
// periodic box
xr
-=
floor
(
xr
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
yr
-=
floor
(
yr
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
...
...
@@ -199,11 +172,33 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
float
r
=
sqrt
(
r2
);
float
ck
=
atomJ
.
q
;
float
conversionFactor
=
(
-
cAmoebaSim
.
electric
/
cAmoebaSim
.
dielec
);
// set the permanent multipole and induced dipole values;
float
pdi
=
atomI
.
damp
;
float
pti
=
atomI
.
thole
;
float
ci
=
atomI
.
q
;
float
di1
=
atomI
.
labFrameDipole
[
0
];
float
di2
=
atomI
.
labFrameDipole
[
1
];
float
di3
=
atomI
.
labFrameDipole
[
2
];
float
qi1
=
atomI
.
labFrameQuadrupole
[
0
];
float
qi2
=
atomI
.
labFrameQuadrupole
[
1
];
float
qi3
=
atomI
.
labFrameQuadrupole
[
2
];
float
qi4
=
atomI
.
labFrameQuadrupole
[
3
];
float
qi5
=
atomI
.
labFrameQuadrupole
[
4
];
float
qi6
=
atomI
.
labFrameQuadrupole
[
5
];
float
qi7
=
atomI
.
labFrameQuadrupole
[
6
];
float
qi8
=
atomI
.
labFrameQuadrupole
[
7
];
float
qi9
=
atomI
.
labFrameQuadrupole
[
8
];
float
dk1
=
atomJ
.
labFrameDipole
[
0
];
float
dk2
=
atomJ
.
labFrameDipole
[
1
];
float
dk3
=
atomJ
.
labFrameDipole
[
2
];
float
qk1
=
atomJ
.
labFrameQuadrupole
[
0
];
float
qk2
=
atomJ
.
labFrameQuadrupole
[
1
];
float
qk3
=
atomJ
.
labFrameQuadrupole
[
2
];
...
...
@@ -213,7 +208,7 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
float
qk7
=
atomJ
.
labFrameQuadrupole
[
6
];
float
qk8
=
atomJ
.
labFrameQuadrupole
[
7
];
float
qk9
=
atomJ
.
labFrameQuadrupole
[
8
];
// calculate the real space error function terms;
float
ralpha
=
cSim
.
alphaEwald
*
r
;
...
...
@@ -497,18 +492,18 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// compute the energy contributions for this interaction
e
=
bn0
*
gl0
+
bn1
*
(
gl1
+
gl6
)
float
e
=
bn0
*
gl0
+
bn1
*
(
gl1
+
gl6
)
+
bn2
*
(
gl2
+
gl7
+
gl8
)
+
bn3
*
(
gl3
+
gl5
)
+
bn4
*
gl4
;
ei
=
0.5
f
*
(
bn1
*
(
gli1
+
gli6
)
float
ei
=
0.5
f
*
(
bn1
*
(
gli1
+
gli6
)
+
bn2
*
(
gli2
+
gli7
)
+
bn3
*
gli3
);
// get the real energy without any screening function
erl
=
rr1
*
gl0
+
rr3
*
(
gl1
+
gl6
)
float
erl
=
rr1
*
gl0
+
rr3
*
(
gl1
+
gl6
)
+
rr5
*
(
gl2
+
gl7
+
gl8
)
+
rr7
*
(
gl3
+
gl5
)
+
rr9
*
gl4
;
erli
=
0.5
f
*
(
rr3
*
(
gli1
+
gli6
)
*
psc3
float
erli
=
0.5
f
*
(
rr3
*
(
gli1
+
gli6
)
*
psc3
+
rr5
*
(
gli2
+
gli7
)
*
psc5
+
rr7
*
gli3
*
psc7
);
e
=
e
-
(
1.0
f
-
scalingFactors
[
MScaleIndex
])
*
erl
;
...
...
@@ -552,23 +547,23 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// intermediate variables for induced force terms
float
gfi1
=
0.5
f
*
bn2
*
(
gli1
+
glip1
+
gli6
+
glip6
)
+
0.5
f
*
bn2
*
scip2
+
0.5
f
*
bn3
*
(
gli2
+
glip2
+
gli7
+
glip7
)
-
0.5
f
*
bn3
*
(
sci3
*
scip4
+
scip3
*
sci4
)
+
0.5
f
*
bn4
*
(
gli3
+
glip3
);
float
gfi1
=
0.5
f
*
(
bn2
*
(
gli1
+
glip1
+
gli6
+
glip6
)
+
bn2
*
scip2
+
bn3
*
(
gli2
+
glip2
+
gli7
+
glip7
)
-
bn3
*
(
sci3
*
scip4
+
scip3
*
sci4
)
+
bn4
*
(
gli3
+
glip3
)
)
;
float
gfi2
=
-
ck
*
bn1
+
sc4
*
bn2
-
sc6
*
bn3
;
float
gfi3
=
ci
*
bn1
+
sc3
*
bn2
+
sc5
*
bn3
;
float
gfi4
=
2.0
f
*
bn2
;
float
gfi5
=
bn3
*
(
sci4
+
scip4
);
float
gfi6
=
-
bn3
*
(
sci3
+
scip3
);
float
gfri1
=
0.5
f
*
rr5
*
((
gli1
+
gli6
)
*
psc3
float
gfri1
=
0.5
f
*
(
rr5
*
((
gli1
+
gli6
)
*
psc3
+
(
glip1
+
glip6
)
*
dsc3
+
scip2
*
usc3
)
+
0.5
f
*
rr7
*
((
gli7
+
gli2
)
*
psc5
+
rr7
*
((
gli7
+
gli2
)
*
psc5
+
(
glip7
+
glip2
)
*
dsc5
-
(
sci3
*
scip4
+
scip3
*
sci4
)
*
usc5
)
+
0.5
f
*
rr9
*
(
gli3
*
psc7
+
glip3
*
dsc7
);
+
rr9
*
(
gli3
*
psc7
+
glip3
*
dsc7
)
)
;
float
gfri4
=
2.0
f
*
rr5
;
float
gfri5
=
rr7
*
(
sci4
*
psc7
+
scip4
*
dsc7
);
float
gfri6
=
-
rr7
*
(
sci3
*
psc7
+
scip3
*
dsc7
);
...
...
@@ -856,15 +851,15 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
outputForce
[
0
]
=
conversionFactor
*
(
ftm21
+
ftm2i1
);
outputForce
[
1
]
=
conversionFactor
*
(
ftm22
+
ftm2i2
);
outputForce
[
2
]
=
conversionFactor
*
(
ftm23
+
ftm2i3
);
conversionFactor
*=
-
1.0
;
outputTorque
[
0
]
[
0
]
=
conversionFactor
*
(
ttm21
+
ttm2i1
);
outputTorque
[
0
][
1
]
=
conversionFactor
*
(
ttm22
+
ttm2i2
);
outputTorque
[
0
][
2
]
=
conversionFactor
*
(
ttm23
+
ttm2i3
);
outputTorque
[
1
]
[
0
]
=
conversionFactor
*
(
ttm31
+
ttm3i1
);
outputTorque
[
1
]
[
1
]
=
conversionFactor
*
(
ttm32
+
ttm3i2
);
outputTorque
[
1
]
[
2
]
=
conversionFactor
*
(
ttm33
+
ttm3i3
);
outputTorque
[
0
]
.
x
=
conversionFactor
*
(
ttm21
+
ttm2i1
);
outputTorque
[
1
]
.
x
=
conversionFactor
*
(
ttm22
+
ttm2i2
);
outputTorque
[
2
]
.
x
=
conversionFactor
*
(
ttm23
+
ttm2i3
);
outputTorque
[
1
]
.
x
=
conversionFactor
*
(
ttm31
+
ttm3i1
);
outputTorque
[
1
]
.
y
=
conversionFactor
*
(
ttm32
+
ttm3i2
);
outputTorque
[
1
]
.
z
=
conversionFactor
*
(
ttm33
+
ttm3i3
);
#ifdef AMOEBA_DEBUG
int
debugIndex
=
0
;
...
...
@@ -958,14 +953,14 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
outputForce
[
0
]
=
0.0
f
;
outputForce
[
1
]
=
0.0
f
;
outputForce
[
2
]
=
0.0
f
;
outputTorque
[
0
]
[
0
]
=
0.0
f
;
outputTorque
[
0
]
[
1
]
=
0.0
f
;
outputTorque
[
0
]
[
2
]
=
0.0
f
;
outputTorque
[
1
]
[
0
]
=
0.0
f
;
outputTorque
[
1
]
[
1
]
=
0.0
f
;
outputTorque
[
1
]
[
2
]
=
0.0
f
;
outputTorque
[
0
]
.
x
=
0.0
f
;
outputTorque
[
0
]
.
y
=
0.0
f
;
outputTorque
[
0
]
.
z
=
0.0
f
;
outputTorque
[
1
]
.
x
=
0.0
f
;
outputTorque
[
1
]
.
y
=
0.0
f
;
outputTorque
[
1
]
.
z
=
0.0
f
;
*
energy
=
0.0
f
;
...
...
@@ -977,7 +972,7 @@ for( int ii = 0; ii < 5; ii++ ){
debugArray
[
ii
].
w
=
(
float
)
(
11
*
ii
);
}
#endif
}
return
;
...
...
@@ -1070,7 +1065,7 @@ static void kReduceForceTorque(amoebaGpuContext amoebaGpu )
void
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpuContext
amoebaGpu
)
{
// ---------------------------------------------------------------------------------------
static
unsigned
int
threadsPerBlock
=
0
;
...
...
@@ -1098,7 +1093,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
methodName
,
gpu
->
natoms
,
amoebaGpu
->
maxCovalentDegreeSz
,
amoebaGpu
->
pGamma
,
amoebaGpu
->
scalingDistanceCutoff
);
}
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
...
...
@@ -1120,8 +1115,8 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
// (void) fprintf( amoebaGpu->log, " %u %s %s\n", ii, lineTokens[0].c_str(), lineTokens[lineTokenIndex].c_str() ); fflush( amoebaGpu->log );
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
offset
-=
3
;
amoebaGpu
->
psInducedDipole
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
offset
-=
3
;
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
amoebaGpu
->
psInducedDipolePolar
->
_pSysStream
[
0
][
offset
++
]
=
static_cast
<
float
>
(
atof
(
lineTokens
[
lineTokenIndex
++
].
c_str
()));
...
...
@@ -1153,7 +1148,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
kClearFields_3
(
amoebaGpu
,
2
);
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticN2Forces: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u
\n
"
,
(
void
)
fprintf
(
amoebaGpu
->
log
,
"kCalculateAmoebaPmeDirectElectrostaticN2Forces: threadsPerBlock=%u getThreadsPerBlock=%d sizeof=%u
\n
"
,
threadsPerBlock
,
getThreadsPerBlock
(
amoebaGpu
,
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
)),
(
sizeof
(
PmeDirectElectrostaticParticle
)
+
sizeof
(
float3
))
);
...
...
@@ -1200,19 +1195,19 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
amoebaGpu
->
psForce
->
Download
();
amoebaGpu
->
psTorque
->
Download
();
debugArray
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Finished PmeDirectElectrostatic kernel execution
\n
"
);
(
void
)
fflush
(
amoebaGpu
->
log
);
int
maxPrint
=
5
;
float
conversion
=
1.0
f
/
41.84
f
;
float
forceSum
[
3
]
=
{
0.0
f
,
0.0
f
,
0.0
f
};
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// force
(
void
)
fprintf
(
amoebaGpu
->
log
,
"PmeDirectElectrostaticF [%16.9e %16.9e %16.9e] "
,
conversion
*
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
],
conversion
*
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
1
],
...
...
@@ -1223,12 +1218,12 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
forceSum
[
2
]
+=
amoebaGpu
->
psForce
->
_pSysStream
[
0
][
indexOffset
+
2
];
// torque
(
void
)
fprintf
(
amoebaGpu
->
log
,
"PmeDirectElectrostaticT [%16.9e %16.9e %16.9e] "
,
conversion
*
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
],
conversion
*
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
1
],
conversion
*
amoebaGpu
->
psTorque
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
...
...
@@ -1243,7 +1238,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
}
else
{
energy
+=
(
*
gpu
->
psEnergy
)[
ii
];
}
}
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Force sums: [%16.9e %16.9e %16.9e] Energy=%16.9e
\n
"
,
forceSum
[
0
],
forceSum
[
1
],
forceSum
[
2
],
energy
);
if
(
0
){
...
...
@@ -1279,7 +1274,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
if
(
1
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
...
...
@@ -1290,7 +1285,7 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaPmeDirectForceTorque"
,
fileId
,
outputVector
);
}
}
}
delete
debugArray
;
#endif
...
...
@@ -1310,5 +1305,5 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
{
cudaComputeAmoebaPmeDirectElectrostatic
(
amoebaGpu
);
kCalculateAmoebaPMEInducedDipoleForces
(
amoebaGpu
);
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeDirectElectrostatic.h
View file @
408469c3
...
...
@@ -117,7 +117,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
{
float
force
[
3
];
float
torque
[
2
]
[
3
]
;
float
3
torque
[
2
];
unsigned
int
atomJ
=
y
+
j
;
...
...
@@ -151,9 +151,9 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
]
[
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
]
[
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
]
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
]
.
x
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
]
.
y
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
]
.
z
:
0
.
0
f
;
totalEnergy
+=
mask
?
0
.
5
*
energy
:
0
.
0
f
;
...
...
@@ -181,15 +181,15 @@ if( atomI == targetAtom ){
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
]
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
]
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
]
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
]
.
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
]
.
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
]
.
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
mask
?
energy
:
0
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
]
[
0
]
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
]
[
1
]
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
]
[
2
]
:
0
.
0
f
;
debugArray
[
index
].
x
=
mask
?
torque
[
0
]
.
x
:
0
.
0
f
;
debugArray
[
index
].
y
=
mask
?
torque
[
0
]
.
y
:
0
.
0
f
;
debugArray
[
index
].
z
=
mask
?
torque
[
0
]
.
z
:
0
.
0
f
;
debugArray
[
index
].
w
=
(
float
)
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
);
for
(
int
pullIndex
=
0
;
pullIndex
<
maxPullIndex
;
pullIndex
++
){
...
...
@@ -304,7 +304,7 @@ if( atomI == targetAtom ){
unsigned
int
atomJ
=
y
+
jIdx
;
float
force
[
3
];
float
torque
[
2
]
[
3
]
;
float
3
torque
[
2
];
// set scale factors
...
...
@@ -335,9 +335,9 @@ if( atomI == targetAtom ){
localParticle
.
force
[
1
]
+=
mask
?
force
[
1
]
:
0
.
0
f
;
localParticle
.
force
[
2
]
+=
mask
?
force
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
]
[
0
]
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
]
[
1
]
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
]
[
2
]
:
0
.
0
f
;
localParticle
.
torque
[
0
]
+=
mask
?
torque
[
0
]
.
x
:
0
.
0
f
;
localParticle
.
torque
[
1
]
+=
mask
?
torque
[
0
]
.
y
:
0
.
0
f
;
localParticle
.
torque
[
2
]
+=
mask
?
torque
[
0
]
.
z
:
0
.
0
f
;
totalEnergy
+=
mask
?
energy
:
0
.
0
f
;
...
...
@@ -349,9 +349,9 @@ if( atomI == targetAtom ){
psA
[
jIdx
].
force
[
1
]
-=
mask
?
force
[
1
]
:
0
.
0
f
;
psA
[
jIdx
].
force
[
2
]
-=
mask
?
force
[
2
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
0
]
+=
mask
?
torque
[
1
]
[
0
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
1
]
+=
mask
?
torque
[
1
]
[
1
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
2
]
+=
mask
?
torque
[
1
]
[
2
]
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
0
]
+=
mask
?
torque
[
1
]
.
x
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
1
]
+=
mask
?
torque
[
1
]
.
y
:
0
.
0
f
;
psA
[
jIdx
].
torque
[
2
]
+=
mask
?
torque
[
1
]
.
z
:
0
.
0
f
;
}
else
{
...
...
@@ -359,9 +359,9 @@ if( atomI == targetAtom ){
sA
[
threadIdx
.
x
].
tempForce
[
1
]
=
mask
?
0
.
0
f
:
force
[
1
];
sA
[
threadIdx
.
x
].
tempForce
[
2
]
=
mask
?
0
.
0
f
:
force
[
2
];
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
]
[
0
]
;
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
]
[
1
]
;
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
]
[
2
]
;
sA
[
threadIdx
.
x
].
tempTorque
[
0
]
=
mask
?
0
.
0
f
:
torque
[
1
]
.
x
;
sA
[
threadIdx
.
x
].
tempTorque
[
1
]
=
mask
?
0
.
0
f
:
torque
[
1
]
.
y
;
sA
[
threadIdx
.
x
].
tempTorque
[
2
]
=
mask
?
0
.
0
f
:
torque
[
1
]
.
z
;
if
(
tgx
%
2
==
0
){
sumTempBuffer
(
sA
[
threadIdx
.
x
],
sA
[
threadIdx
.
x
+
1
]
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
View file @
408469c3
...
...
@@ -187,137 +187,136 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
float
r
=
sqrtf
(
r2
);
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn2
=
(
3.0
f
*
bn1
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn2
=
(
3.0
f
*
bn1
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn3
=
(
5.0
f
*
bn2
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn3
=
(
5.0
f
*
bn2
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
// compute the error function scaled and unscaled terms
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
scale7
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
scale7
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
scale7
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
+
(
0.6
f
*
damp
*
damp
));
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
scale7
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
+
(
0.6
f
*
damp
*
damp
));
}
}
}
float
dsc3
=
dscale
*
scale3
;
float
dsc5
=
dscale
*
scale5
;
float
dsc7
=
dscale
*
scale7
;
float
dsc3
=
dscale
*
scale3
;
float
dsc5
=
dscale
*
scale5
;
float
dsc7
=
dscale
*
scale7
;
float
psc3
=
pscale
*
scale3
;
float
psc5
=
pscale
*
scale5
;
float
psc7
=
pscale
*
scale7
;
float
psc3
=
pscale
*
scale3
;
float
psc5
=
pscale
*
scale5
;
float
psc7
=
pscale
*
scale7
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
r7
=
(
r5
*
r2
);
float
drr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
drr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
drr7
=
15.0
f
*
(
1.0
f
-
dsc7
)
/
r7
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
r7
=
(
r5
*
r2
);
float
drr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
drr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
drr7
=
15.0
f
*
(
1.0
f
-
dsc7
)
/
r7
;
float
prr3
=
(
1.0
f
-
psc3
)
/
r3
;
float
prr5
=
3.0
f
*
(
1.0
f
-
psc5
)
/
r5
;
float
prr7
=
15.0
f
*
(
1.0
f
-
psc7
)
/
r7
;
float
prr3
=
(
1.0
f
-
psc3
)
/
r3
;
float
prr5
=
3.0
f
*
(
1.0
f
-
psc5
)
/
r5
;
float
prr7
=
15.0
f
*
(
1.0
f
-
psc7
)
/
r7
;
float
dir
=
atomI
.
labFrameDipole_X
*
xr
+
atomI
.
labFrameDipole_Y
*
yr
+
atomI
.
labFrameDipole_Z
*
zr
;
float
dir
=
atomI
.
labFrameDipole_X
*
xr
+
atomI
.
labFrameDipole_Y
*
yr
+
atomI
.
labFrameDipole_Z
*
zr
;
float
qix
=
atomI
.
labFrameQuadrupole_XX
*
xr
+
atomI
.
labFrameQuadrupole_XY
*
yr
+
atomI
.
labFrameQuadrupole_XZ
*
zr
;
float
qiy
=
atomI
.
labFrameQuadrupole_XY
*
xr
+
atomI
.
labFrameQuadrupole_YY
*
yr
+
atomI
.
labFrameQuadrupole_YZ
*
zr
;
float
qiz
=
atomI
.
labFrameQuadrupole_XZ
*
xr
+
atomI
.
labFrameQuadrupole_YZ
*
yr
+
atomI
.
labFrameQuadrupole_ZZ
*
zr
;
float
qix
=
atomI
.
labFrameQuadrupole_XX
*
xr
+
atomI
.
labFrameQuadrupole_XY
*
yr
+
atomI
.
labFrameQuadrupole_XZ
*
zr
;
float
qiy
=
atomI
.
labFrameQuadrupole_XY
*
xr
+
atomI
.
labFrameQuadrupole_YY
*
yr
+
atomI
.
labFrameQuadrupole_YZ
*
zr
;
float
qiz
=
atomI
.
labFrameQuadrupole_XZ
*
xr
+
atomI
.
labFrameQuadrupole_YZ
*
yr
+
atomI
.
labFrameQuadrupole_ZZ
*
zr
;
float
qir
=
qix
*
xr
+
qiy
*
yr
+
qiz
*
zr
;
float
qir
=
qix
*
xr
+
qiy
*
yr
+
qiz
*
zr
;
float
dkr
=
atomJ
.
labFrameDipole_X
*
xr
+
atomJ
.
labFrameDipole_Y
*
yr
+
atomJ
.
labFrameDipole_Z
*
zr
;
float
qkx
=
atomJ
.
labFrameQuadrupole_XX
*
xr
+
atomJ
.
labFrameQuadrupole_XY
*
yr
+
atomJ
.
labFrameQuadrupole_XZ
*
zr
;
float
qky
=
atomJ
.
labFrameQuadrupole_XY
*
xr
+
atomJ
.
labFrameQuadrupole_YY
*
yr
+
atomJ
.
labFrameQuadrupole_YZ
*
zr
;
float
qkz
=
atomJ
.
labFrameQuadrupole_XZ
*
xr
+
atomJ
.
labFrameQuadrupole_YZ
*
yr
+
atomJ
.
labFrameQuadrupole_ZZ
*
zr
;
float
qkr
=
qkx
*
xr
+
qky
*
yr
+
qkz
*
zr
;
float
dkr
=
atomJ
.
labFrameDipole_X
*
xr
+
atomJ
.
labFrameDipole_Y
*
yr
+
atomJ
.
labFrameDipole_Z
*
zr
;
float
qkx
=
atomJ
.
labFrameQuadrupole_XX
*
xr
+
atomJ
.
labFrameQuadrupole_XY
*
yr
+
atomJ
.
labFrameQuadrupole_XZ
*
zr
;
float
qky
=
atomJ
.
labFrameQuadrupole_XY
*
xr
+
atomJ
.
labFrameQuadrupole_YY
*
yr
+
atomJ
.
labFrameQuadrupole_YZ
*
zr
;
float
qkz
=
atomJ
.
labFrameQuadrupole_XZ
*
xr
+
atomJ
.
labFrameQuadrupole_YZ
*
yr
+
atomJ
.
labFrameQuadrupole_ZZ
*
zr
;
float
qkr
=
qkx
*
xr
+
qky
*
yr
+
qkz
*
zr
;
float
fim0
=
-
xr
*
(
bn1
*
atomJ
.
q
-
bn2
*
dkr
+
bn3
*
qkr
)
-
bn1
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
bn2
*
qkx
;
float
fim0
=
-
xr
*
(
bn1
*
atomJ
.
q
-
bn2
*
dkr
+
bn3
*
qkr
)
-
bn1
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
bn2
*
qkx
;
float
fim1
=
-
yr
*
(
bn1
*
atomJ
.
q
-
bn2
*
dkr
+
bn3
*
qkr
)
-
bn1
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
bn2
*
qky
;
float
fim1
=
-
yr
*
(
bn1
*
atomJ
.
q
-
bn2
*
dkr
+
bn3
*
qkr
)
-
bn1
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
bn2
*
qky
;
float
fim2
=
-
zr
*
(
bn1
*
atomJ
.
q
-
bn2
*
dkr
+
bn3
*
qkr
)
-
bn1
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
bn2
*
qkz
;
float
fim2
=
-
zr
*
(
bn1
*
atomJ
.
q
-
bn2
*
dkr
+
bn3
*
qkr
)
-
bn1
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
bn2
*
qkz
;
float
fkm0
=
xr
*
(
bn1
*
atomI
.
q
+
bn2
*
dir
+
bn3
*
qir
)
-
bn1
*
atomI
.
labFrameDipole_X
-
2.0
f
*
bn2
*
qix
;
float
fkm0
=
xr
*
(
bn1
*
atomI
.
q
+
bn2
*
dir
+
bn3
*
qir
)
-
bn1
*
atomI
.
labFrameDipole_X
-
2.0
f
*
bn2
*
qix
;
float
fkm1
=
yr
*
(
bn1
*
atomI
.
q
+
bn2
*
dir
+
bn3
*
qir
)
-
bn1
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
bn2
*
qiy
;
float
fkm1
=
yr
*
(
bn1
*
atomI
.
q
+
bn2
*
dir
+
bn3
*
qir
)
-
bn1
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
bn2
*
qiy
;
float
fkm2
=
zr
*
(
bn1
*
atomI
.
q
+
bn2
*
dir
+
bn3
*
qir
)
-
bn1
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
bn2
*
qiz
;
float
fkm2
=
zr
*
(
bn1
*
atomI
.
q
+
bn2
*
dir
+
bn3
*
qir
)
-
bn1
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
bn2
*
qiz
;
float
fid0
=
-
xr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
drr5
*
qkx
;
float
fid0
=
-
xr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
drr5
*
qkx
;
float
fid1
=
-
yr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
drr5
*
qky
;
float
fid1
=
-
yr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
drr5
*
qky
;
float
fid2
=
-
zr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
drr5
*
qkz
;
float
fid2
=
-
zr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
drr5
*
qkz
;
float
fkd0
=
xr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
drr5
*
qix
;
float
fkd0
=
xr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
drr5
*
qix
;
float
fkd1
=
yr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
drr5
*
qiy
;
float
fkd1
=
yr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
drr5
*
qiy
;
float
fkd2
=
zr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
drr5
*
qiz
;
float
fkd2
=
zr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
drr5
*
qiz
;
float
fip0
=
-
xr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
prr5
*
qkx
;
float
fip0
=
-
xr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
prr5
*
qkx
;
float
fip1
=
-
yr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
prr5
*
qky
;
float
fip1
=
-
yr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
prr5
*
qky
;
float
fip2
=
-
zr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
prr5
*
qkz
;
float
fip2
=
-
zr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
prr5
*
qkz
;
float
fkp0
=
xr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
prr5
*
qix
;
float
fkp0
=
xr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
prr5
*
qix
;
float
fkp1
=
yr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
prr5
*
qiy
;
float
fkp1
=
yr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
prr5
*
qiy
;
float
fkp2
=
zr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
prr5
*
qiz
;
// increment the field at each site due to this interaction
float
fkp2
=
zr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
prr5
*
qiz
;
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
// increment the field at each site due to this interaction
fields
[
0
].
x
=
fim0
-
fid0
;
fields
[
1
].
x
=
fim1
-
fid1
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.cu
View file @
408469c3
...
...
@@ -75,92 +75,91 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
float
r
=
sqrtf
(
r2
);
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
// calculate the error function damping terms
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
ralpha
=
cSim
.
alphaEwald
*
r
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
float
bn0
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cSim
.
alphaEwald
*
cSim
.
alphaEwald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cSim
.
alphaEwald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
float
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn2
=
(
3.0
f
*
bn1
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
float
bn2
=
(
3.0
f
*
bn1
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
// compute the error function scaled and unscaled terms
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
}
}
}
float
dsc3
=
uscale
*
scale3
;
float
dsc5
=
uscale
*
scale5
;
float
dsc3
=
uscale
*
scale3
;
float
dsc5
=
uscale
*
scale5
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
rr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
rr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
rr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
rr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
duir
=
atomI
.
inducedDipole
[
0
]
*
xr
+
atomI
.
inducedDipole
[
1
]
*
yr
+
atomI
.
inducedDipole
[
2
]
*
zr
;
float
dukr
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
duir
=
atomI
.
inducedDipole
[
0
]
*
xr
+
atomI
.
inducedDipole
[
1
]
*
yr
+
atomI
.
inducedDipole
[
2
]
*
zr
;
float
dukr
=
atomJ
.
inducedDipole
[
0
]
*
xr
+
atomJ
.
inducedDipole
[
1
]
*
yr
+
atomJ
.
inducedDipole
[
2
]
*
zr
;
float
puir
=
atomI
.
inducedDipolePolar
[
0
]
*
xr
+
atomI
.
inducedDipolePolar
[
1
]
*
yr
+
atomI
.
inducedDipolePolar
[
2
]
*
zr
;
float
pukr
=
atomJ
.
inducedDipolePolar
[
0
]
*
xr
+
atomJ
.
inducedDipolePolar
[
1
]
*
yr
+
atomJ
.
inducedDipolePolar
[
2
]
*
zr
;
float
puir
=
atomI
.
inducedDipolePolar
[
0
]
*
xr
+
atomI
.
inducedDipolePolar
[
1
]
*
yr
+
atomI
.
inducedDipolePolar
[
2
]
*
zr
;
float
pukr
=
atomJ
.
inducedDipolePolar
[
0
]
*
xr
+
atomJ
.
inducedDipolePolar
[
1
]
*
yr
+
atomJ
.
inducedDipolePolar
[
2
]
*
zr
;
bn1
*=
-
1.0
f
;
bn1
*=
-
1.0
f
;
float
fimd0
=
bn1
*
atomJ
.
inducedDipole
[
0
]
+
bn2
*
dukr
*
xr
;
float
fimd1
=
bn1
*
atomJ
.
inducedDipole
[
1
]
+
bn2
*
dukr
*
yr
;
float
fimd2
=
bn1
*
atomJ
.
inducedDipole
[
2
]
+
bn2
*
dukr
*
zr
;
float
fimd0
=
bn1
*
atomJ
.
inducedDipole
[
0
]
+
bn2
*
dukr
*
xr
;
float
fimd1
=
bn1
*
atomJ
.
inducedDipole
[
1
]
+
bn2
*
dukr
*
yr
;
float
fimd2
=
bn1
*
atomJ
.
inducedDipole
[
2
]
+
bn2
*
dukr
*
zr
;
float
fkmd0
=
bn1
*
atomI
.
inducedDipole
[
0
]
+
bn2
*
duir
*
xr
;
float
fkmd1
=
bn1
*
atomI
.
inducedDipole
[
1
]
+
bn2
*
duir
*
yr
;
float
fkmd2
=
bn1
*
atomI
.
inducedDipole
[
2
]
+
bn2
*
duir
*
zr
;
float
fkmd0
=
bn1
*
atomI
.
inducedDipole
[
0
]
+
bn2
*
duir
*
xr
;
float
fkmd1
=
bn1
*
atomI
.
inducedDipole
[
1
]
+
bn2
*
duir
*
yr
;
float
fkmd2
=
bn1
*
atomI
.
inducedDipole
[
2
]
+
bn2
*
duir
*
zr
;
float
fimp0
=
bn1
*
atomJ
.
inducedDipolePolar
[
0
]
+
bn2
*
pukr
*
xr
;
float
fimp1
=
bn1
*
atomJ
.
inducedDipolePolar
[
1
]
+
bn2
*
pukr
*
yr
;
float
fimp2
=
bn1
*
atomJ
.
inducedDipolePolar
[
2
]
+
bn2
*
pukr
*
zr
;
float
fimp0
=
bn1
*
atomJ
.
inducedDipolePolar
[
0
]
+
bn2
*
pukr
*
xr
;
float
fimp1
=
bn1
*
atomJ
.
inducedDipolePolar
[
1
]
+
bn2
*
pukr
*
yr
;
float
fimp2
=
bn1
*
atomJ
.
inducedDipolePolar
[
2
]
+
bn2
*
pukr
*
zr
;
float
fkmp0
=
bn1
*
atomI
.
inducedDipolePolar
[
0
]
+
bn2
*
puir
*
xr
;
float
fkmp1
=
bn1
*
atomI
.
inducedDipolePolar
[
1
]
+
bn2
*
puir
*
yr
;
float
fkmp2
=
bn1
*
atomI
.
inducedDipolePolar
[
2
]
+
bn2
*
puir
*
zr
;
float
fkmp0
=
bn1
*
atomI
.
inducedDipolePolar
[
0
]
+
bn2
*
puir
*
xr
;
float
fkmp1
=
bn1
*
atomI
.
inducedDipolePolar
[
1
]
+
bn2
*
puir
*
yr
;
float
fkmp2
=
bn1
*
atomI
.
inducedDipolePolar
[
2
]
+
bn2
*
puir
*
zr
;
rr3
*=
-
1.0
f
;;
float
fid0
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
rr5
*
dukr
*
xr
;
float
fid1
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
rr5
*
dukr
*
yr
;
float
fid2
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
rr5
*
dukr
*
zr
;
rr3
*=
-
1.0
f
;;
float
fid0
=
rr3
*
atomJ
.
inducedDipole
[
0
]
+
rr5
*
dukr
*
xr
;
float
fid1
=
rr3
*
atomJ
.
inducedDipole
[
1
]
+
rr5
*
dukr
*
yr
;
float
fid2
=
rr3
*
atomJ
.
inducedDipole
[
2
]
+
rr5
*
dukr
*
zr
;
float
fkd0
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
rr5
*
duir
*
xr
;
float
fkd1
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
rr5
*
duir
*
yr
;
float
fkd2
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
rr5
*
duir
*
zr
;
float
fkd0
=
rr3
*
atomI
.
inducedDipole
[
0
]
+
rr5
*
duir
*
xr
;
float
fkd1
=
rr3
*
atomI
.
inducedDipole
[
1
]
+
rr5
*
duir
*
yr
;
float
fkd2
=
rr3
*
atomI
.
inducedDipole
[
2
]
+
rr5
*
duir
*
zr
;
float
fip0
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
rr5
*
pukr
*
xr
;
float
fip1
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
rr5
*
pukr
*
yr
;
float
fip2
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
rr5
*
pukr
*
zr
;
float
fip0
=
rr3
*
atomJ
.
inducedDipolePolar
[
0
]
+
rr5
*
pukr
*
xr
;
float
fip1
=
rr3
*
atomJ
.
inducedDipolePolar
[
1
]
+
rr5
*
pukr
*
yr
;
float
fip2
=
rr3
*
atomJ
.
inducedDipolePolar
[
2
]
+
rr5
*
pukr
*
zr
;
float
fkp0
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
rr5
*
puir
*
xr
;
float
fkp1
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
rr5
*
puir
*
yr
;
float
fkp2
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
rr5
*
puir
*
zr
;
float
fkp0
=
rr3
*
atomI
.
inducedDipolePolar
[
0
]
+
rr5
*
puir
*
xr
;
float
fkp1
=
rr3
*
atomI
.
inducedDipolePolar
[
1
]
+
rr5
*
puir
*
yr
;
float
fkp2
=
rr3
*
atomI
.
inducedDipolePolar
[
2
]
+
rr5
*
puir
*
zr
;
// increment the field at each site due to this interaction
if
(
r2
<=
cSim
.
nonbondedCutoffSqr
){
// increment the field at each site due to this interaction
fields
[
0
].
x
=
fimd0
-
fid0
;
fields
[
0
].
y
=
fkmd0
-
fkd0
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeMutualInducedField.h
View file @
408469c3
...
...
@@ -29,7 +29,7 @@
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 1
3
0)
#elif (__CUDA_ARCH__ >= 1
2
0)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment