Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cf335495
Commit
cf335495
authored
Sep 03, 2010
by
Mark Friedrichs
Browse files
PME real space and self terms for fixed E-field
parent
acb46362
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
1168 additions
and
227 deletions
+1168
-227
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
+31
-6
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
...ins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
+6
-1
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.h
.../cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.h
+9
-30
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
...forms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
+0
-46
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.h
...tforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.h
+10
-49
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedFieldParticle.h
...cuda/src/kernels/kCalculateAmoebaCudaFixedFieldParticle.h
+45
-90
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
+489
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
...rms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
+535
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRealSpaceEwald.cu
...ms/cuda/src/kernels/kCalculateAmoebaCudaRealSpaceEwald.cu
+3
-0
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
...platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
+6
-2
plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
...amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
+34
-3
No files found.
plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
View file @
cf335495
...
...
@@ -1420,6 +1420,18 @@ void gpuKirkwoodAllocate( amoebaGpuContext amoebaGpu )
}
static
void
tabulateErfc
(
gpuContext
gpu
)
{
int
tableSize
=
2048
;
gpu
->
sim
.
tabulatedErfcSize
=
tableSize
;
gpu
->
sim
.
tabulatedErfcScale
=
tableSize
/
(
gpu
->
sim
.
alphaEwald
*
gpu
->
sim
.
nonbondedCutoff
);
gpu
->
psTabulatedErfc
=
new
CUDAStream
<
float
>
(
tableSize
,
1
,
"TabulatedErfc"
);
gpu
->
sim
.
pTabulatedErfc
=
gpu
->
psTabulatedErfc
->
_pDevData
;
for
(
int
i
=
0
;
i
<
tableSize
;
++
i
)
(
*
gpu
->
psTabulatedErfc
)[
i
]
=
(
float
)
erfc
(
i
*
(
gpu
->
sim
.
alphaEwald
*
gpu
->
sim
.
nonbondedCutoff
)
/
tableSize
);
gpu
->
psTabulatedErfc
->
Upload
();
}
/**---------------------------------------------------------------------------------------
Create/initialize data structs associated w/ molecular -> lab frame calculation
...
...
@@ -1525,14 +1537,26 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
}
else
if
(
nonbondedMethod
==
1
){
amoebaGpu
->
multipoleNonbondedMethod
=
AMOEBA_PARTICLE_MESH_EWALD
;
}
else
{
throw
OpenMM
::
OpenMMException
(
"multipoleNonbondedMethod not recogn
z
ied.
\n
"
);
throw
OpenMM
::
OpenMMException
(
"multipoleNonbondedMethod not recogni
z
ed.
\n
"
);
}
amoebaGpu
->
amoebaSim
.
cutoffDistance2
=
cutoffDistance
*
cutoffDistance
;
amoebaGpu
->
amoebaSim
.
sqrtPi
=
sqrt
(
3.1415926535897932384626433832795
);
amoebaGpu
->
amoebaSim
.
aewald
=
aewald
;
amoebaGpu
->
amoebaSim
.
electric
=
electricConstant
;
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%s Nonbonded method=%d %d [NoCutoff=%d PME=%d]
\n
"
,
methodName
.
c_str
(),
nonbondedMethod
,
amoebaGpu
->
multipoleNonbondedMethod
,
AMOEBA_NO_CUTOFF
,
AMOEBA_PARTICLE_MESH_EWALD
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
amoebaGpu
->
amoebaSim
.
cutoffDistance2
=
cutoffDistance
*
cutoffDistance
;
amoebaGpu
->
amoebaSim
.
sqrtPi
=
sqrt
(
3.1415926535897932384626433832795
);
amoebaGpu
->
amoebaSim
.
aewald
=
aewald
;
amoebaGpu
->
amoebaSim
.
electric
=
electricConstant
;
amoebaGpu
->
gpuContext
->
sim
.
alphaEwald
=
aewald
;
amoebaGpu
->
gpuContext
->
sim
.
nonbondedCutoff
=
cutoffDistance
;
tabulateErfc
(
amoebaGpu
->
gpuContext
);
if
(
amoebaGpu
->
amoebaSim
.
dielec
<
1.0e-05
){
amoebaGpu
->
amoebaSim
.
dielec
=
1.0
f
;
amoebaGpu
->
amoebaSim
.
dielec
=
1.0
f
;
}
for
(
int
ii
=
0
;
ii
<
static_cast
<
int
>
(
charges
.
size
());
ii
++
){
...
...
@@ -2593,6 +2617,7 @@ void amoebaGpuSetConstants(amoebaGpuContext amoebaGpu)
SetCalculateAmoebaCudaVdw14_7Sim
(
amoebaGpu
);
SetCalculateAmoebaCudaWcaDispersionSim
(
amoebaGpu
);
SetCalculateAmoebaCudaMutualInducedFieldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpu
);
SetCalculateAmoebaElectrostaticSim
(
amoebaGpu
);
SetCalculateAmoebaRealSpaceEwaldSim
(
amoebaGpu
);
SetCalculateAmoebaCudaMapTorquesSim
(
amoebaGpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaCudaKernels.h
View file @
cf335495
...
...
@@ -65,12 +65,17 @@ extern void SetCalculateAmoebaCudaWcaDispersionSim(amoebaGpuContext gpu);
extern
void
GetCalculateAmoebaCudaWcaDispersionSim
(
amoebaGpuContext
gpu
);
extern
void
kCalculateAmoebaWcaDispersionForces
(
amoebaGpuContext
amoebaGpu
);
// fixed electric field
// fixed electric field
-- no cutoff
extern
void
SetCalculateAmoebaCudaFixedEFieldSim
(
amoebaGpuContext
gpu
);
extern
void
GetCalculateAmoebaCudaFixedEFieldSim
(
amoebaGpuContext
gpu
);
extern
void
cudaComputeAmoebaFixedEField
(
amoebaGpuContext
gpu
);
// fixed electric field -- PME
extern
void
SetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpuContext
gpu
);
extern
void
GetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpuContext
gpu
);
extern
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
gpu
);
// fixed electric field and Gk
extern
void
SetCalculateAmoebaCudaFixedEAndGKFieldsSim
(
amoebaGpuContext
gpu
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEAndGkFields.h
View file @
cf335495
...
...
@@ -83,6 +83,9 @@ void METHOD_NAME(kCalculateAmoebaFixedEAndGkField, _kernel)(
FixedFieldParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
FixedFieldParticle
localParticle
;
loadFixedFieldShared
(
&
localParticle
,
atomI
,
bornRadii
);
float4
iCoord
=
atomCoord
[
atomI
];
float
eFieldSum
[
3
];
...
...
@@ -106,9 +109,7 @@ void METHOD_NAME(kCalculateAmoebaFixedEAndGkField, _kernel)(
// load coordinates, charge, ...
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
,
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
cAmoebaSim
.
pDampingFactorAndThole
,
bornRadii
);
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
,
bornRadii
);
if
(
!
bExclusionFlag
)
{
...
...
@@ -125,12 +126,7 @@ void METHOD_NAME(kCalculateAmoebaFixedEAndGkField, _kernel)(
loadFixedFieldParticleData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
&
jBornRadius
);
calculateFixedEFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
labFrameDipole
[
atomI
*
3
]),
jDipole
,
&
(
labFrameQuadrupole
[
atomI
*
9
]),
jQuadrupole
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateFixedEFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -182,12 +178,7 @@ void METHOD_NAME(kCalculateAmoebaFixedEAndGkField, _kernel)(
loadFixedFieldParticleData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
&
jBornRadius
);
calculateFixedEFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
labFrameDipole
[
atomI
*
3
]),
jDipole
,
&
(
labFrameQuadrupole
[
atomI
*
9
]),
jQuadrupole
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateFixedEFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -366,9 +357,7 @@ if( atomI == targetAtom ){
{
// load coordinates, charge, ...
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
),
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
cAmoebaSim
.
pDampingFactorAndThole
,
bornRadii
);
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
),
bornRadii
);
}
...
...
@@ -387,12 +376,7 @@ if( atomI == targetAtom ){
loadFixedFieldParticleData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
&
jBornRadius
);
calculateFixedEFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
labFrameDipole
[
atomI
*
3
]),
jDipole
,
&
(
labFrameQuadrupole
[
atomI
*
9
]),
jQuadrupole
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateFixedEFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -563,12 +547,7 @@ if( (atomI == targetAtom || (y + tj) == targetAtom) ){
loadFixedFieldParticleData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jQuadrupole
,
&
jBornRadius
);
calculateFixedEFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
labFrameDipole
[
atomI
*
3
]),
jDipole
,
&
(
labFrameQuadrupole
[
atomI
*
9
]),
jQuadrupole
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateFixedEFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.cu
View file @
cf335495
...
...
@@ -61,46 +61,6 @@ static void kReduceE_Fields_kernel(amoebaGpuContext amoebaGpu )
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaFixedEField.h"
#ifdef AMOEBA_DEBUG
#if 0
static void printEFieldBuffer( amoebaGpuContext amoebaGpu, unsigned int bufferIndex )
{
(void) fprintf( amoebaGpu->log, "EField Buffer %u\n", bufferIndex );
unsigned int start = bufferIndex*3*amoebaGpu->paddedNumberOfAtoms;
unsigned int stop = (bufferIndex+1)*3*amoebaGpu->paddedNumberOfAtoms;
for( unsigned int ii = start; ii < stop; ii += 3 ){
unsigned int ii3Index = ii/3;
unsigned int bufferIndex = ii3Index/(amoebaGpu->paddedNumberOfAtoms);
unsigned int particleIndex = ii3Index - bufferIndex*(amoebaGpu->paddedNumberOfAtoms);
(void) fprintf( amoebaGpu->log, " %6u %3u %6u [%14.6e %14.6e %14.6e] [%14.6e %14.6e %14.6e]\n",
ii/3, bufferIndex, particleIndex,
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][ii+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][ii+2] );
}
}
static void printEFieldAtomBuffers( amoebaGpuContext amoebaGpu, unsigned int targetAtom )
{
(void) fprintf( amoebaGpu->log, "EField atom %u\n", targetAtom );
for( unsigned int ii = 0; ii < amoebaGpu->outputBuffers; ii++ ){
unsigned int particleIndex = targetAtom + ii*3*amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log, " %2u %6u [%14.6e %14.6e %14.6e] [%14.6e %14.6e %14.6e]\n",
ii, particleIndex,
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][particleIndex+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][particleIndex+2] );
}
}
#endif
#endif
/**---------------------------------------------------------------------------------------
Compute fixed electric field
...
...
@@ -145,9 +105,6 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 warp
\n
"
);
kCalculateAmoebaFixedE_FieldN2ByWarpForces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameQuadrupole
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
@@ -167,9 +124,6 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaFixedE_FieldN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameDipole
->
_pDevStream
[
0
],
amoebaGpu
->
psLabFrameQuadrupole
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedEField.h
View file @
cf335495
...
...
@@ -36,9 +36,6 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void
METHOD_NAME
(
kCalculateAmoebaFixedE_Field
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
float4
*
atomCoord
,
float
*
labFrameDipole
,
float
*
labFrameQuadrupole
,
float
*
outputEField
,
float
*
outputEFieldPolar
#ifdef AMOEBA_DEBUG
...
...
@@ -59,10 +56,6 @@ void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
float4
jCoord
;
float
jDipole
[
3
];
float
jQuadrupole
[
9
];
while
(
pos
<
end
)
{
...
...
@@ -80,7 +73,9 @@ void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
FixedFieldParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
float4
iCoord
=
atomCoord
[
atomI
];
FixedFieldParticle
localParticle
;
loadFixedFieldShared
(
&
localParticle
,
atomI
);
float
fieldSum
[
3
];
float
fieldPolarSum
[
3
];
...
...
@@ -98,9 +93,7 @@ void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
// load coordinates, charge, ...
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
,
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
cAmoebaSim
.
pDampingFactorAndThole
);
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
if
(
!
bExclusionFlag
)
{
...
...
@@ -113,16 +106,7 @@ void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
float
ijField
[
2
][
3
];
// load coords, charge, ...
loadFixedFieldParticleData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jQuadrupole
);
calculateFixedEFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
labFrameDipole
[
atomI
*
3
]),
jDipole
,
&
(
labFrameQuadrupole
[
atomI
*
9
]),
jQuadrupole
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateFixedEFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -156,14 +140,9 @@ void METHOD_NAME(kCalculateAmoebaFixedE_Field, Forces_kernel)(
float
ijField
[
2
][
3
];
loadFixedFieldParticleData
(
&
(
psA
[
j
]),
&
jCoord
,
jDipole
,
jQuadrupole
);
//
loadFixedFieldParticleData( &(psA[j]), &jCoord, jDipole, jQuadrupole );
calculateFixedEFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
j
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
j
].
thole
,
&
(
labFrameDipole
[
atomI
*
3
]),
jDipole
,
&
(
labFrameQuadrupole
[
atomI
*
9
]),
jQuadrupole
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateFixedEFieldPairIxn_kernel
(
localParticle
,
psA
[
j
],
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -280,9 +259,7 @@ if( 0 && atomI == targetAtom ){
// load coordinates, charge, ...
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
),
atomCoord
,
labFrameDipole
,
labFrameQuadrupole
,
cAmoebaSim
.
pDampingFactorAndThole
);
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
}
...
...
@@ -297,16 +274,7 @@ if( 0 && atomI == targetAtom ){
float
ijField
[
2
][
3
];
// load coords, charge, ...
loadFixedFieldParticleData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jQuadrupole
);
calculateFixedEFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
labFrameDipole
[
atomI
*
3
]),
jDipole
,
&
(
labFrameQuadrupole
[
atomI
*
9
]),
jQuadrupole
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateFixedEFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
@@ -420,14 +388,7 @@ if( 0 && (atomI == targetAtom || (y + tj) == targetAtom) ){
float
ijField
[
2
][
3
];
loadFixedFieldParticleData
(
&
(
psA
[
tj
]),
&
jCoord
,
jDipole
,
jQuadrupole
);
calculateFixedEFieldPairIxn_kernel
(
iCoord
,
jCoord
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
x
,
psA
[
tj
].
damp
,
cAmoebaSim
.
pDampingFactorAndThole
[
atomI
].
y
,
psA
[
tj
].
thole
,
&
(
labFrameDipole
[
atomI
*
3
]),
jDipole
,
&
(
labFrameQuadrupole
[
atomI
*
9
]),
jQuadrupole
,
cAmoebaSim
.
scalingDistanceCutoff
,
ijField
calculateFixedEFieldPairIxn_kernel
(
localParticle
,
psA
[
tj
],
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaFixedFieldParticle.h
View file @
cf335495
...
...
@@ -46,9 +46,7 @@ struct FixedFieldParticle {
#endif
};
__device__
void
loadFixedFieldShared
(
struct
FixedFieldParticle
*
sA
,
unsigned
int
atomI
,
float4
*
atomCoord
,
float
*
labDipole
,
float
*
labQuadrupole
,
float2
*
dampingFactorAndThole
__device__
static
void
loadFixedFieldShared
(
struct
FixedFieldParticle
*
sA
,
unsigned
int
atomI
#ifdef GK
,
float
*
bornR
#endif
...
...
@@ -56,28 +54,28 @@ __device__ void loadFixedFieldShared( struct FixedFieldParticle* sA, unsigned in
{
// coordinates & charge
sA
->
x
=
atomCoord
[
atomI
].
x
;
sA
->
y
=
atomCoord
[
atomI
].
y
;
sA
->
z
=
atomCoord
[
atomI
].
z
;
sA
->
q
=
atomCoord
[
atomI
].
w
;
sA
->
x
=
cSim
.
pPosq
[
atomI
].
x
;
sA
->
y
=
cSim
.
pPosq
[
atomI
].
y
;
sA
->
z
=
cSim
.
pPosq
[
atomI
].
z
;
sA
->
q
=
cSim
.
pPosq
[
atomI
].
w
;
// lab dipole
sA
->
labFrameDipole_X
=
lab
Dipole
[
atomI
*
3
];
sA
->
labFrameDipole_Y
=
lab
Dipole
[
atomI
*
3
+
1
];
sA
->
labFrameDipole_Z
=
lab
Dipole
[
atomI
*
3
+
2
];
sA
->
labFrameDipole_X
=
cAmoebaSim
.
pLabFrame
Dipole
[
atomI
*
3
];
sA
->
labFrameDipole_Y
=
cAmoebaSim
.
pLabFrame
Dipole
[
atomI
*
3
+
1
];
sA
->
labFrameDipole_Z
=
cAmoebaSim
.
pLabFrame
Dipole
[
atomI
*
3
+
2
];
// lab quadrupole
sA
->
labFrameQuadrupole_XX
=
lab
Quadrupole
[
atomI
*
9
];
sA
->
labFrameQuadrupole_XY
=
lab
Quadrupole
[
atomI
*
9
+
1
];
sA
->
labFrameQuadrupole_XZ
=
lab
Quadrupole
[
atomI
*
9
+
2
];
sA
->
labFrameQuadrupole_YY
=
lab
Quadrupole
[
atomI
*
9
+
4
];
sA
->
labFrameQuadrupole_YZ
=
lab
Quadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole_ZZ
=
lab
Quadrupole
[
atomI
*
9
+
8
];
sA
->
labFrameQuadrupole_XX
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
];
sA
->
labFrameQuadrupole_XY
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
1
];
sA
->
labFrameQuadrupole_XZ
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
2
];
sA
->
labFrameQuadrupole_YY
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
4
];
sA
->
labFrameQuadrupole_YZ
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
5
];
sA
->
labFrameQuadrupole_ZZ
=
cAmoebaSim
.
pLabFrame
Quadrupole
[
atomI
*
9
+
8
];
sA
->
damp
=
d
ampingFactorAndThole
[
atomI
].
x
;
sA
->
thole
=
d
ampingFactorAndThole
[
atomI
].
y
;
sA
->
damp
=
cAmoebaSim
.
pD
ampingFactorAndThole
[
atomI
].
x
;
sA
->
thole
=
cAmoebaSim
.
pD
ampingFactorAndThole
[
atomI
].
y
;
#ifdef GK
sA
->
bornR
=
bornR
[
atomI
];
#endif
...
...
@@ -86,8 +84,8 @@ __device__ void loadFixedFieldShared( struct FixedFieldParticle* sA, unsigned in
// load struct and arrays w/ shared data in sA
__device__
void
loadFixedFieldParticleData
(
struct
FixedFieldParticle
*
sA
,
float4
*
jCoord
,
float
*
jDipole
,
float
*
jQuadrupole
__device__
static
void
loadFixedFieldParticleData
(
struct
FixedFieldParticle
*
sA
,
float4
*
jCoord
,
float
*
jDipole
,
float
*
jQuadrupole
#ifdef GK
,
float
*
bornR
#endif
...
...
@@ -142,15 +140,11 @@ __device__ static void zeroFixedFieldParticleSharedField( struct FixedFieldParti
#endif
}
// body of fixed E-field calculation
__device__
static
void
calculateFixedEFieldPairIxn_kernel
(
float4
atomCoordinatesI
,
float4
atomCoordinatesJ
,
float
dampingFactorI
,
float
dampingFactorJ
,
float
tholeI
,
float
tholeJ
,
float
*
labDipoleI
,
float
*
labDipoleJ
,
float
*
labQuadrupoleI
,
float
*
labQuadrupoleJ
,
float
scalingDistanceCutoff
,
float
field
[
2
][
3
]
__device__
static
void
calculateFixedEFieldPairIxn_kernel
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
,
float
field
[
2
][
3
]
#ifdef AMOEBA_DEBUG
,
float4
debugArray
[
12
]
#endif
...
...
@@ -163,9 +157,9 @@ __device__ static void calculateFixedEFieldPairIxn_kernel( float4 atomCoordinate
// get deltaR and r between 2 atoms
float
deltaR
[
3
];
deltaR
[
0
]
=
atom
CoordinatesJ
.
x
-
atomCoordinates
I
.
x
;
deltaR
[
1
]
=
atom
CoordinatesJ
.
y
-
atomCoordinates
I
.
y
;
deltaR
[
2
]
=
atom
CoordinatesJ
.
z
-
atomCoordinates
I
.
z
;
deltaR
[
0
]
=
atom
J
.
x
-
atom
I
.
x
;
deltaR
[
1
]
=
atom
J
.
y
-
atom
I
.
y
;
deltaR
[
2
]
=
atom
J
.
z
-
atom
I
.
z
;
float
r
=
SQRT
(
deltaR
[
0
]
*
deltaR
[
0
]
+
deltaR
[
1
]
*
deltaR
[
1
]
+
deltaR
[
2
]
*
deltaR
[
2
]
);
float
rI
=
1
.
0
f
/
r
;
...
...
@@ -177,14 +171,14 @@ __device__ static void calculateFixedEFieldPairIxn_kernel( float4 atomCoordinate
// get scaling factors, if needed
float
damp
=
dampingFactorI
*
dampingFactorJ
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
float
dampExp
;
if
(
damp
!=
0
.
0
f
&&
r
<
scalingDistanceCutoff
){
if
(
damp
!=
0
.
0
f
&&
r
<
cAmoebaSim
.
scalingDistanceCutoff
){
// get scaling factors
float
ratio
=
r
/
damp
;
float
pGamma
=
thole
J
>
thole
I
?
thole
I
:
thole
J
;
float
pGamma
=
atomJ
.
thole
>
atomI
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
ratio
*
ratio
*
ratio
*
pGamma
;
dampExp
=
EXP
(
-
damp
);
}
else
{
...
...
@@ -197,69 +191,30 @@ __device__ static void calculateFixedEFieldPairIxn_kernel( float4 atomCoordinate
float
rr5_2
=
rr5
*
2
.
0
f
;
#ifdef AMOEBA_DEBUG
int
index
=
0
;
// 0-2
debugArray
[
index
].
x
=
r
;
debugArray
[
index
].
y
=
rr3
;
debugArray
[
index
].
z
=
rr5
;
index
++
;
#endif
float
*
dipole
=
labDipoleJ
;
float
*
quadrupole
=
labQuadrupoleJ
;
float
qDotDelta
[
3
];
qDotDelta
[
0
]
=
deltaR
[
0
]
*
quadrupole
[
0
]
+
deltaR
[
1
]
*
quadrupole
[
1
]
+
deltaR
[
2
]
*
quadrupole
[
2
];
qDotDelta
[
1
]
=
deltaR
[
0
]
*
quadrupole
[
3
]
+
deltaR
[
1
]
*
quadrupole
[
4
]
+
deltaR
[
2
]
*
quadrupole
[
5
];
qDotDelta
[
2
]
=
deltaR
[
0
]
*
quadrupole
[
6
]
+
deltaR
[
1
]
*
quadrupole
[
7
]
+
deltaR
[
2
]
*
quadrupole
[
8
];
float
dotdd
=
deltaR
[
0
]
*
dipole
[
0
]
+
deltaR
[
1
]
*
dipole
[
1
]
+
deltaR
[
2
]
*
dipole
[
2
];
float
dotqd
=
deltaR
[
0
]
*
qDotDelta
[
0
]
+
deltaR
[
1
]
*
qDotDelta
[
1
]
+
deltaR
[
2
]
*
qDotDelta
[
2
];
float
factor
=
-
rr3
*
atomCoordinatesJ
.
w
+
rr5
*
dotdd
-
rr7
*
dotqd
;
qDotDelta
[
0
]
=
deltaR
[
0
]
*
atomJ
.
labFrameQuadrupole_XX
+
deltaR
[
1
]
*
atomJ
.
labFrameQuadrupole_XY
+
deltaR
[
2
]
*
atomJ
.
labFrameQuadrupole_XZ
;
qDotDelta
[
1
]
=
deltaR
[
0
]
*
atomJ
.
labFrameQuadrupole_XY
+
deltaR
[
1
]
*
atomJ
.
labFrameQuadrupole_YY
+
deltaR
[
2
]
*
atomJ
.
labFrameQuadrupole_YZ
;
qDotDelta
[
2
]
=
deltaR
[
0
]
*
atomJ
.
labFrameQuadrupole_XZ
+
deltaR
[
1
]
*
atomJ
.
labFrameQuadrupole_YZ
+
deltaR
[
2
]
*
atomJ
.
labFrameQuadrupole_ZZ
;
#ifdef AMOEBA_DEBUG
// 3-5
debugArray
[
index
].
x
=
dotdd
;
debugArray
[
index
].
y
=
dotqd
;
debugArray
[
index
].
z
=
factor
;
index
++
;
#endif
float
dotdd
=
deltaR
[
0
]
*
atomJ
.
labFrameDipole_X
+
deltaR
[
1
]
*
atomJ
.
labFrameDipole_Y
+
deltaR
[
2
]
*
atomJ
.
labFrameDipole_Z
;
float
dotqd
=
deltaR
[
0
]
*
qDotDelta
[
0
]
+
deltaR
[
1
]
*
qDotDelta
[
1
]
+
deltaR
[
2
]
*
qDotDelta
[
2
];
field
[
0
][
0
]
=
deltaR
[
0
]
*
factor
-
rr3
*
dipole
[
0
]
+
rr5_2
*
qDotDelta
[
0
];
field
[
0
][
1
]
=
deltaR
[
1
]
*
factor
-
rr3
*
dipole
[
1
]
+
rr5_2
*
qDotDelta
[
1
];
field
[
0
][
2
]
=
deltaR
[
2
]
*
factor
-
rr3
*
dipole
[
2
]
+
rr5_2
*
qDotDelta
[
2
];
float
factor
=
-
rr3
*
atomJ
.
q
+
rr5
*
dotdd
-
rr7
*
dotqd
;
field
[
0
][
0
]
=
deltaR
[
0
]
*
factor
-
rr3
*
atomJ
.
labFrameDipole_X
+
rr5_2
*
qDotDelta
[
0
];
field
[
0
][
1
]
=
deltaR
[
1
]
*
factor
-
rr3
*
atomJ
.
labFrameDipole_Y
+
rr5_2
*
qDotDelta
[
1
];
field
[
0
][
2
]
=
deltaR
[
2
]
*
factor
-
rr3
*
atomJ
.
labFrameDipole_Z
+
rr5_2
*
qDotDelta
[
2
];
dipole
=
labDipoleI
;
quadrupole
=
labQuadrupoleI
;
qDotDelta
[
0
]
=
deltaR
[
0
]
*
quadrupole
[
0
]
+
deltaR
[
1
]
*
quadrupole
[
1
]
+
deltaR
[
2
]
*
quadrupole
[
2
];
qDotDelta
[
1
]
=
deltaR
[
0
]
*
quadrupole
[
3
]
+
deltaR
[
1
]
*
quadrupole
[
4
]
+
deltaR
[
2
]
*
quadrupole
[
5
];
qDotDelta
[
2
]
=
deltaR
[
0
]
*
quadrupole
[
6
]
+
deltaR
[
1
]
*
quadrupole
[
7
]
+
deltaR
[
2
]
*
quadrupole
[
8
];
qDotDelta
[
0
]
=
deltaR
[
0
]
*
atomI
.
labFrameQuadrupole_XX
+
deltaR
[
1
]
*
atomI
.
labFrameQuadrupole_XY
+
deltaR
[
2
]
*
atomI
.
labFrameQuadrupole_XZ
;
qDotDelta
[
1
]
=
deltaR
[
0
]
*
atomI
.
labFrameQuadrupole_XY
+
deltaR
[
1
]
*
atomI
.
labFrameQuadrupole_YY
+
deltaR
[
2
]
*
atomI
.
labFrameQuadrupole_YZ
;
qDotDelta
[
2
]
=
deltaR
[
0
]
*
atomI
.
labFrameQuadrupole_XZ
+
deltaR
[
1
]
*
atomI
.
labFrameQuadrupole_YZ
+
deltaR
[
2
]
*
atomI
.
labFrameQuadrupole_ZZ
;
dotdd
=
deltaR
[
0
]
*
d
ipole
[
0
]
+
deltaR
[
1
]
*
d
ipole
[
1
]
+
deltaR
[
2
]
*
d
ipole
[
2
]
;
dotdd
=
deltaR
[
0
]
*
atomI
.
labFrameD
ipole
_X
+
deltaR
[
1
]
*
atomI
.
labFrameD
ipole
_Y
+
deltaR
[
2
]
*
atomI
.
labFrameD
ipole
_Z
;
dotqd
=
deltaR
[
0
]
*
qDotDelta
[
0
]
+
deltaR
[
1
]
*
qDotDelta
[
1
]
+
deltaR
[
2
]
*
qDotDelta
[
2
];
factor
=
rr3
*
atom
Coordinates
I
.
w
+
rr5
*
dotdd
+
rr7
*
dotqd
;
factor
=
rr3
*
atomI
.
q
+
rr5
*
dotdd
+
rr7
*
dotqd
;
#ifdef AMOEBA_DEBUG
// 6-8
debugArray
[
index
].
x
=
dotdd
;
debugArray
[
index
].
y
=
dotqd
;
debugArray
[
index
].
z
=
factor
;
index
++
;
#endif
field
[
1
][
0
]
=
deltaR
[
0
]
*
factor
-
rr3
*
atomI
.
labFrameDipole_X
-
rr5_2
*
qDotDelta
[
0
];
field
[
1
][
1
]
=
deltaR
[
1
]
*
factor
-
rr3
*
atomI
.
labFrameDipole_Y
-
rr5_2
*
qDotDelta
[
1
];
field
[
1
][
2
]
=
deltaR
[
2
]
*
factor
-
rr3
*
atomI
.
labFrameDipole_Z
-
rr5_2
*
qDotDelta
[
2
];
field
[
1
][
0
]
=
deltaR
[
0
]
*
factor
-
rr3
*
dipole
[
0
]
-
rr5_2
*
qDotDelta
[
0
];
field
[
1
][
1
]
=
deltaR
[
1
]
*
factor
-
rr3
*
dipole
[
1
]
-
rr5_2
*
qDotDelta
[
1
];
field
[
1
][
2
]
=
deltaR
[
2
]
*
factor
-
rr3
*
dipole
[
2
]
-
rr5_2
*
qDotDelta
[
2
];
#if 0
float testValue = 1.0f;
field[0][0] = testValue;
field[0][1] = testValue;
field[0][2] = testValue;
field[1][0] = testValue;
field[1][1] = testValue;
field[1][2] = testValue;
#endif
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.cu
0 → 100644
View file @
cf335495
//-----------------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------------
#include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h"
//#define AMOEBA_DEBUG
static
__constant__
cudaGmxSimulation
cSim
;
static
__constant__
cudaAmoebaGmxSimulation
cAmoebaSim
;
void
SetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
status
=
cudaMemcpyToSymbol
(
cAmoebaSim
,
&
amoebaGpu
->
amoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"SetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyToSymbol: SetSim copy to cAmoebaSim failed"
);
}
void
GetCalculateAmoebaCudaPmeFixedEFieldSim
(
amoebaGpuContext
amoebaGpu
)
{
cudaError_t
status
;
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
status
=
cudaMemcpyFromSymbol
(
&
amoebaGpu
->
amoebaSim
,
cAmoebaSim
,
sizeof
(
cudaAmoebaGmxSimulation
));
RTERROR
(
status
,
"GetCalculateAmoebaCudaPmeFixedEFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"
);
}
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_THREADS_PER_BLOCK
,
1
)
#endif
void
kReduceDirectSelfFields_kernel
(
unsigned
int
fieldComponents
,
unsigned
int
outputBuffers
,
float
*
fieldIn
,
float
*
fieldOut
)
{
unsigned
int
pos
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// Reduce field
const
float
term
=
(
4.0
f
/
3.0
f
)
*
(
cAmoebaSim
.
aewald
*
cAmoebaSim
.
aewald
*
cAmoebaSim
.
aewald
)
/
cAmoebaSim
.
sqrtPi
;
while
(
pos
<
fieldComponents
)
{
// self-term included here
float
totalField
=
term
*
cAmoebaSim
.
pLabFrameDipole
[
pos
];
float
*
pFt
=
fieldIn
+
pos
;
unsigned
int
i
=
outputBuffers
;
while
(
i
>=
4
)
{
totalField
+=
pFt
[
0
]
+
pFt
[
fieldComponents
]
+
pFt
[
2
*
fieldComponents
]
+
pFt
[
3
*
fieldComponents
];
pFt
+=
fieldComponents
*
4
;
i
-=
4
;
}
if
(
i
>=
2
)
{
totalField
+=
pFt
[
0
]
+
pFt
[
fieldComponents
];
pFt
+=
fieldComponents
*
2
;
i
-=
2
;
}
if
(
i
>
0
)
{
totalField
+=
pFt
[
0
];
}
fieldOut
[
pos
]
=
totalField
;
pos
+=
gridDim
.
x
*
blockDim
.
x
;
}
}
// reduce psWorkArray_3_1 -> EField
// reduce psWorkArray_3_2 -> EFieldPolar
static
void
kReducePmeDirectE_Fields
(
amoebaGpuContext
amoebaGpu
)
{
kReduceDirectSelfFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
amoebaGpu
->
psE_Field
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReducePmeE_Fields1"
);
kReduceDirectSelfFields_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
fieldReduceThreadsPerBlock
>>>
(
amoebaGpu
->
paddedNumberOfAtoms
*
3
,
amoebaGpu
->
outputBuffers
,
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
amoebaGpu
->
psE_FieldPolar
->
_pDevStream
[
0
]
);
LAUNCHERROR
(
"kReducePmeE_Fields2"
);
}
// file includes FixedFieldParticle struct definition/load/unload struct and body kernel for fixed E-field
#undef GK
#include "kCalculateAmoebaCudaFixedFieldParticle.h"
__device__
void
calculateFixedFieldRealSpacePairIxn_kernel
(
FixedFieldParticle
&
atomI
,
FixedFieldParticle
&
atomJ
,
float
dscale
,
float
pscale
,
float
fields
[
4
][
3
]
#ifdef AMOEBA_DEBUG
,
float4
*
pullBack
#endif
){
// compute the real space portion of the Ewald summation
float
xr
=
atomJ
.
x
-
atomI
.
x
;
float
yr
=
atomJ
.
y
-
atomI
.
y
;
float
zr
=
atomJ
.
z
-
atomI
.
z
;
// periodic boundary conditions
xr
-=
floor
(
xr
*
cSim
.
invPeriodicBoxSizeX
+
0.5
f
)
*
cSim
.
periodicBoxSizeX
;
yr
-=
floor
(
yr
*
cSim
.
invPeriodicBoxSizeY
+
0.5
f
)
*
cSim
.
periodicBoxSizeY
;
zr
-=
floor
(
zr
*
cSim
.
invPeriodicBoxSizeZ
+
0.5
f
)
*
cSim
.
periodicBoxSizeZ
;
float
r2
=
xr
*
xr
+
yr
*
yr
+
zr
*
zr
;
float
r
=
sqrtf
(
r2
);
// calculate the error function damping terms
float
ralpha
=
cAmoebaSim
.
aewald
*
r
;
float
bn
[
4
];
bn
[
0
]
=
erfc
(
ralpha
)
/
r
;
float
alsq2
=
2.0
f
*
cAmoebaSim
.
aewald
*
cAmoebaSim
.
aewald
;
float
alsq2n
=
1.0
f
/
(
cAmoebaSim
.
sqrtPi
*
cAmoebaSim
.
aewald
);
float
exp2a
=
exp
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
bn
[
1
]
=
(
bn
[
0
]
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
bn
[
2
]
=
(
3.0
f
*
bn
[
1
]
+
alsq2n
*
exp2a
)
/
r2
;
alsq2n
*=
alsq2
;
bn
[
3
]
=
(
5.0
f
*
bn
[
2
]
+
alsq2n
*
exp2a
)
/
r2
;
// compute the error function scaled and unscaled terms
float
scale3
=
1.0
f
;
float
scale5
=
1.0
f
;
float
scale7
=
1.0
f
;
float
damp
=
atomI
.
damp
*
atomJ
.
damp
;
if
(
damp
!=
0.0
f
){
float
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atomI
.
thole
<
atomJ
.
thole
?
atomI
.
thole
:
atomJ
.
thole
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50.0
f
)
{
float
expdamp
=
exp
(
damp
);
scale3
=
1.0
f
-
expdamp
;
scale5
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
);
scale7
=
1.0
f
-
expdamp
*
(
1.0
f
-
damp
+
(
0.6
f
*
damp
*
damp
));
}
}
float
dsc3
=
dscale
*
scale3
;
float
dsc5
=
dscale
*
scale5
;
float
dsc7
=
dscale
*
scale7
;
float
psc3
=
pscale
*
scale3
;
float
psc5
=
pscale
*
scale5
;
float
psc7
=
pscale
*
scale7
;
float
r3
=
(
r
*
r2
);
float
r5
=
(
r3
*
r2
);
float
r7
=
(
r5
*
r2
);
float
drr3
=
(
1.0
f
-
dsc3
)
/
r3
;
float
drr5
=
3.0
f
*
(
1.0
f
-
dsc5
)
/
r5
;
float
drr7
=
15.0
f
*
(
1.0
f
-
dsc7
)
/
r7
;
float
prr3
=
(
1.0
f
-
psc3
)
/
r3
;
float
prr5
=
3.0
f
*
(
1.0
f
-
psc5
)
/
r5
;
float
prr7
=
15.0
f
*
(
1.0
f
-
psc7
)
/
r7
;
float
dir
=
atomI
.
labFrameDipole_X
*
xr
+
atomI
.
labFrameDipole_Y
*
yr
+
atomI
.
labFrameDipole_Z
*
zr
;
float
qix
=
atomI
.
labFrameQuadrupole_XX
*
xr
+
atomI
.
labFrameQuadrupole_XY
*
yr
+
atomI
.
labFrameQuadrupole_XZ
*
zr
;
float
qiy
=
atomI
.
labFrameQuadrupole_XY
*
xr
+
atomI
.
labFrameQuadrupole_YY
*
yr
+
atomI
.
labFrameQuadrupole_YZ
*
zr
;
float
qiz
=
atomI
.
labFrameQuadrupole_XZ
*
xr
+
atomI
.
labFrameQuadrupole_YZ
*
yr
+
atomI
.
labFrameQuadrupole_ZZ
*
zr
;
float
qir
=
qix
*
xr
+
qiy
*
yr
+
qiz
*
zr
;
float
dkr
=
atomJ
.
labFrameDipole_X
*
xr
+
atomJ
.
labFrameDipole_Y
*
yr
+
atomJ
.
labFrameDipole_Z
*
zr
;
float
qkx
=
atomJ
.
labFrameQuadrupole_XX
*
xr
+
atomJ
.
labFrameQuadrupole_XY
*
yr
+
atomJ
.
labFrameQuadrupole_XZ
*
zr
;
float
qky
=
atomJ
.
labFrameQuadrupole_XY
*
xr
+
atomJ
.
labFrameQuadrupole_YY
*
yr
+
atomJ
.
labFrameQuadrupole_YZ
*
zr
;
float
qkz
=
atomJ
.
labFrameQuadrupole_XZ
*
xr
+
atomJ
.
labFrameQuadrupole_YZ
*
yr
+
atomJ
.
labFrameQuadrupole_ZZ
*
zr
;
float
qkr
=
qkx
*
xr
+
qky
*
yr
+
qkz
*
zr
;
float
fim
[
3
],
fkm
[
3
];
float
fid
[
3
],
fkd
[
3
];
float
fip
[
3
],
fkp
[
3
];
fim
[
0
]
=
-
xr
*
(
bn
[
1
]
*
atomJ
.
q
-
bn
[
2
]
*
dkr
+
bn
[
3
]
*
qkr
)
-
bn
[
1
]
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
bn
[
2
]
*
qkx
;
fim
[
1
]
=
-
yr
*
(
bn
[
1
]
*
atomJ
.
q
-
bn
[
2
]
*
dkr
+
bn
[
3
]
*
qkr
)
-
bn
[
1
]
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
bn
[
2
]
*
qky
;
fim
[
2
]
=
-
zr
*
(
bn
[
1
]
*
atomJ
.
q
-
bn
[
2
]
*
dkr
+
bn
[
3
]
*
qkr
)
-
bn
[
1
]
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
bn
[
2
]
*
qkz
;
fkm
[
0
]
=
xr
*
(
bn
[
1
]
*
atomI
.
q
+
bn
[
2
]
*
dir
+
bn
[
3
]
*
qir
)
-
bn
[
1
]
*
atomI
.
labFrameDipole_X
-
2.0
f
*
bn
[
2
]
*
qix
;
fkm
[
1
]
=
yr
*
(
bn
[
1
]
*
atomI
.
q
+
bn
[
2
]
*
dir
+
bn
[
3
]
*
qir
)
-
bn
[
1
]
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
bn
[
2
]
*
qiy
;
fkm
[
2
]
=
zr
*
(
bn
[
1
]
*
atomI
.
q
+
bn
[
2
]
*
dir
+
bn
[
3
]
*
qir
)
-
bn
[
1
]
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
bn
[
2
]
*
qiz
;
fid
[
0
]
=
-
xr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
drr5
*
qkx
;
fid
[
1
]
=
-
yr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
drr5
*
qky
;
fid
[
2
]
=
-
zr
*
(
drr3
*
atomJ
.
q
-
drr5
*
dkr
+
drr7
*
qkr
)
-
drr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
drr5
*
qkz
;
fkd
[
0
]
=
xr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
drr5
*
qix
;
fkd
[
1
]
=
yr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
drr5
*
qiy
;
fkd
[
2
]
=
zr
*
(
drr3
*
atomI
.
q
+
drr5
*
dir
+
drr7
*
qir
)
-
drr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
drr5
*
qiz
;
fip
[
0
]
=
-
xr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_X
+
2.0
f
*
prr5
*
qkx
;
fip
[
1
]
=
-
yr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_Y
+
2.0
f
*
prr5
*
qky
;
fip
[
2
]
=
-
zr
*
(
prr3
*
atomJ
.
q
-
prr5
*
dkr
+
prr7
*
qkr
)
-
prr3
*
atomJ
.
labFrameDipole_Z
+
2.0
f
*
prr5
*
qkz
;
fkp
[
0
]
=
xr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_X
-
2.0
f
*
prr5
*
qix
;
fkp
[
1
]
=
yr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_Y
-
2.0
f
*
prr5
*
qiy
;
fkp
[
2
]
=
zr
*
(
prr3
*
atomI
.
q
+
prr5
*
dir
+
prr7
*
qir
)
-
prr3
*
atomI
.
labFrameDipole_Z
-
2.0
f
*
prr5
*
qiz
;
// increment the field at each site due to this interaction
if
(
r2
<=
cAmoebaSim
.
cutoffDistance2
){
fields
[
0
][
0
]
=
fim
[
0
]
-
fid
[
0
];
fields
[
1
][
0
]
=
fkm
[
0
]
-
fkd
[
0
];
fields
[
2
][
0
]
=
fim
[
0
]
-
fip
[
0
];
fields
[
3
][
0
]
=
fkm
[
0
]
-
fkp
[
0
];
fields
[
0
][
1
]
=
fim
[
1
]
-
fid
[
1
];
fields
[
1
][
1
]
=
fkm
[
1
]
-
fkd
[
1
];
fields
[
2
][
1
]
=
fim
[
1
]
-
fip
[
1
];
fields
[
3
][
1
]
=
fkm
[
1
]
-
fkp
[
1
];
fields
[
0
][
2
]
=
fim
[
2
]
-
fid
[
2
];
fields
[
1
][
2
]
=
fkm
[
2
]
-
fkd
[
2
];
fields
[
2
][
2
]
=
fim
[
2
]
-
fip
[
2
];
fields
[
3
][
2
]
=
fkm
[
2
]
-
fkp
[
2
];
}
else
{
fields
[
0
][
0
]
=
0.0
f
;
fields
[
1
][
0
]
=
0.0
f
;
fields
[
2
][
0
]
=
0.0
f
;
fields
[
3
][
0
]
=
0.0
f
;
fields
[
0
][
1
]
=
0.0
f
;
fields
[
1
][
1
]
=
0.0
f
;
fields
[
2
][
1
]
=
0.0
f
;
fields
[
3
][
1
]
=
0.0
f
;
fields
[
0
][
2
]
=
0.0
f
;
fields
[
1
][
2
]
=
0.0
f
;
fields
[
2
][
2
]
=
0.0
f
;
fields
[
3
][
2
]
=
0.0
f
;
}
#ifdef AMOEBA_DEBUG
pullBack
[
0
].
x
=
xr
;
pullBack
[
0
].
y
=
yr
;
pullBack
[
0
].
z
=
zr
;
pullBack
[
0
].
w
=
r2
;
pullBack
[
1
].
x
=
atomJ
.
x
-
atomI
.
x
;
pullBack
[
1
].
y
=
atomJ
.
y
-
atomI
.
y
;
pullBack
[
1
].
z
=
atomJ
.
z
-
atomI
.
z
;
pullBack
[
1
].
w
=
(
atomJ
.
x
-
atomI
.
x
)
*
(
atomJ
.
x
-
atomI
.
x
)
+
(
atomJ
.
y
-
atomI
.
y
)
*
(
atomJ
.
y
-
atomI
.
y
)
+
(
atomJ
.
z
-
atomI
.
z
)
*
(
atomJ
.
z
-
atomI
.
z
);
/*
pullBack[1].x = scale3;
pullBack[1].y = scale5;
pullBack[1].z = scale7;
*/
#endif
}
// Include versions of the kernels for N^2 calculations.
#define METHOD_NAME(a, b) a##N2##b
#include "kCalculateAmoebaCudaPmeFixedEField.h"
#define USE_OUTPUT_BUFFER_PER_WARP
#undef METHOD_NAME
#define METHOD_NAME(a, b) a##N2ByWarp##b
#include "kCalculateAmoebaCudaPmeFixedEField.h"
/**---------------------------------------------------------------------------------------
Compute fixed electric field using PME
@param amoebaGpu amoebaGpu context
--------------------------------------------------------------------------------------- */
void
cudaComputeAmoebaPmeFixedEField
(
amoebaGpuContext
amoebaGpu
)
{
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
gpuContext
gpu
=
amoebaGpu
->
gpuContext
;
#ifdef AMOEBA_DEBUG
static
const
char
*
methodName
=
"computeCudaAmoebaPmeFixedEField"
;
if
(
amoebaGpu
->
log
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
%s
\n
"
,
methodName
);
(
void
)
fflush
(
amoebaGpu
->
log
);
}
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
// N2 debug array
CUDAStream
<
float4
>*
debugArray
=
new
CUDAStream
<
float4
>
(
paddedNumberOfAtoms
*
paddedNumberOfAtoms
,
1
,
"DebugArray"
);
memset
(
debugArray
->
_pSysStream
[
0
],
0
,
sizeof
(
float
)
*
4
*
paddedNumberOfAtoms
*
paddedNumberOfAtoms
);
debugArray
->
Upload
();
(
*
gpu
->
psInteractionCount
)[
0
]
=
gpu
->
sim
.
workUnits
;
gpu
->
psInteractionCount
->
Upload
();
// print intermediate results for the targetAtom
unsigned
int
targetAtom
=
0
;
#endif
kClearFields_3
(
amoebaGpu
,
2
);
if
(
gpu
->
bOutputBufferPerWarp
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 warp
\n
"
);
kCalculateAmoebaPmeDirectFixedE_FieldN2ByWarp_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
else
{
#ifdef AMOEBA_DEBUG
(
void
)
fprintf
(
amoebaGpu
->
log
,
"N2 no warp
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
amoebaGpu
->
psWorkArray_3_1
->
_pDevStream
[
0
],
#ifdef AMOEBA_DEBUG
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
],
debugArray
->
_pDevStream
[
0
],
targetAtom
);
#else
amoebaGpu
->
psWorkArray_3_2
->
_pDevStream
[
0
]
);
#endif
}
LAUNCHERROR
(
"kCalculateAmoebaPmeDirectFixedE_Field_kernel"
);
#if 0
for( unsigned int ii = 0; ii < amoebaGpu->outputBuffers; ii++ ){
//float index = 1.0f;
float index = (float) ii;
for( unsigned int jj = 0; jj < 3*amoebaGpu->paddedNumberOfAtoms; jj += 3 ){
unsigned int kk = 3*ii*amoebaGpu->paddedNumberOfAtoms + jj;
amoebaGpu->psWorkArray_3_1->_pSysStream[0][kk] = index;
amoebaGpu->psWorkArray_3_1->_pSysStream[0][kk+1] = index;
amoebaGpu->psWorkArray_3_1->_pSysStream[0][kk+2] = index;
}
}
amoebaGpu->psWorkArray_3_1->Upload();
#endif
kReducePmeDirectE_Fields
(
amoebaGpu
);
#ifdef AMOEBA_DEBUG
if
(
amoebaGpu
->
log
){
gpu
->
psInteractionCount
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u
\n
"
,
amoebaGpu
->
nonbondBlocks
,
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
bOutputBufferPerWarp
,
sizeof
(
FixedFieldParticle
),
sizeof
(
FixedFieldParticle
)
*
amoebaGpu
->
nonbondThreadsPerBlock
,
amoebaGpu
->
energyOutputBuffers
,
(
*
gpu
->
psInteractionCount
)[
0
],
gpu
->
sim
.
workUnits
);
(
void
)
fflush
(
amoebaGpu
->
log
);
amoebaGpu
->
psWorkArray_3_1
->
Download
();
amoebaGpu
->
psWorkArray_3_2
->
Download
();
amoebaGpu
->
psE_Field
->
Download
();
amoebaGpu
->
psE_FieldPolar
->
Download
();
(
void
)
fprintf
(
amoebaGpu
->
log
,
"OutEFields
\n
"
);
int
maxPrint
=
32
;
for
(
int
ii
=
0
;
ii
<
gpu
->
natoms
;
ii
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d "
,
ii
);
int
indexOffset
=
ii
*
3
;
// E_Field
(
void
)
fprintf
(
amoebaGpu
->
log
,
"E[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
psE_Field
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
// E_Field polar
(
void
)
fprintf
(
amoebaGpu
->
log
,
"Epol[%16.9e %16.9e %16.9e] "
,
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
indexOffset
],
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
indexOffset
+
1
],
amoebaGpu
->
psE_FieldPolar
->
_pSysStream
[
0
][
indexOffset
+
2
]
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
if
(
ii
==
maxPrint
&&
(
gpu
->
natoms
-
maxPrint
)
>
ii
){
ii
=
gpu
->
natoms
-
maxPrint
;
}
}
(
void
)
fflush
(
amoebaGpu
->
log
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"EFields End
\n
"
);
(
void
)
fprintf
(
amoebaGpu
->
log
,
"DebugQ
\n
"
);
debugArray
->
Download
();
int
paddedNumberOfAtoms
=
amoebaGpu
->
gpuContext
->
sim
.
paddedNumberOfAtoms
;
for
(
int
jj
=
0
;
jj
<
gpu
->
natoms
;
jj
++
){
int
debugIndex
=
jj
;
(
void
)
fprintf
(
amoebaGpu
->
log
,
"%5d PmeFixedEField
\n
"
,
jj
);
for
(
int
kk
=
0
;
kk
<
7
;
kk
++
){
(
void
)
fprintf
(
amoebaGpu
->
log
,
"[%16.9e %16.9e %16.9e %16.9e]
\n
"
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
x
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
y
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
z
,
debugArray
->
_pSysStream
[
0
][
debugIndex
].
w
);
debugIndex
+=
paddedNumberOfAtoms
;
}
(
void
)
fprintf
(
amoebaGpu
->
log
,
"
\n
"
);
}
// write results to file
if
(
1
){
std
::
vector
<
int
>
fileId
;
//fileId.push_back( 0 );
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
}
delete
debugArray
;
}
#endif
if
(
1
){
std
::
vector
<
int
>
fileId
;
fileId
.
push_back
(
0
);
VectorOfDoubleVectors
outputVector
;
cudaLoadCudaFloat4Array
(
gpu
->
natoms
,
3
,
gpu
->
psPosq4
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_Field
,
outputVector
);
cudaLoadCudaFloatArray
(
gpu
->
natoms
,
3
,
amoebaGpu
->
psE_FieldPolar
,
outputVector
);
cudaWriteVectorOfDoubleVectorsToFile
(
"CudaEField"
,
fileId
,
outputVector
);
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaPmeFixedEField.h
0 → 100644
View file @
cf335495
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "amoebaScaleFactors.h"
__global__
#if (__CUDA_ARCH__ >= 200)
__launch_bounds__
(
GF1XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#elif (__CUDA_ARCH__ >= 130)
__launch_bounds__
(
GT2XX_NONBOND_THREADS_PER_BLOCK
,
1
)
#else
__launch_bounds__
(
G8X_NONBOND_THREADS_PER_BLOCK
,
1
)
#endif
void
METHOD_NAME
(
kCalculateAmoebaPmeDirectFixedE_Field
,
_kernel
)(
unsigned
int
*
workUnit
,
float
*
outputEField
,
float
*
outputEFieldPolar
#ifdef AMOEBA_DEBUG
,
float4
*
debugArray
,
unsigned
int
targetAtom
#endif
){
#ifdef AMOEBA_DEBUG
float4
pullBack
[
12
];
float
dScaleVal
;
float
pScaleVal
;
#endif
extern
__shared__
FixedFieldParticle
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
unsigned
int
x
;
unsigned
int
y
;
bool
bExclusionFlag
;
// extract cell coordinates
decodeCell
(
workUnit
[
pos
],
&
x
,
&
y
,
&
bExclusionFlag
);
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
FixedFieldParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
FixedFieldParticle
localParticle
;
loadFixedFieldShared
(
&
localParticle
,
atomI
);
float
fieldSum
[
3
];
float
fieldPolarSum
[
3
];
fieldSum
[
0
]
=
0
.
0
f
;
fieldSum
[
1
]
=
0
.
0
f
;
fieldSum
[
2
]
=
0
.
0
f
;
fieldPolarSum
[
0
]
=
0
.
0
f
;
fieldPolarSum
[
1
]
=
0
.
0
f
;
fieldPolarSum
[
2
]
=
0
.
0
f
;
if
(
x
==
y
)
{
// load coordinates, charge, ...
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
atomI
);
if
(
!
bExclusionFlag
)
{
// this branch is never exercised since it includes the
// interaction between atomI and itself which is always excluded
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
ijField
[
4
][
3
];
// load coords, charge, ...
#ifdef AMOEBA_DEBUG
dScaleVal
=
1
.
0
f
;
pScaleVal
=
1
.
0
f
;
#endif
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
1
.
0
f
,
1
.
0
f
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
unsigned
int
match
=
(
atomI
==
(
y
+
j
))
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
0
][
0
];
fieldSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
0
][
1
];
fieldSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
0
][
2
];
fieldPolarSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
2
];
}
}
else
// bExclusion
{
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
xi
*
cAmoebaSim
.
paddedNumberOfAtoms
/
GRID
-
xi
*
(
xi
+
1
)
/
2
;
int
dScaleMask
=
cAmoebaSim
.
pD_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
int2
pScaleMask
=
cAmoebaSim
.
pP_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
// load coords, charge, ...
float
ijField
[
4
][
3
];
float
dScaleValue
;
float
pScaleValue
;
getMaskedDScaleFactor
(
j
,
dScaleMask
,
&
dScaleValue
);
getMaskedPScaleFactor
(
j
,
pScaleMask
,
&
pScaleValue
);
#ifdef AMOEBA_DEBUG
dScaleVal
=
dScaleValue
;
pScaleVal
=
pScaleValue
;
#endif
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
j
],
dScaleValue
,
pScaleValue
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag
unsigned
int
match
=
(
atomI
==
(
y
+
j
))
?
1
:
0
;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
0
][
0
];
fieldSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
0
][
1
];
fieldSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
0
][
2
];
fieldPolarSum
[
0
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
match
?
0
.
0
f
:
ijField
[
2
][
2
];
#ifdef AMOEBA_DEBUG
if
(
atomI
==
targetAtom
){
unsigned
int
index
=
atomI
==
targetAtom
?
(
y
+
j
)
:
atomI
;
unsigned
int
pullBackIndex
=
0
;
unsigned
int
indexI
=
0
;
unsigned
int
indexJ
=
indexI
?
0
:
2
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
j
);
debugArray
[
index
].
z
=
dScaleValue
;
debugArray
[
index
].
w
=
pScaleValue
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
pullBackIndex
++
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
float
flag
=
7
.
0
f
;
debugArray
[
index
].
x
=
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = match ? 0.0f : ijField[indexI][0];
debugArray[index].y = match ? 0.0f : ijField[indexI][1];
debugArray[index].z = match ? 0.0f : ijField[indexI][2];
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
fieldSum
,
outputEField
);
load3dArrayBufferPerWarp
(
offset
,
fieldPolarSum
,
outputEFieldPolar
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
fieldSum
,
outputEField
);
load3dArray
(
offset
,
fieldPolarSum
,
outputEFieldPolar
);
#endif
}
else
// 100% utilization
{
// Read fixed atom data into registers and GRF
if
(
lasty
!=
y
)
{
// load coordinates, charge, ...
loadFixedFieldShared
(
&
(
sA
[
threadIdx
.
x
]),
(
y
+
tgx
)
);
}
// zero shared fields
zeroFixedFieldParticleSharedField
(
&
(
sA
[
threadIdx
.
x
])
);
if
(
!
bExclusionFlag
)
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
ijField
[
4
][
3
];
// load coords, charge, ...
#ifdef AMOEBA_DEBUG
dScaleVal
=
1
.
0
f
;
pScaleVal
=
1
.
0
f
;
#endif
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
tj
],
1
.
0
f
,
1
.
0
f
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
ijField
[
0
][
0
];
fieldSum
[
1
]
+=
ijField
[
0
][
1
];
fieldSum
[
2
]
+=
ijField
[
0
][
2
];
fieldPolarSum
[
0
]
+=
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
ijField
[
2
][
2
];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA
[
tj
].
eField
[
0
]
+=
ijField
[
1
][
0
];
psA
[
tj
].
eField
[
1
]
+=
ijField
[
1
][
1
];
psA
[
tj
].
eField
[
2
]
+=
ijField
[
1
][
2
];
psA
[
tj
].
eFieldP
[
0
]
+=
ijField
[
3
][
0
];
psA
[
tj
].
eFieldP
[
1
]
+=
ijField
[
3
][
1
];
psA
[
tj
].
eFieldP
[
2
]
+=
ijField
[
3
][
2
];
#ifdef AMOEBA_DEBUG
if
(
(
atomI
==
targetAtom
||
(
y
+
tj
)
==
targetAtom
)
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
tj
)
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
2
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
2
:
0
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
tj
);
debugArray
[
index
].
z
=
dScaleVal
;
debugArray
[
index
].
w
=
pScaleVal
;
unsigned
int
pullBackIndex
=
0
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;;
pullBackIndex
++
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;;
float
flag
=
8
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
#if 0
index += cAmoebaSim.paddedNumberOfAtoms;
unsigned int mask = 1 << j;
unsigned int pScaleIndex = (scaleMask.x & mask) ? 1 : 0;
pScaleIndex += (scaleMask.y & mask) ? 2 : 0;
debugArray[index].x = (float) pScaleIndex;
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = pScaleValue + 10.0f;
#endif
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
else
// bExclusion
{
// Read fixed atom data into registers and GRF
unsigned
int
xi
=
x
>>
GRIDBITS
;
unsigned
int
yi
=
y
>>
GRIDBITS
;
unsigned
int
cell
=
xi
+
yi
*
cAmoebaSim
.
paddedNumberOfAtoms
/
GRID
-
yi
*
(
yi
+
1
)
/
2
;
int
dScaleMask
=
cAmoebaSim
.
pD_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
int2
pScaleMask
=
cAmoebaSim
.
pP_ScaleIndices
[
cAmoebaSim
.
pScaleIndicesIndex
[
cell
]
+
tgx
];
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
// load coords, charge, ...
float
ijField
[
4
][
3
];
float
dScaleValue
;
float
pScaleValue
;
getMaskedDScaleFactor
(
tj
,
dScaleMask
,
&
dScaleValue
);
getMaskedPScaleFactor
(
tj
,
pScaleMask
,
&
pScaleValue
);
#ifdef AMOEBA_DEBUG
dScaleVal
=
dScaleValue
;
pScaleVal
=
pScaleValue
;
#endif
calculateFixedFieldRealSpacePairIxn_kernel
(
localParticle
,
psA
[
tj
],
dScaleValue
,
pScaleValue
,
ijField
#ifdef AMOEBA_DEBUG
,
pullBack
#endif
);
// add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum
[
0
]
+=
ijField
[
0
][
0
];
fieldSum
[
1
]
+=
ijField
[
0
][
1
];
fieldSum
[
2
]
+=
ijField
[
0
][
2
];
fieldPolarSum
[
0
]
+=
ijField
[
2
][
0
];
fieldPolarSum
[
1
]
+=
ijField
[
2
][
1
];
fieldPolarSum
[
2
]
+=
ijField
[
2
][
2
];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA
[
tj
].
eField
[
0
]
+=
ijField
[
1
][
0
];
psA
[
tj
].
eField
[
1
]
+=
ijField
[
1
][
1
];
psA
[
tj
].
eField
[
2
]
+=
ijField
[
1
][
2
];
psA
[
tj
].
eFieldP
[
0
]
+=
ijField
[
3
][
0
];
psA
[
tj
].
eFieldP
[
1
]
+=
ijField
[
3
][
1
];
psA
[
tj
].
eFieldP
[
2
]
+=
ijField
[
3
][
2
];
#ifdef AMOEBA_DEBUG
if
(
(
atomI
==
targetAtom
||
(
y
+
tj
)
==
targetAtom
)
){
unsigned
int
index
=
(
atomI
==
targetAtom
)
?
(
y
+
tj
)
:
atomI
;
unsigned
int
indexI
=
(
atomI
==
targetAtom
)
?
0
:
2
;
unsigned
int
indexJ
=
(
atomI
==
targetAtom
)
?
2
:
0
;
debugArray
[
index
].
x
=
(
float
)
atomI
;
debugArray
[
index
].
y
=
(
float
)
(
y
+
tj
);
debugArray
[
index
].
z
=
dScaleVal
;
debugArray
[
index
].
w
=
pScaleVal
;
unsigned
int
pullBackIndex
=
0
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;;
pullBackIndex
++
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
pullBack
[
pullBackIndex
].
x
;
debugArray
[
index
].
y
=
pullBack
[
pullBackIndex
].
y
;
debugArray
[
index
].
z
=
pullBack
[
pullBackIndex
].
z
;
debugArray
[
index
].
w
=
pullBack
[
pullBackIndex
].
w
;;
float
flag
=
9
.
0
f
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexI
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexI
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexI
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
index
+=
cAmoebaSim
.
paddedNumberOfAtoms
;
debugArray
[
index
].
x
=
ijField
[
indexJ
+
1
][
0
];
debugArray
[
index
].
y
=
ijField
[
indexJ
+
1
][
1
];
debugArray
[
index
].
z
=
ijField
[
indexJ
+
1
][
2
];
debugArray
[
index
].
w
=
flag
;
}
#endif
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
offset
=
3
*
(
x
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
fieldSum
,
outputEField
);
load3dArrayBufferPerWarp
(
offset
,
fieldPolarSum
,
outputEFieldPolar
);
offset
=
3
*
(
y
+
tgx
+
warp
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArrayBufferPerWarp
(
offset
,
sA
[
threadIdx
.
x
].
eField
,
outputEField
);
load3dArrayBufferPerWarp
(
offset
,
sA
[
threadIdx
.
x
].
eFieldP
,
outputEFieldPolar
);
#else
unsigned
int
offset
=
3
*
(
x
+
tgx
+
(
y
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
fieldSum
,
outputEField
);
load3dArray
(
offset
,
fieldPolarSum
,
outputEFieldPolar
);
offset
=
3
*
(
y
+
tgx
+
(
x
>>
GRIDBITS
)
*
cAmoebaSim
.
paddedNumberOfAtoms
);
load3dArray
(
offset
,
sA
[
threadIdx
.
x
].
eField
,
outputEField
);
load3dArray
(
offset
,
sA
[
threadIdx
.
x
].
eFieldP
,
outputEFieldPolar
);
#endif
lasty
=
y
;
}
pos
++
;
}
}
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaRealSpaceEwald.cu
View file @
cf335495
...
...
@@ -1027,6 +1027,9 @@ void cudaComputeAmoebaRealSpaceEwald( amoebaGpuContext amoebaGpu )
(
void
)
fflush
(
amoebaGpu
->
log
);
#endif
cudaChannelFormatDesc
channelDesc
=
cudaCreateChannelDesc
<
float
>
();
cudaBindTexture
(
NULL
,
&
tabulatedErfcRef
,
gpu
->
psTabulatedErfc
->
_pDevData
,
&
channelDesc
,
gpu
->
psTabulatedErfc
->
_length
*
sizeof
(
float
));
kCalculateAmoebaCudaRealSpaceEwaldN2Forces_kernel
<<<
amoebaGpu
->
nonbondBlocks
,
threadsPerBlock
,
sizeof
(
RealSpaceEwaldParticle
)
*
threadsPerBlock
>>>
(
amoebaGpu
->
psWorkUnit
->
_pDevStream
[
0
],
gpu
->
psPosq4
->
_pDevStream
[
0
],
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaRotateFrame.cu
View file @
cf335495
...
...
@@ -368,8 +368,12 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
cudaComputeAmoebaFixedEAndGkFields
(
amoebaGpu
);
cudaComputeAmoebaMutualInducedAndGkField
(
amoebaGpu
);
}
else
{
cudaComputeAmoebaFixedEField
(
amoebaGpu
);
cudaComputeAmoebaMutualInducedField
(
amoebaGpu
);
if
(
amoebaGpu
->
multipoleNonbondedMethod
==
AMOEBA_NO_CUTOFF
){
cudaComputeAmoebaFixedEField
(
amoebaGpu
);
cudaComputeAmoebaMutualInducedField
(
amoebaGpu
);
}
else
{
cudaComputeAmoebaPmeFixedEField
(
amoebaGpu
);
}
}
// check if induce dipole calculation converged -- abort if it did not
...
...
plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
View file @
cf335495
...
...
@@ -1989,6 +1989,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
int
usePme
=
0
;
double
aewald
=
0.0
;
double
cutoffDistance
=
0.0
;
double
box
[
3
]
=
{
10.0
,
10.0
,
10.0
};
// usePme, aewald, cutoffDistance added w/ Version 1
...
...
@@ -1996,7 +1997,11 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
usePme
=
atoi
(
tokens
[
2
].
c_str
()
);
aewald
=
atof
(
tokens
[
3
].
c_str
()
);
cutoffDistance
=
atof
(
tokens
[
4
].
c_str
()
);
box
[
0
]
=
atof
(
tokens
[
5
].
c_str
()
);
box
[
1
]
=
atof
(
tokens
[
6
].
c_str
()
);
box
[
2
]
=
atof
(
tokens
[
7
].
c_str
()
);
}
if
(
usePme
){
multipoleForce
->
setNonbondedMethod
(
AmoebaMultipoleForce
::
PME
);
}
else
{
...
...
@@ -2004,6 +2009,7 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
}
multipoleForce
->
setAEwald
(
aewald
);
multipoleForce
->
setCutoffDistance
(
cutoffDistance
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
box
[
0
],
0.0
,
0.0
),
Vec3
(
0.0
,
box
[
1
],
0.0
),
Vec3
(
0.0
,
0.0
,
box
[
2
])
);
if
(
log
){
(
void
)
fprintf
(
log
,
"%s number of MultipoleParameter terms=%d usePme=%d aewald=%15.7e cutoffDistance=%12.4f
\n
"
,
methodName
.
c_str
(),
numberOfMultipoles
,
usePme
,
aewald
,
cutoffDistance
);
...
...
@@ -2106,10 +2112,26 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
double
polarityConversion
=
AngstromToNm
*
AngstromToNm
*
AngstromToNm
;
double
dampingFactorConversion
=
sqrt
(
AngstromToNm
);
float
scalingDistanceCutoff
=
static_cast
<
float
>
(
multipoleForce
->
getScalingDistanceCutoff
());
scalingDistanceCutoff
*=
static_cast
<
float
>
(
AngstromToNm
);
multipoleForce
->
setScalingDistanceCutoff
(
scalingDistanceCutoff
);
multipoleForce
->
setAEwald
(
multipoleForce
->
getAEwald
()
/
AngstromToNm
);
multipoleForce
->
setCutoffDistance
(
multipoleForce
->
getCutoffDistance
()
*
AngstromToNm
);
multipoleForce
->
setScalingDistanceCutoff
(
multipoleForce
->
getScalingDistanceCutoff
()
*
AngstromToNm
);
Vec3
a
,
b
,
c
;
system
.
getDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
a
[
0
]
*=
AngstromToNm
;
a
[
1
]
*=
AngstromToNm
;
a
[
2
]
*=
AngstromToNm
;
b
[
0
]
*=
AngstromToNm
;
b
[
1
]
*=
AngstromToNm
;
b
[
2
]
*=
AngstromToNm
;
c
[
0
]
*=
AngstromToNm
;
c
[
1
]
*=
AngstromToNm
;
c
[
2
]
*=
AngstromToNm
;
system
.
setDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
for
(
int
ii
=
0
;
ii
<
multipoleForce
->
getNumMultipoles
();
ii
++
){
...
...
@@ -2144,6 +2166,15 @@ static int readAmoebaMultipoleParameters( FILE* filePtr, int version, MapStringI
(
void
)
fprintf
(
log
,
"%s Sample of parameters using %s units.
\n
"
,
methodName
.
c_str
(),
(
useOpenMMUnits
?
"OpenMM"
:
"Amoeba"
)
);
std
::
string
nonbondedMethod
=
multipoleForce
->
getNonbondedMethod
(
)
==
AmoebaMultipoleForce
::
PME
?
"PME"
:
"NoCutoff"
;
(
void
)
fprintf
(
log
,
"NonbondedMethod=%s aEwald=%15.7e cutoff=%15.7e.
\n
"
,
nonbondedMethod
.
c_str
(),
multipoleForce
->
getAEwald
(),
multipoleForce
->
getCutoffDistance
()
);
Vec3
a
,
b
,
c
;
system
.
getDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
(
void
)
fprintf
(
log
,
"Box=[%12.3f %12.3f %12.3f] [%12.3f %12.3f %12.3f] [%12.3f %12.3f %12.3f]
\n
"
,
a
[
0
],
a
[
1
],
a
[
2
],
b
[
0
],
b
[
1
],
b
[
2
],
c
[
0
],
c
[
1
],
c
[
2
]
);
(
void
)
fprintf
(
log
,
"Supplementary fields %u: "
,
static_cast
<
unsigned
int
>
(
supplementary
.
size
())
);
for
(
MapStringVectorOfVectorsCI
ii
=
supplementary
.
begin
();
ii
!=
supplementary
.
end
();
ii
++
){
(
void
)
fprintf
(
log
,
"%s "
,
(
*
ii
).
first
.
c_str
()
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment