Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c0d1eb7b
Commit
c0d1eb7b
authored
Aug 23, 2012
by
Peter Eastman
Browse files
Continuing to convert AmoebaMultipoleForce: PME with mutual induced polarization now works
parent
3af4da86
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
128 additions
and
48 deletions
+128
-48
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
+41
-24
plugins/amoeba/platforms/cuda2/src/kernels/multipoleInducedField.cu
...oeba/platforms/cuda2/src/kernels/multipoleInducedField.cu
+70
-6
plugins/amoeba/platforms/cuda2/src/kernels/multipolePme.cu
plugins/amoeba/platforms/cuda2/src/kernels/multipolePme.cu
+10
-10
plugins/amoeba/platforms/cuda2/src/kernels/pmeElectrostaticPairForce.cu
.../platforms/cuda2/src/kernels/pmeElectrostaticPairForce.cu
+7
-8
No files found.
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
View file @
c0d1eb7b
...
...
@@ -1387,6 +1387,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
recordInducedDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
()};
cu
.
executeKernel
(
recordInducedDipolesKernel
,
recordInducedDipolesArgs
,
cu
.
getNumAtoms
());
// Iterate until the dipoles converge.
vector
<
float2
>
errors
;
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
cu
.
clearBuffer
(
*
inducedField
);
...
...
@@ -1492,32 +1495,46 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&
pmePhidp
->
getDevicePointer
(),
&
pmeIgrid
->
getDevicePointer
(),
&
pmeTheta1
->
getDevicePointer
(),
&
pmeTheta2
->
getDevicePointer
(),
&
pmeTheta3
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
// void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
// &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
// cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
// Iterate until the dipoles converge.
// vector<float2> errors;
// for (int i = 0; i < maxInducedIterations; i++) {
// cu.clearBuffer(*inducedField);
// cu.clearBuffer(*inducedFieldPolar);
// void* computeInducedFieldArgs[] = {&inducedField->getDevicePointer(), &inducedFieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
// &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &startTileIndex, &numTileIndices,
// &dampingAndThole->getDevicePointer()};
// cu.executeKernel(computeInducedFieldKernel, computeInducedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
// void* updateInducedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &inducedField->getDevicePointer(),
// &inducedFieldPolar->getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
// &polarizability->getDevicePointer(), &inducedDipoleErrors->getDevicePointer()};
// cu.executeKernel(updateInducedFieldKernel, updateInducedFieldArgs, cu.getNumThreadBlocks()*cu.ThreadBlockSize);
// inducedDipoleErrors->download(errors);
// double total1 = 0.0, total2 = 0.0;
// for (int j = 0; j < (int) errors.size(); j++) {
// total1 += errors[j].x;
// total2 += errors[j].y;
// }
// if (48.033324*sqrt(max(total1, total2)/cu.getNumAtoms()) < inducedEpsilon)
// break;
// }
vector
<
float2
>
errors
;
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
cu
.
clearBuffer
(
*
inducedField
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
void
*
computeInducedFieldArgs
[]
=
{
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
nb
.
getInteractingTiles
().
getDevicePointer
(),
&
nb
.
getInteractionCount
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
&
maxTiles
,
&
nb
.
getInteractionFlags
().
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
void
*
pmeRecordInducedFieldDipolesArgs
[]
=
{
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeRecordInducedFieldDipolesKernel
,
pmeRecordInducedFieldDipolesArgs
,
cu
.
getNumAtoms
());
void
*
updateInducedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
()};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
);
inducedDipoleErrors
->
download
(
errors
);
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
(
int
)
errors
.
size
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total2
+=
errors
[
j
].
y
;
}
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
break
;
}
// Compute electrostatic force.
...
...
plugins/amoeba/platforms/cuda2/src/kernels/multipoleInducedField.cu
View file @
c0d1eb7b
...
...
@@ -21,6 +21,67 @@ inline __device__ void loadAtomData(AtomData& data, int atom, const real4* __res
data
.
thole
=
temp
.
y
;
}
#ifdef USE_EWALD
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
real3
deltaR
,
real3
*
fields
)
{
real
scale1
,
scale2
;
real
r2
=
dot
(
deltaR
,
deltaR
);
if
(
r2
<
CUTOFF_SQUARED
)
{
real
rI
=
RSQRT
(
r2
);
real
r
=
RECIP
(
rI
);
// calculate the error function damping terms
real
ralpha
=
EWALD_ALPHA
*
r
;
real
bn0
=
erfc
(
ralpha
)
*
rI
;
real
alsq2
=
2
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
alsq2n
=
RECIP
(
SQRT
(
M_PI
)
*
EWALD_ALPHA
);
real
exp2a
=
expf
(
-
(
ralpha
*
ralpha
));
alsq2n
*=
alsq2
;
real
bn1
=
(
bn0
+
alsq2n
*
exp2a
)
*
rI
*
rI
;
alsq2n
*=
alsq2
;
real
bn2
=
(
3
*
bn1
+
alsq2n
*
exp2a
)
*
rI
*
rI
;
// compute the error function scaled and unscaled terms
real
scale3
=
1
;
real
scale5
=
1
;
real
damp
=
atom1
.
damp
*
atom2
.
damp
;
if
(
damp
!=
0
)
{
real
ratio
=
(
r
/
damp
);
ratio
=
ratio
*
ratio
*
ratio
;
float
pgamma
=
atom1
.
thole
<
atom2
.
thole
?
atom1
.
thole
:
atom2
.
thole
;
damp
=
-
pgamma
*
ratio
;
if
(
damp
>
-
50
)
{
real
expdamp
=
EXP
(
damp
);
scale3
=
1
-
expdamp
;
scale5
=
1
-
expdamp
*
(
1
-
damp
);
}
}
real
dsc3
=
scale3
;
real
dsc5
=
scale5
;
real
r3
=
(
r
*
r2
);
real
r5
=
(
r3
*
r2
);
real
rr3
=
(
1
-
dsc3
)
/
r3
;
real
rr5
=
3
*
(
1
-
dsc5
)
/
r5
;
scale1
=
rr3
-
bn1
;
scale2
=
bn2
-
rr5
;
}
else
{
scale1
=
0
;
scale2
=
0
;
}
real
dDotDelta
=
scale2
*
dot
(
deltaR
,
atom2
.
inducedDipole
);
fields
[
0
]
=
scale1
*
atom2
.
inducedDipole
+
dDotDelta
*
deltaR
;
dDotDelta
=
scale2
*
dot
(
deltaR
,
atom2
.
inducedDipolePolar
);
fields
[
1
]
=
scale1
*
atom2
.
inducedDipolePolar
+
dDotDelta
*
deltaR
;
dDotDelta
=
scale2
*
dot
(
deltaR
,
atom1
.
inducedDipole
);
fields
[
2
]
=
scale1
*
atom1
.
inducedDipole
+
dDotDelta
*
deltaR
;
dDotDelta
=
scale2
*
dot
(
deltaR
,
atom1
.
inducedDipolePolar
);
fields
[
3
]
=
scale1
*
atom1
.
inducedDipolePolar
+
dDotDelta
*
deltaR
;
}
#else
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
real3
deltaR
,
real3
*
fields
)
{
real
rI
=
RSQRT
(
dot
(
deltaR
,
deltaR
));
real
r
=
RECIP
(
rI
);
...
...
@@ -43,6 +104,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, real3 de
dDotDelta
=
rr5
*
dot
(
deltaR
,
atom1
.
inducedDipolePolar
);
fields
[
3
]
=
rr3
*
atom1
.
inducedDipolePolar
+
dDotDelta
*
deltaR
;
}
#endif
/**
* Compute the mutual induced field.
...
...
@@ -139,10 +201,10 @@ extern "C" __global__ void computeInducedField(
else
{
// Compute only a subset of the interactions in this tile.
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
int
atom2
=
tbx
+
j
;
real3
delta
=
make_real3
(
localData
[
atom2
].
pos
q
.
x
-
data
.
pos
q
.
x
,
localData
[
atom2
].
posq
.
y
-
data
.
posq
.
y
,
localData
[
atom2
].
posq
.
z
-
data
.
posq
.
z
)
;
real3
delta
=
localData
[
atom2
].
pos
-
data
.
pos
;
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
...
@@ -254,18 +316,20 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
const
long
long
*
__restrict__
inducedField
,
const
long
long
*
__restrict__
inducedFieldPolar
,
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
const
float
*
__restrict__
polarizability
,
float2
*
__restrict__
errors
)
{
extern
__shared__
real2
buffer
[];
float
polarSOR
=
0.55
f
;
const
float
polarSOR
=
0.55
f
;
const
real
term
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT
(
M_PI
);
const
real
fieldScale
=
1
/
(
real
)
0xFFFFFFFF
;
real
sumErrors
=
0
;
real
sumPolarErrors
=
0
;
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
scale
=
polarizability
[
atom
]
/
(
real
)
0xFFFFFFFF
;
real
scale
=
polarizability
[
atom
];
for
(
int
component
=
0
;
component
<
3
;
component
++
)
{
int
dipoleIndex
=
3
*
atom
+
component
;
int
fieldIndex
=
atom
+
component
*
PADDED_NUM_ATOMS
;
real
previousDipole
=
inducedDipole
[
dipoleIndex
];
real
previousDipolePolar
=
inducedDipolePolar
[
dipoleIndex
];
real
newDipole
=
scale
*
(
fixedField
[
fieldIndex
]
+
inducedField
[
fieldIndex
]);
real
newDipolePolar
=
scale
*
(
fixedFieldPolar
[
fieldIndex
]
+
inducedFieldPolar
[
fieldIndex
]);
real
newDipole
=
scale
*
(
(
fixedField
[
fieldIndex
]
+
inducedField
[
fieldIndex
])
*
fieldScale
+
term
*
previousDipole
)
;
real
newDipolePolar
=
scale
*
(
(
fixedFieldPolar
[
fieldIndex
]
+
inducedFieldPolar
[
fieldIndex
])
*
fieldScale
+
term
*
previousDipolePolar
)
;
newDipole
=
previousDipole
+
polarSOR
*
(
newDipole
-
previousDipole
);
newDipolePolar
=
previousDipolePolar
+
polarSOR
*
(
newDipolePolar
-
previousDipolePolar
);
inducedDipole
[
dipoleIndex
]
=
newDipole
;
...
...
plugins/amoeba/platforms/cuda2/src/kernels/multipolePme.cu
View file @
c0d1eb7b
...
...
@@ -864,16 +864,16 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
}
extern
"C"
__global__
void
recordInducedFieldDipoles
(
const
real
*
__restrict__
phid
,
real
*
const
__restrict__
phip
,
real
*
__restrict__
induced
Dipole
,
real
*
__restrict__
induced
Dipole
Polar
,
real4
invPeriodicBoxSize
)
{
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
long
long
*
__restrict__
induced
Field
,
long
long
*
__restrict__
induced
Field
Polar
,
real4
invPeriodicBoxSize
)
{
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
0xFFFFFFFF
;
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
0xFFFFFFFF
;
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
0xFFFFFFFF
;
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
induced
Dipole
[
3
*
i
]
-=
xscale
*
phid
[
10
*
i
+
1
];
induced
Dipole
[
3
*
i
+
1
]
-=
yscale
*
phid
[
10
*
i
+
2
];
induced
Dipole
[
3
*
i
+
2
]
-=
zscale
*
phid
[
10
*
i
+
3
];
induced
Dipole
Polar
[
3
*
i
]
-=
xscale
*
phip
[
10
*
i
+
1
];
induced
DipolePolar
[
3
*
i
+
1
]
-=
yscale
*
phip
[
10
*
i
+
2
];
induced
DipolePolar
[
3
*
i
+
2
]
-=
zscale
*
phip
[
10
*
i
+
3
];
induced
Field
[
i
]
-=
(
long
long
)
(
xscale
*
phid
[
10
*
i
+
1
]
)
;
induced
Field
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
yscale
*
phid
[
10
*
i
+
2
]
)
;
induced
Field
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
long
long
)
(
zscale
*
phid
[
10
*
i
+
3
]
)
;
induced
Field
Polar
[
i
]
-=
(
long
long
)
(
xscale
*
phip
[
10
*
i
+
1
]
)
;
induced
FieldPolar
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
yscale
*
phip
[
10
*
i
+
2
]
)
;
induced
FieldPolar
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
long
long
)
(
zscale
*
phip
[
10
*
i
+
3
]
)
;
}
}
plugins/amoeba/platforms/cuda2/src/kernels/pmeElectrostaticPairForce.cu
View file @
c0d1eb7b
...
...
@@ -290,14 +290,13 @@ __device__ void computeOneInteractionF2(AtomData& atom1, volatile AtomData& atom
real
scip4
=
atom2
.
inducedDipolePolar
.
x
*
xr
+
atom2
.
inducedDipolePolar
.
y
*
yr
+
atom2
.
inducedDipolePolar
.
z
*
zr
;
#ifndef DIRECT_POLARIZATION
#ifdef APPLY_SCALE
prefactor1
=
0.5
f
*
(
bn2
-
usc5
);
prefactor1
=
0.5
f
*
(
bn2
-
usc5
);
#else
prefactor1
=
0.5
f
*
(
bn2
-
psc5
);
prefactor1
=
0.5
f
*
(
bn2
-
psc5
);
#endif
ftm21
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
x
+
scip4
*
atom1
.
inducedDipole
.
x
));
ftm22
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
y
+
scip4
*
atom1
.
inducedDipole
.
y
));
ftm23
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
z
+
scip4
*
atom1
.
inducedDipole
.
z
));
}
ftm21
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
x
+
scip4
*
atom1
.
inducedDipole
.
x
));
ftm22
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
y
+
scip4
*
atom1
.
inducedDipole
.
y
));
ftm23
+=
prefactor1
*
((
sci4
*
atom1
.
inducedDipolePolar
.
z
+
scip4
*
atom1
.
inducedDipole
.
z
));
#endif
#ifdef APPLY_SCALE
...
...
@@ -461,8 +460,8 @@ __device__ void computeOneInteractionF2(AtomData& atom1, volatile AtomData& atom
ftm23
+=
prefactor1
*
(
sci3
*
atom2
.
inducedDipolePolar
.
z
+
scip3
*
atom2
.
inducedDipole
.
z
);
real
sci34
;
real
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
real
scip4
=
atom2
.
inducedDipolePolar
.
x
*
xr
+
atom2
.
inducedDipolePolar
.
y
*
yr
+
atom2
.
inducedDipolePolar
.
z
*
zr
;
sci4
=
atom2
.
inducedDipole
.
x
*
xr
+
atom2
.
inducedDipole
.
y
*
yr
+
atom2
.
inducedDipole
.
z
*
zr
;
scip4
=
atom2
.
inducedDipolePolar
.
x
*
xr
+
atom2
.
inducedDipolePolar
.
y
*
yr
+
atom2
.
inducedDipolePolar
.
z
*
zr
;
sci34
=
(
sci3
*
scip4
+
scip3
*
sci4
);
#ifdef APPLY_SCALE
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment