Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8db70d34
Commit
8db70d34
authored
Jul 21, 2014
by
peastman
Browse files
Merge pull request #547 from peastman/diis
Implemented DIIS for converging AMOEBA induced dipoles
parents
61595804
bbbc3160
Changes
5
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
1460 additions
and
1132 deletions
+1460
-1132
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+115
-31
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+10
-0
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
...moeba/platforms/cuda/src/kernels/multipoleInducedField.cu
+113
-2
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
...ence/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
+962
-855
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.h
...erence/src/SimTKReference/AmoebaReferenceMultipoleForce.h
+260
-244
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
8db70d34
...
@@ -40,6 +40,7 @@
...
@@ -40,6 +40,7 @@
#include "CudaForceInfo.h"
#include "CudaForceInfo.h"
#include "CudaKernelSources.h"
#include "CudaKernelSources.h"
#include "CudaNonbondedUtilities.h"
#include "CudaNonbondedUtilities.h"
#include "jama_svd.h"
#include <algorithm>
#include <algorithm>
#include <cmath>
#include <cmath>
...
@@ -796,8 +797,9 @@ private:
...
@@ -796,8 +797,9 @@ private:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
hasInitializedFFT
(
false
),
multipolesAreValid
(
false
),
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
hasInitializedFFT
(
false
),
multipolesAreValid
(
false
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
inducedDipole
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
prevDipolesGkPolar
(
NULL
),
prevErrors
(
NULL
),
diisMatrix
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
pmePhid
(
NULL
),
pmePhip
(
NULL
),
pmePhidp
(
NULL
),
pmeAtomGridIndex
(
NULL
),
lastPositions
(
NULL
),
sort
(
NULL
),
gkKernel
(
NULL
)
{
pmePhid
(
NULL
),
pmePhip
(
NULL
),
pmePhidp
(
NULL
),
pmeAtomGridIndex
(
NULL
),
lastPositions
(
NULL
),
sort
(
NULL
),
gkKernel
(
NULL
)
{
}
}
...
@@ -832,6 +834,20 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...
@@ -832,6 +834,20 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
inducedDipolePolar
;
delete
inducedDipolePolar
;
if
(
inducedDipoleErrors
!=
NULL
)
if
(
inducedDipoleErrors
!=
NULL
)
delete
inducedDipoleErrors
;
delete
inducedDipoleErrors
;
if
(
prevDipoles
!=
NULL
)
delete
prevDipoles
;
if
(
prevDipolesPolar
!=
NULL
)
delete
prevDipolesPolar
;
if
(
prevDipolesGk
!=
NULL
)
delete
prevDipolesGk
;
if
(
prevDipolesGkPolar
!=
NULL
)
delete
prevDipolesGkPolar
;
if
(
prevErrors
!=
NULL
)
delete
prevErrors
;
if
(
diisMatrix
!=
NULL
)
delete
diisMatrix
;
if
(
diisCoefficients
!=
NULL
)
delete
diisCoefficients
;
if
(
polarizability
!=
NULL
)
if
(
polarizability
!=
NULL
)
delete
polarizability
;
delete
polarizability
;
if
(
covalentFlags
!=
NULL
)
if
(
covalentFlags
!=
NULL
)
...
@@ -959,6 +975,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -959,6 +975,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
inducedDipoleErrors
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
(),
sizeof
(
float2
),
"inducedDipoleErrors"
);
inducedDipoleErrors
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
(),
sizeof
(
float2
),
"inducedDipoleErrors"
);
prevDipoles
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipoles"
);
prevDipolesPolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesPolar"
);
prevErrors
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevErrors"
);
diisMatrix
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"diisMatrix"
);
diisCoefficients
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
+
1
,
sizeof
(
float
),
"diisMatrix"
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
torque
);
cu
.
addAutoclearBuffer
(
*
torque
);
...
@@ -1088,6 +1109,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1088,6 +1109,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines
[
"GK_FQ"
]
=
cu
.
doubleToString
(
3
*
(
1
-
solventDielectric
)
/
(
2
+
3
*
solventDielectric
));
defines
[
"GK_FQ"
]
=
cu
.
doubleToString
(
3
*
(
1
-
solventDielectric
)
/
(
2
+
3
*
solventDielectric
));
fixedThreadMemory
+=
4
*
elementSize
;
fixedThreadMemory
+=
4
*
elementSize
;
inducedThreadMemory
+=
13
*
elementSize
;
inducedThreadMemory
+=
13
*
elementSize
;
prevDipolesGk
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesGk"
);
prevDipolesGkPolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesGkPolar"
);
}
}
int
maxThreads
=
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
int
maxThreads
=
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
fixedFieldThreads
=
min
(
maxThreads
,
cu
.
computeThreadBlockSize
(
fixedThreadMemory
));
fixedFieldThreads
=
min
(
maxThreads
,
cu
.
computeThreadBlockSize
(
fixedThreadMemory
));
...
@@ -1102,9 +1125,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1102,9 +1125,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
if
(
maxInducedIterations
>
0
)
{
if
(
maxInducedIterations
>
0
)
{
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
inducedFieldThreads
);
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
inducedFieldThreads
);
defines
[
"MAX_PREV_DIIS_DIPOLES"
]
=
cu
.
intToString
(
MaxPrevDIISDipoles
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleInducedField
,
defines
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleInducedField
,
defines
);
computeInducedFieldKernel
=
cu
.
getKernel
(
module
,
"computeInducedField"
);
computeInducedFieldKernel
=
cu
.
getKernel
(
module
,
"computeInducedField"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldBySOR"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldByDIIS"
);
recordDIISDipolesKernel
=
cu
.
getKernel
(
module
,
"recordInducedDipolesForDIIS"
);
buildMatrixKernel
=
cu
.
getKernel
(
module
,
"computeDIISMatrix"
);
}
}
stringstream
electrostaticsSource
;
stringstream
electrostaticsSource
;
if
(
usePME
)
{
if
(
usePME
)
{
...
@@ -1421,7 +1447,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1421,7 +1447,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge.
// Iterate until the dipoles converge.
vector
<
float2
>
errors
;
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
cu
.
clearBuffer
(
*
inducedField
);
cu
.
clearBuffer
(
*
inducedField
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
...
@@ -1440,23 +1465,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1440,23 +1465,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getBornRadii
()
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
&
gkKernel
->
getBornRadii
()
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
void
*
updateInducedGkFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
gkKernel
->
getField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedFieldPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
()};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedGkFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
}
}
void
*
updateInducedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
npt
,
&
inducedField
->
getDevicePointer
(),
double
maxEpsilon
=
iterateDipolesByDIIS
(
i
);
&
inducedFieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
if
(
maxEpsilon
<
inducedEpsilon
)
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
()};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
inducedDipoleErrors
->
download
(
errors
);
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
(
int
)
errors
.
size
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total2
+=
errors
[
j
].
y
;
}
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
break
;
break
;
}
}
...
@@ -1568,17 +1579,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1568,17 +1579,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
pmeRecordInducedFieldDipolesArgs
[]
=
{
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
void
*
pmeRecordInducedFieldDipolesArgs
[]
=
{
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeRecordInducedFieldDipolesKernel
,
pmeRecordInducedFieldDipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeRecordInducedFieldDipolesKernel
,
pmeRecordInducedFieldDipolesArgs
,
cu
.
getNumAtoms
());
void
*
updateInducedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
npt
,
&
inducedField
->
getDevicePointer
(),
double
maxEpsilon
=
iterateDipolesByDIIS
(
i
);
&
inducedFieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
if
(
maxEpsilon
<
inducedEpsilon
)
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
()};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
inducedDipoleErrors
->
download
(
errors
);
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
(
int
)
errors
.
size
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total2
+=
errors
[
j
].
y
;
}
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
break
;
break
;
}
}
...
@@ -1612,6 +1614,88 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1612,6 +1614,88 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
return
0.0
;
return
0.0
;
}
}
double
CudaCalcAmoebaMultipoleForceKernel
::
iterateDipolesByDIIS
(
int
iteration
)
{
void
*
npt
=
NULL
;
bool
trueValue
=
true
,
falseValue
=
false
;
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
// Record the dipole and errors into the lists of previous dipoles.
if
(
gkKernel
!=
NULL
)
{
void
*
recordDIISDipolesGkArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
gkKernel
->
getField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedFieldPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
(),
&
prevDipolesGk
->
getDevicePointer
(),
&
prevDipolesGkPolar
->
getDevicePointer
(),
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
falseValue
,
&
diisMatrix
->
getDevicePointer
()};
cu
.
executeKernel
(
recordDIISDipolesKernel
,
recordDIISDipolesGkArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
}
void
*
recordDIISDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
npt
,
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
(),
&
prevDipoles
->
getDevicePointer
(),
&
prevDipolesPolar
->
getDevicePointer
(),
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
trueValue
,
&
diisMatrix
->
getDevicePointer
()};
cu
.
executeKernel
(
recordDIISDipolesKernel
,
recordDIISDipolesArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
float2
*
errors
=
(
float2
*
)
cu
.
getPinnedBuffer
();
inducedDipoleErrors
->
download
(
errors
,
false
);
// Determine the coefficients for selecting the new dipoles.
int
numPrev
=
(
iteration
+
1
<
MaxPrevDIISDipoles
?
iteration
+
1
:
MaxPrevDIISDipoles
);
void
*
buildMatrixArgs
[]
=
{
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
diisMatrix
->
getDevicePointer
()};
int
threadBlocks
=
min
(
numPrev
,
cu
.
getNumThreadBlocks
());
cu
.
executeKernel
(
buildMatrixKernel
,
buildMatrixArgs
,
threadBlocks
*
128
,
128
,
128
*
elementSize
);
vector
<
float
>
coefficients
(
MaxPrevDIISDipoles
);
if
(
iteration
==
0
)
coefficients
[
0
]
=
1
;
else
{
vector
<
float
>
matrix
;
diisMatrix
->
download
(
matrix
);
int
rank
=
numPrev
+
1
;
Array2D
<
double
>
b
(
rank
,
rank
);
b
[
0
][
0
]
=
0
;
for
(
int
i
=
1
;
i
<
rank
;
i
++
)
b
[
i
][
0
]
=
b
[
0
][
i
]
=
-
1
;
for
(
int
i
=
0
;
i
<
numPrev
;
i
++
)
for
(
int
j
=
0
;
j
<
numPrev
;
j
++
)
b
[
i
+
1
][
j
+
1
]
=
matrix
[
i
*
MaxPrevDIISDipoles
+
j
];
// Solve using SVD. Since the right hand side is (-1, 0, 0, 0, ...), this is simpler than the general case.
JAMA
::
SVD
<
double
>
svd
(
b
);
Array2D
<
double
>
u
,
v
;
svd
.
getU
(
u
);
svd
.
getV
(
v
);
Array1D
<
double
>
s
;
svd
.
getSingularValues
(
s
);
int
effectiveRank
=
svd
.
rank
();
for
(
int
i
=
1
;
i
<
rank
;
i
++
)
{
double
d
=
0
;
for
(
int
j
=
0
;
j
<
effectiveRank
;
j
++
)
d
-=
u
[
0
][
j
]
*
v
[
i
][
j
]
/
s
[
j
];
coefficients
[
i
-
1
]
=
d
;
}
}
diisCoefficients
->
upload
(
&
coefficients
[
0
]);
// Compute the dipoles.
void
*
updateInducedFieldArgs
[]
=
{
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
prevDipoles
->
getDevicePointer
(),
&
prevDipolesPolar
->
getDevicePointer
(),
&
diisCoefficients
->
getDevicePointer
(),
&
numPrev
};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
);
if
(
gkKernel
!=
NULL
)
{
void
*
updateInducedFieldGkArgs
[]
=
{
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
prevDipolesGk
->
getDevicePointer
(),
&
prevDipolesGkPolar
->
getDevicePointer
(),
&
diisCoefficients
->
getDevicePointer
(),
&
numPrev
};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldGkArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
);
}
// Compute the overall error for monitoring convergence.
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
inducedDipoleErrors
->
getSize
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total2
+=
errors
[
j
].
y
;
}
return
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
());
}
void
CudaCalcAmoebaMultipoleForceKernel
::
ensureMultipolesValid
(
ContextImpl
&
context
)
{
void
CudaCalcAmoebaMultipoleForceKernel
::
ensureMultipolesValid
(
ContextImpl
&
context
)
{
if
(
multipolesAreValid
)
{
if
(
multipolesAreValid
)
{
int
numParticles
=
cu
.
getNumAtoms
();
int
numParticles
=
cu
.
getNumAtoms
();
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
View file @
8db70d34
...
@@ -375,6 +375,7 @@ private:
...
@@ -375,6 +375,7 @@ private:
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
};
};
void
initializeScaleFactors
();
void
initializeScaleFactors
();
double
iterateDipolesByDIIS
(
int
iteration
);
void
ensureMultipolesValid
(
ContextImpl
&
context
);
void
ensureMultipolesValid
(
ContextImpl
&
context
);
template
<
class
T
,
class
T4
,
class
M4
>
void
computeSystemMultipoleMoments
(
ContextImpl
&
context
,
std
::
vector
<
double
>&
outputMultipoleMoments
);
template
<
class
T
,
class
T4
,
class
M4
>
void
computeSystemMultipoleMoments
(
ContextImpl
&
context
,
std
::
vector
<
double
>&
outputMultipoleMoments
);
int
numMultipoles
,
maxInducedIterations
;
int
numMultipoles
,
maxInducedIterations
;
...
@@ -399,6 +400,13 @@ private:
...
@@ -399,6 +400,13 @@ private:
CudaArray
*
inducedDipole
;
CudaArray
*
inducedDipole
;
CudaArray
*
inducedDipolePolar
;
CudaArray
*
inducedDipolePolar
;
CudaArray
*
inducedDipoleErrors
;
CudaArray
*
inducedDipoleErrors
;
CudaArray
*
prevDipoles
;
CudaArray
*
prevDipolesPolar
;
CudaArray
*
prevDipolesGk
;
CudaArray
*
prevDipolesGkPolar
;
CudaArray
*
prevErrors
;
CudaArray
*
diisMatrix
;
CudaArray
*
diisCoefficients
;
CudaArray
*
polarizability
;
CudaArray
*
polarizability
;
CudaArray
*
covalentFlags
;
CudaArray
*
covalentFlags
;
CudaArray
*
polarizationGroupFlags
;
CudaArray
*
polarizationGroupFlags
;
...
@@ -419,8 +427,10 @@ private:
...
@@ -419,8 +427,10 @@ private:
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
pmeGridIndexKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeGridIndexKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
static
const
int
MaxPrevDIISDipoles
=
20
;
};
};
/**
/**
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
View file @
8db70d34
...
@@ -495,3 +495,114 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
...
@@ -495,3 +495,114 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
if
(
threadIdx
.
x
==
0
)
if
(
threadIdx
.
x
==
0
)
errors
[
blockIdx
.
x
]
=
make_float2
((
float
)
buffer
[
0
].
x
,
(
float
)
buffer
[
0
].
y
);
errors
[
blockIdx
.
x
]
=
make_float2
((
float
)
buffer
[
0
].
x
,
(
float
)
buffer
[
0
].
y
);
}
}
extern
"C"
__global__
void
recordInducedDipolesForDIIS
(
const
long
long
*
__restrict__
fixedField
,
const
long
long
*
__restrict__
fixedFieldPolar
,
const
long
long
*
__restrict__
fixedFieldS
,
const
long
long
*
__restrict__
inducedField
,
const
long
long
*
__restrict__
inducedFieldPolar
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float
*
__restrict__
polarizability
,
float2
*
__restrict__
errors
,
real
*
__restrict__
prevDipoles
,
real
*
__restrict__
prevDipolesPolar
,
real
*
__restrict__
prevErrors
,
int
iteration
,
bool
recordPrevErrors
,
real
*
__restrict__
matrix
)
{
extern
__shared__
real2
buffer
[];
#ifdef USE_EWALD
const
real
ewaldScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
#else
const
real
ewaldScale
=
0
;
#endif
const
real
fieldScale
=
1
/
(
real
)
0x100000000
;
real
sumErrors
=
0
;
real
sumPolarErrors
=
0
;
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
scale
=
polarizability
[
atom
];
for
(
int
component
=
0
;
component
<
3
;
component
++
)
{
int
dipoleIndex
=
3
*
atom
+
component
;
int
fieldIndex
=
atom
+
component
*
PADDED_NUM_ATOMS
;
if
(
iteration
>=
MAX_PREV_DIIS_DIPOLES
)
{
// We have filled up the buffer for previous dipoles, so shift them all over by one.
for
(
int
i
=
1
;
i
<
MAX_PREV_DIIS_DIPOLES
;
i
++
)
{
int
index1
=
dipoleIndex
+
(
i
-
1
)
*
NUM_ATOMS
*
3
;
int
index2
=
dipoleIndex
+
i
*
NUM_ATOMS
*
3
;
prevDipoles
[
index1
]
=
prevDipoles
[
index2
];
prevDipolesPolar
[
index1
]
=
prevDipolesPolar
[
index2
];
if
(
recordPrevErrors
)
prevErrors
[
index1
]
=
prevErrors
[
index2
];
}
}
// Compute the new dipole, and record it along with the error.
real
oldDipole
=
inducedDipole
[
dipoleIndex
];
real
oldDipolePolar
=
inducedDipolePolar
[
dipoleIndex
];
long
long
fixedS
=
(
fixedFieldS
==
NULL
?
(
long
long
)
0
:
fixedFieldS
[
fieldIndex
]);
real
newDipole
=
scale
*
((
fixedField
[
fieldIndex
]
+
fixedS
+
inducedField
[
fieldIndex
])
*
fieldScale
+
ewaldScale
*
oldDipole
);
real
newDipolePolar
=
scale
*
((
fixedFieldPolar
[
fieldIndex
]
+
fixedS
+
inducedFieldPolar
[
fieldIndex
])
*
fieldScale
+
ewaldScale
*
oldDipolePolar
);
int
storePrevIndex
=
dipoleIndex
+
min
(
iteration
,
MAX_PREV_DIIS_DIPOLES
-
1
)
*
NUM_ATOMS
*
3
;
prevDipoles
[
storePrevIndex
]
=
newDipole
;
prevDipolesPolar
[
storePrevIndex
]
=
newDipolePolar
;
if
(
recordPrevErrors
)
prevErrors
[
storePrevIndex
]
=
newDipole
-
oldDipole
;
sumErrors
+=
(
newDipole
-
oldDipole
)
*
(
newDipole
-
oldDipole
);
sumPolarErrors
+=
(
newDipolePolar
-
oldDipolePolar
)
*
(
newDipolePolar
-
oldDipolePolar
);
}
}
// Sum the errors over threads and store the total for this block.
buffer
[
threadIdx
.
x
]
=
make_real2
(
sumErrors
,
sumPolarErrors
);
__syncthreads
();
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
{
buffer
[
threadIdx
.
x
].
x
+=
buffer
[
threadIdx
.
x
+
offset
].
x
;
buffer
[
threadIdx
.
x
].
y
+=
buffer
[
threadIdx
.
x
+
offset
].
y
;
}
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
errors
[
blockIdx
.
x
]
=
make_float2
((
float
)
buffer
[
0
].
x
,
(
float
)
buffer
[
0
].
y
);
if
(
iteration
>=
MAX_PREV_DIIS_DIPOLES
&&
recordPrevErrors
&&
blockIdx
.
x
==
0
)
{
// Shift over the existing matrix elements.
for
(
int
i
=
0
;
i
<
MAX_PREV_DIIS_DIPOLES
-
1
;
i
++
)
{
if
(
threadIdx
.
x
<
MAX_PREV_DIIS_DIPOLES
-
1
)
matrix
[
threadIdx
.
x
+
i
*
MAX_PREV_DIIS_DIPOLES
]
=
matrix
[(
threadIdx
.
x
+
1
)
+
(
i
+
1
)
*
MAX_PREV_DIIS_DIPOLES
];
__syncthreads
();
}
}
}
extern
"C"
__global__
void
computeDIISMatrix
(
real
*
__restrict__
prevErrors
,
int
iteration
,
real
*
__restrict__
matrix
)
{
extern
__shared__
real
sumBuffer
[];
int
j
=
min
(
iteration
,
MAX_PREV_DIIS_DIPOLES
-
1
);
for
(
int
i
=
blockIdx
.
x
;
i
<=
j
;
i
+=
gridDim
.
x
)
{
// All the threads in this thread block work together to compute a single matrix element.
real
sum
=
0
;
for
(
int
index
=
threadIdx
.
x
;
index
<
NUM_ATOMS
*
3
;
index
+=
blockDim
.
x
)
sum
+=
prevErrors
[
index
+
i
*
NUM_ATOMS
*
3
]
*
prevErrors
[
index
+
j
*
NUM_ATOMS
*
3
];
sumBuffer
[
threadIdx
.
x
]
=
sum
;
__syncthreads
();
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
sumBuffer
[
threadIdx
.
x
]
+=
sumBuffer
[
threadIdx
.
x
+
offset
];
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
{
matrix
[
i
+
MAX_PREV_DIIS_DIPOLES
*
j
]
=
sumBuffer
[
0
];
if
(
i
!=
j
)
matrix
[
j
+
MAX_PREV_DIIS_DIPOLES
*
i
]
=
sumBuffer
[
0
];
}
}
}
extern
"C"
__global__
void
updateInducedFieldByDIIS
(
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
const
real
*
__restrict__
prevDipoles
,
const
real
*
__restrict__
prevDipolesPolar
,
const
float
*
__restrict__
coefficients
,
int
numPrev
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
3
*
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
sum
=
0
;
real
sumPolar
=
0
;
for
(
int
i
=
0
;
i
<
numPrev
;
i
++
)
{
sum
+=
coefficients
[
i
]
*
prevDipoles
[
i
*
3
*
NUM_ATOMS
+
index
];
sumPolar
+=
coefficients
[
i
]
*
prevDipolesPolar
[
i
*
3
*
NUM_ATOMS
+
index
];
}
inducedDipole
[
index
]
=
sum
;
inducedDipolePolar
[
index
]
=
sumPolar
;
}
}
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.cpp
View file @
8db70d34
This diff is collapsed.
Click to expand it.
plugins/amoeba/platforms/reference/src/SimTKReference/AmoebaReferenceMultipoleForce.h
View file @
8db70d34
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment