Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a346a6db
Commit
a346a6db
authored
Jul 18, 2014
by
peastman
Browse files
Initial CUDA implementation of DIIS
parent
49c9817f
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
239 additions
and
33 deletions
+239
-33
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+111
-31
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+10
-0
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
...moeba/platforms/cuda/src/kernels/multipoleInducedField.cu
+118
-2
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
a346a6db
...
@@ -40,6 +40,7 @@
...
@@ -40,6 +40,7 @@
#include "CudaForceInfo.h"
#include "CudaForceInfo.h"
#include "CudaKernelSources.h"
#include "CudaKernelSources.h"
#include "CudaNonbondedUtilities.h"
#include "CudaNonbondedUtilities.h"
#include "jama_svd.h"
#include <algorithm>
#include <algorithm>
#include <cmath>
#include <cmath>
...
@@ -796,8 +797,9 @@ private:
...
@@ -796,8 +797,9 @@ private:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
hasInitializedFFT
(
false
),
multipolesAreValid
(
false
),
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
hasInitializedFFT
(
false
),
multipolesAreValid
(
false
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
inducedDipole
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
prevDipolesGkPolar
(
NULL
),
prevErrors
(
NULL
),
diisMatrix
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
pmePhid
(
NULL
),
pmePhip
(
NULL
),
pmePhidp
(
NULL
),
pmeAtomGridIndex
(
NULL
),
lastPositions
(
NULL
),
sort
(
NULL
),
gkKernel
(
NULL
)
{
pmePhid
(
NULL
),
pmePhip
(
NULL
),
pmePhidp
(
NULL
),
pmeAtomGridIndex
(
NULL
),
lastPositions
(
NULL
),
sort
(
NULL
),
gkKernel
(
NULL
)
{
}
}
...
@@ -832,6 +834,20 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...
@@ -832,6 +834,20 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
inducedDipolePolar
;
delete
inducedDipolePolar
;
if
(
inducedDipoleErrors
!=
NULL
)
if
(
inducedDipoleErrors
!=
NULL
)
delete
inducedDipoleErrors
;
delete
inducedDipoleErrors
;
if
(
prevDipoles
!=
NULL
)
delete
prevDipoles
;
if
(
prevDipolesPolar
!=
NULL
)
delete
prevDipolesPolar
;
if
(
prevDipolesGk
!=
NULL
)
delete
prevDipolesGk
;
if
(
prevDipolesGkPolar
!=
NULL
)
delete
prevDipolesGkPolar
;
if
(
prevErrors
!=
NULL
)
delete
prevErrors
;
if
(
diisMatrix
!=
NULL
)
delete
diisMatrix
;
if
(
diisCoefficients
!=
NULL
)
delete
diisCoefficients
;
if
(
polarizability
!=
NULL
)
if
(
polarizability
!=
NULL
)
delete
polarizability
;
delete
polarizability
;
if
(
covalentFlags
!=
NULL
)
if
(
covalentFlags
!=
NULL
)
...
@@ -959,6 +975,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -959,6 +975,11 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
inducedDipoleErrors
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
(),
sizeof
(
float2
),
"inducedDipoleErrors"
);
inducedDipoleErrors
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
(),
sizeof
(
float2
),
"inducedDipoleErrors"
);
prevDipoles
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipoles"
);
prevDipolesPolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesPolar"
);
prevErrors
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevErrors"
);
diisMatrix
=
new
CudaArray
(
cu
,
(
MaxPrevDIISDipoles
+
1
)
*
(
MaxPrevDIISDipoles
+
1
),
elementSize
,
"diisMatrix"
);
diisCoefficients
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
+
1
,
sizeof
(
float
),
"diisMatrix"
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
torque
);
cu
.
addAutoclearBuffer
(
*
torque
);
...
@@ -1088,6 +1109,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1088,6 +1109,8 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines
[
"GK_FQ"
]
=
cu
.
doubleToString
(
3
*
(
1
-
solventDielectric
)
/
(
2
+
3
*
solventDielectric
));
defines
[
"GK_FQ"
]
=
cu
.
doubleToString
(
3
*
(
1
-
solventDielectric
)
/
(
2
+
3
*
solventDielectric
));
fixedThreadMemory
+=
4
*
elementSize
;
fixedThreadMemory
+=
4
*
elementSize
;
inducedThreadMemory
+=
13
*
elementSize
;
inducedThreadMemory
+=
13
*
elementSize
;
prevDipolesGk
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesGk"
);
prevDipolesGkPolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesGkPolar"
);
}
}
int
maxThreads
=
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
int
maxThreads
=
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
fixedFieldThreads
=
min
(
maxThreads
,
cu
.
computeThreadBlockSize
(
fixedThreadMemory
));
fixedFieldThreads
=
min
(
maxThreads
,
cu
.
computeThreadBlockSize
(
fixedThreadMemory
));
...
@@ -1102,9 +1125,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1102,9 +1125,12 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
if
(
maxInducedIterations
>
0
)
{
if
(
maxInducedIterations
>
0
)
{
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
inducedFieldThreads
);
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
inducedFieldThreads
);
defines
[
"MAX_PREV_DIIS_DIPOLES"
]
=
cu
.
intToString
(
MaxPrevDIISDipoles
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleInducedField
,
defines
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleInducedField
,
defines
);
computeInducedFieldKernel
=
cu
.
getKernel
(
module
,
"computeInducedField"
);
computeInducedFieldKernel
=
cu
.
getKernel
(
module
,
"computeInducedField"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldBySOR"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldByDIIS"
);
recordDIISDipolesKernel
=
cu
.
getKernel
(
module
,
"recordInducedDipolesForDIIS"
);
buildMatrixKernel
=
cu
.
getKernel
(
module
,
"computeDIISMatrix"
);
}
}
stringstream
electrostaticsSource
;
stringstream
electrostaticsSource
;
if
(
usePME
)
{
if
(
usePME
)
{
...
@@ -1421,7 +1447,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1421,7 +1447,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge.
// Iterate until the dipoles converge.
vector
<
float2
>
errors
;
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
cu
.
clearBuffer
(
*
inducedField
);
cu
.
clearBuffer
(
*
inducedField
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
...
@@ -1440,23 +1465,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1440,23 +1465,9 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getBornRadii
()
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
&
gkKernel
->
getBornRadii
()
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
void
*
updateInducedGkFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
gkKernel
->
getField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedFieldPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
()};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedGkFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
}
void
*
updateInducedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
npt
,
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
()};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
inducedDipoleErrors
->
download
(
errors
);
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
(
int
)
errors
.
size
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total2
+=
errors
[
j
].
y
;
}
}
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
double
maxEpsilon
=
iterateDipolesByDIIS
(
i
);
if
(
maxEpsilon
<
inducedEpsilon
)
break
;
break
;
}
}
...
@@ -1568,17 +1579,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1568,17 +1579,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
pmeRecordInducedFieldDipolesArgs
[]
=
{
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
void
*
pmeRecordInducedFieldDipolesArgs
[]
=
{
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeRecordInducedFieldDipolesKernel
,
pmeRecordInducedFieldDipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeRecordInducedFieldDipolesKernel
,
pmeRecordInducedFieldDipolesArgs
,
cu
.
getNumAtoms
());
void
*
updateInducedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
npt
,
&
inducedField
->
getDevicePointer
(),
double
maxEpsilon
=
iterateDipolesByDIIS
(
i
);
&
inducedFieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
if
(
maxEpsilon
<
inducedEpsilon
)
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
()};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
inducedDipoleErrors
->
download
(
errors
);
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
(
int
)
errors
.
size
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total2
+=
errors
[
j
].
y
;
}
if
(
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
())
<
inducedEpsilon
)
break
;
break
;
}
}
...
@@ -1612,6 +1614,84 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1612,6 +1614,84 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
return
0.0
;
return
0.0
;
}
}
double
CudaCalcAmoebaMultipoleForceKernel
::
iterateDipolesByDIIS
(
int
iteration
)
{
void
*
npt
=
NULL
;
bool
trueValue
=
true
,
falseValue
=
false
;
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
// Record the dipole and errors into the lists of previous dipoles.
if
(
gkKernel
!=
NULL
)
{
void
*
recordDIISDipolesGkArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
gkKernel
->
getField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedFieldPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
(),
&
prevDipolesGk
->
getDevicePointer
(),
&
prevDipolesGkPolar
->
getDevicePointer
(),
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
falseValue
};
cu
.
executeKernel
(
recordDIISDipolesKernel
,
recordDIISDipolesGkArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
}
void
*
recordDIISDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
npt
,
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
(),
&
inducedDipoleErrors
->
getDevicePointer
(),
&
prevDipoles
->
getDevicePointer
(),
&
prevDipolesPolar
->
getDevicePointer
(),
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
trueValue
};
cu
.
executeKernel
(
recordDIISDipolesKernel
,
recordDIISDipolesArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
elementSize
*
2
);
float2
*
errors
=
(
float2
*
)
cu
.
getPinnedBuffer
();
inducedDipoleErrors
->
download
(
errors
,
false
);
// Determine the coefficients for selecting the new dipoles.
vector
<
float
>
coefficients
(
MaxPrevDIISDipoles
);
int
numPrev
=
(
iteration
+
1
<
MaxPrevDIISDipoles
?
iteration
+
1
:
MaxPrevDIISDipoles
);
if
(
iteration
==
0
)
coefficients
[
0
]
=
1
;
else
{
void
*
buildMatrixArgs
[]
=
{
&
prevErrors
->
getDevicePointer
(),
&
iteration
,
&
diisMatrix
->
getDevicePointer
()};
cu
.
executeKernel
(
buildMatrixKernel
,
buildMatrixArgs
,
cu
.
getNumThreadBlocks
()
*
128
,
128
,
128
*
elementSize
);
vector
<
float
>
matrix
;
diisMatrix
->
download
(
matrix
);
int
rank
=
numPrev
+
1
;
Array2D
<
double
>
b
(
rank
,
rank
);
for
(
int
i
=
0
;
i
<
rank
;
i
++
)
for
(
int
j
=
0
;
j
<
rank
;
j
++
)
b
[
i
][
j
]
=
matrix
[
i
*
rank
+
j
];
// Solve using SVD. Since the right hand side is (-1, 0, 0, 0, ...), this is simpler than the general case.
JAMA
::
SVD
<
double
>
svd
(
b
);
Array2D
<
double
>
u
,
v
;
svd
.
getU
(
u
);
svd
.
getV
(
v
);
Array1D
<
double
>
s
;
svd
.
getSingularValues
(
s
);
int
effectiveRank
=
svd
.
rank
();
for
(
int
i
=
1
;
i
<
rank
;
i
++
)
{
double
d
=
0
;
for
(
int
j
=
0
;
j
<
effectiveRank
;
j
++
)
d
-=
u
[
0
][
j
]
*
v
[
i
][
j
]
/
s
[
j
];
coefficients
[
i
-
1
]
=
d
;
}
}
diisCoefficients
->
upload
(
&
coefficients
[
0
]);
// Compute the dipoles.
void
*
updateInducedFieldArgs
[]
=
{
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
prevDipoles
->
getDevicePointer
(),
&
prevDipolesPolar
->
getDevicePointer
(),
&
diisCoefficients
->
getDevicePointer
(),
&
numPrev
};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
);
if
(
gkKernel
!=
NULL
)
{
void
*
updateInducedFieldGkArgs
[]
=
{
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
prevDipolesGk
->
getDevicePointer
(),
&
prevDipolesGkPolar
->
getDevicePointer
(),
&
diisCoefficients
->
getDevicePointer
(),
&
numPrev
};
cu
.
executeKernel
(
updateInducedFieldKernel
,
updateInducedFieldGkArgs
,
cu
.
getNumThreadBlocks
()
*
cu
.
ThreadBlockSize
);
}
// Compute the overall error for monitoring convergence.
double
total1
=
0.0
,
total2
=
0.0
;
for
(
int
j
=
0
;
j
<
inducedDipoleErrors
->
getSize
();
j
++
)
{
total1
+=
errors
[
j
].
x
;
total2
+=
errors
[
j
].
y
;
}
return
48.033324
*
sqrt
(
max
(
total1
,
total2
)
/
cu
.
getNumAtoms
());
}
void
CudaCalcAmoebaMultipoleForceKernel
::
ensureMultipolesValid
(
ContextImpl
&
context
)
{
void
CudaCalcAmoebaMultipoleForceKernel
::
ensureMultipolesValid
(
ContextImpl
&
context
)
{
if
(
multipolesAreValid
)
{
if
(
multipolesAreValid
)
{
int
numParticles
=
cu
.
getNumAtoms
();
int
numParticles
=
cu
.
getNumAtoms
();
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
View file @
a346a6db
...
@@ -375,6 +375,7 @@ private:
...
@@ -375,6 +375,7 @@ private:
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
};
};
void
initializeScaleFactors
();
void
initializeScaleFactors
();
double
iterateDipolesByDIIS
(
int
iteration
);
void
ensureMultipolesValid
(
ContextImpl
&
context
);
void
ensureMultipolesValid
(
ContextImpl
&
context
);
template
<
class
T
,
class
T4
,
class
M4
>
void
computeSystemMultipoleMoments
(
ContextImpl
&
context
,
std
::
vector
<
double
>&
outputMultipoleMoments
);
template
<
class
T
,
class
T4
,
class
M4
>
void
computeSystemMultipoleMoments
(
ContextImpl
&
context
,
std
::
vector
<
double
>&
outputMultipoleMoments
);
int
numMultipoles
,
maxInducedIterations
;
int
numMultipoles
,
maxInducedIterations
;
...
@@ -399,6 +400,13 @@ private:
...
@@ -399,6 +400,13 @@ private:
CudaArray
*
inducedDipole
;
CudaArray
*
inducedDipole
;
CudaArray
*
inducedDipolePolar
;
CudaArray
*
inducedDipolePolar
;
CudaArray
*
inducedDipoleErrors
;
CudaArray
*
inducedDipoleErrors
;
CudaArray
*
prevDipoles
;
CudaArray
*
prevDipolesPolar
;
CudaArray
*
prevDipolesGk
;
CudaArray
*
prevDipolesGkPolar
;
CudaArray
*
prevErrors
;
CudaArray
*
diisMatrix
;
CudaArray
*
diisCoefficients
;
CudaArray
*
polarizability
;
CudaArray
*
polarizability
;
CudaArray
*
covalentFlags
;
CudaArray
*
covalentFlags
;
CudaArray
*
polarizationGroupFlags
;
CudaArray
*
polarizationGroupFlags
;
...
@@ -419,8 +427,10 @@ private:
...
@@ -419,8 +427,10 @@ private:
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
pmeGridIndexKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeGridIndexKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
static
const
int
MaxPrevDIISDipoles
=
20
;
};
};
/**
/**
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
View file @
a346a6db
...
@@ -485,7 +485,7 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
...
@@ -485,7 +485,7 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
buffer
[
threadIdx
.
x
]
=
make_real2
(
sumErrors
,
sumPolarErrors
);
buffer
[
threadIdx
.
x
]
=
make_real2
(
sumErrors
,
sumPolarErrors
);
__syncthreads
();
__syncthreads
();
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
{
buffer
[
threadIdx
.
x
].
x
+=
buffer
[
threadIdx
.
x
+
offset
].
x
;
buffer
[
threadIdx
.
x
].
x
+=
buffer
[
threadIdx
.
x
+
offset
].
x
;
buffer
[
threadIdx
.
x
].
y
+=
buffer
[
threadIdx
.
x
+
offset
].
y
;
buffer
[
threadIdx
.
x
].
y
+=
buffer
[
threadIdx
.
x
+
offset
].
y
;
...
@@ -494,4 +494,120 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
...
@@ -494,4 +494,120 @@ extern "C" __global__ void updateInducedFieldBySOR(const long long* __restrict__
}
}
if
(
threadIdx
.
x
==
0
)
if
(
threadIdx
.
x
==
0
)
errors
[
blockIdx
.
x
]
=
make_float2
((
float
)
buffer
[
0
].
x
,
(
float
)
buffer
[
0
].
y
);
errors
[
blockIdx
.
x
]
=
make_float2
((
float
)
buffer
[
0
].
x
,
(
float
)
buffer
[
0
].
y
);
}
}
\ No newline at end of file
extern
"C"
__global__
void
recordInducedDipolesForDIIS
(
const
long
long
*
__restrict__
fixedField
,
const
long
long
*
__restrict__
fixedFieldPolar
,
const
long
long
*
__restrict__
fixedFieldS
,
const
long
long
*
__restrict__
inducedField
,
const
long
long
*
__restrict__
inducedFieldPolar
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float
*
__restrict__
polarizability
,
float2
*
__restrict__
errors
,
real
*
__restrict__
prevDipoles
,
real
*
__restrict__
prevDipolesPolar
,
real
*
__restrict__
prevErrors
,
int
iteration
,
bool
recordPrevErrors
)
{
extern
__shared__
real2
buffer
[];
#ifdef USE_EWALD
const
real
ewaldScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
#else
const
real
ewaldScale
=
0
;
#endif
const
real
fieldScale
=
1
/
(
real
)
0x100000000
;
real
sumErrors
=
0
;
real
sumPolarErrors
=
0
;
for
(
int
atom
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
atom
<
NUM_ATOMS
;
atom
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
scale
=
polarizability
[
atom
];
for
(
int
component
=
0
;
component
<
3
;
component
++
)
{
int
dipoleIndex
=
3
*
atom
+
component
;
int
fieldIndex
=
atom
+
component
*
PADDED_NUM_ATOMS
;
if
(
iteration
>=
MAX_PREV_DIIS_DIPOLES
)
{
// We have filled up the buffer for previous dipoles, so shift them all over by one.
for
(
int
i
=
1
;
i
<
MAX_PREV_DIIS_DIPOLES
;
i
++
)
{
int
index1
=
dipoleIndex
+
(
i
-
1
)
*
NUM_ATOMS
*
3
;
int
index2
=
dipoleIndex
+
i
*
NUM_ATOMS
*
3
;
prevDipoles
[
index1
]
=
prevDipoles
[
index2
];
prevDipolesPolar
[
index1
]
=
prevDipolesPolar
[
index2
];
if
(
recordPrevErrors
)
prevErrors
[
index1
]
=
prevErrors
[
index2
];
}
}
// Compute the new dipole, and record it along with the error.
real
oldDipole
=
inducedDipole
[
dipoleIndex
];
real
oldDipolePolar
=
inducedDipolePolar
[
dipoleIndex
];
long
long
fixedS
=
(
fixedFieldS
==
NULL
?
(
long
long
)
0
:
fixedFieldS
[
fieldIndex
]);
real
newDipole
=
scale
*
((
fixedField
[
fieldIndex
]
+
fixedS
+
inducedField
[
fieldIndex
])
*
fieldScale
+
ewaldScale
*
oldDipole
);
real
newDipolePolar
=
scale
*
((
fixedFieldPolar
[
fieldIndex
]
+
fixedS
+
inducedFieldPolar
[
fieldIndex
])
*
fieldScale
+
ewaldScale
*
oldDipolePolar
);
int
storePrevIndex
=
dipoleIndex
+
min
(
iteration
,
MAX_PREV_DIIS_DIPOLES
-
1
)
*
NUM_ATOMS
*
3
;
prevDipoles
[
storePrevIndex
]
=
newDipole
;
prevDipolesPolar
[
storePrevIndex
]
=
newDipolePolar
;
if
(
recordPrevErrors
)
prevErrors
[
storePrevIndex
]
=
newDipole
-
oldDipole
;
sumErrors
+=
(
newDipole
-
oldDipole
)
*
(
newDipole
-
oldDipole
);
sumPolarErrors
+=
(
newDipolePolar
-
oldDipolePolar
)
*
(
newDipolePolar
-
oldDipolePolar
);
}
}
// Sum the errors over threads and store the total for this block.
buffer
[
threadIdx
.
x
]
=
make_real2
(
sumErrors
,
sumPolarErrors
);
__syncthreads
();
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
{
buffer
[
threadIdx
.
x
].
x
+=
buffer
[
threadIdx
.
x
+
offset
].
x
;
buffer
[
threadIdx
.
x
].
y
+=
buffer
[
threadIdx
.
x
+
offset
].
y
;
}
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
errors
[
blockIdx
.
x
]
=
make_float2
((
float
)
buffer
[
0
].
x
,
(
float
)
buffer
[
0
].
y
);
}
extern
"C"
__global__
void
computeDIISMatrix
(
real
*
__restrict__
prevErrors
,
int
iteration
,
real
*
__restrict__
matrix
)
{
extern
__shared__
real
sumBuffer
[];
int
rank
=
min
(
iteration
+
1
,
MAX_PREV_DIIS_DIPOLES
)
+
1
;
for
(
int
element
=
blockIdx
.
x
;
element
<
rank
*
rank
;
element
+=
gridDim
.
x
)
{
// All the threads in this thread block work together to compute a single matrix element.
int
i
=
element
/
rank
;
int
j
=
element
-
i
*
rank
;
if
(
i
>
j
)
continue
;
real
value
;
if
(
i
==
0
&&
j
==
0
)
value
=
0
;
else
if
(
i
==
0
||
j
==
0
)
value
=
-
1
;
else
{
// Compute the inner product of the two error vectors.
real
sum
=
0
;
for
(
int
index
=
threadIdx
.
x
;
index
<
NUM_ATOMS
*
3
;
index
+=
blockDim
.
x
)
sum
+=
prevErrors
[
index
+
(
i
-
1
)
*
NUM_ATOMS
*
3
]
*
prevErrors
[
index
+
(
j
-
1
)
*
NUM_ATOMS
*
3
];
sumBuffer
[
threadIdx
.
x
]
=
sum
;
__syncthreads
();
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
threadIdx
.
x
+
offset
<
blockDim
.
x
&&
(
threadIdx
.
x
&
(
2
*
offset
-
1
))
==
0
)
sumBuffer
[
threadIdx
.
x
]
+=
sumBuffer
[
threadIdx
.
x
+
offset
];
__syncthreads
();
}
value
=
sumBuffer
[
0
];
}
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
{
matrix
[
element
]
=
value
;
if
(
i
!=
j
)
matrix
[
j
*
rank
+
i
]
=
value
;
}
}
}
extern
"C"
__global__
void
updateInducedFieldByDIIS
(
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
const
real
*
__restrict__
prevDipoles
,
const
real
*
__restrict__
prevDipolesPolar
,
const
float
*
__restrict__
coefficients
,
int
numPrev
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
3
*
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
sum
=
0
;
real
sumPolar
=
0
;
for
(
int
i
=
0
;
i
<
numPrev
;
i
++
)
{
sum
+=
coefficients
[
i
]
*
prevDipoles
[
i
*
3
*
NUM_ATOMS
+
index
];
sumPolar
+=
coefficients
[
i
]
*
prevDipolesPolar
[
i
*
3
*
NUM_ATOMS
+
index
];
}
inducedDipole
[
index
]
=
sum
;
inducedDipolePolar
[
index
]
=
sumPolar
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment