Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a4e2d9a6
"wrappers/python/vscode:/vscode.git/clone" did not exist on "51f82c025619874753ed0b3c0c8f71bc61bd08da"
Commit
a4e2d9a6
authored
Jan 05, 2016
by
Peter Eastman
Browse files
Began CUDA implementation of extrapolated polarization
parent
e6804811
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
303 additions
and
62 deletions
+303
-62
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+194
-61
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+17
-1
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
...moeba/platforms/cuda/src/kernels/multipoleInducedField.cu
+92
-0
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
a4e2d9a6
...
@@ -810,7 +810,10 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
...
@@ -810,7 +810,10 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
sphericalDipoles
(
NULL
),
sphericalQuadrupoles
(
NULL
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
sphericalDipoles
(
NULL
),
sphericalQuadrupoles
(
NULL
),
fracDipoles
(
NULL
),
fracQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
fracDipoles
(
NULL
),
fracQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
prevDipolesGkPolar
(
NULL
),
prevErrors
(
NULL
),
diisMatrix
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
prevDipolesGkPolar
(
NULL
),
prevErrors
(
NULL
),
diisMatrix
(
NULL
),
polarizability
(
NULL
),
extrapolatedDipole
(
NULL
),
extrapolatedDipolePolar
(
NULL
),
extrapolatedDipoleGk
(
NULL
),
extrapolatedDipoleGkPolar
(
NULL
),
inducedDipoleFieldGradient
(
NULL
),
inducedDipoleFieldGradientPolar
(
NULL
),
inducedDipoleFieldGradientGk
(
NULL
),
inducedDipoleFieldGradientGkPolar
(
NULL
),
extrapolatedDipoleFieldGradient
(
NULL
),
extrapolatedDipoleFieldGradientPolar
(
NULL
),
extrapolatedDipoleFieldGradientGk
(
NULL
),
extrapolatedDipoleFieldGradientGkPolar
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
pmePhid
(
NULL
),
pmePhip
(
NULL
),
pmePhidp
(
NULL
),
pmeCphi
(
NULL
),
pmeAtomGridIndex
(
NULL
),
lastPositions
(
NULL
),
sort
(
NULL
),
gkKernel
(
NULL
)
{
pmePhid
(
NULL
),
pmePhip
(
NULL
),
pmePhidp
(
NULL
),
pmeCphi
(
NULL
),
pmeAtomGridIndex
(
NULL
),
lastPositions
(
NULL
),
sort
(
NULL
),
gkKernel
(
NULL
)
{
}
}
...
@@ -867,6 +870,30 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...
@@ -867,6 +870,30 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
diisMatrix
;
delete
diisMatrix
;
if
(
diisCoefficients
!=
NULL
)
if
(
diisCoefficients
!=
NULL
)
delete
diisCoefficients
;
delete
diisCoefficients
;
if
(
extrapolatedDipole
!=
NULL
)
delete
extrapolatedDipole
;
if
(
extrapolatedDipolePolar
!=
NULL
)
delete
extrapolatedDipolePolar
;
if
(
extrapolatedDipoleGk
!=
NULL
)
delete
extrapolatedDipoleGk
;
if
(
extrapolatedDipoleGkPolar
!=
NULL
)
delete
extrapolatedDipoleGkPolar
;
if
(
inducedDipoleFieldGradient
!=
NULL
)
delete
inducedDipoleFieldGradient
;
if
(
inducedDipoleFieldGradientPolar
!=
NULL
)
delete
inducedDipoleFieldGradientPolar
;
if
(
inducedDipoleFieldGradientGk
!=
NULL
)
delete
inducedDipoleFieldGradientGk
;
if
(
inducedDipoleFieldGradientGkPolar
!=
NULL
)
delete
inducedDipoleFieldGradientGkPolar
;
if
(
extrapolatedDipoleFieldGradient
!=
NULL
)
delete
extrapolatedDipoleFieldGradient
;
if
(
extrapolatedDipoleFieldGradientPolar
!=
NULL
)
delete
extrapolatedDipoleFieldGradientPolar
;
if
(
extrapolatedDipoleFieldGradientGk
!=
NULL
)
delete
extrapolatedDipoleFieldGradientGk
;
if
(
extrapolatedDipoleFieldGradientGkPolar
!=
NULL
)
delete
extrapolatedDipoleFieldGradientGkPolar
;
if
(
polarizability
!=
NULL
)
if
(
polarizability
!=
NULL
)
delete
polarizability
;
delete
polarizability
;
if
(
covalentFlags
!=
NULL
)
if
(
covalentFlags
!=
NULL
)
...
@@ -967,6 +994,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -967,6 +994,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Create workspace arrays.
// Create workspace arrays.
polarizationType
=
force
.
getPolarizationType
();
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
labFrameDipoles
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"labFrameDipoles"
);
labFrameDipoles
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"labFrameDipoles"
);
labFrameQuadrupoles
=
new
CudaArray
(
cu
,
5
*
paddedNumAtoms
,
elementSize
,
"labFrameQuadrupoles"
);
labFrameQuadrupoles
=
new
CudaArray
(
cu
,
5
*
paddedNumAtoms
,
elementSize
,
"labFrameQuadrupoles"
);
...
@@ -979,12 +1007,23 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -979,12 +1007,23 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
torque
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"torque"
);
torque
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"torque"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipole
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipole"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
inducedDipolePolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"inducedDipolePolar"
);
if
(
polarizationType
==
AmoebaMultipoleForce
::
Mutual
)
{
inducedDipoleErrors
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
(),
sizeof
(
float2
),
"inducedDipoleErrors"
);
inducedDipoleErrors
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
(),
sizeof
(
float2
),
"inducedDipoleErrors"
);
prevDipoles
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipoles"
);
prevDipoles
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipoles"
);
prevDipolesPolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesPolar"
);
prevDipolesPolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesPolar"
);
prevErrors
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevErrors"
);
prevErrors
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevErrors"
);
diisMatrix
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"diisMatrix"
);
diisMatrix
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"diisMatrix"
);
diisCoefficients
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
+
1
,
sizeof
(
float
),
"diisMatrix"
);
diisCoefficients
=
new
CudaArray
(
cu
,
MaxPrevDIISDipoles
+
1
,
sizeof
(
float
),
"diisMatrix"
);
}
else
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
{
int
numOrders
=
force
.
getExtrapolationCoefficients
().
size
();
extrapolatedDipole
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
numOrders
,
elementSize
,
"extrapolatedDipole"
);
extrapolatedDipolePolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
numOrders
,
elementSize
,
"extrapolatedDipolePolar"
);
inducedDipoleFieldGradient
=
new
CudaArray
(
cu
,
6
*
numMultipoles
,
elementSize
,
"inducedDipoleFieldGradient"
);
inducedDipoleFieldGradientPolar
=
new
CudaArray
(
cu
,
6
*
numMultipoles
,
elementSize
,
"inducedDipoleFieldGradientPolar"
);
extrapolatedDipoleFieldGradient
=
new
CudaArray
(
cu
,
6
*
numMultipoles
*
numOrders
,
elementSize
,
"extrapolatedDipoleFieldGradient"
);
extrapolatedDipoleFieldGradientPolar
=
new
CudaArray
(
cu
,
6
*
numMultipoles
*
numOrders
,
elementSize
,
"extrapolatedDipoleFieldGradientPolar"
);
}
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
field
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
fieldPolar
);
cu
.
addAutoclearBuffer
(
*
torque
);
cu
.
addAutoclearBuffer
(
*
torque
);
...
@@ -1036,14 +1075,16 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1036,14 +1075,16 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
// Record other options.
// Record other options.
if
(
force
.
getP
olarizationType
()
==
AmoebaMultipoleForce
::
Mutual
)
{
if
(
p
olarizationType
==
AmoebaMultipoleForce
::
Mutual
)
{
maxInducedIterations
=
force
.
getMutualInducedMaxIterations
();
maxInducedIterations
=
force
.
getMutualInducedMaxIterations
();
inducedEpsilon
=
force
.
getMutualInducedTargetEpsilon
();
inducedEpsilon
=
force
.
getMutualInducedTargetEpsilon
();
inducedField
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"inducedField"
);
inducedFieldPolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"inducedFieldPolar"
);
}
}
else
else
maxInducedIterations
=
0
;
maxInducedIterations
=
0
;
if
(
polarizationType
!=
AmoebaMultipoleForce
::
Direct
)
{
inducedField
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"inducedField"
);
inducedFieldPolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"inducedFieldPolar"
);
}
usePME
=
(
force
.
getNonbondedMethod
()
==
AmoebaMultipoleForce
::
PME
);
usePME
=
(
force
.
getNonbondedMethod
()
==
AmoebaMultipoleForce
::
PME
);
// See whether there's an AmoebaGeneralizedKirkwoodForce in the System.
// See whether there's an AmoebaGeneralizedKirkwoodForce in the System.
...
@@ -1066,7 +1107,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1066,7 +1107,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
cu
.
intToString
(
cu
.
getNumAtomBlocks
());
defines
[
"ENERGY_SCALE_FACTOR"
]
=
cu
.
doubleToString
(
138.9354558456
/
innerDielectric
);
defines
[
"ENERGY_SCALE_FACTOR"
]
=
cu
.
doubleToString
(
138.9354558456
/
innerDielectric
);
if
(
force
.
getP
olarizationType
()
==
AmoebaMultipoleForce
::
Direct
)
if
(
p
olarizationType
==
AmoebaMultipoleForce
::
Direct
)
defines
[
"DIRECT_POLARIZATION"
]
=
""
;
defines
[
"DIRECT_POLARIZATION"
]
=
""
;
if
(
useShuffle
)
if
(
useShuffle
)
defines
[
"USE_SHUFFLE"
]
=
""
;
defines
[
"USE_SHUFFLE"
]
=
""
;
...
@@ -1080,6 +1121,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1080,6 +1121,18 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
int
endExclusionIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
numExclusionTiles
/
numContexts
;
int
endExclusionIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
numExclusionTiles
/
numContexts
;
defines
[
"FIRST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
startExclusionIndex
);
defines
[
"FIRST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
startExclusionIndex
);
defines
[
"LAST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
endExclusionIndex
);
defines
[
"LAST_EXCLUSION_TILE"
]
=
cu
.
intToString
(
endExclusionIndex
);
maxExtrapolationOrder
=
force
.
getExtrapolationCoefficients
().
size
();
defines
[
"MAX_EXTRAPOLATION_ORDER"
]
=
cu
.
intToString
(
maxExtrapolationOrder
);
stringstream
coefficients
;
for
(
int
i
=
0
;
i
<
maxExtrapolationOrder
;
i
++
)
{
if
(
i
>
0
)
coefficients
<<
","
;
double
sum
=
0
;
for
(
int
j
=
i
;
j
<
maxExtrapolationOrder
;
j
++
)
sum
=
force
.
getExtrapolationCoefficients
()[
j
];
coefficients
<<
cu
.
doubleToString
(
sum
);
}
defines
[
"EXTRAPOLATION_COEFFICIENTS_SUM"
]
=
coefficients
.
str
();
alpha
=
force
.
getAEwald
();
alpha
=
force
.
getAEwald
();
if
(
usePME
)
{
if
(
usePME
)
{
vector
<
int
>
pmeGridDimension
;
vector
<
int
>
pmeGridDimension
;
...
@@ -1113,9 +1166,20 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1113,9 +1166,20 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines
[
"GK_FQ"
]
=
cu
.
doubleToString
(
3
*
(
1
-
solventDielectric
)
/
(
2
+
3
*
solventDielectric
));
defines
[
"GK_FQ"
]
=
cu
.
doubleToString
(
3
*
(
1
-
solventDielectric
)
/
(
2
+
3
*
solventDielectric
));
fixedThreadMemory
+=
4
*
elementSize
;
fixedThreadMemory
+=
4
*
elementSize
;
inducedThreadMemory
+=
13
*
elementSize
;
inducedThreadMemory
+=
13
*
elementSize
;
if
(
polarizationType
==
AmoebaMultipoleForce
::
Mutual
)
{
prevDipolesGk
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesGk"
);
prevDipolesGk
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesGk"
);
prevDipolesGkPolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesGkPolar"
);
prevDipolesGkPolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
MaxPrevDIISDipoles
,
elementSize
,
"prevDipolesGkPolar"
);
}
}
else
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
{
int
numOrders
=
force
.
getExtrapolationCoefficients
().
size
();
extrapolatedDipoleGk
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
numOrders
,
elementSize
,
"extrapolatedDipoleGk"
);
extrapolatedDipoleGkPolar
=
new
CudaArray
(
cu
,
3
*
numMultipoles
*
numOrders
,
elementSize
,
"extrapolatedDipoleGkPolar"
);
inducedDipoleFieldGradientGk
=
new
CudaArray
(
cu
,
6
*
numMultipoles
,
elementSize
,
"inducedDipoleFieldGradientGk"
);
inducedDipoleFieldGradientGkPolar
=
new
CudaArray
(
cu
,
6
*
numMultipoles
,
elementSize
,
"inducedDipoleFieldGradientGkPolar"
);
extrapolatedDipoleFieldGradientGk
=
new
CudaArray
(
cu
,
6
*
numMultipoles
*
numOrders
,
elementSize
,
"extrapolatedDipoleFieldGradientGk"
);
extrapolatedDipoleFieldGradientGkPolar
=
new
CudaArray
(
cu
,
6
*
numMultipoles
*
numOrders
,
elementSize
,
"extrapolatedDipoleFieldGradientGkPolar"
);
}
}
int
maxThreads
=
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
int
maxThreads
=
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
fixedFieldThreads
=
min
(
maxThreads
,
cu
.
computeThreadBlockSize
(
fixedThreadMemory
));
fixedFieldThreads
=
min
(
maxThreads
,
cu
.
computeThreadBlockSize
(
fixedThreadMemory
));
inducedFieldThreads
=
min
(
maxThreads
,
cu
.
computeThreadBlockSize
(
inducedThreadMemory
));
inducedFieldThreads
=
min
(
maxThreads
,
cu
.
computeThreadBlockSize
(
inducedThreadMemory
));
...
@@ -1127,7 +1191,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1127,7 +1191,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
fixedFieldThreads
);
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
fixedFieldThreads
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleFixedField
,
defines
);
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipoleFixedField
,
defines
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
computeFixedFieldKernel
=
cu
.
getKernel
(
module
,
"computeFixedField"
);
if
(
maxInducedIterations
>
0
)
{
if
(
polarizationType
!=
AmoebaMultipoleForce
::
Direct
)
{
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
inducedFieldThreads
);
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
inducedFieldThreads
);
defines
[
"MAX_PREV_DIIS_DIPOLES"
]
=
cu
.
intToString
(
MaxPrevDIISDipoles
);
defines
[
"MAX_PREV_DIIS_DIPOLES"
]
=
cu
.
intToString
(
MaxPrevDIISDipoles
);
defines
[
"USE_MUTUAL_POLARIZATION"
]
=
"1"
;
defines
[
"USE_MUTUAL_POLARIZATION"
]
=
"1"
;
...
@@ -1136,6 +1200,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1136,6 +1200,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldByDIIS"
);
updateInducedFieldKernel
=
cu
.
getKernel
(
module
,
"updateInducedFieldByDIIS"
);
recordDIISDipolesKernel
=
cu
.
getKernel
(
module
,
"recordInducedDipolesForDIIS"
);
recordDIISDipolesKernel
=
cu
.
getKernel
(
module
,
"recordInducedDipolesForDIIS"
);
buildMatrixKernel
=
cu
.
getKernel
(
module
,
"computeDIISMatrix"
);
buildMatrixKernel
=
cu
.
getKernel
(
module
,
"computeDIISMatrix"
);
initExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"initExtrapolatedDipoles"
);
iterateExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"iterateExtrapolatedDipoles"
);
computeExtrapolatedKernel
=
cu
.
getKernel
(
module
,
"computeExtrapolatedDipoles"
);
}
}
stringstream
electrostaticsSource
;
stringstream
electrostaticsSource
;
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
electrostaticsSource
<<
CudaKernelSources
::
vectorOps
;
...
@@ -1166,7 +1233,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1166,7 +1233,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeDefines
[
"GRID_SIZE_Z"
]
=
cu
.
intToString
(
gridSizeZ
);
pmeDefines
[
"GRID_SIZE_Z"
]
=
cu
.
intToString
(
gridSizeZ
);
pmeDefines
[
"M_PI"
]
=
cu
.
doubleToString
(
M_PI
);
pmeDefines
[
"M_PI"
]
=
cu
.
doubleToString
(
M_PI
);
pmeDefines
[
"SQRT_PI"
]
=
cu
.
doubleToString
(
sqrt
(
M_PI
));
pmeDefines
[
"SQRT_PI"
]
=
cu
.
doubleToString
(
sqrt
(
M_PI
));
if
(
force
.
getP
olarizationType
()
==
AmoebaMultipoleForce
::
Direct
)
if
(
p
olarizationType
==
AmoebaMultipoleForce
::
Direct
)
pmeDefines
[
"DIRECT_POLARIZATION"
]
=
""
;
pmeDefines
[
"DIRECT_POLARIZATION"
]
=
""
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipolePme
,
pmeDefines
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipolePme
,
pmeDefines
);
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
...
@@ -1407,7 +1474,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1407,7 +1474,6 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
int
numTileIndices
=
nb
.
getNumTiles
();
int
numTileIndices
=
nb
.
getNumTiles
();
int
numForceThreadBlocks
=
nb
.
getNumForceThreadBlocks
();
int
numForceThreadBlocks
=
nb
.
getNumForceThreadBlocks
();
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
void
*
npt
=
NULL
;
if
(
pmeGrid
==
NULL
)
{
if
(
pmeGrid
==
NULL
)
{
// Compute induced dipoles.
// Compute induced dipoles.
...
@@ -1436,25 +1502,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1436,25 +1502,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge.
// Iterate until the dipoles converge.
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
computeExtrapolatedDipoles
(
NULL
);
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
cu
.
clearBuffer
(
*
inducedField
);
computeInducedField
(
NULL
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
if
(
gkKernel
==
NULL
)
{
void
*
computeInducedFieldArgs
[]
=
{
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusionTiles
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
}
else
{
cu
.
clearBuffer
(
*
gkKernel
->
getInducedField
());
cu
.
clearBuffer
(
*
gkKernel
->
getInducedFieldPolar
());
void
*
computeInducedFieldArgs
[]
=
{
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusionTiles
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
gkKernel
->
getInducedField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedFieldPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getBornRadii
()
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
}
bool
converged
=
iterateDipolesByDIIS
(
i
);
bool
converged
=
iterateDipolesByDIIS
(
i
);
if
(
converged
)
if
(
converged
)
break
;
break
;
...
@@ -1579,34 +1630,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1579,34 +1630,10 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
// Iterate until the dipoles converge.
// Iterate until the dipoles converge.
vector
<
float2
>
errors
;
if
(
polarizationType
==
AmoebaMultipoleForce
::
Extrapolated
)
computeExtrapolatedDipoles
(
recipBoxVectorPointer
);
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
for
(
int
i
=
0
;
i
<
maxInducedIterations
;
i
++
)
{
cu
.
clearBuffer
(
*
inducedField
);
computeInducedField
(
recipBoxVectorPointer
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
void
*
computeInducedFieldArgs
[]
=
{
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusionTiles
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
nb
.
getInteractingTiles
().
getDevicePointer
(),
&
nb
.
getInteractionCount
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
&
maxTiles
,
&
nb
.
getBlockCenters
().
getDevicePointer
(),
&
nb
.
getInteractingAtoms
().
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
cu
.
clearBuffer
(
*
pmeGrid
);
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
pmeGrid
->
getSize
());
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
void
*
pmeRecordInducedFieldDipolesArgs
[]
=
{
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeRecordInducedFieldDipolesKernel
,
pmeRecordInducedFieldDipolesArgs
,
cu
.
getNumAtoms
());
bool
converged
=
iterateDipolesByDIIS
(
i
);
bool
converged
=
iterateDipolesByDIIS
(
i
);
if
(
converged
)
if
(
converged
)
break
;
break
;
...
@@ -1646,6 +1673,73 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1646,6 +1673,73 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
return
0.0
;
return
0.0
;
}
}
void
CudaCalcAmoebaMultipoleForceKernel
::
computeInducedField
(
void
**
recipBoxVectorPointer
)
{
CudaNonbondedUtilities
&
nb
=
cu
.
getNonbondedUtilities
();
int
startTileIndex
=
nb
.
getStartTileIndex
();
int
numTileIndices
=
nb
.
getNumTiles
();
int
numForceThreadBlocks
=
nb
.
getNumForceThreadBlocks
();
if
(
pmeGrid
==
NULL
)
{
cu
.
clearBuffer
(
*
inducedField
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
if
(
gkKernel
==
NULL
)
{
void
*
computeInducedFieldArgs
[]
=
{
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusionTiles
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
}
else
{
cu
.
clearBuffer
(
*
gkKernel
->
getInducedField
());
cu
.
clearBuffer
(
*
gkKernel
->
getInducedFieldPolar
());
void
*
computeInducedFieldArgs
[]
=
{
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusionTiles
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
gkKernel
->
getInducedField
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedFieldPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipoles
()
->
getDevicePointer
(),
&
gkKernel
->
getInducedDipolesPolar
()
->
getDevicePointer
(),
&
gkKernel
->
getBornRadii
()
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
}
}
else
{
cu
.
clearBuffer
(
*
inducedField
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
unsigned
int
maxTiles
=
nb
.
getInteractingTiles
().
getSize
();
void
*
computeInducedFieldArgs
[]
=
{
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusionTiles
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
nb
.
getInteractingTiles
().
getDevicePointer
(),
&
nb
.
getInteractionCount
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
&
maxTiles
,
&
nb
.
getBlockCenters
().
getDevicePointer
(),
&
nb
.
getInteractingAtoms
().
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
cu
.
clearBuffer
(
*
pmeGrid
);
void
*
pmeSpreadInducedDipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
{
void
*
finishSpreadArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
pmeGrid
->
getSize
());
}
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
void
*
pmeConvolutionArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmeBsplineModuliX
->
getDevicePointer
(),
&
pmeBsplineModuliY
->
getDevicePointer
(),
&
pmeBsplineModuliZ
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
void
*
pmeInducedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
pmePhidp
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
void
*
pmeRecordInducedFieldDipolesArgs
[]
=
{
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeRecordInducedFieldDipolesKernel
,
pmeRecordInducedFieldDipolesArgs
,
cu
.
getNumAtoms
());
}
}
bool
CudaCalcAmoebaMultipoleForceKernel
::
iterateDipolesByDIIS
(
int
iteration
)
{
bool
CudaCalcAmoebaMultipoleForceKernel
::
iterateDipolesByDIIS
(
int
iteration
)
{
void
*
npt
=
NULL
;
void
*
npt
=
NULL
;
bool
trueValue
=
true
,
falseValue
=
false
;
bool
trueValue
=
true
,
falseValue
=
false
;
...
@@ -1744,6 +1838,45 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
...
@@ -1744,6 +1838,45 @@ bool CudaCalcAmoebaMultipoleForceKernel::iterateDipolesByDIIS(int iteration) {
return
false
;
return
false
;
}
}
void
CudaCalcAmoebaMultipoleForceKernel
::
computeExtrapolatedDipoles
(
void
**
recipBoxVectorPointer
)
{
// Start by storing the direct dipoles as PT0
void
*
initArgs
[]
=
{
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
extrapolatedDipole
->
getDevicePointer
(),
&
extrapolatedDipolePolar
->
getDevicePointer
(),
&
inducedDipoleFieldGradient
->
getDevicePointer
(),
&
inducedDipoleFieldGradientPolar
->
getDevicePointer
()};
cu
.
executeKernel
(
initExtrapolatedKernel
,
initArgs
,
extrapolatedDipole
->
getSize
());
// Recursively apply alpha.Tau to the µ_(n) components to generate µ_(n+1), and store the result
CudaNonbondedUtilities
&
nb
=
cu
.
getNonbondedUtilities
();
for
(
int
order
=
1
;
order
<
maxExtrapolationOrder
;
++
order
)
{
cu
.
clearBuffer
(
*
inducedField
);
cu
.
clearBuffer
(
*
inducedFieldPolar
);
computeInducedField
(
recipBoxVectorPointer
);
void
*
iterateArgs
[]
=
{
&
order
,
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
extrapolatedDipole
->
getDevicePointer
(),
&
extrapolatedDipolePolar
->
getDevicePointer
(),
&
inducedDipoleFieldGradient
->
getDevicePointer
(),
&
inducedDipoleFieldGradientPolar
->
getDevicePointer
(),
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
extrapolatedDipoleFieldGradient
->
getDevicePointer
(),
&
extrapolatedDipoleFieldGradientPolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
()};
cu
.
executeKernel
(
iterateExtrapolatedKernel
,
iterateArgs
,
extrapolatedDipole
->
getSize
());
}
cout
<<
"CUDA"
<<
endl
;
vector
<
float
>
d
;
extrapolatedDipole
->
download
(
d
);
for
(
int
i
=
0
;
i
<
maxExtrapolationOrder
;
i
++
)
{
cout
<<
"order "
<<
i
<<
endl
;
for
(
int
j
=
0
;
j
<
numMultipoles
;
j
++
)
{
int
k
=
3
*
(
j
+
i
*
numMultipoles
);
cout
<<
d
[
k
]
<<
" "
<<
d
[
k
+
1
]
<<
" "
<<
d
[
k
+
2
]
<<
endl
;
}
}
// Take a linear combination of the µ_(n) components to form the total dipole
void
*
computeArgs
[]
=
{
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
extrapolatedDipole
->
getDevicePointer
(),
&
extrapolatedDipolePolar
->
getDevicePointer
()};
cu
.
executeKernel
(
computeExtrapolatedKernel
,
computeArgs
,
extrapolatedDipole
->
getSize
());
}
void
CudaCalcAmoebaMultipoleForceKernel
::
ensureMultipolesValid
(
ContextImpl
&
context
)
{
void
CudaCalcAmoebaMultipoleForceKernel
::
ensureMultipolesValid
(
ContextImpl
&
context
)
{
if
(
multipolesAreValid
)
{
if
(
multipolesAreValid
)
{
int
numParticles
=
cu
.
getNumAtoms
();
int
numParticles
=
cu
.
getNumAtoms
();
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
View file @
a4e2d9a6
...
@@ -385,14 +385,17 @@ private:
...
@@ -385,14 +385,17 @@ private:
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
const
char
*
getSortKey
()
const
{
return
"value.y"
;}
};
};
void
initializeScaleFactors
();
void
initializeScaleFactors
();
void
computeInducedField
(
void
**
recipBoxVectorPointer
);
bool
iterateDipolesByDIIS
(
int
iteration
);
bool
iterateDipolesByDIIS
(
int
iteration
);
void
computeExtrapolatedDipoles
(
void
**
recipBoxVectorPointer
);
void
ensureMultipolesValid
(
ContextImpl
&
context
);
void
ensureMultipolesValid
(
ContextImpl
&
context
);
template
<
class
T
,
class
T4
,
class
M4
>
void
computeSystemMultipoleMoments
(
ContextImpl
&
context
,
std
::
vector
<
double
>&
outputMultipoleMoments
);
template
<
class
T
,
class
T4
,
class
M4
>
void
computeSystemMultipoleMoments
(
ContextImpl
&
context
,
std
::
vector
<
double
>&
outputMultipoleMoments
);
int
numMultipoles
,
maxInducedIterations
;
int
numMultipoles
,
maxInducedIterations
,
maxExtrapolationOrder
;
int
fixedFieldThreads
,
inducedFieldThreads
,
electrostaticsThreads
;
int
fixedFieldThreads
,
inducedFieldThreads
,
electrostaticsThreads
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
int
gridSizeX
,
gridSizeY
,
gridSizeZ
;
double
alpha
,
inducedEpsilon
;
double
alpha
,
inducedEpsilon
;
bool
usePME
,
hasQuadrupoles
,
hasInitializedScaleFactors
,
hasInitializedFFT
,
multipolesAreValid
;
bool
usePME
,
hasQuadrupoles
,
hasInitializedScaleFactors
,
hasInitializedFFT
,
multipolesAreValid
;
AmoebaMultipoleForce
::
PolarizationType
polarizationType
;
CudaContext
&
cu
;
CudaContext
&
cu
;
const
System
&
system
;
const
System
&
system
;
std
::
vector
<
int3
>
covalentFlagValues
;
std
::
vector
<
int3
>
covalentFlagValues
;
...
@@ -422,6 +425,18 @@ private:
...
@@ -422,6 +425,18 @@ private:
CudaArray
*
prevErrors
;
CudaArray
*
prevErrors
;
CudaArray
*
diisMatrix
;
CudaArray
*
diisMatrix
;
CudaArray
*
diisCoefficients
;
CudaArray
*
diisCoefficients
;
CudaArray
*
extrapolatedDipole
;
CudaArray
*
extrapolatedDipolePolar
;
CudaArray
*
extrapolatedDipoleGk
;
CudaArray
*
extrapolatedDipoleGkPolar
;
CudaArray
*
inducedDipoleFieldGradient
;
CudaArray
*
inducedDipoleFieldGradientPolar
;
CudaArray
*
inducedDipoleFieldGradientGk
;
CudaArray
*
inducedDipoleFieldGradientGkPolar
;
CudaArray
*
extrapolatedDipoleFieldGradient
;
CudaArray
*
extrapolatedDipoleFieldGradientPolar
;
CudaArray
*
extrapolatedDipoleFieldGradientGk
;
CudaArray
*
extrapolatedDipoleFieldGradientGkPolar
;
CudaArray
*
polarizability
;
CudaArray
*
polarizability
;
CudaArray
*
covalentFlags
;
CudaArray
*
covalentFlags
;
CudaArray
*
polarizationGroupFlags
;
CudaArray
*
polarizationGroupFlags
;
...
@@ -444,6 +459,7 @@ private:
...
@@ -444,6 +459,7 @@ private:
CUfunction
pmeGridIndexKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeGridIndexKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
;
CUfunction
initExtrapolatedKernel
,
iterateExtrapolatedKernel
,
computeExtrapolatedKernel
;
CUfunction
pmeTransformMultipolesKernel
,
pmeTransformPotentialKernel
;
CUfunction
pmeTransformMultipolesKernel
,
pmeTransformPotentialKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
View file @
a4e2d9a6
...
@@ -607,3 +607,95 @@ extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDi
...
@@ -607,3 +607,95 @@ extern "C" __global__ void updateInducedFieldByDIIS(real* __restrict__ inducedDi
inducedDipolePolar
[
index
]
=
sumPolar
;
inducedDipolePolar
[
index
]
=
sumPolar
;
}
}
}
}
extern
"C"
__global__
void
initExtrapolatedDipoles
(
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
real
*
__restrict__
extrapolatedDipole
,
real
*
__restrict__
extrapolatedDipolePolar
,
real
*
__restrict__
inducedDipoleFieldGradient
,
real
*
__restrict__
inducedDipoleFieldGradientPolar
#ifdef USE_GK
,
real
*
__restrict__
inducedDipoleGk
,
real
*
__restrict__
inducedDipoleGkPolar
,
real
*
__restrict__
extrapolatedDipoleGk
,
real
*
__restrict__
extrapolatedDipoleGkPolar
,
real
*
__restrict__
inducedDipoleFieldGradientGk
,
real
*
__restrict__
inducedDipoleFieldGradientGkPolar
#endif
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
3
*
NUM_ATOMS
*
MAX_EXTRAPOLATION_ORDER
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
extrapolatedDipole
[
index
]
=
inducedDipole
[
index
];
extrapolatedDipolePolar
[
index
]
=
inducedDipolePolar
[
index
];
#ifdef USE_GK
extrapolatedDipoleGk
[
index
]
=
inducedDipoleGk
[
index
];
extrapolatedDipoleGkPolar
[
index
]
=
inducedDipoleGkPolar
[
index
];
#endif
}
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
6
*
NUM_ATOMS
*
MAX_EXTRAPOLATION_ORDER
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
inducedDipoleFieldGradient
[
index
]
=
0
;
inducedDipoleFieldGradientPolar
[
index
]
=
0
;
#ifdef USE_GK
inducedDipoleFieldGradientGk
[
index
]
=
0
;
inducedDipoleFieldGradientGkPolar
[
index
]
=
0
;
#endif
}
}
extern
"C"
__global__
void
iterateExtrapolatedDipoles
(
int
order
,
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
real
*
__restrict__
extrapolatedDipole
,
real
*
__restrict__
extrapolatedDipolePolar
,
real
*
__restrict__
inducedDipoleFieldGradient
,
real
*
__restrict__
inducedDipoleFieldGradientPolar
,
long
long
*
__restrict__
inducedDipoleField
,
long
long
*
__restrict__
inducedDipoleFieldPolar
,
real
*
__restrict__
extrapolatedDipoleFieldGradient
,
real
*
__restrict__
extrapolatedDipoleFieldGradientPolar
,
#ifdef USE_GK
real
*
__restrict__
inducedDipoleGk
,
real
*
__restrict__
inducedDipoleGkPolar
,
real
*
__restrict__
extrapolatedDipoleGk
,
real
*
__restrict__
extrapolatedDipoleGkPolar
,
real
*
__restrict__
inducedDipoleFieldGradientGk
,
real
*
__restrict__
inducedDipoleFieldGradientGkPolar
,
long
long
*
__restrict__
inducedDipoleFieldGk
,
long
long
*
__restrict__
inducedDipoleFieldGkPolar
,
real
*
__restrict__
extrapolatedDipoleFieldGradientGk
,
real
*
__restrict__
extrapolatedDipoleFieldGradientGkPolar
,
#endif
const
float
*
__restrict__
polarizability
)
{
const
real
fieldScale
=
1
/
(
real
)
0x100000000
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
3
*
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
index
/
3
;
int
component
=
index
-
3
*
atom
;
int
fieldIndex
=
atom
+
component
*
PADDED_NUM_ATOMS
;
float
polar
=
polarizability
[
atom
];
real
value
=
inducedDipoleField
[
fieldIndex
]
*
fieldScale
*
polar
;
inducedDipole
[
index
]
=
value
;
printf
(
"%d %d %g %g
\n
"
,
order
,
index
,
inducedDipoleField
[
fieldIndex
]
*
fieldScale
,
value
);
extrapolatedDipole
[
order
*
3
*
NUM_ATOMS
+
index
]
=
value
;
value
=
inducedDipoleFieldPolar
[
fieldIndex
]
*
fieldScale
*
polar
;
inducedDipolePolar
[
index
]
=
value
;
extrapolatedDipolePolar
[
order
*
3
*
NUM_ATOMS
+
index
]
=
value
;
#ifdef USE_GK
value
=
inducedDipoleFieldGk
[
fieldIndex
]
*
fieldScale
*
polar
;
inducedDipoleGk
[
index
]
=
value
;
extrapolatedDipoleGk
[
order
*
3
*
NUM_ATOMS
+
index
]
=
value
;
value
=
inducedDipoleFieldGkPolar
[
fieldIndex
]
*
fieldScale
*
polar
;
inducedDipoleGkPolar
[
index
]
=
value
;
extrapolatedDipoleGkPolar
[
order
*
3
*
NUM_ATOMS
+
index
]
=
value
;
#endif
}
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
6
*
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
extrapolatedDipoleFieldGradient
[
order
*
6
*
NUM_ATOMS
+
index
]
=
inducedDipoleFieldGradient
[
index
];
extrapolatedDipoleFieldGradientPolar
[
order
*
6
*
NUM_ATOMS
+
index
]
=
inducedDipoleFieldGradientPolar
[
index
];
#ifdef USE_GK
extrapolatedDipoleFieldGradientGk
[
order
*
6
*
NUM_ATOMS
+
index
]
=
inducedDipoleFieldGradientGk
[
index
];
extrapolatedDipoleFieldGradientGkPolar
[
order
*
6
*
NUM_ATOMS
+
index
]
=
inducedDipoleFieldGradientGkPolar
[
index
];
#endif
}
}
extern
"C"
__global__
void
computeExtrapolatedDipoles
(
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
real
*
__restrict__
extrapolatedDipole
,
real
*
__restrict__
extrapolatedDipolePolar
#ifdef USE_GK
,
real
*
__restrict__
inducedDipoleGk
,
real
*
__restrict__
inducedDipoleGkPolar
,
real
*
__restrict__
extrapolatedDipoleGk
,
real
*
__restrict__
extrapolatedDipoleGkPolar
#endif
)
{
real
coeff
[]
=
{
EXTRAPOLATION_COEFFICIENTS_SUM
};
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
3
*
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
sum
=
0
,
sumPolar
=
0
,
sumGk
=
0
,
sumGkPolar
=
0
;
for
(
int
order
=
0
;
order
<
MAX_EXTRAPOLATION_ORDER
;
order
++
)
{
sum
+=
extrapolatedDipole
[
order
*
3
*
NUM_ATOMS
+
index
]
*
coeff
[
order
];
sumPolar
+=
extrapolatedDipolePolar
[
order
*
3
*
NUM_ATOMS
+
index
]
*
coeff
[
order
];
#ifdef USE_GK
sumGk
+=
extrapolatedDipoleGk
[
order
*
3
*
NUM_ATOMS
+
index
]
*
coeff
[
order
];
sumGkPolar
+=
extrapolatedDipoleGkPolar
[
order
*
3
*
NUM_ATOMS
+
index
]
*
coeff
[
order
];
#endif
}
inducedDipole
[
index
]
=
sum
;
inducedDipolePolar
[
index
]
=
sumPolar
;
#ifdef USE_GK
inducedDipoleGk
[
index
]
=
sumGk
;
inducedDipoleGkPolar
[
index
]
=
sumGkPolar
;
#endif
}
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment