Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3b91c945
"wrappers/python/simtk/vscode:/vscode.git/clone" did not exist on "2f42ac0a06a47aa7a36accbc1c944a540865e521"
Commit
3b91c945
authored
Feb 05, 2015
by
Peter Eastman
Browse files
Continuing CUDA implementation of triclinic boxes for AmoebaMultipoleForce
parent
c83f2a12
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
176 additions
and
90 deletions
+176
-90
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+18
-7
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+3
-2
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
+155
-81
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
3b91c945
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman, Mark Friedrichs *
* Authors: Peter Eastman, Mark Friedrichs *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -801,7 +801,7 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
...
@@ -801,7 +801,7 @@ CudaCalcAmoebaMultipoleForceKernel::CudaCalcAmoebaMultipoleForceKernel(std::stri
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
prevDipolesGkPolar
(
NULL
),
prevErrors
(
NULL
),
diisMatrix
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
prevDipolesGkPolar
(
NULL
),
prevErrors
(
NULL
),
diisMatrix
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
pmePhid
(
NULL
),
pmePhip
(
NULL
),
pmePhidp
(
NULL
),
pmeAtomGridIndex
(
NULL
),
lastPositions
(
NULL
),
sort
(
NULL
),
gkKernel
(
NULL
)
{
pmePhid
(
NULL
),
pmePhip
(
NULL
),
pmePhidp
(
NULL
),
pmeCphi
(
NULL
),
pmeAtomGridIndex
(
NULL
),
lastPositions
(
NULL
),
sort
(
NULL
),
gkKernel
(
NULL
)
{
}
}
CudaCalcAmoebaMultipoleForceKernel
::~
CudaCalcAmoebaMultipoleForceKernel
()
{
CudaCalcAmoebaMultipoleForceKernel
::~
CudaCalcAmoebaMultipoleForceKernel
()
{
...
@@ -876,6 +876,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
...
@@ -876,6 +876,8 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
pmePhip
;
delete
pmePhip
;
if
(
pmePhidp
!=
NULL
)
if
(
pmePhidp
!=
NULL
)
delete
pmePhidp
;
delete
pmePhidp
;
if
(
pmeCphi
!=
NULL
)
delete
pmeCphi
;
if
(
pmeAtomGridIndex
!=
NULL
)
if
(
pmeAtomGridIndex
!=
NULL
)
delete
pmeAtomGridIndex
;
delete
pmeAtomGridIndex
;
if
(
lastPositions
!=
NULL
)
if
(
lastPositions
!=
NULL
)
...
@@ -1192,6 +1194,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1192,6 +1194,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipolePme
,
pmeDefines
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipolePme
,
pmeDefines
);
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
pmeTransformMultipolesKernel
=
cu
.
getKernel
(
module
,
"transformMultipolesToFractionalCoordinates"
);
pmeTransformMultipolesKernel
=
cu
.
getKernel
(
module
,
"transformMultipolesToFractionalCoordinates"
);
pmeTransformPotentialKernel
=
cu
.
getKernel
(
module
,
"transformPotentialToCartesianCoordinates"
);
pmeSpreadFixedMultipolesKernel
=
cu
.
getKernel
(
module
,
"gridSpreadFixedMultipoles"
);
pmeSpreadFixedMultipolesKernel
=
cu
.
getKernel
(
module
,
"gridSpreadFixedMultipoles"
);
pmeSpreadInducedDipolesKernel
=
cu
.
getKernel
(
module
,
"gridSpreadInducedDipoles"
);
pmeSpreadInducedDipolesKernel
=
cu
.
getKernel
(
module
,
"gridSpreadInducedDipoles"
);
pmeFinishSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"finishSpreadCharge"
);
pmeFinishSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"finishSpreadCharge"
);
...
@@ -1219,6 +1222,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
...
@@ -1219,6 +1222,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmePhid
=
new
CudaArray
(
cu
,
10
*
numMultipoles
,
elementSize
,
"pmePhid"
);
pmePhid
=
new
CudaArray
(
cu
,
10
*
numMultipoles
,
elementSize
,
"pmePhid"
);
pmePhip
=
new
CudaArray
(
cu
,
10
*
numMultipoles
,
elementSize
,
"pmePhip"
);
pmePhip
=
new
CudaArray
(
cu
,
10
*
numMultipoles
,
elementSize
,
"pmePhip"
);
pmePhidp
=
new
CudaArray
(
cu
,
20
*
numMultipoles
,
elementSize
,
"pmePhidp"
);
pmePhidp
=
new
CudaArray
(
cu
,
20
*
numMultipoles
,
elementSize
,
"pmePhidp"
);
pmeCphi
=
new
CudaArray
(
cu
,
10
*
numMultipoles
,
elementSize
,
"pmeCphi"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numMultipoles
,
"pmeAtomGridIndex"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numMultipoles
,
"pmeAtomGridIndex"
);
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
...
@@ -1520,14 +1524,16 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1520,14 +1524,16 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
unsigned
int
maxTiles
=
nb
.
getInteractingTiles
().
getSize
();
unsigned
int
maxTiles
=
nb
.
getInteractingTiles
().
getSize
();
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
(),
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
PmeOrder
*
PmeOrder
*
elementSize
);
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
(),
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
PmeOrder
*
PmeOrder
*
elementSize
);
sort
->
sort
(
*
pmeAtomGridIndex
);
sort
->
sort
(
*
pmeAtomGridIndex
);
void
*
pmeTransformMultipolesArgs
[]
=
{
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
void
*
pmeTransformMultipolesArgs
[]
=
{
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeTransformMultipolesKernel
,
pmeTransformMultipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeTransformMultipolesKernel
,
pmeTransformMultipolesArgs
,
cu
.
getNumAtoms
());
void
*
pmeSpreadFixedMultipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
void
*
pmeSpreadFixedMultipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeSpreadFixedMultipolesKernel
,
pmeSpreadFixedMultipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeSpreadFixedMultipolesKernel
,
pmeSpreadFixedMultipolesArgs
,
cu
.
getNumAtoms
());
void
*
finishSpreadArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
()};
void
*
finishSpreadArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
()};
if
(
cu
.
getUseDoublePrecision
())
if
(
cu
.
getUseDoublePrecision
())
...
@@ -1547,9 +1553,11 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1547,9 +1553,11 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&
fieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeFixedPotentialKernel
,
pmeFixedPotentialArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeFixedPotentialKernel
,
pmeFixedPotentialArgs
,
cu
.
getNumAtoms
());
void
*
pmeTransformFixedPotentialArgs
[]
=
{
&
pmePhi
->
getDevicePointer
(),
&
pmeCphi
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeTransformPotentialKernel
,
pmeTransformFixedPotentialArgs
,
cu
.
getNumAtoms
());
void
*
pmeFixedForceArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
void
*
pmeFixedForceArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
&
pmeCphi
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeFixedForceKernel
,
pmeFixedForceArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeFixedForceKernel
,
pmeFixedForceArgs
,
cu
.
getNumAtoms
());
...
@@ -1570,7 +1578,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1570,7 +1578,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cu
.
clearBuffer
(
*
pmeGrid
);
cu
.
clearBuffer
(
*
pmeGrid
);
void
*
pmeSpreadInducedDipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
void
*
pmeSpreadInducedDipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
if
(
cu
.
getUseDoublePrecision
())
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
pmeGrid
->
getSize
());
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
pmeGrid
->
getSize
());
...
@@ -1634,11 +1643,13 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
...
@@ -1634,11 +1643,13 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
electrostaticsKernel
,
electrostaticsArgs
,
numForceThreadBlocks
*
electrostaticsThreads
,
electrostaticsThreads
);
cu
.
executeKernel
(
electrostaticsKernel
,
electrostaticsArgs
,
numForceThreadBlocks
*
electrostaticsThreads
,
electrostaticsThreads
);
void
*
pmeTransformInducedPotentialArgs
[]
=
{
&
pmePhidp
->
getDevicePointer
(),
&
pmeCphi
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeTransformPotentialKernel
,
pmeTransformInducedPotentialArgs
,
cu
.
getNumAtoms
());
void
*
pmeInducedForceArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
void
*
pmeInducedForceArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
pmePhidp
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
&
pmePhip
->
getDevicePointer
(),
&
pmePhidp
->
getDevicePointer
(),
&
pmeCphi
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeInducedForceKernel
,
pmeInducedForceArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeInducedForceKernel
,
pmeInducedForceArgs
,
cu
.
getNumAtoms
());
}
}
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
View file @
3b91c945
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Authors: Mark Friedrichs, Peter Eastman *
* Authors: Mark Friedrichs, Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -421,6 +421,7 @@ private:
...
@@ -421,6 +421,7 @@ private:
CudaArray
*
pmePhid
;
CudaArray
*
pmePhid
;
CudaArray
*
pmePhip
;
CudaArray
*
pmePhip
;
CudaArray
*
pmePhidp
;
CudaArray
*
pmePhidp
;
CudaArray
*
pmeCphi
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
lastPositions
;
CudaArray
*
lastPositions
;
...
@@ -430,7 +431,7 @@ private:
...
@@ -430,7 +431,7 @@ private:
CUfunction
pmeGridIndexKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeGridIndexKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
;
CUfunction
pmeTransformMultipolesKernel
;
CUfunction
pmeTransformMultipolesKernel
,
pmeTransformPotentialKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
static
const
int
PmeOrder
=
5
;
static
const
int
PmeOrder
=
5
;
static
const
int
MaxPrevDIISDipoles
=
20
;
static
const
int
MaxPrevDIISDipoles
=
20
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
View file @
3b91c945
...
@@ -73,12 +73,12 @@ __device__ void computeBSplinePoint(real4* thetai, real w, real* array) {
...
@@ -73,12 +73,12 @@ __device__ void computeBSplinePoint(real4* thetai, real w, real* array) {
* Compute the index of the grid point each atom is associated with.
* Compute the index of the grid point each atom is associated with.
*/
*/
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBox
Size
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBox
VecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
i
];
real4
pos
=
posq
[
i
];
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVec
X
.
x
)
*
periodicBoxSize
.
x
;
pos
-=
periodicBoxVecZ
*
floor
(
pos
.
z
*
recipBoxVec
Z
.
z
+
0.5
f
)
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
-=
periodicBoxVecY
*
floor
(
pos
.
y
*
recipBoxVecY
.
z
+
0.5
f
)
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVec
Z
.
z
)
*
periodicBoxSize
.
z
;
pos
-=
periodicBoxVecX
*
floor
(
pos
.
x
*
recipBoxVec
X
.
z
+
0.5
f
)
;
// First axis.
// First axis.
...
@@ -109,6 +109,7 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
...
@@ -109,6 +109,7 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
pmeAtomGridIndex
[
i
]
=
make_int2
(
i
,
igrid1
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
igrid2
*
GRID_SIZE_Z
+
igrid3
);
pmeAtomGridIndex
[
i
]
=
make_int2
(
i
,
igrid1
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
igrid2
*
GRID_SIZE_Z
+
igrid3
);
}
}
}
}
/**
/**
* Convert the fixed multipoles from Cartesian to fractional coordinates.
* Convert the fixed multipoles from Cartesian to fractional coordinates.
*/
*/
...
@@ -161,9 +162,55 @@ extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real
...
@@ -161,9 +162,55 @@ extern "C" __global__ void transformMultipolesToFractionalCoordinates(const real
}
}
}
}
/**
* Convert the potential from fractional to Cartesian coordinates.
*/
extern
"C"
__global__
void
transformPotentialToCartesianCoordinates
(
const
real
*
__restrict__
fphi
,
real
*
__restrict__
cphi
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// Build matrices for transforming the potential.
__shared__
real
a
[
3
][
3
];
if
(
threadIdx
.
x
==
0
)
{
a
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
a
[
1
][
0
]
=
GRID_SIZE_X
*
recipBoxVecY
.
x
;
a
[
2
][
0
]
=
GRID_SIZE_X
*
recipBoxVecZ
.
x
;
a
[
0
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecX
.
y
;
a
[
1
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
a
[
2
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecZ
.
y
;
a
[
0
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecX
.
z
;
a
[
1
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecY
.
z
;
a
[
2
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
}
__syncthreads
();
int
index1
[]
=
{
0
,
1
,
2
,
0
,
0
,
1
};
int
index2
[]
=
{
0
,
1
,
2
,
1
,
2
,
2
};
__shared__
real
b
[
6
][
6
];
if
(
threadIdx
.
x
<
36
)
{
int
i
=
threadIdx
.
x
/
6
;
int
j
=
threadIdx
.
x
-
6
*
i
;
b
[
i
][
j
]
=
a
[
index1
[
i
]][
index1
[
j
]]
*
a
[
index2
[
i
]][
index2
[
j
]];
if
(
index1
[
i
]
!=
index2
[
i
])
b
[
i
][
j
]
+=
(
i
<
3
?
b
[
i
][
j
]
:
a
[
index1
[
i
]][
index2
[
j
]]
*
a
[
index2
[
i
]][
index1
[
j
]]);
}
__syncthreads
();
// Transform the potential.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
cphi
[
10
*
i
]
=
fphi
[
20
*
i
];
cphi
[
10
*
i
+
1
]
=
a
[
0
][
0
]
*
fphi
[
20
*
i
+
1
]
+
a
[
0
][
1
]
*
fphi
[
20
*
i
+
2
]
+
a
[
0
][
2
]
*
fphi
[
20
*
i
+
3
];
cphi
[
10
*
i
+
2
]
=
a
[
1
][
0
]
*
fphi
[
20
*
i
+
1
]
+
a
[
1
][
1
]
*
fphi
[
20
*
i
+
2
]
+
a
[
1
][
2
]
*
fphi
[
20
*
i
+
3
];
cphi
[
10
*
i
+
3
]
=
a
[
2
][
0
]
*
fphi
[
20
*
i
+
1
]
+
a
[
2
][
1
]
*
fphi
[
20
*
i
+
2
]
+
a
[
2
][
2
]
*
fphi
[
20
*
i
+
3
];
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
cphi
[
10
*
i
+
4
+
j
]
=
0
;
for
(
int
k
=
0
;
k
<
6
;
k
++
)
cphi
[
10
*
i
+
4
+
j
]
+=
b
[
j
][
k
]
*
fphi
[
20
*
i
+
4
+
k
];
}
}
}
extern
"C"
__global__
void
gridSpreadFixedMultipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
fracDipole
,
extern
"C"
__global__
void
gridSpreadFixedMultipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
fracQuadrupole
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBox
Size
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBox
VecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
array
[
PME_ORDER
*
PME_ORDER
];
real
array
[
PME_ORDER
*
PME_ORDER
];
real4
theta1
[
PME_ORDER
];
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
...
@@ -175,28 +222,28 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
...
@@ -175,28 +222,28 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
m
=
pmeAtomGridIndex
[
i
].
x
;
int
m
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
m
];
real4
pos
=
posq
[
m
];
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVec
X
.
x
)
*
periodicBoxSize
.
x
;
pos
-=
periodicBoxVecZ
*
floor
(
pos
.
z
*
recipBoxVec
Z
.
z
+
0.5
f
)
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
-=
periodicBoxVecY
*
floor
(
pos
.
y
*
recipBoxVecY
.
z
+
0.5
f
)
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVec
Z
.
z
)
*
periodicBoxSize
.
z
;
pos
-=
periodicBoxVecX
*
floor
(
pos
.
x
*
recipBoxVec
X
.
z
+
0.5
f
)
;
// Since we need the full set of thetas, it's faster to compute them here than load them
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
// from global memory.
real
w
=
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
;
real
w
=
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
;
real
fr
=
GRID_SIZE_X
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
real
fr
=
GRID_SIZE_X
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
int
ifr
=
(
int
)
f
r
;
int
ifr
=
(
int
)
f
loor
(
fr
)
;
w
=
fr
-
ifr
;
w
=
fr
-
ifr
;
int
igrid1
=
ifr
-
PME_ORDER
+
1
;
int
igrid1
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta1
,
w
,
array
);
computeBSplinePoint
(
theta1
,
w
,
array
);
w
=
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
;
w
=
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
;
fr
=
GRID_SIZE_Y
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
fr
=
GRID_SIZE_Y
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
f
r
;
ifr
=
(
int
)
f
loor
(
fr
)
;
w
=
fr
-
ifr
;
w
=
fr
-
ifr
;
int
igrid2
=
ifr
-
PME_ORDER
+
1
;
int
igrid2
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta2
,
w
,
array
);
computeBSplinePoint
(
theta2
,
w
,
array
);
w
=
pos
.
z
*
recipBoxVecZ
.
z
;
w
=
pos
.
z
*
recipBoxVecZ
.
z
;
fr
=
GRID_SIZE_Z
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
fr
=
GRID_SIZE_Z
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
f
r
;
ifr
=
(
int
)
f
loor
(
fr
)
;
w
=
fr
-
ifr
;
w
=
fr
-
ifr
;
int
igrid3
=
ifr
-
PME_ORDER
+
1
;
int
igrid3
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta3
,
w
,
array
);
computeBSplinePoint
(
theta3
,
w
,
array
);
...
@@ -252,14 +299,24 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
...
@@ -252,14 +299,24 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
extern
"C"
__global__
void
gridSpreadInducedDipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
extern
"C"
__global__
void
gridSpreadInducedDipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
const
real
*
__restrict__
inducedDipolePolar
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
const
real
xscale
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
real
array
[
PME_ORDER
*
PME_ORDER
];
real
array
[
PME_ORDER
*
PME_ORDER
];
real4
theta1
[
PME_ORDER
];
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
real4
theta3
[
PME_ORDER
];
real4
theta3
[
PME_ORDER
];
__shared__
real
cartToFrac
[
3
][
3
];
if
(
threadIdx
.
x
==
0
)
{
cartToFrac
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
cartToFrac
[
0
][
1
]
=
GRID_SIZE_X
*
recipBoxVecY
.
x
;
cartToFrac
[
0
][
2
]
=
GRID_SIZE_X
*
recipBoxVecZ
.
x
;
cartToFrac
[
1
][
0
]
=
GRID_SIZE_Y
*
recipBoxVecX
.
y
;
cartToFrac
[
1
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
cartToFrac
[
1
][
2
]
=
GRID_SIZE_Y
*
recipBoxVecZ
.
y
;
cartToFrac
[
2
][
0
]
=
GRID_SIZE_Z
*
recipBoxVecX
.
z
;
cartToFrac
[
2
][
1
]
=
GRID_SIZE_Z
*
recipBoxVecY
.
z
;
cartToFrac
[
2
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
}
__syncthreads
();
// Process the atoms in spatially sorted order. This improves cache performance when loading
// Process the atoms in spatially sorted order. This improves cache performance when loading
// the grid values.
// the grid values.
...
@@ -267,28 +324,28 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
...
@@ -267,28 +324,28 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
m
=
pmeAtomGridIndex
[
i
].
x
;
int
m
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
m
];
real4
pos
=
posq
[
m
];
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVec
X
.
x
)
*
periodicBoxSize
.
x
;
pos
-=
periodicBoxVecZ
*
floor
(
pos
.
z
*
recipBoxVec
Z
.
z
+
0.5
f
)
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
-=
periodicBoxVecY
*
floor
(
pos
.
y
*
recipBoxVecY
.
z
+
0.5
f
)
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVec
Z
.
z
)
*
periodicBoxSize
.
z
;
pos
-=
periodicBoxVecX
*
floor
(
pos
.
x
*
recipBoxVec
X
.
z
+
0.5
f
)
;
// Since we need the full set of thetas, it's faster to compute them here than load them
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
// from global memory.
real
w
=
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
;
real
w
=
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
;
real
fr
=
GRID_SIZE_X
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
real
fr
=
GRID_SIZE_X
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
int
ifr
=
(
int
)
f
r
;
int
ifr
=
(
int
)
f
loor
(
fr
)
;
w
=
fr
-
ifr
;
w
=
fr
-
ifr
;
int
igrid1
=
ifr
-
PME_ORDER
+
1
;
int
igrid1
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta1
,
w
,
array
);
computeBSplinePoint
(
theta1
,
w
,
array
);
w
=
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
;
w
=
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
;
fr
=
GRID_SIZE_Y
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
fr
=
GRID_SIZE_Y
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
f
r
;
ifr
=
(
int
)
f
loor
(
fr
)
;
w
=
fr
-
ifr
;
w
=
fr
-
ifr
;
int
igrid2
=
ifr
-
PME_ORDER
+
1
;
int
igrid2
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta2
,
w
,
array
);
computeBSplinePoint
(
theta2
,
w
,
array
);
w
=
pos
.
z
*
recipBoxVecZ
.
z
;
w
=
pos
.
z
*
recipBoxVecZ
.
z
;
fr
=
GRID_SIZE_Z
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
fr
=
GRID_SIZE_Z
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
f
r
;
ifr
=
(
int
)
f
loor
(
fr
)
;
w
=
fr
-
ifr
;
w
=
fr
-
ifr
;
int
igrid3
=
ifr
-
PME_ORDER
+
1
;
int
igrid3
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta3
,
w
,
array
);
computeBSplinePoint
(
theta3
,
w
,
array
);
...
@@ -316,16 +373,18 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
...
@@ -316,16 +373,18 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
int
index
=
ybase
+
zindex
;
int
index
=
ybase
+
zindex
;
real4
v
=
theta3
[
iz
];
real4
v
=
theta3
[
iz
];
real
inducedDipoleX
=
xscale
*
inducedDipole
[
m
*
3
];
real3
cinducedDipole
=
make_real3
(
inducedDipole
[
m
*
3
],
inducedDipole
[
m
*
3
+
1
],
inducedDipole
[
m
*
3
+
2
]);
real
inducedDipoleY
=
yscale
*
inducedDipole
[
m
*
3
+
1
];
real3
cinducedDipolePolar
=
make_real3
(
inducedDipolePolar
[
m
*
3
],
inducedDipolePolar
[
m
*
3
+
1
],
inducedDipolePolar
[
m
*
3
+
2
]);
real
inducedDipoleZ
=
zscale
*
inducedDipole
[
m
*
3
+
2
];
real3
finducedDipole
=
make_real3
(
cinducedDipole
.
x
*
cartToFrac
[
0
][
0
]
+
cinducedDipole
.
y
*
cartToFrac
[
0
][
1
]
+
cinducedDipole
.
z
*
cartToFrac
[
0
][
2
],
real
inducedDipolePolarX
=
xscale
*
inducedDipolePolar
[
m
*
3
];
cinducedDipole
.
x
*
cartToFrac
[
1
][
0
]
+
cinducedDipole
.
y
*
cartToFrac
[
1
][
1
]
+
cinducedDipole
.
z
*
cartToFrac
[
1
][
2
],
real
inducedDipolePolarY
=
yscale
*
inducedDipolePolar
[
m
*
3
+
1
];
cinducedDipole
.
x
*
cartToFrac
[
2
][
0
]
+
cinducedDipole
.
y
*
cartToFrac
[
2
][
1
]
+
cinducedDipole
.
z
*
cartToFrac
[
2
][
2
]);
real
inducedDipolePolarZ
=
zscale
*
inducedDipolePolar
[
m
*
3
+
2
];
real3
finducedDipolePolar
=
make_real3
(
cinducedDipolePolar
.
x
*
cartToFrac
[
0
][
0
]
+
cinducedDipolePolar
.
y
*
cartToFrac
[
0
][
1
]
+
cinducedDipolePolar
.
z
*
cartToFrac
[
0
][
2
],
real
term01
=
inducedDipoleY
*
u
.
y
*
v
.
x
+
inducedDipoleZ
*
u
.
x
*
v
.
y
;
cinducedDipolePolar
.
x
*
cartToFrac
[
1
][
0
]
+
cinducedDipolePolar
.
y
*
cartToFrac
[
1
][
1
]
+
cinducedDipolePolar
.
z
*
cartToFrac
[
1
][
2
],
real
term11
=
inducedDipoleX
*
u
.
x
*
v
.
x
;
cinducedDipolePolar
.
x
*
cartToFrac
[
2
][
0
]
+
cinducedDipolePolar
.
y
*
cartToFrac
[
2
][
1
]
+
cinducedDipolePolar
.
z
*
cartToFrac
[
2
][
2
]);
real
term02
=
inducedDipolePolarY
*
u
.
y
*
v
.
x
+
inducedDipolePolarZ
*
u
.
x
*
v
.
y
;
real
term01
=
finducedDipole
.
y
*
u
.
y
*
v
.
x
+
finducedDipole
.
z
*
u
.
x
*
v
.
y
;
real
term12
=
inducedDipolePolarX
*
u
.
x
*
v
.
x
;
real
term11
=
finducedDipole
.
x
*
u
.
x
*
v
.
x
;
real
term02
=
finducedDipolePolar
.
y
*
u
.
y
*
v
.
x
+
finducedDipolePolar
.
z
*
u
.
x
*
v
.
y
;
real
term12
=
finducedDipolePolar
.
x
*
u
.
x
*
v
.
x
;
real
add1
=
term01
*
t
.
x
+
term11
*
t
.
y
;
real
add1
=
term01
*
t
.
x
+
term11
*
t
.
y
;
real
add2
=
term02
*
t
.
x
+
term12
*
t
.
y
;
real
add2
=
term02
*
t
.
x
+
term12
*
t
.
y
;
#ifdef USE_DOUBLE_PRECISION
#ifdef USE_DOUBLE_PRECISION
...
@@ -392,6 +451,19 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
...
@@ -392,6 +451,19 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
real4
theta1
[
PME_ORDER
];
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
real4
theta3
[
PME_ORDER
];
real4
theta3
[
PME_ORDER
];
__shared__
real
fracToCart
[
3
][
3
];
if
(
threadIdx
.
x
==
0
)
{
fracToCart
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
fracToCart
[
1
][
0
]
=
GRID_SIZE_X
*
recipBoxVecY
.
x
;
fracToCart
[
2
][
0
]
=
GRID_SIZE_X
*
recipBoxVecZ
.
x
;
fracToCart
[
0
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecX
.
y
;
fracToCart
[
1
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
fracToCart
[
2
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecZ
.
y
;
fracToCart
[
0
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecX
.
z
;
fracToCart
[
1
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecY
.
z
;
fracToCart
[
2
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
}
__syncthreads
();
// Process the atoms in spatially sorted order. This improves cache performance when loading
// Process the atoms in spatially sorted order. This improves cache performance when loading
// the grid values.
// the grid values.
...
@@ -530,13 +602,13 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
...
@@ -530,13 +602,13 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
phi
[
20
*
m
+
18
]
=
tuv012
;
phi
[
20
*
m
+
18
]
=
tuv012
;
phi
[
20
*
m
+
19
]
=
tuv111
;
phi
[
20
*
m
+
19
]
=
tuv111
;
real
dipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
real
dipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
long
long
fieldx
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
]
-
GRID_SIZE_X
*
recipBoxVecX
.
x
*
tuv10
0
)
*
0x100000000
);
long
long
fieldx
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
]
-
tuv100
*
fracToCart
[
0
][
0
]
-
tuv
0
10
*
fracToCart
[
0
][
1
]
-
tuv001
*
fracToCart
[
0
][
2
]
)
*
0x100000000
);
fieldBuffers
[
m
]
=
fieldx
;
fieldBuffers
[
m
]
=
fieldx
;
fieldPolarBuffers
[
m
]
=
fieldx
;
fieldPolarBuffers
[
m
]
=
fieldx
;
long
long
fieldy
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
1
]
-
GRID_SIZE_Y
*
recipBoxVecY
.
y
*
tuv010
)
*
0x100000000
);
long
long
fieldy
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
1
]
-
tuv100
*
fracToCart
[
1
][
0
]
-
tuv010
*
fracToCart
[
1
][
1
]
-
tuv001
*
fracToCart
[
1
][
2
]
)
*
0x100000000
);
fieldBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
fieldy
;
fieldBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
fieldy
;
fieldPolarBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
fieldy
;
fieldPolarBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
fieldy
;
long
long
fieldz
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
2
]
-
GRID_SIZE_Z
*
recipBoxVecZ
.
z
*
tuv001
)
*
0x100000000
);
long
long
fieldz
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
2
]
-
tuv100
*
fracToCart
[
2
][
0
]
-
tuv010
*
fracToCart
[
2
][
1
]
-
tuv001
*
fracToCart
[
2
][
2
]
)
*
0x100000000
);
fieldBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
fieldz
;
fieldBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
fieldz
;
fieldPolarBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
fieldz
;
fieldPolarBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
fieldz
;
}
}
...
@@ -786,14 +858,11 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
...
@@ -786,14 +858,11 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
extern
"C"
__global__
void
computeFixedMultipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
extern
"C"
__global__
void
computeFixedMultipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
phi_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
const
real
*
__restrict__
phi_global
,
const
real
*
__restrict__
cphi_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
multipole
[
10
];
real
multipole
[
10
];
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
const
real
xscale
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
real
energy
=
0
;
real
energy
=
0
;
__shared__
real
fracToCart
[
3
][
3
];
__shared__
real
fracToCart
[
3
][
3
];
if
(
threadIdx
.
x
==
0
)
{
if
(
threadIdx
.
x
==
0
)
{
...
@@ -822,22 +891,22 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
...
@@ -822,22 +891,22 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
multipole
[
8
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
2
];
multipole
[
8
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
2
];
multipole
[
9
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
4
];
multipole
[
9
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
4
];
const
real
*
phi
=
&
phi_global
[
2
0
*
i
];
const
real
*
c
phi
=
&
c
phi_global
[
1
0
*
i
];
torqueBuffers
[
i
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
3
]
*
yscale
*
phi
[
2
]
-
multipole
[
2
]
*
zscale
*
phi
[
3
]
torqueBuffers
[
i
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
3
]
*
c
phi
[
2
]
-
multipole
[
2
]
*
c
phi
[
3
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
yscale
*
zscale
*
phi
[
9
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
c
phi
[
9
]
+
multipole
[
8
]
*
xscale
*
yscale
*
phi
[
7
]
+
multipole
[
9
]
*
yscale
*
yscale
*
phi
[
5
]
+
multipole
[
8
]
*
c
phi
[
7
]
+
multipole
[
9
]
*
c
phi
[
5
]
-
multipole
[
7
]
*
xscale
*
zscale
*
phi
[
8
]
-
multipole
[
9
]
*
zscale
*
zscale
*
phi
[
6
])
*
0x100000000
);
-
multipole
[
7
]
*
c
phi
[
8
]
-
multipole
[
9
]
*
c
phi
[
6
])
*
0x100000000
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
1
]
*
zscale
*
phi
[
3
]
-
multipole
[
3
]
*
xscale
*
phi
[
1
]
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
1
]
*
c
phi
[
3
]
-
multipole
[
3
]
*
c
phi
[
1
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
xscale
*
zscale
*
phi
[
8
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
c
phi
[
8
]
+
multipole
[
7
]
*
yscale
*
zscale
*
phi
[
9
]
+
multipole
[
8
]
*
zscale
*
zscale
*
phi
[
6
]
+
multipole
[
7
]
*
c
phi
[
9
]
+
multipole
[
8
]
*
c
phi
[
6
]
-
multipole
[
8
]
*
xscale
*
xscale
*
phi
[
4
]
-
multipole
[
9
]
*
xscale
*
yscale
*
phi
[
7
])
*
0x100000000
);
-
multipole
[
8
]
*
c
phi
[
4
]
-
multipole
[
9
]
*
c
phi
[
7
])
*
0x100000000
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
2
]
*
xscale
*
phi
[
1
]
-
multipole
[
1
]
*
yscale
*
phi
[
2
]
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
2
]
*
c
phi
[
1
]
-
multipole
[
1
]
*
c
phi
[
2
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
xscale
*
yscale
*
phi
[
7
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
c
phi
[
7
]
+
multipole
[
7
]
*
xscale
*
xscale
*
phi
[
4
]
+
multipole
[
9
]
*
xscale
*
zscale
*
phi
[
8
]
+
multipole
[
7
]
*
c
phi
[
4
]
+
multipole
[
9
]
*
c
phi
[
8
]
-
multipole
[
7
]
*
yscale
*
yscale
*
phi
[
5
]
-
multipole
[
8
]
*
yscale
*
zscale
*
phi
[
9
])
*
0x100000000
);
-
multipole
[
7
]
*
c
phi
[
5
]
-
multipole
[
8
]
*
c
phi
[
9
])
*
0x100000000
);
// Compute the force and energy.
// Compute the force and energy.
...
@@ -851,6 +920,7 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
...
@@ -851,6 +920,7 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
multipole
[
8
]
=
fracQuadrupole
[
i
*
6
+
2
];
multipole
[
8
]
=
fracQuadrupole
[
i
*
6
+
2
];
multipole
[
9
]
=
fracQuadrupole
[
i
*
6
+
4
];
multipole
[
9
]
=
fracQuadrupole
[
i
*
6
+
4
];
const
real
*
phi
=
&
phi_global
[
20
*
i
];
real4
f
=
make_real4
(
0
,
0
,
0
,
0
);
real4
f
=
make_real4
(
0
,
0
,
0
,
0
);
for
(
int
k
=
0
;
k
<
10
;
k
++
)
{
for
(
int
k
=
0
;
k
<
10
;
k
++
)
{
energy
+=
multipole
[
k
]
*
phi
[
k
];
energy
+=
multipole
[
k
]
*
phi
[
k
];
...
@@ -873,20 +943,13 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
...
@@ -873,20 +943,13 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
inducedDipole_global
,
const
real
*
__restrict__
inducedDipolePolar_global
,
const
real
*
__restrict__
inducedDipole_global
,
const
real
*
__restrict__
inducedDipolePolar_global
,
const
real
*
__restrict__
phi_global
,
const
real
*
__restrict__
phid_global
,
const
real
*
__restrict__
phip_global
,
const
real
*
__restrict__
phi_global
,
const
real
*
__restrict__
phid_global
,
const
real
*
__restrict__
phip_global
,
const
real
*
__restrict__
phidp_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
const
real
*
__restrict__
phidp_global
,
const
real
*
__restrict__
cphi_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
multipole
[
10
];
real
multipole
[
10
];
real
cinducedDipole
[
3
],
inducedDipole
[
3
];
real
cinducedDipole
[
3
],
inducedDipole
[
3
];
real
cinducedDipolePolar
[
3
],
inducedDipolePolar
[
3
];
real
cinducedDipolePolar
[
3
],
inducedDipolePolar
[
3
];
real
scales
[
3
];
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
const
real
xscale
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
scales
[
0
]
=
xscale
;
scales
[
1
]
=
yscale
;
scales
[
2
]
=
zscale
;
real
energy
=
0
;
real
energy
=
0
;
__shared__
real
fracToCart
[
3
][
3
];
__shared__
real
fracToCart
[
3
][
3
];
if
(
threadIdx
.
x
==
0
)
{
if
(
threadIdx
.
x
==
0
)
{
...
@@ -914,22 +977,22 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
...
@@ -914,22 +977,22 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
multipole
[
7
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
1
];
multipole
[
7
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
1
];
multipole
[
8
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
2
];
multipole
[
8
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
2
];
multipole
[
9
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
4
];
multipole
[
9
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
4
];
const
real
*
phi
dp
=
&
phi
dp
_global
[
2
0
*
i
];
const
real
*
c
phi
=
&
c
phi_global
[
1
0
*
i
];
torqueBuffers
[
i
]
+=
(
long
long
)
(
0.5
f
*
EPSILON_FACTOR
*
(
multipole
[
3
]
*
yscale
*
phi
dp
[
2
]
-
multipole
[
2
]
*
zscale
*
phi
dp
[
3
]
torqueBuffers
[
i
]
+=
(
long
long
)
(
0.5
f
*
EPSILON_FACTOR
*
(
multipole
[
3
]
*
c
phi
[
2
]
-
multipole
[
2
]
*
c
phi
[
3
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
yscale
*
zscale
*
phi
dp
[
9
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
c
phi
[
9
]
+
multipole
[
8
]
*
xscale
*
yscale
*
phi
dp
[
7
]
+
multipole
[
9
]
*
yscale
*
yscale
*
phi
dp
[
5
]
+
multipole
[
8
]
*
c
phi
[
7
]
+
multipole
[
9
]
*
c
phi
[
5
]
-
multipole
[
7
]
*
xscale
*
zscale
*
phi
dp
[
8
]
-
multipole
[
9
]
*
zscale
*
zscale
*
phi
dp
[
6
])
*
0x100000000
);
-
multipole
[
7
]
*
c
phi
[
8
]
-
multipole
[
9
]
*
c
phi
[
6
])
*
0x100000000
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
+=
(
long
long
)
(
0.5
f
*
EPSILON_FACTOR
*
(
multipole
[
1
]
*
zscale
*
phi
dp
[
3
]
-
multipole
[
3
]
*
xscale
*
phi
dp
[
1
]
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
+=
(
long
long
)
(
0.5
f
*
EPSILON_FACTOR
*
(
multipole
[
1
]
*
c
phi
[
3
]
-
multipole
[
3
]
*
c
phi
[
1
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
xscale
*
zscale
*
phi
dp
[
8
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
c
phi
[
8
]
+
multipole
[
7
]
*
yscale
*
zscale
*
phi
dp
[
9
]
+
multipole
[
8
]
*
zscale
*
zscale
*
phi
dp
[
6
]
+
multipole
[
7
]
*
c
phi
[
9
]
+
multipole
[
8
]
*
c
phi
[
6
]
-
multipole
[
8
]
*
xscale
*
xscale
*
phi
dp
[
4
]
-
multipole
[
9
]
*
xscale
*
yscale
*
phi
dp
[
7
])
*
0x100000000
);
-
multipole
[
8
]
*
c
phi
[
4
]
-
multipole
[
9
]
*
c
phi
[
7
])
*
0x100000000
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
+=
(
long
long
)
(
0.5
f
*
EPSILON_FACTOR
*
(
multipole
[
2
]
*
xscale
*
phi
dp
[
1
]
-
multipole
[
1
]
*
yscale
*
phi
dp
[
2
]
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
+=
(
long
long
)
(
0.5
f
*
EPSILON_FACTOR
*
(
multipole
[
2
]
*
c
phi
[
1
]
-
multipole
[
1
]
*
c
phi
[
2
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
xscale
*
yscale
*
phi
dp
[
7
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
c
phi
[
7
]
+
multipole
[
7
]
*
xscale
*
xscale
*
phi
dp
[
4
]
+
multipole
[
9
]
*
xscale
*
zscale
*
phi
dp
[
8
]
+
multipole
[
7
]
*
c
phi
[
4
]
+
multipole
[
9
]
*
c
phi
[
8
]
-
multipole
[
7
]
*
yscale
*
yscale
*
phi
dp
[
5
]
-
multipole
[
8
]
*
yscale
*
zscale
*
phi
dp
[
9
])
*
0x100000000
);
-
multipole
[
7
]
*
c
phi
[
5
]
-
multipole
[
8
]
*
c
phi
[
9
])
*
0x100000000
);
// Compute the force and energy.
// Compute the force and energy.
...
@@ -981,6 +1044,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
...
@@ -981,6 +1044,7 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
#endif
#endif
}
}
const
real
*
phidp
=
&
phidp_global
[
20
*
i
];
for
(
int
k
=
0
;
k
<
10
;
k
++
)
{
for
(
int
k
=
0
;
k
<
10
;
k
++
)
{
f
.
x
+=
multipole
[
k
]
*
phidp
[
deriv1
[
k
]];
f
.
x
+=
multipole
[
k
]
*
phidp
[
deriv1
[
k
]];
f
.
y
+=
multipole
[
k
]
*
phidp
[
deriv2
[
k
]];
f
.
y
+=
multipole
[
k
]
*
phidp
[
deriv2
[
k
]];
...
@@ -998,15 +1062,25 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
...
@@ -998,15 +1062,25 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
extern
"C"
__global__
void
recordInducedFieldDipoles
(
const
real
*
__restrict__
phid
,
real
*
const
__restrict__
phip
,
extern
"C"
__global__
void
recordInducedFieldDipoles
(
const
real
*
__restrict__
phid
,
real
*
const
__restrict__
phip
,
long
long
*
__restrict__
inducedField
,
long
long
*
__restrict__
inducedFieldPolar
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
long
long
*
__restrict__
inducedField
,
long
long
*
__restrict__
inducedFieldPolar
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
xscale
=
GRID_SIZE_X
*
recipBoxVecX
.
x
*
0x100000000
;
__shared__
real
fracToCart
[
3
][
3
];
real
yscale
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
*
0x100000000
;
if
(
threadIdx
.
x
==
0
)
{
real
zscale
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
*
0x100000000
;
fracToCart
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
fracToCart
[
1
][
0
]
=
GRID_SIZE_X
*
recipBoxVecY
.
x
;
fracToCart
[
2
][
0
]
=
GRID_SIZE_X
*
recipBoxVecZ
.
x
;
fracToCart
[
0
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecX
.
y
;
fracToCart
[
1
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
fracToCart
[
2
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecZ
.
y
;
fracToCart
[
0
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecX
.
z
;
fracToCart
[
1
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecY
.
z
;
fracToCart
[
2
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
}
__syncthreads
();
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
inducedField
[
i
]
-=
(
long
long
)
(
xscale
*
phid
[
10
*
i
+
1
]
);
inducedField
[
i
]
-=
(
long
long
)
(
0x100000000
*
(
phid
[
10
*
i
+
1
]
*
fracToCart
[
0
][
0
]
+
phid
[
10
*
i
+
2
]
*
fracToCart
[
0
][
1
]
+
phid
[
10
*
i
+
3
]
*
fracToCart
[
0
][
2
])
);
inducedField
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
yscale
*
phid
[
10
*
i
+
2
]);
inducedField
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
0x100000000
*
(
phid
[
10
*
i
+
1
]
*
fracToCart
[
1
][
0
]
+
phid
[
10
*
i
+
2
]
*
fracToCart
[
1
][
1
]
+
phid
[
10
*
i
+
3
]
*
fracToCart
[
1
][
2
])
)
;
inducedField
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
long
long
)
(
zscale
*
phid
[
10
*
i
+
3
]
);
inducedField
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
long
long
)
(
0x100000000
*
(
phid
[
10
*
i
+
1
]
*
fracToCart
[
2
][
0
]
+
phid
[
10
*
i
+
2
]
*
fracToCart
[
2
][
1
]
+
phid
[
10
*
i
+
3
]
*
fracToCart
[
2
][
2
])
);
inducedFieldPolar
[
i
]
-=
(
long
long
)
(
xscale
*
phip
[
10
*
i
+
1
]
);
inducedFieldPolar
[
i
]
-=
(
long
long
)
(
0x100000000
*
(
phip
[
10
*
i
+
1
]
*
fracToCart
[
0
][
0
]
+
phip
[
10
*
i
+
2
]
*
fracToCart
[
0
][
1
]
+
phip
[
10
*
i
+
3
]
*
fracToCart
[
0
][
2
])
);
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
yscale
*
phip
[
10
*
i
+
2
]);
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
0x100000000
*
(
phip
[
10
*
i
+
1
]
*
fracToCart
[
1
][
0
]
+
phip
[
10
*
i
+
2
]
*
fracToCart
[
1
][
1
]
+
phip
[
10
*
i
+
3
]
*
fracToCart
[
1
][
2
])
)
;
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
long
long
)
(
zscale
*
phip
[
10
*
i
+
3
]
);
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
long
long
)
(
0x100000000
*
(
phip
[
10
*
i
+
1
]
*
fracToCart
[
2
][
0
]
+
phip
[
10
*
i
+
2
]
*
fracToCart
[
2
][
1
]
+
phip
[
10
*
i
+
3
]
*
fracToCart
[
2
][
2
])
);
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment