Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c83f2a12
Commit
c83f2a12
authored
Feb 04, 2015
by
Peter Eastman
Browse files
Began CUDA implementation of triclinic boxes for AmoebaMultipoleForce
parent
82e118fb
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
286 additions
and
168 deletions
+286
-168
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+59
-17
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+3
-0
plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
...eba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
+3
-1
plugins/amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
.../amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
+6
-10
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
...moeba/platforms/cuda/src/kernels/multipoleInducedField.cu
+5
-10
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
+200
-117
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
+2
-4
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
.../platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
+8
-9
No files found.
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
c83f2a12
...
...
@@ -796,8 +796,8 @@ private:
CudaCalcAmoebaMultipoleForceKernel
::
CudaCalcAmoebaMultipoleForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcAmoebaMultipoleForceKernel
(
name
,
platform
),
cu
(
cu
),
system
(
system
),
hasInitializedScaleFactors
(
false
),
hasInitializedFFT
(
false
),
multipolesAreValid
(
false
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
multipoleParticles
(
NULL
),
molecularDipoles
(
NULL
),
molecularQuadrupoles
(
NULL
),
labFrameDipoles
(
NULL
),
labFrameQuadrupoles
(
NULL
),
fracDipoles
(
NULL
),
fracQuadrupoles
(
NULL
),
field
(
NULL
),
fieldPolar
(
NULL
),
inducedField
(
NULL
),
inducedFieldPolar
(
NULL
),
torque
(
NULL
),
dampingAndThole
(
NULL
),
inducedDipole
(
NULL
),
diisCoefficients
(
NULL
),
inducedDipolePolar
(
NULL
),
inducedDipoleErrors
(
NULL
),
prevDipoles
(
NULL
),
prevDipolesPolar
(
NULL
),
prevDipolesGk
(
NULL
),
prevDipolesGkPolar
(
NULL
),
prevErrors
(
NULL
),
diisMatrix
(
NULL
),
polarizability
(
NULL
),
covalentFlags
(
NULL
),
polarizationGroupFlags
(
NULL
),
pmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeIgrid
(
NULL
),
pmePhi
(
NULL
),
...
...
@@ -816,6 +816,10 @@ CudaCalcAmoebaMultipoleForceKernel::~CudaCalcAmoebaMultipoleForceKernel() {
delete
labFrameDipoles
;
if
(
labFrameQuadrupoles
!=
NULL
)
delete
labFrameQuadrupoles
;
if
(
fracDipoles
!=
NULL
)
delete
fracDipoles
;
if
(
fracQuadrupoles
!=
NULL
)
delete
fracQuadrupoles
;
if
(
field
!=
NULL
)
delete
field
;
if
(
fieldPolar
!=
NULL
)
...
...
@@ -968,7 +972,9 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
labFrameDipoles
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"labFrameDipoles"
);
labFrameQuadrupoles
=
new
CudaArray
(
cu
,
9
*
paddedNumAtoms
,
elementSize
,
"labFrameQuadrupoles"
);
labFrameQuadrupoles
=
new
CudaArray
(
cu
,
5
*
paddedNumAtoms
,
elementSize
,
"labFrameQuadrupoles"
);
fracDipoles
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
elementSize
,
"fracDipoles"
);
fracQuadrupoles
=
new
CudaArray
(
cu
,
6
*
paddedNumAtoms
,
elementSize
,
"fracQuadrupoles"
);
field
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"field"
);
fieldPolar
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"fieldPolar"
);
torque
=
new
CudaArray
(
cu
,
3
*
paddedNumAtoms
,
sizeof
(
long
long
),
"torque"
);
...
...
@@ -1185,6 +1191,7 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeDefines
[
"DIRECT_POLARIZATION"
]
=
""
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaAmoebaKernelSources
::
multipolePme
,
pmeDefines
);
pmeGridIndexKernel
=
cu
.
getKernel
(
module
,
"findAtomGridIndex"
);
pmeTransformMultipolesKernel
=
cu
.
getKernel
(
module
,
"transformMultipolesToFractionalCoordinates"
);
pmeSpreadFixedMultipolesKernel
=
cu
.
getKernel
(
module
,
"gridSpreadFixedMultipoles"
);
pmeSpreadInducedDipolesKernel
=
cu
.
getKernel
(
module
,
"gridSpreadInducedDipoles"
);
pmeFinishSpreadChargeKernel
=
cu
.
getKernel
(
module
,
"finishSpreadCharge"
);
...
...
@@ -1483,15 +1490,44 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
gkKernel
->
finishComputation
(
*
torque
,
*
labFrameDipoles
,
*
labFrameQuadrupoles
,
*
inducedDipole
,
*
inducedDipolePolar
,
*
dampingAndThole
,
*
covalentFlags
,
*
polarizationGroupFlags
);
}
else
{
// Compute reciprocal box vectors.
Vec3
boxVectors
[
3
];
cu
.
getPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
double
determinant
=
boxVectors
[
0
][
0
]
*
boxVectors
[
1
][
1
]
*
boxVectors
[
2
][
2
];
double
scale
=
1.0
/
determinant
;
double3
recipBoxVectors
[
3
];
recipBoxVectors
[
0
]
=
make_double3
(
boxVectors
[
1
][
1
]
*
boxVectors
[
2
][
2
]
*
scale
,
0
,
0
);
recipBoxVectors
[
1
]
=
make_double3
(
-
boxVectors
[
1
][
0
]
*
boxVectors
[
2
][
2
]
*
scale
,
boxVectors
[
0
][
0
]
*
boxVectors
[
2
][
2
]
*
scale
,
0
);
recipBoxVectors
[
2
]
=
make_double3
((
boxVectors
[
1
][
0
]
*
boxVectors
[
2
][
1
]
-
boxVectors
[
1
][
1
]
*
boxVectors
[
2
][
0
])
*
scale
,
-
boxVectors
[
0
][
0
]
*
boxVectors
[
2
][
1
]
*
scale
,
boxVectors
[
0
][
0
]
*
boxVectors
[
1
][
1
]
*
scale
);
float3
recipBoxVectorsFloat
[
3
];
void
*
recipBoxVectorPointer
[
3
];
if
(
cu
.
getUseDoublePrecision
())
{
recipBoxVectorPointer
[
0
]
=
&
recipBoxVectors
[
0
];
recipBoxVectorPointer
[
1
]
=
&
recipBoxVectors
[
1
];
recipBoxVectorPointer
[
2
]
=
&
recipBoxVectors
[
2
];
}
else
{
recipBoxVectorsFloat
[
0
]
=
make_float3
((
float
)
recipBoxVectors
[
0
].
x
,
0
,
0
);
recipBoxVectorsFloat
[
1
]
=
make_float3
((
float
)
recipBoxVectors
[
1
].
x
,
(
float
)
recipBoxVectors
[
1
].
y
,
0
);
recipBoxVectorsFloat
[
2
]
=
make_float3
((
float
)
recipBoxVectors
[
2
].
x
,
(
float
)
recipBoxVectors
[
2
].
y
,
(
float
)
recipBoxVectors
[
2
].
z
);
recipBoxVectorPointer
[
0
]
=
&
recipBoxVectorsFloat
[
0
];
recipBoxVectorPointer
[
1
]
=
&
recipBoxVectorsFloat
[
1
];
recipBoxVectorPointer
[
2
]
=
&
recipBoxVectorsFloat
[
2
];
}
// Reciprocal space calculation.
unsigned
int
maxTiles
=
nb
.
getInteractingTiles
().
getSize
();
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSize
Pointer
()
};
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVector
Pointer
[
2
]
};
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
(),
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
PmeOrder
*
PmeOrder
*
elementSize
);
sort
->
sort
(
*
pmeAtomGridIndex
);
void
*
pmeSpreadFixedMultipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
void
*
pmeTransformMultipolesArgs
[]
=
{
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeTransformMultipolesKernel
,
pmeTransformMultipolesArgs
,
cu
.
getNumAtoms
());
void
*
pmeSpreadFixedMultipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeSpreadFixedMultipolesKernel
,
pmeSpreadFixedMultipolesArgs
,
cu
.
getNumAtoms
());
void
*
finishSpreadArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
()};
if
(
cu
.
getUseDoublePrecision
())
...
...
@@ -1501,7 +1537,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
void
*
pmeConvolutionArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmeBsplineModuliX
->
getDevicePointer
(),
&
pmeBsplineModuliY
->
getDevicePointer
(),
&
pmeBsplineModuliZ
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSize
Pointer
()
};
&
pmeBsplineModuliZ
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVector
Pointer
[
2
]
};
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
...
...
@@ -1509,11 +1545,12 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
void
*
pmeFixedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSize
Pointer
()
,
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVector
Pointer
[
2
]
,
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeFixedPotentialKernel
,
pmeFixedPotentialArgs
,
cu
.
getNumAtoms
());
void
*
pmeFixedForceArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeFixedForceKernel
,
pmeFixedForceArgs
,
cu
.
getNumAtoms
());
// Direct space calculation.
...
...
@@ -1521,7 +1558,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
computeFixedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
covalentFlags
->
getDevicePointer
(),
&
polarizationGroupFlags
->
getDevicePointer
(),
&
nb
.
getExclusionTiles
().
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
nb
.
getInteractingTiles
().
getDevicePointer
(),
&
nb
.
getInteractionCount
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
&
maxTiles
,
&
nb
.
getBlockCenters
().
getDevicePointer
(),
&
nb
.
getInteractingAtoms
().
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
&
maxTiles
,
&
nb
.
getBlockCenters
().
getDevicePointer
(),
&
nb
.
getInteractingAtoms
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeFixedFieldKernel
,
computeFixedFieldArgs
,
numForceThreadBlocks
*
fixedFieldThreads
,
fixedFieldThreads
);
void
*
recordInducedDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
...
...
@@ -1532,7 +1570,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cu
.
clearBuffer
(
*
pmeGrid
);
void
*
pmeSpreadInducedDipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSize
Pointer
()
};
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVector
Pointer
[
2
]
};
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
pmeGrid
->
getSize
());
...
...
@@ -1546,7 +1584,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
void
*
pmeInducedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
pmePhidp
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSize
Pointer
()
,
&
pmePhidp
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVector
Pointer
[
2
]
,
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
...
...
@@ -1559,7 +1597,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
computeInducedFieldArgs
[]
=
{
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusionTiles
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
nb
.
getInteractingTiles
().
getDevicePointer
(),
&
nb
.
getInteractionCount
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
&
maxTiles
,
&
nb
.
getBlockCenters
().
getDevicePointer
(),
&
nb
.
getInteractingAtoms
().
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
&
maxTiles
,
&
nb
.
getBlockCenters
().
getDevicePointer
(),
&
nb
.
getInteractingAtoms
().
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
computeInducedFieldKernel
,
computeInducedFieldArgs
,
numForceThreadBlocks
*
inducedFieldThreads
,
inducedFieldThreads
);
cu
.
clearBuffer
(
*
pmeGrid
);
...
...
@@ -1577,7 +1616,7 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
void
*
pmeRecordInducedFieldDipolesArgs
[]
=
{
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSize
Pointer
()
};
&
inducedField
->
getDevicePointer
(),
&
inducedFieldPolar
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVector
Pointer
[
2
]
};
cu
.
executeKernel
(
pmeRecordInducedFieldDipolesKernel
,
pmeRecordInducedFieldDipolesArgs
,
cu
.
getNumAtoms
());
bool
converged
=
iterateDipolesByDIIS
(
i
);
if
(
converged
)
...
...
@@ -1590,14 +1629,16 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&
cu
.
getPosq
().
getDevicePointer
(),
&
covalentFlags
->
getDevicePointer
(),
&
polarizationGroupFlags
->
getDevicePointer
(),
&
nb
.
getExclusionTiles
().
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
&
nb
.
getInteractingTiles
().
getDevicePointer
(),
&
nb
.
getInteractionCount
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
&
maxTiles
,
&
nb
.
getBlockCenters
().
getDevicePointer
(),
&
nb
.
getInteractingAtoms
().
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
&
maxTiles
,
&
nb
.
getBlockCenters
().
getDevicePointer
(),
&
nb
.
getInteractingAtoms
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
electrostaticsKernel
,
electrostaticsArgs
,
numForceThreadBlocks
*
electrostaticsThreads
,
electrostaticsThreads
);
void
*
pmeInducedForceArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
fracDipoles
->
getDevicePointer
(),
&
fracQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
pmePhidp
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSize
Pointer
()
};
&
pmePhip
->
getDevicePointer
(),
&
pmePhidp
->
getDevicePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVector
Pointer
[
2
]
};
cu
.
executeKernel
(
pmeInducedForceKernel
,
pmeInducedForceArgs
,
cu
.
getNumAtoms
());
}
...
...
@@ -1785,7 +1826,8 @@ void CudaCalcAmoebaMultipoleForceKernel::getElectrostaticPotential(ContextImpl&
void
*
computePotentialArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
points
.
getDevicePointer
(),
&
potential
.
getDevicePointer
(),
&
numPoints
,
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
&
potential
.
getDevicePointer
(),
&
numPoints
,
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
()};
int
blockSize
=
128
;
cu
.
executeKernel
(
computePotentialKernel
,
computePotentialArgs
,
numPoints
,
blockSize
,
blockSize
*
15
*
elementSize
);
outputElectrostaticPotential
.
resize
(
numPoints
);
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
View file @
c83f2a12
...
...
@@ -391,6 +391,8 @@ private:
CudaArray
*
molecularQuadrupoles
;
CudaArray
*
labFrameDipoles
;
CudaArray
*
labFrameQuadrupoles
;
CudaArray
*
fracDipoles
;
CudaArray
*
fracQuadrupoles
;
CudaArray
*
field
;
CudaArray
*
fieldPolar
;
CudaArray
*
inducedField
;
...
...
@@ -428,6 +430,7 @@ private:
CUfunction
pmeGridIndexKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeFinishSpreadChargeKernel
,
pmeConvolutionKernel
;
CUfunction
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
,
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
,
computePotentialKernel
;
CUfunction
recordDIISDipolesKernel
,
buildMatrixKernel
;
CUfunction
pmeTransformMultipolesKernel
;
CudaCalcAmoebaGeneralizedKirkwoodForceKernel
*
gkKernel
;
static
const
int
PmeOrder
=
5
;
static
const
int
MaxPrevDIISDipoles
=
20
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleElectrostatics.cu
View file @
c83f2a12
...
...
@@ -65,7 +65,9 @@ extern "C" __global__ void computeElectrostatics(
const
real4
*
__restrict__
posq
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#endif
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleFixedField.cu
View file @
c83f2a12
...
...
@@ -440,7 +440,9 @@ extern "C" __global__ void computeFixedField(
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#elif defined USE_GK
const
real
*
__restrict__
bornRadii
,
unsigned
long
long
*
__restrict__
gkFieldBuffers
,
#endif
...
...
@@ -494,9 +496,7 @@ extern "C" __global__ void computeFixedField(
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
trimTo3
(
localData
[
tbx
+
j
].
posq
-
data
.
posq
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
int
atom2
=
y
*
TILE_SIZE
+
j
;
if
(
atom1
!=
atom2
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
...
...
@@ -532,9 +532,7 @@ extern "C" __global__ void computeFixedField(
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
trimTo3
(
localData
[
tbx
+
tj
].
posq
-
data
.
posq
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
int
atom2
=
y
*
TILE_SIZE
+
tj
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
...
...
@@ -675,9 +673,7 @@ extern "C" __global__ void computeFixedField(
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
trimTo3
(
localData
[
tbx
+
tj
].
posq
-
data
.
posq
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
int
atom2
=
atomIndices
[
tbx
+
tj
];
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoleInducedField.cu
View file @
c83f2a12
...
...
@@ -207,7 +207,8 @@ extern "C" __global__ void computeInducedField(
unsigned
long
long
*
__restrict__
field
,
unsigned
long
long
*
__restrict__
fieldPolar
,
const
real4
*
__restrict__
posq
,
const
ushort2
*
__restrict__
exclusionTiles
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#elif defined USE_GK
unsigned
long
long
*
__restrict__
fieldS
,
unsigned
long
long
*
__restrict__
fieldPolarS
,
const
real
*
__restrict__
inducedDipoleS
,
const
real
*
__restrict__
inducedDipolePolarS
,
const
real
*
__restrict__
bornRadii
,
...
...
@@ -251,9 +252,7 @@ extern "C" __global__ void computeInducedField(
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
localData
[
tbx
+
j
].
pos
-
data
.
pos
;
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
int
atom2
=
y
*
TILE_SIZE
+
j
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
...
...
@@ -273,9 +272,7 @@ extern "C" __global__ void computeInducedField(
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
localData
[
tbx
+
tj
].
pos
-
data
.
pos
;
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
int
atom2
=
y
*
TILE_SIZE
+
j
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
...
...
@@ -404,9 +401,7 @@ extern "C" __global__ void computeInducedField(
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
localData
[
tbx
+
tj
].
pos
-
data
.
pos
;
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
int
atom2
=
atomIndices
[
tbx
+
tj
];
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipolePme.cu
View file @
c83f2a12
...
...
@@ -73,30 +73,30 @@ __device__ void computeBSplinePoint(real4* thetai, real w, real* array) {
* Compute the index of the grid point each atom is associated with.
*/
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
i
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
// First axis.
real
w
=
pos
.
x
*
invPeriodicBoxSize
.
x
;
real
w
=
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
;
real
fr
=
GRID_SIZE_X
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
int
ifr
=
(
int
)
fr
;
int
igrid1
=
ifr
-
PME_ORDER
+
1
;
// Second axis.
w
=
pos
.
y
*
invPeriodicBoxSize
.
y
;
w
=
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
;
fr
=
GRID_SIZE_Y
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
int
igrid2
=
ifr
-
PME_ORDER
+
1
;
// Third axis.
w
=
pos
.
z
*
invPeriodicBoxSize
.
z
;
w
=
pos
.
z
*
recipBoxVecZ
.
z
;
fr
=
GRID_SIZE_Z
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
int
igrid3
=
ifr
-
PME_ORDER
+
1
;
...
...
@@ -109,13 +109,61 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
pmeAtomGridIndex
[
i
]
=
make_int2
(
i
,
igrid1
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
igrid2
*
GRID_SIZE_Z
+
igrid3
);
}
}
/**
* Convert the fixed multipoles from Cartesian to fractional coordinates.
*/
extern
"C"
__global__
void
transformMultipolesToFractionalCoordinates
(
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
real
*
__restrict__
fracDipole
,
real
*
__restrict__
fracQuadrupole
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// Build matrices for transforming the dipoles and quadrupoles.
__shared__
real
a
[
3
][
3
];
if
(
threadIdx
.
x
==
0
)
{
a
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
a
[
0
][
1
]
=
GRID_SIZE_X
*
recipBoxVecY
.
x
;
a
[
0
][
2
]
=
GRID_SIZE_X
*
recipBoxVecZ
.
x
;
a
[
1
][
0
]
=
GRID_SIZE_Y
*
recipBoxVecX
.
y
;
a
[
1
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
a
[
1
][
2
]
=
GRID_SIZE_Y
*
recipBoxVecZ
.
y
;
a
[
2
][
0
]
=
GRID_SIZE_Z
*
recipBoxVecX
.
z
;
a
[
2
][
1
]
=
GRID_SIZE_Z
*
recipBoxVecY
.
z
;
a
[
2
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
}
__syncthreads
();
int
index1
[]
=
{
0
,
0
,
0
,
1
,
1
,
2
};
int
index2
[]
=
{
0
,
1
,
2
,
1
,
2
,
2
};
__shared__
real
b
[
6
][
6
];
if
(
threadIdx
.
x
<
36
)
{
int
i
=
threadIdx
.
x
/
6
;
int
j
=
threadIdx
.
x
-
6
*
i
;
b
[
i
][
j
]
=
a
[
index1
[
i
]][
index1
[
j
]]
*
a
[
index2
[
i
]][
index2
[
j
]];
if
(
index1
[
i
]
!=
index2
[
i
])
b
[
i
][
j
]
+=
a
[
index1
[
i
]][
index2
[
j
]]
*
a
[
index2
[
i
]][
index1
[
j
]];
}
__syncthreads
();
extern
"C"
__global__
void
gridSpreadFixedMultipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
const
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
// Transform the multipoles.
real
quadScale
[]
=
{
1
,
2
,
2
,
1
,
2
,
1
};
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
real
dipole
=
0
;
for
(
int
k
=
0
;
k
<
3
;
k
++
)
dipole
+=
a
[
j
][
k
]
*
labFrameDipole
[
3
*
i
+
k
];
fracDipole
[
3
*
i
+
j
]
=
dipole
;
}
for
(
int
j
=
0
;
j
<
6
;
j
++
)
{
real
quadrupole
=
0
;
for
(
int
k
=
0
;
k
<
5
;
k
++
)
quadrupole
+=
quadScale
[
k
]
*
b
[
j
][
k
]
*
labFrameQuadrupole
[
5
*
i
+
k
];
quadrupole
-=
quadScale
[
5
]
*
b
[
j
][
5
]
*
(
labFrameQuadrupole
[
5
*
i
]
+
labFrameQuadrupole
[
5
*
i
+
3
]);
fracQuadrupole
[
6
*
i
+
j
]
=
quadrupole
;
}
}
}
extern
"C"
__global__
void
gridSpreadFixedMultipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
array
[
PME_ORDER
*
PME_ORDER
];
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
...
...
@@ -127,26 +175,26 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
m
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
m
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real
w
=
pos
.
x
*
invPeriodicBoxSize
.
x
;
real
w
=
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
;
real
fr
=
GRID_SIZE_X
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
int
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
int
igrid1
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta1
,
w
,
array
);
w
=
pos
.
y
*
invPeriodicBoxSize
.
y
;
w
=
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
;
fr
=
GRID_SIZE_Y
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
int
igrid2
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta2
,
w
,
array
);
w
=
pos
.
z
*
invPeriodicBoxSize
.
z
;
w
=
pos
.
z
*
recipBoxVecZ
.
z
;
fr
=
GRID_SIZE_Z
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
...
...
@@ -177,15 +225,15 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
real4
v
=
theta3
[
iz
];
real
atomCharge
=
pos
.
w
;
real
atomDipoleX
=
xscale
*
labFrame
Dipole
[
m
*
3
];
real
atomDipoleY
=
yscale
*
labFrame
Dipole
[
m
*
3
+
1
];
real
atomDipoleZ
=
zscale
*
labFrame
Dipole
[
m
*
3
+
2
];
real
atomQuadrupoleXX
=
xscale
*
xscale
*
labFrame
Quadrupole
[
m
*
5
];
real
atomQuadrupoleXY
=
2
*
xscale
*
yscale
*
labFrame
Quadrupole
[
m
*
5
+
1
];
real
atomQuadrupoleXZ
=
2
*
xscale
*
zscale
*
labFrame
Quadrupole
[
m
*
5
+
2
];
real
atomQuadrupoleYY
=
yscale
*
yscale
*
labFrame
Quadrupole
[
m
*
5
+
3
];
real
atomQuadrupoleYZ
=
2
*
yscale
*
zscale
*
labFrame
Quadrupole
[
m
*
5
+
4
];
real
atomQuadrupoleZZ
=
-
zscale
*
zscale
*
(
labFrameQuadrupole
[
m
*
5
]
+
labFrameQuadrupole
[
m
*
5
+
3
])
;
real
atomDipoleX
=
frac
Dipole
[
m
*
3
];
real
atomDipoleY
=
frac
Dipole
[
m
*
3
+
1
];
real
atomDipoleZ
=
frac
Dipole
[
m
*
3
+
2
];
real
atomQuadrupoleXX
=
frac
Quadrupole
[
m
*
6
];
real
atomQuadrupoleXY
=
frac
Quadrupole
[
m
*
6
+
1
];
real
atomQuadrupoleXZ
=
frac
Quadrupole
[
m
*
6
+
2
];
real
atomQuadrupoleYY
=
frac
Quadrupole
[
m
*
6
+
3
];
real
atomQuadrupoleYZ
=
frac
Quadrupole
[
m
*
6
+
4
];
real
atomQuadrupoleZZ
=
fracQuadrupole
[
m
*
6
+
5
]
;
real
term0
=
atomCharge
*
u
.
x
*
v
.
x
+
atomDipoleY
*
u
.
y
*
v
.
x
+
atomDipoleZ
*
u
.
x
*
v
.
y
+
atomQuadrupoleYY
*
u
.
z
*
v
.
x
+
atomQuadrupoleZZ
*
u
.
x
*
v
.
z
+
atomQuadrupoleYZ
*
u
.
y
*
v
.
y
;
real
term1
=
atomDipoleX
*
u
.
x
*
v
.
x
+
atomQuadrupoleXY
*
u
.
y
*
v
.
x
+
atomQuadrupoleXZ
*
u
.
x
*
v
.
y
;
real
term2
=
atomQuadrupoleXX
*
u
.
x
*
v
.
x
;
...
...
@@ -204,10 +252,10 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
extern
"C"
__global__
void
gridSpreadInducedDipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
)
{
const
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
const
real
xscale
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
real
array
[
PME_ORDER
*
PME_ORDER
];
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
...
...
@@ -219,26 +267,26 @@ extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ po
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
m
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
m
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real
w
=
pos
.
x
*
invPeriodicBoxSize
.
x
;
real
w
=
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
;
real
fr
=
GRID_SIZE_X
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
int
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
int
igrid1
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta1
,
w
,
array
);
w
=
pos
.
y
*
invPeriodicBoxSize
.
y
;
w
=
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
;
fr
=
GRID_SIZE_Y
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
int
igrid2
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta2
,
w
,
array
);
w
=
pos
.
z
*
invPeriodicBoxSize
.
z
;
w
=
pos
.
z
*
recipBoxVecZ
.
z
;
fr
=
GRID_SIZE_Z
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
...
...
@@ -306,7 +354,8 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ pmeGrid) {
}
extern
"C"
__global__
void
reciprocalConvolution
(
real2
*
__restrict__
pmeGrid
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
real
expFactor
=
M_PI
*
M_PI
/
(
EWALD_ALPHA
*
EWALD_ALPHA
);
real
scaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
...
...
@@ -322,9 +371,9 @@ extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, co
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
real
mhx
=
mx
*
invPeriodicBoxSize
.
x
;
real
mhy
=
m
y
*
invPeriodicBoxSize
.
y
;
real
mhz
=
m
z
*
invPeriodicBoxSize
.
z
;
real
mhx
=
mx
*
recipBoxVecX
.
x
;
real
mhy
=
m
x
*
recipBoxVecY
.
x
+
my
*
recipBoxVecY
.
y
;
real
mhz
=
m
x
*
recipBoxVecZ
.
x
+
my
*
recipBoxVecZ
.
y
+
mz
*
recipBoxVecZ
.
z
;
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
...
...
@@ -338,7 +387,7 @@ extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, co
extern
"C"
__global__
void
computeFixedPotentialFromGrid
(
const
real2
*
__restrict__
pmeGrid
,
real
*
__restrict__
phi
,
long
long
*
__restrict__
fieldBuffers
,
long
long
*
__restrict__
fieldPolarBuffers
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
,
int2
*
__restrict__
pmeAtomGridIndex
)
{
const
real
*
__restrict__
labFrameDipole
,
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
int2
*
__restrict__
pmeAtomGridIndex
)
{
real
array
[
PME_ORDER
*
PME_ORDER
];
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
...
...
@@ -350,26 +399,26 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
m
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
m
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real
w
=
pos
.
x
*
invPeriodicBoxSize
.
x
;
real
w
=
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
;
real
fr
=
GRID_SIZE_X
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
int
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
int
igrid1
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta1
,
w
,
array
);
w
=
pos
.
y
*
invPeriodicBoxSize
.
y
;
w
=
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
;
fr
=
GRID_SIZE_Y
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
int
igrid2
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta2
,
w
,
array
);
w
=
pos
.
z
*
invPeriodicBoxSize
.
z
;
w
=
pos
.
z
*
recipBoxVecZ
.
z
;
fr
=
GRID_SIZE_Z
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
...
...
@@ -481,13 +530,13 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
phi
[
20
*
m
+
18
]
=
tuv012
;
phi
[
20
*
m
+
19
]
=
tuv111
;
real
dipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
long
long
fieldx
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
]
-
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
tuv100
)
*
0x100000000
);
long
long
fieldx
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
]
-
GRID_SIZE_X
*
recipBoxVecX
.
x
*
tuv100
)
*
0x100000000
);
fieldBuffers
[
m
]
=
fieldx
;
fieldPolarBuffers
[
m
]
=
fieldx
;
long
long
fieldy
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
1
]
-
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
tuv010
)
*
0x100000000
);
long
long
fieldy
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
1
]
-
GRID_SIZE_Y
*
recipBoxVecY
.
y
*
tuv010
)
*
0x100000000
);
fieldBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
fieldy
;
fieldPolarBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
fieldy
;
long
long
fieldz
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
2
]
-
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
tuv001
)
*
0x100000000
);
long
long
fieldz
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
2
]
-
GRID_SIZE_Z
*
recipBoxVecZ
.
z
*
tuv001
)
*
0x100000000
);
fieldBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
fieldz
;
fieldPolarBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
fieldz
;
}
...
...
@@ -495,7 +544,7 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
extern
"C"
__global__
void
computeInducedPotentialFromGrid
(
const
real2
*
__restrict__
pmeGrid
,
real
*
__restrict__
phid
,
real
*
__restrict__
phip
,
real
*
__restrict__
phidp
,
const
real4
*
__restrict__
posq
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
,
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
int2
*
__restrict__
pmeAtomGridIndex
)
{
real
array
[
PME_ORDER
*
PME_ORDER
];
real4
theta1
[
PME_ORDER
];
real4
theta2
[
PME_ORDER
];
...
...
@@ -507,26 +556,26 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
m
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
m
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real
w
=
pos
.
x
*
invPeriodicBoxSize
.
x
;
real
w
=
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
;
real
fr
=
GRID_SIZE_X
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
int
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
int
igrid1
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta1
,
w
,
array
);
w
=
pos
.
y
*
invPeriodicBoxSize
.
y
;
w
=
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
;
fr
=
GRID_SIZE_Y
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
int
igrid2
=
ifr
-
PME_ORDER
+
1
;
computeBSplinePoint
(
theta2
,
w
,
array
);
w
=
pos
.
z
*
invPeriodicBoxSize
.
z
;
w
=
pos
.
z
*
recipBoxVecZ
.
z
;
fr
=
GRID_SIZE_Z
*
(
w
-
(
int
)(
w
+
0.5
f
)
+
0.5
f
);
ifr
=
(
int
)
fr
;
w
=
fr
-
ifr
;
...
...
@@ -736,15 +785,29 @@ extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restri
extern
"C"
__global__
void
computeFixedMultipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
phi_global
,
real4
invPeriodicBoxSize
)
{
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
phi_global
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
multipole
[
10
];
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
const
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
const
real
xscale
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
real
energy
=
0
;
__shared__
real
fracToCart
[
3
][
3
];
if
(
threadIdx
.
x
==
0
)
{
fracToCart
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
fracToCart
[
1
][
0
]
=
GRID_SIZE_X
*
recipBoxVecY
.
x
;
fracToCart
[
2
][
0
]
=
GRID_SIZE_X
*
recipBoxVecZ
.
x
;
fracToCart
[
0
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecX
.
y
;
fracToCart
[
1
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
fracToCart
[
2
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecZ
.
y
;
fracToCart
[
0
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecX
.
z
;
fracToCart
[
1
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecY
.
z
;
fracToCart
[
2
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
}
__syncthreads
();
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Compute the torque.
...
...
@@ -778,15 +841,15 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
// Compute the force and energy.
multipole
[
1
]
*
=
xscale
;
multipole
[
2
]
*
=
yscale
;
multipole
[
3
]
*
=
zscale
;
multipole
[
4
]
*
=
xscale
*
xscale
;
multipole
[
5
]
*
=
yscale
*
yscale
;
multipole
[
6
]
*
=
zscale
*
zscale
;
multipole
[
7
]
*
=
xscale
*
yscale
;
multipole
[
8
]
*
=
xscale
*
zscale
;
multipole
[
9
]
*
=
yscale
*
zscale
;
multipole
[
1
]
=
fracDipole
[
i
*
3
]
;
multipole
[
2
]
=
fracDipole
[
i
*
3
+
1
]
;
multipole
[
3
]
=
fracDipole
[
i
*
3
+
2
]
;
multipole
[
4
]
=
fracQuadrupole
[
i
*
6
]
;
multipole
[
5
]
=
fracQuadrupole
[
i
*
6
+
3
]
;
multipole
[
6
]
=
fracQuadrupole
[
i
*
6
+
5
]
;
multipole
[
7
]
=
fracQuadrupole
[
i
*
6
+
1
]
;
multipole
[
8
]
=
fracQuadrupole
[
i
*
6
+
2
]
;
multipole
[
9
]
=
fracQuadrupole
[
i
*
6
+
4
]
;
real4
f
=
make_real4
(
0
,
0
,
0
,
0
);
for
(
int
k
=
0
;
k
<
10
;
k
++
)
{
...
...
@@ -795,9 +858,9 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
f
.
y
+=
multipole
[
k
]
*
phi
[
deriv2
[
k
]];
f
.
z
+=
multipole
[
k
]
*
phi
[
deriv3
[
k
]];
}
f
.
x
*=
EPSILON_FACTOR
*
xscale
;
f
.
y
*=
EPSILON_FACTOR
*
yscale
;
f
.
z
*=
EPSILON_FACTOR
*
zscale
;
f
=
make_real4
(
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
0
][
0
]
+
f
.
y
*
fracToCart
[
0
][
1
]
+
f
.
z
*
fracToCart
[
0
][
2
]),
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
1
][
0
]
+
f
.
y
*
fracToCart
[
1
][
1
]
+
f
.
z
*
fracToCart
[
1
][
2
]),
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
2
][
0
]
+
f
.
y
*
fracToCart
[
2
][
1
]
+
f
.
z
*
fracToCart
[
2
][
2
]),
0
)
;
forceBuffers
[
i
]
-=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
x
*
0x100000000
));
forceBuffers
[
i
+
PADDED_NUM_ATOMS
]
-=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
y
*
0x100000000
));
forceBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
z
*
0x100000000
));
...
...
@@ -807,23 +870,37 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
extern
"C"
__global__
void
computeInducedDipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole_global
,
const
real
*
__restrict__
inducedDipolePolar_global
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
fracDipole
,
const
real
*
__restrict__
fracQuadrupole
,
const
real
*
__restrict__
inducedDipole_global
,
const
real
*
__restrict__
inducedDipolePolar_global
,
const
real
*
__restrict__
phi_global
,
const
real
*
__restrict__
phid_global
,
const
real
*
__restrict__
phip_global
,
const
real
*
__restrict__
phidp_global
,
real
4
invPeriodicBoxSize
)
{
const
real
*
__restrict__
phidp_global
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
multipole
[
10
];
real
inducedDipole
[
3
];
real
inducedDipolePolar
[
3
];
real
cinducedDipole
[
3
],
inducedDipole
[
3
];
real
cinducedDipolePolar
[
3
],
inducedDipolePolar
[
3
];
real
scales
[
3
];
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
const
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
const
real
xscale
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
scales
[
0
]
=
xscale
;
scales
[
1
]
=
yscale
;
scales
[
2
]
=
zscale
;
real
energy
=
0
;
__shared__
real
fracToCart
[
3
][
3
];
if
(
threadIdx
.
x
==
0
)
{
fracToCart
[
0
][
0
]
=
GRID_SIZE_X
*
recipBoxVecX
.
x
;
fracToCart
[
1
][
0
]
=
GRID_SIZE_X
*
recipBoxVecY
.
x
;
fracToCart
[
2
][
0
]
=
GRID_SIZE_X
*
recipBoxVecZ
.
x
;
fracToCart
[
0
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecX
.
y
;
fracToCart
[
1
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
;
fracToCart
[
2
][
1
]
=
GRID_SIZE_Y
*
recipBoxVecZ
.
y
;
fracToCart
[
0
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecX
.
z
;
fracToCart
[
1
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecY
.
z
;
fracToCart
[
2
][
2
]
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
;
}
__syncthreads
();
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Compute the torque.
...
...
@@ -856,56 +933,62 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
// Compute the force and energy.
multipole
[
1
]
*=
xscale
;
multipole
[
2
]
*=
yscale
;
multipole
[
3
]
*=
zscale
;
multipole
[
4
]
*=
xscale
*
xscale
;
multipole
[
5
]
*=
yscale
*
yscale
;
multipole
[
6
]
*=
zscale
*
zscale
;
multipole
[
7
]
*=
xscale
*
yscale
;
multipole
[
8
]
*=
xscale
*
zscale
;
multipole
[
9
]
*=
yscale
*
zscale
;
inducedDipole
[
0
]
=
inducedDipole_global
[
i
*
3
];
inducedDipole
[
1
]
=
inducedDipole_global
[
i
*
3
+
1
];
inducedDipole
[
2
]
=
inducedDipole_global
[
i
*
3
+
2
];
inducedDipolePolar
[
0
]
=
inducedDipolePolar_global
[
i
*
3
];
inducedDipolePolar
[
1
]
=
inducedDipolePolar_global
[
i
*
3
+
1
];
inducedDipolePolar
[
2
]
=
inducedDipolePolar_global
[
i
*
3
+
2
];
multipole
[
1
]
=
fracDipole
[
i
*
3
];
multipole
[
2
]
=
fracDipole
[
i
*
3
+
1
];
multipole
[
3
]
=
fracDipole
[
i
*
3
+
2
];
multipole
[
4
]
=
fracQuadrupole
[
i
*
6
];
multipole
[
5
]
=
fracQuadrupole
[
i
*
6
+
3
];
multipole
[
6
]
=
fracQuadrupole
[
i
*
6
+
5
];
multipole
[
7
]
=
fracQuadrupole
[
i
*
6
+
1
];
multipole
[
8
]
=
fracQuadrupole
[
i
*
6
+
2
];
multipole
[
9
]
=
fracQuadrupole
[
i
*
6
+
4
];
cinducedDipole
[
0
]
=
inducedDipole_global
[
i
*
3
];
cinducedDipole
[
1
]
=
inducedDipole_global
[
i
*
3
+
1
];
cinducedDipole
[
2
]
=
inducedDipole_global
[
i
*
3
+
2
];
cinducedDipolePolar
[
0
]
=
inducedDipolePolar_global
[
i
*
3
];
cinducedDipolePolar
[
1
]
=
inducedDipolePolar_global
[
i
*
3
+
1
];
cinducedDipolePolar
[
2
]
=
inducedDipolePolar_global
[
i
*
3
+
2
];
// Multiply the dipoles by cartToFrac, which is just the transpose of fracToCart.
inducedDipole
[
0
]
=
cinducedDipole
[
0
]
*
fracToCart
[
0
][
0
]
+
cinducedDipole
[
1
]
*
fracToCart
[
1
][
0
]
+
cinducedDipole
[
2
]
*
fracToCart
[
2
][
0
];
inducedDipole
[
1
]
=
cinducedDipole
[
0
]
*
fracToCart
[
0
][
1
]
+
cinducedDipole
[
1
]
*
fracToCart
[
1
][
1
]
+
cinducedDipole
[
2
]
*
fracToCart
[
2
][
1
];
inducedDipole
[
2
]
=
cinducedDipole
[
0
]
*
fracToCart
[
0
][
2
]
+
cinducedDipole
[
1
]
*
fracToCart
[
1
][
2
]
+
cinducedDipole
[
2
]
*
fracToCart
[
2
][
2
];
inducedDipolePolar
[
0
]
=
cinducedDipolePolar
[
0
]
*
fracToCart
[
0
][
0
]
+
cinducedDipolePolar
[
1
]
*
fracToCart
[
1
][
0
]
+
cinducedDipolePolar
[
2
]
*
fracToCart
[
2
][
0
];
inducedDipolePolar
[
1
]
=
cinducedDipolePolar
[
0
]
*
fracToCart
[
0
][
1
]
+
cinducedDipolePolar
[
1
]
*
fracToCart
[
1
][
1
]
+
cinducedDipolePolar
[
2
]
*
fracToCart
[
2
][
1
];
inducedDipolePolar
[
2
]
=
cinducedDipolePolar
[
0
]
*
fracToCart
[
0
][
2
]
+
cinducedDipolePolar
[
1
]
*
fracToCart
[
1
][
2
]
+
cinducedDipolePolar
[
2
]
*
fracToCart
[
2
][
2
];
const
real
*
phi
=
&
phi_global
[
20
*
i
];
const
real
*
phip
=
&
phip_global
[
10
*
i
];
const
real
*
phid
=
&
phid_global
[
10
*
i
];
real4
f
=
make_real4
(
0
,
0
,
0
,
0
);
energy
+=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
inducedDipole
[
0
]
*
phi
[
1
];
energy
+=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
inducedDipole
[
1
]
*
phi
[
2
];
energy
+=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
inducedDipole
[
2
]
*
phi
[
3
];
energy
+=
inducedDipole
[
0
]
*
phi
[
1
];
energy
+=
inducedDipole
[
1
]
*
phi
[
2
];
energy
+=
inducedDipole
[
2
]
*
phi
[
3
];
for
(
int
k
=
0
;
k
<
3
;
k
++
)
{
int
j1
=
deriv1
[
k
+
1
];
int
j2
=
deriv2
[
k
+
1
];
int
j3
=
deriv3
[
k
+
1
];
f
.
x
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j1
]
*
(
scales
[
k
]
/
xscale
)
;
f
.
y
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j2
]
*
(
scales
[
k
]
/
yscale
)
;
f
.
z
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j3
]
*
(
scales
[
k
]
/
zscale
)
;
f
.
x
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j1
];
f
.
y
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j2
];
f
.
z
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j3
];
#ifndef DIRECT_POLARIZATION
f
.
x
+=
(
inducedDipole
[
k
]
*
phip
[
j1
]
+
inducedDipolePolar
[
k
]
*
phid
[
j1
])
*
(
scales
[
k
]
/
xscale
)
;
f
.
y
+=
(
inducedDipole
[
k
]
*
phip
[
j2
]
+
inducedDipolePolar
[
k
]
*
phid
[
j2
])
*
(
scales
[
k
]
/
yscale
)
;
f
.
z
+=
(
inducedDipole
[
k
]
*
phip
[
j3
]
+
inducedDipolePolar
[
k
]
*
phid
[
j3
])
*
(
scales
[
k
]
/
zscale
)
;
f
.
x
+=
(
inducedDipole
[
k
]
*
phip
[
j1
]
+
inducedDipolePolar
[
k
]
*
phid
[
j1
]);
f
.
y
+=
(
inducedDipole
[
k
]
*
phip
[
j2
]
+
inducedDipolePolar
[
k
]
*
phid
[
j2
]);
f
.
z
+=
(
inducedDipole
[
k
]
*
phip
[
j3
]
+
inducedDipolePolar
[
k
]
*
phid
[
j3
]);
#endif
}
f
.
x
*=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
f
.
y
*=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
f
.
z
*=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
for
(
int
k
=
0
;
k
<
10
;
k
++
)
{
f
.
x
+=
multipole
[
k
]
*
phidp
[
deriv1
[
k
]];
f
.
y
+=
multipole
[
k
]
*
phidp
[
deriv2
[
k
]];
f
.
z
+=
multipole
[
k
]
*
phidp
[
deriv3
[
k
]];
}
f
.
x
*=
0.5
f
*
EPSILON_FACTOR
*
xscale
;
f
.
y
*=
0.5
f
*
EPSILON_FACTOR
*
yscale
;
f
.
z
*=
0.5
f
*
EPSILON_FACTOR
*
zscale
;
f
=
make_real4
(
0.5
f
*
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
0
][
0
]
+
f
.
y
*
fracToCart
[
0
][
1
]
+
f
.
z
*
fracToCart
[
0
][
2
]),
0.5
f
*
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
1
][
0
]
+
f
.
y
*
fracToCart
[
1
][
1
]
+
f
.
z
*
fracToCart
[
1
][
2
]),
0.5
f
*
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
2
][
0
]
+
f
.
y
*
fracToCart
[
2
][
1
]
+
f
.
z
*
fracToCart
[
2
][
2
]),
0
)
;
forceBuffers
[
i
]
-=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
x
*
0x100000000
));
forceBuffers
[
i
+
PADDED_NUM_ATOMS
]
-=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
y
*
0x100000000
));
forceBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
z
*
0x100000000
));
...
...
@@ -914,10 +997,10 @@ extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict_
}
extern
"C"
__global__
void
recordInducedFieldDipoles
(
const
real
*
__restrict__
phid
,
real
*
const
__restrict__
phip
,
long
long
*
__restrict__
inducedField
,
long
long
*
__restrict__
inducedFieldPolar
,
real
4
invPeriodicBoxSize
)
{
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
0x100000000
;
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
0x100000000
;
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
0x100000000
;
long
long
*
__restrict__
inducedField
,
long
long
*
__restrict__
inducedFieldPolar
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real
xscale
=
GRID_SIZE_X
*
recipBoxVecX
.
x
*
0x100000000
;
real
yscale
=
GRID_SIZE_Y
*
recipBoxVecY
.
y
*
0x100000000
;
real
zscale
=
GRID_SIZE_Z
*
recipBoxVecZ
.
z
*
0x100000000
;
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
inducedField
[
i
]
-=
(
long
long
)
(
xscale
*
phid
[
10
*
i
+
1
]);
inducedField
[
i
+
PADDED_NUM_ATOMS
]
-=
(
long
long
)
(
yscale
*
phid
[
10
*
i
+
2
]);
...
...
plugins/amoeba/platforms/cuda/src/kernels/multipoles.cu
View file @
c83f2a12
...
...
@@ -448,7 +448,7 @@ extern "C" __global__ void mapTorqueToForce(unsigned long long* __restrict__ for
*/
extern
"C"
__global__
void
computePotentialAtPoints
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real4
*
__restrict__
points
,
real
*
__restrict__
potential
,
int
numPoints
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
real
*
__restrict__
potential
,
int
numPoints
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
extern
__shared__
real4
localPosq
[];
real3
*
localDipole
=
(
real3
*
)
&
localPosq
[
blockDim
.
x
];
real3
*
localInducedDipole
=
(
real3
*
)
&
localDipole
[
blockDim
.
x
];
...
...
@@ -481,9 +481,7 @@ extern "C" __global__ void computePotentialAtPoints(const real4* __restrict__ po
for
(
int
i
=
0
;
i
<
end
;
i
++
)
{
real3
delta
=
trimTo3
(
localPosq
[
i
]
-
pointPos
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
dot
(
delta
,
delta
);
real
rInv
=
RSQRT
(
r2
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/pmeMultipoleElectrostatics.cu
View file @
c83f2a12
...
...
@@ -65,7 +65,7 @@ __device__ float computePScaleFactor(uint2 covalent, unsigned int polarizationGr
}
__device__
void
computeOneInteraction
(
AtomData
&
atom1
,
AtomData
&
atom2
,
bool
hasExclusions
,
float
dScale
,
float
pScale
,
float
mScale
,
float
forceFactor
,
real
&
energy
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
real
&
energy
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
real4
delta
;
delta
.
x
=
atom2
.
pos
.
x
-
atom1
.
pos
.
x
;
delta
.
y
=
atom2
.
pos
.
y
-
atom1
.
pos
.
y
;
...
...
@@ -73,10 +73,7 @@ __device__ void computeOneInteraction(AtomData& atom1, AtomData& atom2, bool has
// periodic box
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
w
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
delta
.
w
>
CUTOFF_SQUARED
)
return
;
...
...
@@ -203,7 +200,9 @@ extern "C" __global__ void computeElectrostatics(
const
real4
*
__restrict__
posq
,
const
uint2
*
__restrict__
covalentFlags
,
const
unsigned
int
*
__restrict__
polarizationGroupFlags
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
,
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#endif
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
const
float2
*
__restrict__
dampingAndThole
)
{
...
...
@@ -255,7 +254,7 @@ extern "C" __global__ void computeElectrostatics(
float
d
=
computeDScaleFactor
(
polarizationGroup
,
j
);
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
,
j
);
float
m
=
computeMScaleFactor
(
covalent
,
j
);
computeOneInteraction
(
data
,
localData
[
tbx
+
j
],
true
,
d
,
p
,
m
,
0.5
f
,
energy
,
periodicBoxSize
,
invPeriodicBoxSize
);
computeOneInteraction
(
data
,
localData
[
tbx
+
j
],
true
,
d
,
p
,
m
,
0.5
f
,
energy
,
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
);
}
}
if
(
atom1
<
NUM_ATOMS
)
...
...
@@ -283,7 +282,7 @@ extern "C" __global__ void computeElectrostatics(
float
d
=
computeDScaleFactor
(
polarizationGroup
,
tj
);
float
p
=
computePScaleFactor
(
covalent
,
polarizationGroup
,
tj
);
float
m
=
computeMScaleFactor
(
covalent
,
tj
);
computeOneInteraction
(
data
,
localData
[
tbx
+
tj
],
true
,
d
,
p
,
m
,
1
,
energy
,
periodicBoxSize
,
invPeriodicBoxSize
);
computeOneInteraction
(
data
,
localData
[
tbx
+
tj
],
true
,
d
,
p
,
m
,
1
,
energy
,
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
);
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
@@ -386,7 +385,7 @@ extern "C" __global__ void computeElectrostatics(
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
atomIndices
[
tbx
+
tj
];
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
computeOneInteraction
(
data
,
localData
[
tbx
+
tj
],
false
,
1
,
1
,
1
,
1
,
energy
,
periodicBoxSize
,
invPeriodicBoxSize
);
computeOneInteraction
(
data
,
localData
[
tbx
+
tj
],
false
,
1
,
1
,
1
,
1
,
energy
,
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
);
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment