Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f352d116
"wrappers/python/vscode:/vscode.git/clone" did not exist on "9e82c890505badd5023b60d8efa5723ca72b49b9"
Commit
f352d116
authored
Aug 18, 2012
by
Peter Eastman
Browse files
Continuing to convert AmoebaMultipoleForce: PME with direct polarization now works
parent
7a60fd73
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
486 additions
and
501 deletions
+486
-501
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
+47
-56
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
+2
-1
plugins/amoeba/platforms/cuda2/src/kernels/multipolePme.cu
plugins/amoeba/platforms/cuda2/src/kernels/multipolePme.cu
+437
-444
No files found.
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.cpp
View file @
f352d116
...
...
@@ -1166,11 +1166,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeUpdateBsplinesKernel
=
cu
.
getKernel
(
module
,
"updateBsplines"
);
pmeAtomRangeKernel
=
cu
.
getKernel
(
module
,
"findAtomRangeForGrid"
);
pmeSpreadFixedMultipolesKernel
=
cu
.
getKernel
(
module
,
"gridSpreadFixedMultipoles"
);
pmeSpreadInducedDipolesKernel
=
cu
.
getKernel
(
module
,
"gridSpreadInducedDipoles"
);
pmeConvolutionKernel
=
cu
.
getKernel
(
module
,
"reciprocalConvolution"
);
pmeFixedPotentialKernel
=
cu
.
getKernel
(
module
,
"computeFixedPotentialFromGrid"
);
pmeInducedPotentialKernel
=
cu
.
getKernel
(
module
,
"computeInducedPotentialFromGrid"
);
pmeFixedForceKernel
=
cu
.
getKernel
(
module
,
"computeFixedMultipoleForceAndEnergy"
);
//
pmeIn
terpolate
ForceKernel = cu.getKernel(module, "
gridInter
pol
at
eForce");
//
pme
FinishSpreadCharge
Kernel = cu.getKernel(module, "
finishSpreadCharge
");
pmeIn
duced
ForceKernel
=
cu
.
getKernel
(
module
,
"
computeInducedDi
poleForce
AndEnergy
"
);
pme
RecordInducedFieldDipoles
Kernel
=
cu
.
getKernel
(
module
,
"
recordInducedFieldDipoles
"
);
// cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
// Create required data structures.
...
...
@@ -1415,12 +1417,15 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
electrostaticsKernel
,
electrostaticsArgs
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
// Map torques to force.
void
*
mapTorqueArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
multipoleParticles
->
getDevicePointer
()};
cu
.
executeKernel
(
mapTorqueKernel
,
mapTorqueArgs
,
cu
.
getNumAtoms
());
}
else
{
//
Compute induced dipoles
.
//
Reciprocal space calculation
.
unsigned
int
maxTiles
=
nb
.
getInteractingTiles
().
getSize
();
void
*
pmeUpdateBsplinesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeIgrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
...
...
@@ -1433,9 +1438,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cu
.
executeKernel
(
pmeAtomRangeKernel
,
pmeAtomRangeArgs
,
cu
.
getNumAtoms
(),
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
PmeOrder
*
PmeOrder
*
elementSize
);
void
*
pmeSpreadFixedMultipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
&
pmeAtomRange
->
getDevicePointer
(),
&
pmeTheta1
->
getDevicePointer
(),
&
pmeTheta2
->
getDevicePointer
(),
&
pmeTheta3
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeSpreadFixedMultipolesKernel
,
pmeSpreadFixedMultipolesArgs
,
cu
.
getNumAtoms
(),
cu
.
ThreadBlockSize
,
cu
.
ThreadBlockSize
*
PmeOrder
*
PmeOrder
*
elementSize
);
&
pmeTheta1
->
getDevicePointer
(),
&
pmeTheta2
->
getDevicePointer
(),
&
pmeTheta3
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeSpreadFixedMultipolesKernel
,
pmeSpreadFixedMultipolesArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
else
...
...
@@ -1448,23 +1452,16 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
void
*
pmeFixedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
&
field
->
getDevicePointer
(),
&
pmeIgrid
->
getDevicePointer
(),
&
pmeTheta1
->
getDevicePointer
(),
&
pmeTheta2
->
getDevicePointer
(),
&
pmeTheta3
->
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
&
fieldPolar
->
getDevicePointer
(),
&
pmeIgrid
->
getDevicePointer
(),
&
pmeTheta1
->
getDevicePointer
(),
&
pmeTheta2
->
getDevicePointer
(),
&
pmeTheta3
->
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeFixedPotentialKernel
,
pmeFixedPotentialArgs
,
cu
.
getNumAtoms
());
void
*
pmeFixedForceArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
&
pmePhi
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeFixedForceKernel
,
pmeFixedForceArgs
,
cu
.
getNumAtoms
());
printf
(
"reciprocal:
\n
"
);
vector
<
long
long
>
f
;
printf
(
"force
\n
"
);
cu
.
getForce
().
download
(
f
);
for
(
int
i
=
0
;
i
<
cu
.
getNumAtoms
();
i
++
)
printf
(
"%d: %g %g %g
\n
"
,
i
,
f
[
i
]
/
(
double
)
0xFFFFFFFF
,
f
[
i
+
cu
.
getPaddedNumAtoms
()]
/
(
double
)
0xFFFFFFFF
,
f
[
i
+
cu
.
getPaddedNumAtoms
()
*
2
]
/
(
double
)
0xFFFFFFFF
);
// printf("torque\n");
// torque->download(f);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF);
// Direct space calculation.
void
*
computeFixedFieldArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
nb
.
getExclusionIndices
().
getDevicePointer
(),
&
nb
.
getExclusionRowIndices
().
getDevicePointer
(),
&
covalentFlags
->
getDevicePointer
(),
&
polarizationGroupFlags
->
getDevicePointer
(),
&
startTileIndex
,
&
numTileIndices
,
...
...
@@ -1475,38 +1472,29 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void
*
recordInducedDipolesArgs
[]
=
{
&
field
->
getDevicePointer
(),
&
fieldPolar
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
polarizability
->
getDevicePointer
()};
cu
.
executeKernel
(
recordInducedDipolesKernel
,
recordInducedDipolesArgs
,
cu
.
getNumAtoms
());
printf
(
"direct:
\n
"
);
printf
(
"force
\n
"
);
cu
.
getForce
().
download
(
f
);
for
(
int
i
=
0
;
i
<
cu
.
getNumAtoms
();
i
++
)
printf
(
"%d: %g %g %g
\n
"
,
i
,
f
[
i
]
/
(
double
)
0xFFFFFFFF
,
f
[
i
+
cu
.
getPaddedNumAtoms
()]
/
(
double
)
0xFFFFFFFF
,
f
[
i
+
cu
.
getPaddedNumAtoms
()
*
2
]
/
(
double
)
0xFFFFFFFF
);
// printf("torque\n");
// torque->download(f);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF);
// vector<float> d, dp;
// printf("phi\n");
// pmePhi->download(d);
// for (int i = 0; i < d.size(); i++)
// printf("%d: %g\n", i, d[i]);
// printf("dipoles\n");
// labFrameDipoles->download(d);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2]);
// printf("quadrupoles\n");
// labFrameQuadrupoles->download(d);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g %g %g %g\n", i, d[5*i], d[5*i+1], d[5*i+2], d[5*i+3], d[5*i+4], -(d[5*i]+d[5*i+3]));
// printf("induced dipoles\n");
// inducedDipole->download(d);
// inducedDipolePolar->download(dp);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g, %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2], dp[3*i], dp[3*i+1], dp[3*i+2]);
// printf("positions\n");
// vector<float4> p;
// cu.getPosq().download(p);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g %g\n", i, p[i].x, p[i].y, p[i].z, p[i].w);
// Reciprocal space calculation for the induced dipoles.
void
*
pmeSpreadInducedDipolesArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
pmeGrid
->
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
&
pmeAtomRange
->
getDevicePointer
(),
&
pmeTheta1
->
getDevicePointer
(),
&
pmeTheta2
->
getDevicePointer
(),
&
pmeTheta3
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeSpreadInducedDipolesKernel
,
pmeSpreadInducedDipolesArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_FORWARD
);
cu
.
executeKernel
(
pmeConvolutionKernel
,
pmeConvolutionArgs
,
cu
.
getNumAtoms
());
if
(
cu
.
getUseDoublePrecision
())
cufftExecZ2Z
(
fft
,
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
(
double2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
else
cufftExecC2C
(
fft
,
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
(
float2
*
)
pmeGrid
->
getDevicePointer
(),
CUFFT_INVERSE
);
void
*
pmeInducedPotentialArgs
[]
=
{
&
pmeGrid
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
pmePhidp
->
getDevicePointer
(),
&
pmeIgrid
->
getDevicePointer
(),
&
pmeTheta1
->
getDevicePointer
(),
&
pmeTheta2
->
getDevicePointer
(),
&
pmeTheta3
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeInducedPotentialKernel
,
pmeInducedPotentialArgs
,
cu
.
getNumAtoms
());
// void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
// &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
// cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
// vector<float2> errors;
...
...
@@ -1541,11 +1529,14 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
dampingAndThole
->
getDevicePointer
()};
cu
.
executeKernel
(
electrostaticsKernel
,
electrostaticsArgs
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
printf
(
"electrostatic:
\n
"
);
printf
(
"force
\n
"
);
cu
.
getForce
().
download
(
f
);
for
(
int
i
=
0
;
i
<
cu
.
getNumAtoms
();
i
++
)
printf
(
"%d: %g %g %g
\n
"
,
i
,
f
[
i
]
/
(
double
)
0xFFFFFFFF
,
f
[
i
+
cu
.
getPaddedNumAtoms
()]
/
(
double
)
0xFFFFFFFF
,
f
[
i
+
cu
.
getPaddedNumAtoms
()
*
2
]
/
(
double
)
0xFFFFFFFF
);
void
*
pmeInducedForceArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
labFrameDipoles
->
getDevicePointer
(),
&
labFrameQuadrupoles
->
getDevicePointer
(),
&
inducedDipole
->
getDevicePointer
(),
&
inducedDipolePolar
->
getDevicePointer
(),
&
pmePhi
->
getDevicePointer
(),
&
pmePhid
->
getDevicePointer
(),
&
pmePhip
->
getDevicePointer
(),
&
pmePhidp
->
getDevicePointer
(),
cu
.
getInvPeriodicBoxSizePointer
()};
cu
.
executeKernel
(
pmeInducedForceKernel
,
pmeInducedForceArgs
,
cu
.
getNumAtoms
());
// Map torques to force.
void
*
mapTorqueArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
torque
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
multipoleParticles
->
getDevicePointer
()};
cu
.
executeKernel
(
mapTorqueKernel
,
mapTorqueArgs
,
cu
.
getNumAtoms
());
...
...
plugins/amoeba/platforms/cuda2/src/AmoebaCudaKernels.h
View file @
f352d116
...
...
@@ -425,7 +425,8 @@ private:
CudaSort
*
sort
;
cufftHandle
fft
;
CUfunction
computeMomentsKernel
,
recordInducedDipolesKernel
,
computeFixedFieldKernel
,
computeInducedFieldKernel
,
updateInducedFieldKernel
,
electrostaticsKernel
,
mapTorqueKernel
;
CUfunction
pmeUpdateBsplinesKernel
,
pmeAtomRangeKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeConvolutionKernel
,
pmeFixedPotentialKernel
,
pmeFixedForceKernel
;
CUfunction
pmeUpdateBsplinesKernel
,
pmeAtomRangeKernel
,
pmeSpreadFixedMultipolesKernel
,
pmeSpreadInducedDipolesKernel
,
pmeConvolutionKernel
,
pmeFixedPotentialKernel
,
pmeInducedPotentialKernel
;
CUfunction
pmeFixedForceKernel
,
pmeInducedForceKernel
,
pmeRecordInducedFieldDipolesKernel
;
static
const
int
PmeOrder
=
5
;
};
...
...
plugins/amoeba/platforms/cuda2/src/kernels/multipolePme.cu
View file @
f352d116
...
...
@@ -161,8 +161,7 @@ extern "C" __global__ void findAtomRangeForGrid(int2* __restrict__ pmeAtomGridIn
}
extern
"C"
__global__
void
gridSpreadFixedMultipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
int
*
__restrict__
pmeAtomRange
,
const
real4
*
__restrict__
theta1
,
const
real4
*
__restrict__
theta2
,
const
real4
*
__restrict__
theta3
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
const
real4
*
__restrict__
theta1
,
const
real4
*
__restrict__
theta2
,
const
real4
*
__restrict__
theta3
,
real4
invPeriodicBoxSize
)
{
const
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
...
...
@@ -248,88 +247,90 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
}
}
//extern "C" __global__ void kGridSpreadInducedDipoles_kernel() {
// const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
// const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
// const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
// unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
// unsigned int numThreads = gridDim.x*blockDim.x;
// for (int gridIndex = blockIdx.x*blockDim.x+threadIdx.x; gridIndex < numGridPoints; gridIndex += numThreads) {
// int3 gridPoint;
// gridPoint.x = gridIndex/(GRID_SIZE_Y*GRID_SIZE_Z);
// int remainder = gridIndex-gridPoint.x*GRID_SIZE_Y*GRID_SIZE_Z;
// gridPoint.y = remainder/GRID_SIZE_Z;
// gridPoint.z = remainder-gridPoint.y*GRID_SIZE_Z;
// real2 result = make_real2(0, 0);
// for (int ix = 0; ix < PME_ORDER; ++ix) {
// int x = gridPoint.x-ix+(gridPoint.x >= ix ? 0 : GRID_SIZE_X);
// for (int iy = 0; iy < PME_ORDER; ++iy) {
// int y = gridPoint.y-iy+(gridPoint.y >= iy ? 0 : GRID_SIZE_Y);
// int z1 = gridPoint.z-PME_ORDER+1;
// z1 += (z1 >= 0 ? 0 : GRID_SIZE_Z);
// int z2 = (z1 < gridPoint.z ? gridPoint.z : GRID_SIZE_Z-1);
// int gridIndex1 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z1;
// int gridIndex2 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z2;
// int firstAtom = pmeAtomRange[gridIndex1];
// int lastAtom = pmeAtomRange[gridIndex2+1];
// for (int i = firstAtom; i < lastAtom; ++i) {
// int2 atomData = pmeAtomGridIndex[i];
// int atomIndex = atomData.x;
// int z = atomData.y;
// int iz = gridPoint.z-z+(gridPoint.z >= z ? 0 : GRID_SIZE_Z);
// if (iz >= GRID_SIZE_Z)
// iz -= GRID_SIZE_Z;
// real inducedDipoleX = xscale*cAmoebaSim.pInducedDipole[atomIndex*3];
// real inducedDipoleY = yscale*cAmoebaSim.pInducedDipole[atomIndex*3+1];
// real inducedDipoleZ = zscale*cAmoebaSim.pInducedDipole[atomIndex*3+2];
// real inducedDipolePolarX = xscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3];
// real inducedDipolePolarY = yscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3+1];
// real inducedDipolePolarZ = zscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3+2];
// real4 t = theta1[atomIndex*PME_ORDER+ix];
// real4 u = theta2[atomIndex*PME_ORDER+iy];
// real4 v = theta3[atomIndex*PME_ORDER+iz];
// real term01 = inducedDipoleY*u.y*v.x + inducedDipoleZ*u.x*v.y;
// real term11 = inducedDipoleX*u.x*v.x;
// real term02 = inducedDipolePolarY*u.y*v.x + inducedDipolePolarZ*u.x*v.y;
// real term12 = inducedDipolePolarX*u.x*v.x;
// result.x += term01*t.x + term11*t.y;
// result.y += term02*t.x + term12*t.y;
// }
// if (z1 > gridPoint.z) {
// gridIndex1 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z;
// gridIndex2 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+gridPoint.z;
// firstAtom = pmeAtomRange[gridIndex1];
// lastAtom = pmeAtomRange[gridIndex2+1];
// for (int i = firstAtom; i < lastAtom; ++i) {
// int2 atomData = pmeAtomGridIndex[i];
// int atomIndex = atomData.x;
// int z = atomData.y;
// int iz = gridPoint.z-z+(gridPoint.z >= z ? 0 : GRID_SIZE_Z);
// if (iz >= GRID_SIZE_Z)
// iz -= GRID_SIZE_Z;
// real inducedDipoleX = xscale*cAmoebaSim.pInducedDipole[atomIndex*3];
// real inducedDipoleY = yscale*cAmoebaSim.pInducedDipole[atomIndex*3+1];
// real inducedDipoleZ = zscale*cAmoebaSim.pInducedDipole[atomIndex*3+2];
// real inducedDipolePolarX = xscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3];
// real inducedDipolePolarY = yscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3+1];
// real inducedDipolePolarZ = zscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3+2];
// real4 t = theta1[atomIndex*PME_ORDER+ix];
// real4 u = theta2[atomIndex*PME_ORDER+iy];
// real4 v = theta3[atomIndex*PME_ORDER+iz];
// real term01 = inducedDipoleY*u.y*v.x + inducedDipoleZ*u.x*v.y;
// real term11 = inducedDipoleX*u.x*v.x;
// real term02 = inducedDipolePolarY*u.y*v.x + inducedDipolePolarZ*u.x*v.y;
// real term12 = inducedDipolePolarX*u.x*v.x;
// result.x += term01*t.x + term11*t.y;
// result.y += term02*t.x + term12*t.y;
// }
// }
// }
// }
// pmeGrid[gridIndex] = result;
// }
//}
//
extern
"C"
__global__
void
gridSpreadInducedDipoles
(
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
inducedDipole
,
const
real
*
__restrict__
inducedDipolePolar
,
real2
*
__restrict__
pmeGrid
,
int2
*
__restrict__
pmeAtomGridIndex
,
int
*
__restrict__
pmeAtomRange
,
const
real4
*
__restrict__
theta1
,
const
real4
*
__restrict__
theta2
,
const
real4
*
__restrict__
theta3
,
real4
invPeriodicBoxSize
)
{
const
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
unsigned
int
numGridPoints
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
unsigned
int
numThreads
=
gridDim
.
x
*
blockDim
.
x
;
for
(
int
gridIndex
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
gridIndex
<
numGridPoints
;
gridIndex
+=
numThreads
)
{
int3
gridPoint
;
gridPoint
.
x
=
gridIndex
/
(
GRID_SIZE_Y
*
GRID_SIZE_Z
);
int
remainder
=
gridIndex
-
gridPoint
.
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
gridPoint
.
y
=
remainder
/
GRID_SIZE_Z
;
gridPoint
.
z
=
remainder
-
gridPoint
.
y
*
GRID_SIZE_Z
;
real2
result
=
make_real2
(
0
,
0
);
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
++
ix
)
{
int
x
=
gridPoint
.
x
-
ix
+
(
gridPoint
.
x
>=
ix
?
0
:
GRID_SIZE_X
);
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
++
iy
)
{
int
y
=
gridPoint
.
y
-
iy
+
(
gridPoint
.
y
>=
iy
?
0
:
GRID_SIZE_Y
);
int
z1
=
gridPoint
.
z
-
PME_ORDER
+
1
;
z1
+=
(
z1
>=
0
?
0
:
GRID_SIZE_Z
);
int
z2
=
(
z1
<
gridPoint
.
z
?
gridPoint
.
z
:
GRID_SIZE_Z
-
1
);
int
gridIndex1
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
+
z1
;
int
gridIndex2
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
+
z2
;
int
firstAtom
=
pmeAtomRange
[
gridIndex1
];
int
lastAtom
=
pmeAtomRange
[
gridIndex2
+
1
];
for
(
int
i
=
firstAtom
;
i
<
lastAtom
;
++
i
)
{
int2
atomData
=
pmeAtomGridIndex
[
i
];
int
atomIndex
=
atomData
.
x
;
int
z
=
atomData
.
y
;
int
iz
=
gridPoint
.
z
-
z
+
(
gridPoint
.
z
>=
z
?
0
:
GRID_SIZE_Z
);
if
(
iz
>=
GRID_SIZE_Z
)
iz
-=
GRID_SIZE_Z
;
real
inducedDipoleX
=
xscale
*
inducedDipole
[
atomIndex
*
3
];
real
inducedDipoleY
=
yscale
*
inducedDipole
[
atomIndex
*
3
+
1
];
real
inducedDipoleZ
=
zscale
*
inducedDipole
[
atomIndex
*
3
+
2
];
real
inducedDipolePolarX
=
xscale
*
inducedDipolePolar
[
atomIndex
*
3
];
real
inducedDipolePolarY
=
yscale
*
inducedDipolePolar
[
atomIndex
*
3
+
1
];
real
inducedDipolePolarZ
=
zscale
*
inducedDipolePolar
[
atomIndex
*
3
+
2
];
real4
t
=
theta1
[
atomIndex
*
PME_ORDER
+
ix
];
real4
u
=
theta2
[
atomIndex
*
PME_ORDER
+
iy
];
real4
v
=
theta3
[
atomIndex
*
PME_ORDER
+
iz
];
real
term01
=
inducedDipoleY
*
u
.
y
*
v
.
x
+
inducedDipoleZ
*
u
.
x
*
v
.
y
;
real
term11
=
inducedDipoleX
*
u
.
x
*
v
.
x
;
real
term02
=
inducedDipolePolarY
*
u
.
y
*
v
.
x
+
inducedDipolePolarZ
*
u
.
x
*
v
.
y
;
real
term12
=
inducedDipolePolarX
*
u
.
x
*
v
.
x
;
result
.
x
+=
term01
*
t
.
x
+
term11
*
t
.
y
;
result
.
y
+=
term02
*
t
.
x
+
term12
*
t
.
y
;
}
if
(
z1
>
gridPoint
.
z
)
{
gridIndex1
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
;
gridIndex2
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
+
gridPoint
.
z
;
firstAtom
=
pmeAtomRange
[
gridIndex1
];
lastAtom
=
pmeAtomRange
[
gridIndex2
+
1
];
for
(
int
i
=
firstAtom
;
i
<
lastAtom
;
++
i
)
{
int2
atomData
=
pmeAtomGridIndex
[
i
];
int
atomIndex
=
atomData
.
x
;
int
z
=
atomData
.
y
;
int
iz
=
gridPoint
.
z
-
z
+
(
gridPoint
.
z
>=
z
?
0
:
GRID_SIZE_Z
);
if
(
iz
>=
GRID_SIZE_Z
)
iz
-=
GRID_SIZE_Z
;
real
inducedDipoleX
=
xscale
*
inducedDipole
[
atomIndex
*
3
];
real
inducedDipoleY
=
yscale
*
inducedDipole
[
atomIndex
*
3
+
1
];
real
inducedDipoleZ
=
zscale
*
inducedDipole
[
atomIndex
*
3
+
2
];
real
inducedDipolePolarX
=
xscale
*
inducedDipolePolar
[
atomIndex
*
3
];
real
inducedDipolePolarY
=
yscale
*
inducedDipolePolar
[
atomIndex
*
3
+
1
];
real
inducedDipolePolarZ
=
zscale
*
inducedDipolePolar
[
atomIndex
*
3
+
2
];
real4
t
=
theta1
[
atomIndex
*
PME_ORDER
+
ix
];
real4
u
=
theta2
[
atomIndex
*
PME_ORDER
+
iy
];
real4
v
=
theta3
[
atomIndex
*
PME_ORDER
+
iz
];
real
term01
=
inducedDipoleY
*
u
.
y
*
v
.
x
+
inducedDipoleZ
*
u
.
x
*
v
.
y
;
real
term11
=
inducedDipoleX
*
u
.
x
*
v
.
x
;
real
term02
=
inducedDipolePolarY
*
u
.
y
*
v
.
x
+
inducedDipolePolarZ
*
u
.
x
*
v
.
y
;
real
term12
=
inducedDipolePolarX
*
u
.
x
*
v
.
x
;
result
.
x
+=
term01
*
t
.
x
+
term11
*
t
.
y
;
result
.
y
+=
term02
*
t
.
x
+
term12
*
t
.
y
;
}
}
}
}
pmeGrid
[
gridIndex
]
=
result
;
}
}
extern
"C"
__global__
void
reciprocalConvolution
(
real2
*
__restrict__
pmeGrid
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
...
...
@@ -362,7 +363,7 @@ extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, co
}
extern
"C"
__global__
void
computeFixedPotentialFromGrid
(
const
real2
*
__restrict__
pmeGrid
,
real
*
__restrict__
phi
,
long
long
*
__restrict__
fieldBuffers
,
const
int4
*
__restrict__
igrid
,
const
real4
*
__restrict__
theta1
,
long
long
*
__restrict__
fieldBuffers
,
long
long
*
__restrict__
fieldPolarBuffers
,
const
int4
*
__restrict__
igrid
,
const
real4
*
__restrict__
theta1
,
const
real4
*
__restrict__
theta2
,
const
real4
*
__restrict__
theta3
,
const
real
*
__restrict__
labFrameDipole
,
real4
invPeriodicBoxSize
)
{
// extract the permanent multipole field at each site
...
...
@@ -468,216 +469,224 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
phi
[
20
*
m
+
18
]
=
tuv012
;
phi
[
20
*
m
+
19
]
=
tuv111
;
real
dipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT
(
M_PI
);
fieldBuffers
[
m
]
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
]
-
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
tuv100
)
*
0xFFFFFFFF
);
fieldBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
1
]
-
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
tuv010
)
*
0xFFFFFFFF
);
fieldBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
2
]
-
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
tuv001
)
*
0xFFFFFFFF
);
long
long
fieldx
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
]
-
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
tuv100
)
*
0xFFFFFFFF
);
fieldBuffers
[
m
]
=
fieldx
;
fieldPolarBuffers
[
m
]
=
fieldx
;
long
long
fieldy
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
1
]
-
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
tuv010
)
*
0xFFFFFFFF
);
fieldBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
fieldy
;
fieldPolarBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
fieldy
;
long
long
fieldz
=
(
long
long
)
((
dipoleScale
*
labFrameDipole
[
m
*
3
+
2
]
-
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
tuv001
)
*
0xFFFFFFFF
);
fieldBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
fieldz
;
fieldPolarBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
fieldz
;
}
}
//extern "C" __global__ void kComputeInducedPotentialFromGrid_kernel() {
// // extract the induced dipole field at each site
//
// for (int m = blockIdx.x*blockDim.x+threadIdx.x; m < NUM_ATOMS; m += blockDim.x*gridDim.x) {
// int4 gridPoint = igrid[m];
// real tuv100_1 = 0;
// real tuv010_1 = 0;
// real tuv001_1 = 0;
// real tuv200_1 = 0;
// real tuv020_1 = 0;
// real tuv002_1 = 0;
// real tuv110_1 = 0;
// real tuv101_1 = 0;
// real tuv011_1 = 0;
// real tuv100_2 = 0;
// real tuv010_2 = 0;
// real tuv001_2 = 0;
// real tuv200_2 = 0;
// real tuv020_2 = 0;
// real tuv002_2 = 0;
// real tuv110_2 = 0;
// real tuv101_2 = 0;
// real tuv011_2 = 0;
// real tuv000 = 0;
// real tuv001 = 0;
// real tuv010 = 0;
// real tuv100 = 0;
// real tuv200 = 0;
// real tuv020 = 0;
// real tuv002 = 0;
// real tuv110 = 0;
// real tuv101 = 0;
// real tuv011 = 0;
// real tuv300 = 0;
// real tuv030 = 0;
// real tuv003 = 0;
// real tuv210 = 0;
// real tuv201 = 0;
// real tuv120 = 0;
// real tuv021 = 0;
// real tuv102 = 0;
// real tuv012 = 0;
// real tuv111 = 0;
// for (int iz = 0; iz < PME_ORDER; iz++) {
// int k = gridPoint.z+iz-(gridPoint.z+iz >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
// real4 v = theta3[m*PME_ORDER+iz];
// real tu00_1 = 0;
// real tu01_1 = 0;
// real tu10_1 = 0;
// real tu20_1 = 0;
// real tu11_1 = 0;
// real tu02_1 = 0;
// real tu00_2 = 0;
// real tu01_2 = 0;
// real tu10_2 = 0;
// real tu20_2 = 0;
// real tu11_2 = 0;
// real tu02_2 = 0;
// real tu00 = 0;
// real tu10 = 0;
// real tu01 = 0;
// real tu20 = 0;
// real tu11 = 0;
// real tu02 = 0;
// real tu30 = 0;
// real tu21 = 0;
// real tu12 = 0;
// real tu03 = 0;
// for (int iy = 0; iy < PME_ORDER; iy++) {
// int j = gridPoint.y+iy-(gridPoint.y+iy >= GRID_SIZE_Y ? GRID_SIZE_Y : 0);
// real4 u = theta2[m*PME_ORDER+iy];
// real t0_1 = 0;
// real t1_1 = 0;
// real t2_1 = 0;
// real t0_2 = 0;
// real t1_2 = 0;
// real t2_2 = 0;
// real t3 = 0;
// for (int ix = 0; ix < PME_ORDER; ix++) {
// int i = gridPoint.x+ix-(gridPoint.x+ix >= GRID_SIZE_X ? GRID_SIZE_X : 0);
// int gridIndex = i*GRID_SIZE_Y*GRID_SIZE_Z + j*GRID_SIZE_Z + k;
// real2 tq = pmeGrid[gridIndex];
// real4 tadd = theta1[m*PME_ORDER+ix];
// t0_1 += tq.x*tadd.x;
// t1_1 += tq.x*tadd.y;
// t2_1 += tq.x*tadd.z;
// t0_2 += tq.y*tadd.x;
// t1_2 += tq.y*tadd.y;
// t2_2 += tq.y*tadd.z;
// t3 += (tq.x+tq.y)*tadd.w;
// }
// tu00_1 += t0_1*u.x;
// tu10_1 += t1_1*u.x;
// tu01_1 += t0_1*u.y;
// tu20_1 += t2_1*u.x;
// tu11_1 += t1_1*u.y;
// tu02_1 += t0_1*u.z;
// tu00_2 += t0_2*u.x;
// tu10_2 += t1_2*u.x;
// tu01_2 += t0_2*u.y;
// tu20_2 += t2_2*u.x;
// tu11_2 += t1_2*u.y;
// tu02_2 += t0_2*u.z;
// real t0 = t0_1 + t0_2;
// real t1 = t1_1 + t1_2;
// real t2 = t2_1 + t2_2;
// tu00 += t0*u.x;
// tu10 += t1*u.x;
// tu01 += t0*u.y;
// tu20 += t2*u.x;
// tu11 += t1*u.y;
// tu02 += t0*u.z;
// tu30 += t3*u.x;
// tu21 += t2*u.y;
// tu12 += t1*u.z;
// tu03 += t0*u.w;
// }
// tuv100_1 += tu10_1*v.x;
// tuv010_1 += tu01_1*v.x;
// tuv001_1 += tu00_1*v.y;
// tuv200_1 += tu20_1*v.x;
// tuv020_1 += tu02_1*v.x;
// tuv002_1 += tu00_1*v.z;
// tuv110_1 += tu11_1*v.x;
// tuv101_1 += tu10_1*v.y;
// tuv011_1 += tu01_1*v.y;
// tuv100_2 += tu10_2*v.x;
// tuv010_2 += tu01_2*v.x;
// tuv001_2 += tu00_2*v.y;
// tuv200_2 += tu20_2*v.x;
// tuv020_2 += tu02_2*v.x;
// tuv002_2 += tu00_2*v.z;
// tuv110_2 += tu11_2*v.x;
// tuv101_2 += tu10_2*v.y;
// tuv011_2 += tu01_2*v.y;
// tuv000 += tu00*v.x;
// tuv100 += tu10*v.x;
// tuv010 += tu01*v.x;
// tuv001 += tu00*v.y;
// tuv200 += tu20*v.x;
// tuv020 += tu02*v.x;
// tuv002 += tu00*v.z;
// tuv110 += tu11*v.x;
// tuv101 += tu10*v.y;
// tuv011 += tu01*v.y;
// tuv300 += tu30*v.x;
// tuv030 += tu03*v.x;
// tuv003 += tu00*v.w;
// tuv210 += tu21*v.x;
// tuv201 += tu20*v.y;
// tuv120 += tu12*v.x;
// tuv021 += tu02*v.y;
// tuv102 += tu10*v.z;
// tuv012 += tu01*v.z;
// tuv111 += tu11*v.y;
// }
// phid[10*m] = 0;
// phid[10*m+1] = tuv100_1;
// phid[10*m+2] = tuv010_1;
// phid[10*m+3] = tuv001_1;
// phid[10*m+4] = tuv200_1;
// phid[10*m+5] = tuv020_1;
// phid[10*m+6] = tuv002_1;
// phid[10*m+7] = tuv110_1;
// phid[10*m+8] = tuv101_1;
// phid[10*m+9] = tuv011_1;
//
// phip[10*m] = 0;
// phip[10*m+1] = tuv100_2;
// phip[10*m+2] = tuv010_2;
// phip[10*m+3] = tuv001_2;
// phip[10*m+4] = tuv200_2;
// phip[10*m+5] = tuv020_2;
// phip[10*m+6] = tuv002_2;
// phip[10*m+7] = tuv110_2;
// phip[10*m+8] = tuv101_2;
// phip[10*m+9] = tuv011_2;
//
// phidp[20*m] = tuv000;
// phidp[20*m+1] = tuv100;
// phidp[20*m+2] = tuv010;
// phidp[20*m+3] = tuv001;
// phidp[20*m+4] = tuv200;
// phidp[20*m+5] = tuv020;
// phidp[20*m+6] = tuv002;
// phidp[20*m+7] = tuv110;
// phidp[20*m+8] = tuv101;
// phidp[20*m+9] = tuv011;
// phidp[20*m+10] = tuv300;
// phidp[20*m+11] = tuv030;
// phidp[20*m+12] = tuv003;
// phidp[20*m+13] = tuv210;
// phidp[20*m+14] = tuv201;
// phidp[20*m+15] = tuv120;
// phidp[20*m+16] = tuv021;
// phidp[20*m+17] = tuv102;
// phidp[20*m+18] = tuv012;
// phidp[20*m+19] = tuv111;
// }
//}
extern
"C"
__global__
void
computeInducedPotentialFromGrid
(
const
real2
*
__restrict__
pmeGrid
,
real
*
__restrict__
phid
,
real
*
__restrict__
phip
,
real
*
__restrict__
phidp
,
const
int4
*
__restrict__
igrid
,
const
real4
*
__restrict__
theta1
,
const
real4
*
__restrict__
theta2
,
const
real4
*
__restrict__
theta3
,
real4
invPeriodicBoxSize
)
{
// extract the induced dipole field at each site
for
(
int
m
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
m
<
NUM_ATOMS
;
m
+=
blockDim
.
x
*
gridDim
.
x
)
{
int4
gridPoint
=
igrid
[
m
];
real
tuv100_1
=
0
;
real
tuv010_1
=
0
;
real
tuv001_1
=
0
;
real
tuv200_1
=
0
;
real
tuv020_1
=
0
;
real
tuv002_1
=
0
;
real
tuv110_1
=
0
;
real
tuv101_1
=
0
;
real
tuv011_1
=
0
;
real
tuv100_2
=
0
;
real
tuv010_2
=
0
;
real
tuv001_2
=
0
;
real
tuv200_2
=
0
;
real
tuv020_2
=
0
;
real
tuv002_2
=
0
;
real
tuv110_2
=
0
;
real
tuv101_2
=
0
;
real
tuv011_2
=
0
;
real
tuv000
=
0
;
real
tuv001
=
0
;
real
tuv010
=
0
;
real
tuv100
=
0
;
real
tuv200
=
0
;
real
tuv020
=
0
;
real
tuv002
=
0
;
real
tuv110
=
0
;
real
tuv101
=
0
;
real
tuv011
=
0
;
real
tuv300
=
0
;
real
tuv030
=
0
;
real
tuv003
=
0
;
real
tuv210
=
0
;
real
tuv201
=
0
;
real
tuv120
=
0
;
real
tuv021
=
0
;
real
tuv102
=
0
;
real
tuv012
=
0
;
real
tuv111
=
0
;
for
(
int
iz
=
0
;
iz
<
PME_ORDER
;
iz
++
)
{
int
k
=
gridPoint
.
z
+
iz
-
(
gridPoint
.
z
+
iz
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
real4
v
=
theta3
[
m
*
PME_ORDER
+
iz
];
real
tu00_1
=
0
;
real
tu01_1
=
0
;
real
tu10_1
=
0
;
real
tu20_1
=
0
;
real
tu11_1
=
0
;
real
tu02_1
=
0
;
real
tu00_2
=
0
;
real
tu01_2
=
0
;
real
tu10_2
=
0
;
real
tu20_2
=
0
;
real
tu11_2
=
0
;
real
tu02_2
=
0
;
real
tu00
=
0
;
real
tu10
=
0
;
real
tu01
=
0
;
real
tu20
=
0
;
real
tu11
=
0
;
real
tu02
=
0
;
real
tu30
=
0
;
real
tu21
=
0
;
real
tu12
=
0
;
real
tu03
=
0
;
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
iy
++
)
{
int
j
=
gridPoint
.
y
+
iy
-
(
gridPoint
.
y
+
iy
>=
GRID_SIZE_Y
?
GRID_SIZE_Y
:
0
);
real4
u
=
theta2
[
m
*
PME_ORDER
+
iy
];
real
t0_1
=
0
;
real
t1_1
=
0
;
real
t2_1
=
0
;
real
t0_2
=
0
;
real
t1_2
=
0
;
real
t2_2
=
0
;
real
t3
=
0
;
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
i
=
gridPoint
.
x
+
ix
-
(
gridPoint
.
x
+
ix
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
);
int
gridIndex
=
i
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
j
*
GRID_SIZE_Z
+
k
;
real2
tq
=
pmeGrid
[
gridIndex
];
real4
tadd
=
theta1
[
m
*
PME_ORDER
+
ix
];
t0_1
+=
tq
.
x
*
tadd
.
x
;
t1_1
+=
tq
.
x
*
tadd
.
y
;
t2_1
+=
tq
.
x
*
tadd
.
z
;
t0_2
+=
tq
.
y
*
tadd
.
x
;
t1_2
+=
tq
.
y
*
tadd
.
y
;
t2_2
+=
tq
.
y
*
tadd
.
z
;
t3
+=
(
tq
.
x
+
tq
.
y
)
*
tadd
.
w
;
}
tu00_1
+=
t0_1
*
u
.
x
;
tu10_1
+=
t1_1
*
u
.
x
;
tu01_1
+=
t0_1
*
u
.
y
;
tu20_1
+=
t2_1
*
u
.
x
;
tu11_1
+=
t1_1
*
u
.
y
;
tu02_1
+=
t0_1
*
u
.
z
;
tu00_2
+=
t0_2
*
u
.
x
;
tu10_2
+=
t1_2
*
u
.
x
;
tu01_2
+=
t0_2
*
u
.
y
;
tu20_2
+=
t2_2
*
u
.
x
;
tu11_2
+=
t1_2
*
u
.
y
;
tu02_2
+=
t0_2
*
u
.
z
;
real
t0
=
t0_1
+
t0_2
;
real
t1
=
t1_1
+
t1_2
;
real
t2
=
t2_1
+
t2_2
;
tu00
+=
t0
*
u
.
x
;
tu10
+=
t1
*
u
.
x
;
tu01
+=
t0
*
u
.
y
;
tu20
+=
t2
*
u
.
x
;
tu11
+=
t1
*
u
.
y
;
tu02
+=
t0
*
u
.
z
;
tu30
+=
t3
*
u
.
x
;
tu21
+=
t2
*
u
.
y
;
tu12
+=
t1
*
u
.
z
;
tu03
+=
t0
*
u
.
w
;
}
tuv100_1
+=
tu10_1
*
v
.
x
;
tuv010_1
+=
tu01_1
*
v
.
x
;
tuv001_1
+=
tu00_1
*
v
.
y
;
tuv200_1
+=
tu20_1
*
v
.
x
;
tuv020_1
+=
tu02_1
*
v
.
x
;
tuv002_1
+=
tu00_1
*
v
.
z
;
tuv110_1
+=
tu11_1
*
v
.
x
;
tuv101_1
+=
tu10_1
*
v
.
y
;
tuv011_1
+=
tu01_1
*
v
.
y
;
tuv100_2
+=
tu10_2
*
v
.
x
;
tuv010_2
+=
tu01_2
*
v
.
x
;
tuv001_2
+=
tu00_2
*
v
.
y
;
tuv200_2
+=
tu20_2
*
v
.
x
;
tuv020_2
+=
tu02_2
*
v
.
x
;
tuv002_2
+=
tu00_2
*
v
.
z
;
tuv110_2
+=
tu11_2
*
v
.
x
;
tuv101_2
+=
tu10_2
*
v
.
y
;
tuv011_2
+=
tu01_2
*
v
.
y
;
tuv000
+=
tu00
*
v
.
x
;
tuv100
+=
tu10
*
v
.
x
;
tuv010
+=
tu01
*
v
.
x
;
tuv001
+=
tu00
*
v
.
y
;
tuv200
+=
tu20
*
v
.
x
;
tuv020
+=
tu02
*
v
.
x
;
tuv002
+=
tu00
*
v
.
z
;
tuv110
+=
tu11
*
v
.
x
;
tuv101
+=
tu10
*
v
.
y
;
tuv011
+=
tu01
*
v
.
y
;
tuv300
+=
tu30
*
v
.
x
;
tuv030
+=
tu03
*
v
.
x
;
tuv003
+=
tu00
*
v
.
w
;
tuv210
+=
tu21
*
v
.
x
;
tuv201
+=
tu20
*
v
.
y
;
tuv120
+=
tu12
*
v
.
x
;
tuv021
+=
tu02
*
v
.
y
;
tuv102
+=
tu10
*
v
.
z
;
tuv012
+=
tu01
*
v
.
z
;
tuv111
+=
tu11
*
v
.
y
;
}
phid
[
10
*
m
]
=
0
;
phid
[
10
*
m
+
1
]
=
tuv100_1
;
phid
[
10
*
m
+
2
]
=
tuv010_1
;
phid
[
10
*
m
+
3
]
=
tuv001_1
;
phid
[
10
*
m
+
4
]
=
tuv200_1
;
phid
[
10
*
m
+
5
]
=
tuv020_1
;
phid
[
10
*
m
+
6
]
=
tuv002_1
;
phid
[
10
*
m
+
7
]
=
tuv110_1
;
phid
[
10
*
m
+
8
]
=
tuv101_1
;
phid
[
10
*
m
+
9
]
=
tuv011_1
;
phip
[
10
*
m
]
=
0
;
phip
[
10
*
m
+
1
]
=
tuv100_2
;
phip
[
10
*
m
+
2
]
=
tuv010_2
;
phip
[
10
*
m
+
3
]
=
tuv001_2
;
phip
[
10
*
m
+
4
]
=
tuv200_2
;
phip
[
10
*
m
+
5
]
=
tuv020_2
;
phip
[
10
*
m
+
6
]
=
tuv002_2
;
phip
[
10
*
m
+
7
]
=
tuv110_2
;
phip
[
10
*
m
+
8
]
=
tuv101_2
;
phip
[
10
*
m
+
9
]
=
tuv011_2
;
phidp
[
20
*
m
]
=
tuv000
;
phidp
[
20
*
m
+
1
]
=
tuv100
;
phidp
[
20
*
m
+
2
]
=
tuv010
;
phidp
[
20
*
m
+
3
]
=
tuv001
;
phidp
[
20
*
m
+
4
]
=
tuv200
;
phidp
[
20
*
m
+
5
]
=
tuv020
;
phidp
[
20
*
m
+
6
]
=
tuv002
;
phidp
[
20
*
m
+
7
]
=
tuv110
;
phidp
[
20
*
m
+
8
]
=
tuv101
;
phidp
[
20
*
m
+
9
]
=
tuv011
;
phidp
[
20
*
m
+
10
]
=
tuv300
;
phidp
[
20
*
m
+
11
]
=
tuv030
;
phidp
[
20
*
m
+
12
]
=
tuv003
;
phidp
[
20
*
m
+
13
]
=
tuv210
;
phidp
[
20
*
m
+
14
]
=
tuv201
;
phidp
[
20
*
m
+
15
]
=
tuv120
;
phidp
[
20
*
m
+
16
]
=
tuv021
;
phidp
[
20
*
m
+
17
]
=
tuv102
;
phidp
[
20
*
m
+
18
]
=
tuv012
;
phidp
[
20
*
m
+
19
]
=
tuv111
;
}
}
extern
"C"
__global__
void
computeFixedMultipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
phi
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
phi
_global
,
real4
invPeriodicBoxSize
)
{
real
multipole
[
10
];
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
...
...
@@ -700,22 +709,22 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
multipole
[
8
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
2
];
multipole
[
9
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
4
];
const
real
*
atomP
hi
=
&
phi
[
20
*
i
];
const
real
*
p
hi
=
&
phi
_global
[
20
*
i
];
torqueBuffers
[
i
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
3
]
*
yscale
*
atomP
hi
[
2
]
-
multipole
[
2
]
*
zscale
*
atomP
hi
[
3
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
yscale
*
zscale
*
atomP
hi
[
9
]
+
multipole
[
8
]
*
xscale
*
yscale
*
atomP
hi
[
7
]
+
multipole
[
9
]
*
yscale
*
yscale
*
atomP
hi
[
5
]
-
multipole
[
7
]
*
xscale
*
zscale
*
atomP
hi
[
8
]
-
multipole
[
9
]
*
zscale
*
zscale
*
atomP
hi
[
6
])
*
0xFFFFFFFF
);
torqueBuffers
[
i
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
3
]
*
yscale
*
p
hi
[
2
]
-
multipole
[
2
]
*
zscale
*
p
hi
[
3
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
yscale
*
zscale
*
p
hi
[
9
]
+
multipole
[
8
]
*
xscale
*
yscale
*
p
hi
[
7
]
+
multipole
[
9
]
*
yscale
*
yscale
*
p
hi
[
5
]
-
multipole
[
7
]
*
xscale
*
zscale
*
p
hi
[
8
]
-
multipole
[
9
]
*
zscale
*
zscale
*
p
hi
[
6
])
*
0xFFFFFFFF
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
1
]
*
zscale
*
atomP
hi
[
3
]
-
multipole
[
3
]
*
xscale
*
atomP
hi
[
1
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
xscale
*
zscale
*
atomP
hi
[
8
]
+
multipole
[
7
]
*
yscale
*
zscale
*
atomP
hi
[
9
]
+
multipole
[
8
]
*
zscale
*
zscale
*
atomP
hi
[
6
]
-
multipole
[
8
]
*
xscale
*
xscale
*
atomP
hi
[
4
]
-
multipole
[
9
]
*
xscale
*
yscale
*
atomP
hi
[
7
])
*
0xFFFFFFFF
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
1
]
*
zscale
*
p
hi
[
3
]
-
multipole
[
3
]
*
xscale
*
p
hi
[
1
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
xscale
*
zscale
*
p
hi
[
8
]
+
multipole
[
7
]
*
yscale
*
zscale
*
p
hi
[
9
]
+
multipole
[
8
]
*
zscale
*
zscale
*
p
hi
[
6
]
-
multipole
[
8
]
*
xscale
*
xscale
*
p
hi
[
4
]
-
multipole
[
9
]
*
xscale
*
yscale
*
p
hi
[
7
])
*
0xFFFFFFFF
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
2
]
*
xscale
*
atomP
hi
[
1
]
-
multipole
[
1
]
*
yscale
*
atomP
hi
[
2
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
xscale
*
yscale
*
atomP
hi
[
7
]
+
multipole
[
7
]
*
xscale
*
xscale
*
atomP
hi
[
4
]
+
multipole
[
9
]
*
xscale
*
zscale
*
atomP
hi
[
8
]
-
multipole
[
7
]
*
yscale
*
yscale
*
atomP
hi
[
5
]
-
multipole
[
8
]
*
yscale
*
zscale
*
atomP
hi
[
9
])
*
0xFFFFFFFF
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
=
(
long
long
)
(
EPSILON_FACTOR
*
(
multipole
[
2
]
*
xscale
*
p
hi
[
1
]
-
multipole
[
1
]
*
yscale
*
p
hi
[
2
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
xscale
*
yscale
*
p
hi
[
7
]
+
multipole
[
7
]
*
xscale
*
xscale
*
p
hi
[
4
]
+
multipole
[
9
]
*
xscale
*
zscale
*
p
hi
[
8
]
-
multipole
[
7
]
*
yscale
*
yscale
*
p
hi
[
5
]
-
multipole
[
8
]
*
yscale
*
zscale
*
p
hi
[
9
])
*
0xFFFFFFFF
);
// Compute the force and energy.
...
...
@@ -731,10 +740,10 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
real4
f
=
make_real4
(
0
,
0
,
0
,
0
);
for
(
int
k
=
0
;
k
<
10
;
k
++
)
{
energy
+=
multipole
[
k
]
*
atomP
hi
[
k
];
f
.
x
+=
multipole
[
k
]
*
atomP
hi
[
deriv1
[
k
]];
f
.
y
+=
multipole
[
k
]
*
atomP
hi
[
deriv2
[
k
]];
f
.
z
+=
multipole
[
k
]
*
atomP
hi
[
deriv3
[
k
]];
energy
+=
multipole
[
k
]
*
p
hi
[
k
];
f
.
x
+=
multipole
[
k
]
*
p
hi
[
deriv1
[
k
]];
f
.
y
+=
multipole
[
k
]
*
p
hi
[
deriv2
[
k
]];
f
.
z
+=
multipole
[
k
]
*
p
hi
[
deriv3
[
k
]];
}
f
.
x
*=
EPSILON_FACTOR
*
xscale
;
f
.
y
*=
EPSILON_FACTOR
*
yscale
;
...
...
@@ -746,141 +755,125 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
0.5
f
*
EPSILON_FACTOR
*
energy
;
}
//extern "C" __global__ void kComputeInducedDipoleForceAndEnergy_kernel() {
// real multipole[10];
// real inducedDipole[3];
// real inducedDipolePolar[3];
// real scales[3];
// const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19};
// const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16};
// const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18};
// const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
// const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
// const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
// scales[0] = xscale;
// scales[1] = yscale;
// scales[2] = zscale;
// real energy = 0;
// for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// // Compute the torque.
//
// multipole[0] = posq[i].w;
// multipole[1] = labFrameDipole[i*3];
// multipole[2] = labFrameDipole[i*3+1];
// multipole[3] = labFrameDipole[i*3+2];
// multipole[4] = labFrameQuadrupole[i*5];
// multipole[5] = labFrameQuadrupole[i*5+3];
// multipole[6] = -(multipole[4]+multipole[5]);
// multipole[7] = 2*labFrameQuadrupole[i*5+1];
// multipole[8] = 2*labFrameQuadrupole[i*5+2];
// multipole[9] = 2*labFrameQuadrupole[i*5+4];
// real* phidp = &cAmoebaSim.pPhidp[20*i];
//
// cAmoebaSim.pTorque[3*i] += 0.5f*EPSILON_FACTOR*(multipole[3]*yscale*phidp[2] - multipole[2]*zscale*phidp[3]
// + 2*(multipole[6]-multipole[5])*yscale*zscale*phidp[9]
// + multipole[8]*xscale*yscale*phidp[7] + multipole[9]*yscale*yscale*phidp[5]
// - multipole[7]*xscale*zscale*phidp[8] - multipole[9]*zscale*zscale*phidp[6]);
//
// cAmoebaSim.pTorque[3*i+1] += 0.5f*EPSILON_FACTOR*(multipole[1]*zscale*phidp[3] - multipole[3]*xscale*phidp[1]
// + 2*(multipole[4]-multipole[6])*xscale*zscale*phidp[8]
// + multipole[7]*yscale*zscale*phidp[9] + multipole[8]*zscale*zscale*phidp[6]
// - multipole[8]*xscale*xscale*phidp[4] - multipole[9]*xscale*yscale*phidp[7]);
//
// cAmoebaSim.pTorque[3*i+2] += 0.5f*EPSILON_FACTOR*(multipole[2]*xscale*phidp[1] - multipole[1]*yscale*phidp[2]
// + 2*(multipole[5]-multipole[4])*xscale*yscale*phidp[7]
// + multipole[7]*xscale*xscale*phidp[4] + multipole[9]*xscale*zscale*phidp[8]
// - multipole[7]*yscale*yscale*phidp[5] - multipole[8]*yscale*zscale*phidp[9]);
//
// // Compute the force and energy.
//
// multipole[1] *= xscale;
// multipole[2] *= yscale;
// multipole[3] *= zscale;
// multipole[4] *= xscale*xscale;
// multipole[5] *= yscale*yscale;
// multipole[6] *= zscale*zscale;
// multipole[7] *= xscale*yscale;
// multipole[8] *= xscale*zscale;
// multipole[9] *= yscale*zscale;
//
// inducedDipole[0] = cAmoebaSim.pInducedDipole[i*3];
// inducedDipole[1] = cAmoebaSim.pInducedDipole[i*3+1];
// inducedDipole[2] = cAmoebaSim.pInducedDipole[i*3+2];
// inducedDipolePolar[0] = cAmoebaSim.pInducedDipolePolar[i*3];
// inducedDipolePolar[1] = cAmoebaSim.pInducedDipolePolar[i*3+1];
// inducedDipolePolar[2] = cAmoebaSim.pInducedDipolePolar[i*3+2];
// real* phi = &cAmoebaSim.pPhi[20*i];
// real* phip = &cAmoebaSim.pPhip[10*i];
// real* phid = &cAmoebaSim.pPhid[10*i];
// real4 f = make_real4(0, 0, 0, 0);
//
// energy += GRID_SIZE_X*invPeriodicBoxSize.x*inducedDipole[0]*phi[1];
// energy += GRID_SIZE_Y*invPeriodicBoxSize.y*inducedDipole[1]*phi[2];
// energy += GRID_SIZE_Z*invPeriodicBoxSize.z*inducedDipole[2]*phi[3];
//
// for (int k = 0; k < 3; k++) {
//
// int j1 = deriv1[k+1];
// int j2 = deriv2[k+1];
// int j3 = deriv3[k+1];
//
// f.x += (inducedDipole[k]+inducedDipolePolar[k])*phi[j1]*(scales[k]/xscale);
// f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[j2]*(scales[k]/yscale);
// f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[j3]*(scales[k]/zscale);
//
// if( cAmoebaSim.polarizationType == 0 )
// {
// f.x += (inducedDipole[k]*phip[j1] + inducedDipolePolar[k]*phid[j1])*(scales[k]/xscale);
// f.y += (inducedDipole[k]*phip[j2] + inducedDipolePolar[k]*phid[j2])*(scales[k]/yscale);
// f.z += (inducedDipole[k]*phip[j3] + inducedDipolePolar[k]*phid[j3])*(scales[k]/zscale);
// }
//
//
// }
//
// f.x *= GRID_SIZE_X*invPeriodicBoxSize.x;
// f.y *= GRID_SIZE_Y*invPeriodicBoxSize.y;
// f.z *= GRID_SIZE_Z*invPeriodicBoxSize.z;
// for (int k = 0; k < 10; k++) {
// f.x += multipole[k]*phidp[deriv1[k]];
// f.y += multipole[k]*phidp[deriv2[k]];
// f.z += multipole[k]*phidp[deriv3[k]];
// }
//
// f.x *= 0.5f*EPSILON_FACTOR*xscale;
// f.y *= 0.5f*EPSILON_FACTOR*yscale;
// f.z *= 0.5f*EPSILON_FACTOR*zscale;
//
// real4 force = cSim.pForce4[i];
// force.x -= f.x;
// force.y -= f.y;
// force.z -= f.z;
// cSim.pForce4[i] = force;
// }
// cSim.pEnergy[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy;
//}
//
//extern "C" __global__ void kRecordFixedMultipoleField_kernel(real* output) {
// const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
// const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
// const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
// for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// output[3*i] = -xscale*cAmoebaSim.pPhi[20*i+1];
// output[3*i+1] = -yscale*cAmoebaSim.pPhi[20*i+2];
// output[3*i+2] = -zscale*cAmoebaSim.pPhi[20*i+3];
// }
//}
//
//extern "C" __global__ void kRecordInducedDipoleField_kernel(real* output, real* outputPolar) {
// const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
// const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
// const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
// for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// output[3*i] -= xscale*cAmoebaSim.pPhid[10*i+1];
// output[3*i+1] -= yscale*cAmoebaSim.pPhid[10*i+2];
// output[3*i+2] -= zscale*cAmoebaSim.pPhid[10*i+3];
// outputPolar[3*i] -= xscale*cAmoebaSim.pPhip[10*i+1];
// outputPolar[3*i+1] -= yscale*cAmoebaSim.pPhip[10*i+2];
// outputPolar[3*i+2] -= zscale*cAmoebaSim.pPhip[10*i+3];
// }
//}
extern
"C"
__global__
void
computeInducedDipoleForceAndEnergy
(
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
long
long
*
__restrict__
torqueBuffers
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
labFrameDipole
,
const
real
*
__restrict__
labFrameQuadrupole
,
const
real
*
__restrict__
inducedDipole_global
,
const
real
*
__restrict__
inducedDipolePolar_global
,
const
real
*
__restrict__
phi_global
,
const
real
*
__restrict__
phid_global
,
const
real
*
__restrict__
phip_global
,
const
real
*
__restrict__
phidp_global
,
real4
invPeriodicBoxSize
)
{
real
multipole
[
10
];
real
inducedDipole
[
3
];
real
inducedDipolePolar
[
3
];
real
scales
[
3
];
const
int
deriv1
[]
=
{
1
,
4
,
7
,
8
,
10
,
15
,
17
,
13
,
14
,
19
};
const
int
deriv2
[]
=
{
2
,
7
,
5
,
9
,
13
,
11
,
18
,
15
,
19
,
16
};
const
int
deriv3
[]
=
{
3
,
8
,
9
,
6
,
14
,
16
,
12
,
19
,
17
,
18
};
const
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
const
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
const
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
scales
[
0
]
=
xscale
;
scales
[
1
]
=
yscale
;
scales
[
2
]
=
zscale
;
real
energy
=
0
;
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
// Compute the torque.
multipole
[
0
]
=
posq
[
i
].
w
;
multipole
[
1
]
=
labFrameDipole
[
i
*
3
];
multipole
[
2
]
=
labFrameDipole
[
i
*
3
+
1
];
multipole
[
3
]
=
labFrameDipole
[
i
*
3
+
2
];
multipole
[
4
]
=
labFrameQuadrupole
[
i
*
5
];
multipole
[
5
]
=
labFrameQuadrupole
[
i
*
5
+
3
];
multipole
[
6
]
=
-
(
multipole
[
4
]
+
multipole
[
5
]);
multipole
[
7
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
1
];
multipole
[
8
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
2
];
multipole
[
9
]
=
2
*
labFrameQuadrupole
[
i
*
5
+
4
];
const
real
*
phidp
=
&
phidp_global
[
20
*
i
];
torqueBuffers
[
i
]
+=
(
long
long
)
(
0.5
f
*
EPSILON_FACTOR
*
(
multipole
[
3
]
*
yscale
*
phidp
[
2
]
-
multipole
[
2
]
*
zscale
*
phidp
[
3
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
yscale
*
zscale
*
phidp
[
9
]
+
multipole
[
8
]
*
xscale
*
yscale
*
phidp
[
7
]
+
multipole
[
9
]
*
yscale
*
yscale
*
phidp
[
5
]
-
multipole
[
7
]
*
xscale
*
zscale
*
phidp
[
8
]
-
multipole
[
9
]
*
zscale
*
zscale
*
phidp
[
6
])
*
0xFFFFFFFF
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
+=
(
long
long
)
(
0.5
f
*
EPSILON_FACTOR
*
(
multipole
[
1
]
*
zscale
*
phidp
[
3
]
-
multipole
[
3
]
*
xscale
*
phidp
[
1
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
xscale
*
zscale
*
phidp
[
8
]
+
multipole
[
7
]
*
yscale
*
zscale
*
phidp
[
9
]
+
multipole
[
8
]
*
zscale
*
zscale
*
phidp
[
6
]
-
multipole
[
8
]
*
xscale
*
xscale
*
phidp
[
4
]
-
multipole
[
9
]
*
xscale
*
yscale
*
phidp
[
7
])
*
0xFFFFFFFF
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
+=
(
long
long
)
(
0.5
f
*
EPSILON_FACTOR
*
(
multipole
[
2
]
*
xscale
*
phidp
[
1
]
-
multipole
[
1
]
*
yscale
*
phidp
[
2
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
xscale
*
yscale
*
phidp
[
7
]
+
multipole
[
7
]
*
xscale
*
xscale
*
phidp
[
4
]
+
multipole
[
9
]
*
xscale
*
zscale
*
phidp
[
8
]
-
multipole
[
7
]
*
yscale
*
yscale
*
phidp
[
5
]
-
multipole
[
8
]
*
yscale
*
zscale
*
phidp
[
9
])
*
0xFFFFFFFF
);
// Compute the force and energy.
multipole
[
1
]
*=
xscale
;
multipole
[
2
]
*=
yscale
;
multipole
[
3
]
*=
zscale
;
multipole
[
4
]
*=
xscale
*
xscale
;
multipole
[
5
]
*=
yscale
*
yscale
;
multipole
[
6
]
*=
zscale
*
zscale
;
multipole
[
7
]
*=
xscale
*
yscale
;
multipole
[
8
]
*=
xscale
*
zscale
;
multipole
[
9
]
*=
yscale
*
zscale
;
inducedDipole
[
0
]
=
inducedDipole_global
[
i
*
3
];
inducedDipole
[
1
]
=
inducedDipole_global
[
i
*
3
+
1
];
inducedDipole
[
2
]
=
inducedDipole_global
[
i
*
3
+
2
];
inducedDipolePolar
[
0
]
=
inducedDipolePolar_global
[
i
*
3
];
inducedDipolePolar
[
1
]
=
inducedDipolePolar_global
[
i
*
3
+
1
];
inducedDipolePolar
[
2
]
=
inducedDipolePolar_global
[
i
*
3
+
2
];
const
real
*
phi
=
&
phi_global
[
20
*
i
];
const
real
*
phip
=
&
phip_global
[
10
*
i
];
const
real
*
phid
=
&
phid_global
[
10
*
i
];
real4
f
=
make_real4
(
0
,
0
,
0
,
0
);
energy
+=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
inducedDipole
[
0
]
*
phi
[
1
];
energy
+=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
inducedDipole
[
1
]
*
phi
[
2
];
energy
+=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
inducedDipole
[
2
]
*
phi
[
3
];
for
(
int
k
=
0
;
k
<
3
;
k
++
)
{
int
j1
=
deriv1
[
k
+
1
];
int
j2
=
deriv2
[
k
+
1
];
int
j3
=
deriv3
[
k
+
1
];
f
.
x
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j1
]
*
(
scales
[
k
]
/
xscale
);
f
.
y
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j2
]
*
(
scales
[
k
]
/
yscale
);
f
.
z
+=
(
inducedDipole
[
k
]
+
inducedDipolePolar
[
k
])
*
phi
[
j3
]
*
(
scales
[
k
]
/
zscale
);
#ifndef DIRECT_POLARIZATION
f
.
x
+=
(
inducedDipole
[
k
]
*
phip
[
j1
]
+
inducedDipolePolar
[
k
]
*
phid
[
j1
])
*
(
scales
[
k
]
/
xscale
);
f
.
y
+=
(
inducedDipole
[
k
]
*
phip
[
j2
]
+
inducedDipolePolar
[
k
]
*
phid
[
j2
])
*
(
scales
[
k
]
/
yscale
);
f
.
z
+=
(
inducedDipole
[
k
]
*
phip
[
j3
]
+
inducedDipolePolar
[
k
]
*
phid
[
j3
])
*
(
scales
[
k
]
/
zscale
);
#endif
}
f
.
x
*=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
f
.
y
*=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
f
.
z
*=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
for
(
int
k
=
0
;
k
<
10
;
k
++
)
{
f
.
x
+=
multipole
[
k
]
*
phidp
[
deriv1
[
k
]];
f
.
y
+=
multipole
[
k
]
*
phidp
[
deriv2
[
k
]];
f
.
z
+=
multipole
[
k
]
*
phidp
[
deriv3
[
k
]];
}
f
.
x
*=
0.5
f
*
EPSILON_FACTOR
*
xscale
;
f
.
y
*=
0.5
f
*
EPSILON_FACTOR
*
yscale
;
f
.
z
*=
0.5
f
*
EPSILON_FACTOR
*
zscale
;
forceBuffers
[
i
]
-=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
x
*
0xFFFFFFFF
));
forceBuffers
[
i
+
PADDED_NUM_ATOMS
]
-=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
y
*
0xFFFFFFFF
));
forceBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
z
*
0xFFFFFFFF
));
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
0.5
f
*
EPSILON_FACTOR
*
energy
;
}
extern
"C"
__global__
void
recordInducedFieldDipoles
(
const
real
*
__restrict__
phid
,
real
*
const
__restrict__
phip
,
real
*
__restrict__
inducedDipole
,
real
*
__restrict__
inducedDipolePolar
,
real4
invPeriodicBoxSize
)
{
real
xscale
=
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
;
real
yscale
=
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
;
real
zscale
=
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
;
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
inducedDipole
[
3
*
i
]
-=
xscale
*
phid
[
10
*
i
+
1
];
inducedDipole
[
3
*
i
+
1
]
-=
yscale
*
phid
[
10
*
i
+
2
];
inducedDipole
[
3
*
i
+
2
]
-=
zscale
*
phid
[
10
*
i
+
3
];
inducedDipolePolar
[
3
*
i
]
-=
xscale
*
phip
[
10
*
i
+
1
];
inducedDipolePolar
[
3
*
i
+
1
]
-=
yscale
*
phip
[
10
*
i
+
2
];
inducedDipolePolar
[
3
*
i
+
2
]
-=
zscale
*
phip
[
10
*
i
+
3
];
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment