"plugins/rpmd/platforms/cuda/src/CudaRpmdKernelFactory.cpp" did not exist on "04ea8ce98a64e048003d0e926cc1251d7c47ef06"
Commit f352d116 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to convert AmoebaMultipoleForce: PME with direct polarization now works

parent 7a60fd73
...@@ -1166,11 +1166,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1166,11 +1166,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeUpdateBsplinesKernel = cu.getKernel(module, "updateBsplines"); pmeUpdateBsplinesKernel = cu.getKernel(module, "updateBsplines");
pmeAtomRangeKernel = cu.getKernel(module, "findAtomRangeForGrid"); pmeAtomRangeKernel = cu.getKernel(module, "findAtomRangeForGrid");
pmeSpreadFixedMultipolesKernel = cu.getKernel(module, "gridSpreadFixedMultipoles"); pmeSpreadFixedMultipolesKernel = cu.getKernel(module, "gridSpreadFixedMultipoles");
pmeSpreadInducedDipolesKernel = cu.getKernel(module, "gridSpreadInducedDipoles");
pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution"); pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
pmeFixedPotentialKernel = cu.getKernel(module, "computeFixedPotentialFromGrid"); pmeFixedPotentialKernel = cu.getKernel(module, "computeFixedPotentialFromGrid");
pmeInducedPotentialKernel = cu.getKernel(module, "computeInducedPotentialFromGrid");
pmeFixedForceKernel = cu.getKernel(module, "computeFixedMultipoleForceAndEnergy"); pmeFixedForceKernel = cu.getKernel(module, "computeFixedMultipoleForceAndEnergy");
// pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce"); pmeInducedForceKernel = cu.getKernel(module, "computeInducedDipoleForceAndEnergy");
// pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge"); pmeRecordInducedFieldDipolesKernel = cu.getKernel(module, "recordInducedFieldDipoles");
// cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1); // cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
// Create required data structures. // Create required data structures.
...@@ -1415,12 +1417,15 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1415,12 +1417,15 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
// Map torques to force.
void* mapTorqueArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(), void* mapTorqueArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
&cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer()}; &cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer()};
cu.executeKernel(mapTorqueKernel, mapTorqueArgs, cu.getNumAtoms()); cu.executeKernel(mapTorqueKernel, mapTorqueArgs, cu.getNumAtoms());
} }
else { else {
// Compute induced dipoles. // Reciprocal space calculation.
unsigned int maxTiles = nb.getInteractingTiles().getSize(); unsigned int maxTiles = nb.getInteractingTiles().getSize();
void* pmeUpdateBsplinesArgs[] = {&cu.getPosq().getDevicePointer(), &pmeIgrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), void* pmeUpdateBsplinesArgs[] = {&cu.getPosq().getDevicePointer(), &pmeIgrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(),
...@@ -1433,9 +1438,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1433,9 +1438,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cu.executeKernel(pmeAtomRangeKernel, pmeAtomRangeArgs, cu.getNumAtoms(), cu.ThreadBlockSize, cu.ThreadBlockSize*PmeOrder*PmeOrder*elementSize); cu.executeKernel(pmeAtomRangeKernel, pmeAtomRangeArgs, cu.getNumAtoms(), cu.ThreadBlockSize, cu.ThreadBlockSize*PmeOrder*PmeOrder*elementSize);
void* pmeSpreadFixedMultipolesArgs[] = {&cu.getPosq().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), void* pmeSpreadFixedMultipolesArgs[] = {&cu.getPosq().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), &pmeAtomRange->getDevicePointer(), &pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), &pmeAtomRange->getDevicePointer(),
&pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(), &pmeTheta3->getDevicePointer(), cu.getPeriodicBoxSizePointer(), &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(), &pmeTheta3->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.getInvPeriodicBoxSizePointer()}; cu.executeKernel(pmeSpreadFixedMultipolesKernel, pmeSpreadFixedMultipolesArgs, cu.getNumAtoms());
cu.executeKernel(pmeSpreadFixedMultipolesKernel, pmeSpreadFixedMultipolesArgs, cu.getNumAtoms(), cu.ThreadBlockSize, cu.ThreadBlockSize*PmeOrder*PmeOrder*elementSize);
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD); cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else else
...@@ -1448,23 +1452,16 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1448,23 +1452,16 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
else else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE); cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
void* pmeFixedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhi->getDevicePointer(), &field->getDevicePointer(), void* pmeFixedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhi->getDevicePointer(), &field->getDevicePointer(),
&pmeIgrid->getDevicePointer(), &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(), &pmeTheta3->getDevicePointer(), &fieldPolar ->getDevicePointer(), &pmeIgrid->getDevicePointer(), &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(),
&labFrameDipoles->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()}; &pmeTheta3->getDevicePointer(), &labFrameDipoles->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeFixedPotentialKernel, pmeFixedPotentialArgs, cu.getNumAtoms()); cu.executeKernel(pmeFixedPotentialKernel, pmeFixedPotentialArgs, cu.getNumAtoms());
void* pmeFixedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque->getDevicePointer(), void* pmeFixedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
&cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
&pmePhi->getDevicePointer(), cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()}; &pmePhi->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeFixedForceKernel, pmeFixedForceArgs, cu.getNumAtoms()); cu.executeKernel(pmeFixedForceKernel, pmeFixedForceArgs, cu.getNumAtoms());
printf("reciprocal:\n");
vector<long long> f; // Direct space calculation.
printf("force\n");
cu.getForce().download(f);
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF);
// printf("torque\n");
// torque->download(f);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF);
void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(), void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(), &nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices, &covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices,
...@@ -1475,38 +1472,29 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1475,38 +1472,29 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()}; &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms()); cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
printf("direct:\n");
printf("force\n"); // Reciprocal space calculation for the induced dipoles.
cu.getForce().download(f);
for (int i = 0; i < cu.getNumAtoms(); i++) void* pmeSpreadInducedDipolesArgs[] = {&cu.getPosq().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF); &pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), &pmeAtomRange->getDevicePointer(),
// printf("torque\n"); &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(), &pmeTheta3->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
// torque->download(f); cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
// for (int i = 0; i < cu.getNumAtoms(); i++) if (cu.getUseDoublePrecision())
// printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF); cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
// vector<float> d, dp; else
// printf("phi\n"); cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
// pmePhi->download(d); cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
// for (int i = 0; i < d.size(); i++) if (cu.getUseDoublePrecision())
// printf("%d: %g\n", i, d[i]); cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
// printf("dipoles\n"); else
// labFrameDipoles->download(d); cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
// for (int i = 0; i < cu.getNumAtoms(); i++) void* pmeInducedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
// printf("%d: %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2]); &pmePhidp->getDevicePointer(), &pmeIgrid->getDevicePointer(), &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(),
// printf("quadrupoles\n"); &pmeTheta3->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
// labFrameQuadrupoles->download(d); cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
// for (int i = 0; i < cu.getNumAtoms(); i++) // void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
// printf("%d: %g %g %g %g %g %g\n", i, d[5*i], d[5*i+1], d[5*i+2], d[5*i+3], d[5*i+4], -(d[5*i]+d[5*i+3])); // &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
// printf("induced dipoles\n"); // cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
// inducedDipole->download(d);
// inducedDipolePolar->download(dp);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g, %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2], dp[3*i], dp[3*i+1], dp[3*i+2]);
// printf("positions\n");
// vector<float4> p;
// cu.getPosq().download(p);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g %g\n", i, p[i].x, p[i].y, p[i].z, p[i].w);
// vector<float2> errors; // vector<float2> errors;
...@@ -1541,11 +1529,14 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1541,11 +1529,14 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
printf("electrostatic:\n"); void* pmeInducedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
printf("force\n"); &cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
cu.getForce().download(f); &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &pmePhi->getDevicePointer(), &pmePhid->getDevicePointer(),
for (int i = 0; i < cu.getNumAtoms(); i++) &pmePhip->getDevicePointer(), &pmePhidp->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF); cu.executeKernel(pmeInducedForceKernel, pmeInducedForceArgs, cu.getNumAtoms());
// Map torques to force.
void* mapTorqueArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(), void* mapTorqueArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
&cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer()}; &cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer()};
cu.executeKernel(mapTorqueKernel, mapTorqueArgs, cu.getNumAtoms()); cu.executeKernel(mapTorqueKernel, mapTorqueArgs, cu.getNumAtoms());
......
...@@ -425,7 +425,8 @@ private: ...@@ -425,7 +425,8 @@ private:
CudaSort* sort; CudaSort* sort;
cufftHandle fft; cufftHandle fft;
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel; CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeSpreadFixedMultipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeFixedForceKernel; CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeInducedPotentialKernel;
CUfunction pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
......
...@@ -161,8 +161,7 @@ extern "C" __global__ void findAtomRangeForGrid(int2* __restrict__ pmeAtomGridIn ...@@ -161,8 +161,7 @@ extern "C" __global__ void findAtomRangeForGrid(int2* __restrict__ pmeAtomGridIn
} }
extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ posq, const real* __restrict__ labFrameDipole, extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ posq, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, real2* __restrict__ pmeGrid, int2* __restrict__ pmeAtomGridIndex, int* __restrict__ pmeAtomRange, const real* __restrict__ labFrameQuadrupole, real2* __restrict__ pmeGrid, int2* __restrict__ pmeAtomGridIndex, int* __restrict__ pmeAtomRange,
const real4* __restrict__ theta1, const real4* __restrict__ theta2, const real4* __restrict__ theta3, const real4* __restrict__ theta1, const real4* __restrict__ theta2, const real4* __restrict__ theta3, real4 invPeriodicBoxSize) {
real4 periodicBoxSize, real4 invPeriodicBoxSize) {
const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x; const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y; const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z; const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
...@@ -248,88 +247,90 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p ...@@ -248,88 +247,90 @@ extern "C" __global__ void gridSpreadFixedMultipoles(const real4* __restrict__ p
} }
} }
//extern "C" __global__ void kGridSpreadInducedDipoles_kernel() { extern "C" __global__ void gridSpreadInducedDipoles(const real4* __restrict__ posq, const real* __restrict__ inducedDipole,
// const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x; const real* __restrict__ inducedDipolePolar, real2* __restrict__ pmeGrid, int2* __restrict__ pmeAtomGridIndex, int* __restrict__ pmeAtomRange,
// const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y; const real4* __restrict__ theta1, const real4* __restrict__ theta2, const real4* __restrict__ theta3, real4 invPeriodicBoxSize) {
// const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z; const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
// unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z; const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
// unsigned int numThreads = gridDim.x*blockDim.x; const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
// for (int gridIndex = blockIdx.x*blockDim.x+threadIdx.x; gridIndex < numGridPoints; gridIndex += numThreads) { unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
// int3 gridPoint; unsigned int numThreads = gridDim.x*blockDim.x;
// gridPoint.x = gridIndex/(GRID_SIZE_Y*GRID_SIZE_Z); for (int gridIndex = blockIdx.x*blockDim.x+threadIdx.x; gridIndex < numGridPoints; gridIndex += numThreads) {
// int remainder = gridIndex-gridPoint.x*GRID_SIZE_Y*GRID_SIZE_Z; int3 gridPoint;
// gridPoint.y = remainder/GRID_SIZE_Z; gridPoint.x = gridIndex/(GRID_SIZE_Y*GRID_SIZE_Z);
// gridPoint.z = remainder-gridPoint.y*GRID_SIZE_Z; int remainder = gridIndex-gridPoint.x*GRID_SIZE_Y*GRID_SIZE_Z;
// real2 result = make_real2(0, 0); gridPoint.y = remainder/GRID_SIZE_Z;
// for (int ix = 0; ix < PME_ORDER; ++ix) { gridPoint.z = remainder-gridPoint.y*GRID_SIZE_Z;
// int x = gridPoint.x-ix+(gridPoint.x >= ix ? 0 : GRID_SIZE_X); real2 result = make_real2(0, 0);
// for (int iy = 0; iy < PME_ORDER; ++iy) { for (int ix = 0; ix < PME_ORDER; ++ix) {
// int y = gridPoint.y-iy+(gridPoint.y >= iy ? 0 : GRID_SIZE_Y); int x = gridPoint.x-ix+(gridPoint.x >= ix ? 0 : GRID_SIZE_X);
// int z1 = gridPoint.z-PME_ORDER+1; for (int iy = 0; iy < PME_ORDER; ++iy) {
// z1 += (z1 >= 0 ? 0 : GRID_SIZE_Z); int y = gridPoint.y-iy+(gridPoint.y >= iy ? 0 : GRID_SIZE_Y);
// int z2 = (z1 < gridPoint.z ? gridPoint.z : GRID_SIZE_Z-1); int z1 = gridPoint.z-PME_ORDER+1;
// int gridIndex1 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z1; z1 += (z1 >= 0 ? 0 : GRID_SIZE_Z);
// int gridIndex2 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z2; int z2 = (z1 < gridPoint.z ? gridPoint.z : GRID_SIZE_Z-1);
// int firstAtom = pmeAtomRange[gridIndex1]; int gridIndex1 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z1;
// int lastAtom = pmeAtomRange[gridIndex2+1]; int gridIndex2 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+z2;
// for (int i = firstAtom; i < lastAtom; ++i) { int firstAtom = pmeAtomRange[gridIndex1];
// int2 atomData = pmeAtomGridIndex[i]; int lastAtom = pmeAtomRange[gridIndex2+1];
// int atomIndex = atomData.x; for (int i = firstAtom; i < lastAtom; ++i) {
// int z = atomData.y; int2 atomData = pmeAtomGridIndex[i];
// int iz = gridPoint.z-z+(gridPoint.z >= z ? 0 : GRID_SIZE_Z); int atomIndex = atomData.x;
// if (iz >= GRID_SIZE_Z) int z = atomData.y;
// iz -= GRID_SIZE_Z; int iz = gridPoint.z-z+(gridPoint.z >= z ? 0 : GRID_SIZE_Z);
// real inducedDipoleX = xscale*cAmoebaSim.pInducedDipole[atomIndex*3]; if (iz >= GRID_SIZE_Z)
// real inducedDipoleY = yscale*cAmoebaSim.pInducedDipole[atomIndex*3+1]; iz -= GRID_SIZE_Z;
// real inducedDipoleZ = zscale*cAmoebaSim.pInducedDipole[atomIndex*3+2]; real inducedDipoleX = xscale*inducedDipole[atomIndex*3];
// real inducedDipolePolarX = xscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3]; real inducedDipoleY = yscale*inducedDipole[atomIndex*3+1];
// real inducedDipolePolarY = yscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3+1]; real inducedDipoleZ = zscale*inducedDipole[atomIndex*3+2];
// real inducedDipolePolarZ = zscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3+2]; real inducedDipolePolarX = xscale*inducedDipolePolar[atomIndex*3];
// real4 t = theta1[atomIndex*PME_ORDER+ix]; real inducedDipolePolarY = yscale*inducedDipolePolar[atomIndex*3+1];
// real4 u = theta2[atomIndex*PME_ORDER+iy]; real inducedDipolePolarZ = zscale*inducedDipolePolar[atomIndex*3+2];
// real4 v = theta3[atomIndex*PME_ORDER+iz]; real4 t = theta1[atomIndex*PME_ORDER+ix];
// real term01 = inducedDipoleY*u.y*v.x + inducedDipoleZ*u.x*v.y; real4 u = theta2[atomIndex*PME_ORDER+iy];
// real term11 = inducedDipoleX*u.x*v.x; real4 v = theta3[atomIndex*PME_ORDER+iz];
// real term02 = inducedDipolePolarY*u.y*v.x + inducedDipolePolarZ*u.x*v.y; real term01 = inducedDipoleY*u.y*v.x + inducedDipoleZ*u.x*v.y;
// real term12 = inducedDipolePolarX*u.x*v.x; real term11 = inducedDipoleX*u.x*v.x;
// result.x += term01*t.x + term11*t.y; real term02 = inducedDipolePolarY*u.y*v.x + inducedDipolePolarZ*u.x*v.y;
// result.y += term02*t.x + term12*t.y; real term12 = inducedDipolePolarX*u.x*v.x;
// } result.x += term01*t.x + term11*t.y;
// if (z1 > gridPoint.z) { result.y += term02*t.x + term12*t.y;
// gridIndex1 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z; }
// gridIndex2 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+gridPoint.z; if (z1 > gridPoint.z) {
// firstAtom = pmeAtomRange[gridIndex1]; gridIndex1 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z;
// lastAtom = pmeAtomRange[gridIndex2+1]; gridIndex2 = x*GRID_SIZE_Y*GRID_SIZE_Z+y*GRID_SIZE_Z+gridPoint.z;
// for (int i = firstAtom; i < lastAtom; ++i) { firstAtom = pmeAtomRange[gridIndex1];
// int2 atomData = pmeAtomGridIndex[i]; lastAtom = pmeAtomRange[gridIndex2+1];
// int atomIndex = atomData.x; for (int i = firstAtom; i < lastAtom; ++i) {
// int z = atomData.y; int2 atomData = pmeAtomGridIndex[i];
// int iz = gridPoint.z-z+(gridPoint.z >= z ? 0 : GRID_SIZE_Z); int atomIndex = atomData.x;
// if (iz >= GRID_SIZE_Z) int z = atomData.y;
// iz -= GRID_SIZE_Z; int iz = gridPoint.z-z+(gridPoint.z >= z ? 0 : GRID_SIZE_Z);
// real inducedDipoleX = xscale*cAmoebaSim.pInducedDipole[atomIndex*3]; if (iz >= GRID_SIZE_Z)
// real inducedDipoleY = yscale*cAmoebaSim.pInducedDipole[atomIndex*3+1]; iz -= GRID_SIZE_Z;
// real inducedDipoleZ = zscale*cAmoebaSim.pInducedDipole[atomIndex*3+2]; real inducedDipoleX = xscale*inducedDipole[atomIndex*3];
// real inducedDipolePolarX = xscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3]; real inducedDipoleY = yscale*inducedDipole[atomIndex*3+1];
// real inducedDipolePolarY = yscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3+1]; real inducedDipoleZ = zscale*inducedDipole[atomIndex*3+2];
// real inducedDipolePolarZ = zscale*cAmoebaSim.pInducedDipolePolar[atomIndex*3+2]; real inducedDipolePolarX = xscale*inducedDipolePolar[atomIndex*3];
// real4 t = theta1[atomIndex*PME_ORDER+ix]; real inducedDipolePolarY = yscale*inducedDipolePolar[atomIndex*3+1];
// real4 u = theta2[atomIndex*PME_ORDER+iy]; real inducedDipolePolarZ = zscale*inducedDipolePolar[atomIndex*3+2];
// real4 v = theta3[atomIndex*PME_ORDER+iz]; real4 t = theta1[atomIndex*PME_ORDER+ix];
// real term01 = inducedDipoleY*u.y*v.x + inducedDipoleZ*u.x*v.y; real4 u = theta2[atomIndex*PME_ORDER+iy];
// real term11 = inducedDipoleX*u.x*v.x; real4 v = theta3[atomIndex*PME_ORDER+iz];
// real term02 = inducedDipolePolarY*u.y*v.x + inducedDipolePolarZ*u.x*v.y; real term01 = inducedDipoleY*u.y*v.x + inducedDipoleZ*u.x*v.y;
// real term12 = inducedDipolePolarX*u.x*v.x; real term11 = inducedDipoleX*u.x*v.x;
// result.x += term01*t.x + term11*t.y; real term02 = inducedDipolePolarY*u.y*v.x + inducedDipolePolarZ*u.x*v.y;
// result.y += term02*t.x + term12*t.y; real term12 = inducedDipolePolarX*u.x*v.x;
// } result.x += term01*t.x + term11*t.y;
// } result.y += term02*t.x + term12*t.y;
// } }
// } }
// pmeGrid[gridIndex] = result; }
// } }
//} pmeGrid[gridIndex] = result;
// }
}
extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, const real* __restrict__ pmeBsplineModuliX, extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, const real* __restrict__ pmeBsplineModuliX,
const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ, real4 periodicBoxSize, real4 invPeriodicBoxSize) { const real* __restrict__ pmeBsplineModuliY, const real* __restrict__ pmeBsplineModuliZ, real4 periodicBoxSize, real4 invPeriodicBoxSize) {
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z; const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
...@@ -362,7 +363,7 @@ extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, co ...@@ -362,7 +363,7 @@ extern "C" __global__ void reciprocalConvolution(real2* __restrict__ pmeGrid, co
} }
extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict__ pmeGrid, real* __restrict__ phi, extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict__ pmeGrid, real* __restrict__ phi,
long long* __restrict__ fieldBuffers, const int4* __restrict__ igrid, const real4* __restrict__ theta1, long long* __restrict__ fieldBuffers, long long* __restrict__ fieldPolarBuffers, const int4* __restrict__ igrid, const real4* __restrict__ theta1,
const real4* __restrict__ theta2, const real4* __restrict__ theta3, const real* __restrict__ labFrameDipole, real4 invPeriodicBoxSize) { const real4* __restrict__ theta2, const real4* __restrict__ theta3, const real* __restrict__ labFrameDipole, real4 invPeriodicBoxSize) {
// extract the permanent multipole field at each site // extract the permanent multipole field at each site
...@@ -468,216 +469,224 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict ...@@ -468,216 +469,224 @@ extern "C" __global__ void computeFixedPotentialFromGrid(const real2* __restrict
phi[20*m+18] = tuv012; phi[20*m+18] = tuv012;
phi[20*m+19] = tuv111; phi[20*m+19] = tuv111;
real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT(M_PI); real dipoleScale = (4/(real) 3)*(EWALD_ALPHA*EWALD_ALPHA*EWALD_ALPHA)/SQRT(M_PI);
fieldBuffers[m] = (long long) ((dipoleScale*labFrameDipole[m*3]-GRID_SIZE_X*invPeriodicBoxSize.x*tuv100)*0xFFFFFFFF); long long fieldx = (long long) ((dipoleScale*labFrameDipole[m*3]-GRID_SIZE_X*invPeriodicBoxSize.x*tuv100)*0xFFFFFFFF);
fieldBuffers[m+PADDED_NUM_ATOMS] = (long long) ((dipoleScale*labFrameDipole[m*3+1]-GRID_SIZE_Y*invPeriodicBoxSize.y*tuv010)*0xFFFFFFFF); fieldBuffers[m] = fieldx;
fieldBuffers[m+2*PADDED_NUM_ATOMS] = (long long) ((dipoleScale*labFrameDipole[m*3+2]-GRID_SIZE_Z*invPeriodicBoxSize.z*tuv001)*0xFFFFFFFF); fieldPolarBuffers[m] = fieldx;
long long fieldy = (long long) ((dipoleScale*labFrameDipole[m*3+1]-GRID_SIZE_Y*invPeriodicBoxSize.y*tuv010)*0xFFFFFFFF);
fieldBuffers[m+PADDED_NUM_ATOMS] = fieldy;
fieldPolarBuffers[m+PADDED_NUM_ATOMS] = fieldy;
long long fieldz = (long long) ((dipoleScale*labFrameDipole[m*3+2]-GRID_SIZE_Z*invPeriodicBoxSize.z*tuv001)*0xFFFFFFFF);
fieldBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
fieldPolarBuffers[m+2*PADDED_NUM_ATOMS] = fieldz;
} }
} }
//extern "C" __global__ void kComputeInducedPotentialFromGrid_kernel() { extern "C" __global__ void computeInducedPotentialFromGrid(const real2* __restrict__ pmeGrid, real* __restrict__ phid,
// // extract the induced dipole field at each site real* __restrict__ phip, real* __restrict__ phidp, const int4* __restrict__ igrid, const real4* __restrict__ theta1,
// const real4* __restrict__ theta2, const real4* __restrict__ theta3, real4 invPeriodicBoxSize) {
// for (int m = blockIdx.x*blockDim.x+threadIdx.x; m < NUM_ATOMS; m += blockDim.x*gridDim.x) { // extract the induced dipole field at each site
// int4 gridPoint = igrid[m];
// real tuv100_1 = 0; for (int m = blockIdx.x*blockDim.x+threadIdx.x; m < NUM_ATOMS; m += blockDim.x*gridDim.x) {
// real tuv010_1 = 0; int4 gridPoint = igrid[m];
// real tuv001_1 = 0; real tuv100_1 = 0;
// real tuv200_1 = 0; real tuv010_1 = 0;
// real tuv020_1 = 0; real tuv001_1 = 0;
// real tuv002_1 = 0; real tuv200_1 = 0;
// real tuv110_1 = 0; real tuv020_1 = 0;
// real tuv101_1 = 0; real tuv002_1 = 0;
// real tuv011_1 = 0; real tuv110_1 = 0;
// real tuv100_2 = 0; real tuv101_1 = 0;
// real tuv010_2 = 0; real tuv011_1 = 0;
// real tuv001_2 = 0; real tuv100_2 = 0;
// real tuv200_2 = 0; real tuv010_2 = 0;
// real tuv020_2 = 0; real tuv001_2 = 0;
// real tuv002_2 = 0; real tuv200_2 = 0;
// real tuv110_2 = 0; real tuv020_2 = 0;
// real tuv101_2 = 0; real tuv002_2 = 0;
// real tuv011_2 = 0; real tuv110_2 = 0;
// real tuv000 = 0; real tuv101_2 = 0;
// real tuv001 = 0; real tuv011_2 = 0;
// real tuv010 = 0; real tuv000 = 0;
// real tuv100 = 0; real tuv001 = 0;
// real tuv200 = 0; real tuv010 = 0;
// real tuv020 = 0; real tuv100 = 0;
// real tuv002 = 0; real tuv200 = 0;
// real tuv110 = 0; real tuv020 = 0;
// real tuv101 = 0; real tuv002 = 0;
// real tuv011 = 0; real tuv110 = 0;
// real tuv300 = 0; real tuv101 = 0;
// real tuv030 = 0; real tuv011 = 0;
// real tuv003 = 0; real tuv300 = 0;
// real tuv210 = 0; real tuv030 = 0;
// real tuv201 = 0; real tuv003 = 0;
// real tuv120 = 0; real tuv210 = 0;
// real tuv021 = 0; real tuv201 = 0;
// real tuv102 = 0; real tuv120 = 0;
// real tuv012 = 0; real tuv021 = 0;
// real tuv111 = 0; real tuv102 = 0;
// for (int iz = 0; iz < PME_ORDER; iz++) { real tuv012 = 0;
// int k = gridPoint.z+iz-(gridPoint.z+iz >= GRID_SIZE_Z ? GRID_SIZE_Z : 0); real tuv111 = 0;
// real4 v = theta3[m*PME_ORDER+iz]; for (int iz = 0; iz < PME_ORDER; iz++) {
// real tu00_1 = 0; int k = gridPoint.z+iz-(gridPoint.z+iz >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
// real tu01_1 = 0; real4 v = theta3[m*PME_ORDER+iz];
// real tu10_1 = 0; real tu00_1 = 0;
// real tu20_1 = 0; real tu01_1 = 0;
// real tu11_1 = 0; real tu10_1 = 0;
// real tu02_1 = 0; real tu20_1 = 0;
// real tu00_2 = 0; real tu11_1 = 0;
// real tu01_2 = 0; real tu02_1 = 0;
// real tu10_2 = 0; real tu00_2 = 0;
// real tu20_2 = 0; real tu01_2 = 0;
// real tu11_2 = 0; real tu10_2 = 0;
// real tu02_2 = 0; real tu20_2 = 0;
// real tu00 = 0; real tu11_2 = 0;
// real tu10 = 0; real tu02_2 = 0;
// real tu01 = 0; real tu00 = 0;
// real tu20 = 0; real tu10 = 0;
// real tu11 = 0; real tu01 = 0;
// real tu02 = 0; real tu20 = 0;
// real tu30 = 0; real tu11 = 0;
// real tu21 = 0; real tu02 = 0;
// real tu12 = 0; real tu30 = 0;
// real tu03 = 0; real tu21 = 0;
// for (int iy = 0; iy < PME_ORDER; iy++) { real tu12 = 0;
// int j = gridPoint.y+iy-(gridPoint.y+iy >= GRID_SIZE_Y ? GRID_SIZE_Y : 0); real tu03 = 0;
// real4 u = theta2[m*PME_ORDER+iy]; for (int iy = 0; iy < PME_ORDER; iy++) {
// real t0_1 = 0; int j = gridPoint.y+iy-(gridPoint.y+iy >= GRID_SIZE_Y ? GRID_SIZE_Y : 0);
// real t1_1 = 0; real4 u = theta2[m*PME_ORDER+iy];
// real t2_1 = 0; real t0_1 = 0;
// real t0_2 = 0; real t1_1 = 0;
// real t1_2 = 0; real t2_1 = 0;
// real t2_2 = 0; real t0_2 = 0;
// real t3 = 0; real t1_2 = 0;
// for (int ix = 0; ix < PME_ORDER; ix++) { real t2_2 = 0;
// int i = gridPoint.x+ix-(gridPoint.x+ix >= GRID_SIZE_X ? GRID_SIZE_X : 0); real t3 = 0;
// int gridIndex = i*GRID_SIZE_Y*GRID_SIZE_Z + j*GRID_SIZE_Z + k; for (int ix = 0; ix < PME_ORDER; ix++) {
// real2 tq = pmeGrid[gridIndex]; int i = gridPoint.x+ix-(gridPoint.x+ix >= GRID_SIZE_X ? GRID_SIZE_X : 0);
// real4 tadd = theta1[m*PME_ORDER+ix]; int gridIndex = i*GRID_SIZE_Y*GRID_SIZE_Z + j*GRID_SIZE_Z + k;
// t0_1 += tq.x*tadd.x; real2 tq = pmeGrid[gridIndex];
// t1_1 += tq.x*tadd.y; real4 tadd = theta1[m*PME_ORDER+ix];
// t2_1 += tq.x*tadd.z; t0_1 += tq.x*tadd.x;
// t0_2 += tq.y*tadd.x; t1_1 += tq.x*tadd.y;
// t1_2 += tq.y*tadd.y; t2_1 += tq.x*tadd.z;
// t2_2 += tq.y*tadd.z; t0_2 += tq.y*tadd.x;
// t3 += (tq.x+tq.y)*tadd.w; t1_2 += tq.y*tadd.y;
// } t2_2 += tq.y*tadd.z;
// tu00_1 += t0_1*u.x; t3 += (tq.x+tq.y)*tadd.w;
// tu10_1 += t1_1*u.x; }
// tu01_1 += t0_1*u.y; tu00_1 += t0_1*u.x;
// tu20_1 += t2_1*u.x; tu10_1 += t1_1*u.x;
// tu11_1 += t1_1*u.y; tu01_1 += t0_1*u.y;
// tu02_1 += t0_1*u.z; tu20_1 += t2_1*u.x;
// tu00_2 += t0_2*u.x; tu11_1 += t1_1*u.y;
// tu10_2 += t1_2*u.x; tu02_1 += t0_1*u.z;
// tu01_2 += t0_2*u.y; tu00_2 += t0_2*u.x;
// tu20_2 += t2_2*u.x; tu10_2 += t1_2*u.x;
// tu11_2 += t1_2*u.y; tu01_2 += t0_2*u.y;
// tu02_2 += t0_2*u.z; tu20_2 += t2_2*u.x;
// real t0 = t0_1 + t0_2; tu11_2 += t1_2*u.y;
// real t1 = t1_1 + t1_2; tu02_2 += t0_2*u.z;
// real t2 = t2_1 + t2_2; real t0 = t0_1 + t0_2;
// tu00 += t0*u.x; real t1 = t1_1 + t1_2;
// tu10 += t1*u.x; real t2 = t2_1 + t2_2;
// tu01 += t0*u.y; tu00 += t0*u.x;
// tu20 += t2*u.x; tu10 += t1*u.x;
// tu11 += t1*u.y; tu01 += t0*u.y;
// tu02 += t0*u.z; tu20 += t2*u.x;
// tu30 += t3*u.x; tu11 += t1*u.y;
// tu21 += t2*u.y; tu02 += t0*u.z;
// tu12 += t1*u.z; tu30 += t3*u.x;
// tu03 += t0*u.w; tu21 += t2*u.y;
// } tu12 += t1*u.z;
// tuv100_1 += tu10_1*v.x; tu03 += t0*u.w;
// tuv010_1 += tu01_1*v.x; }
// tuv001_1 += tu00_1*v.y; tuv100_1 += tu10_1*v.x;
// tuv200_1 += tu20_1*v.x; tuv010_1 += tu01_1*v.x;
// tuv020_1 += tu02_1*v.x; tuv001_1 += tu00_1*v.y;
// tuv002_1 += tu00_1*v.z; tuv200_1 += tu20_1*v.x;
// tuv110_1 += tu11_1*v.x; tuv020_1 += tu02_1*v.x;
// tuv101_1 += tu10_1*v.y; tuv002_1 += tu00_1*v.z;
// tuv011_1 += tu01_1*v.y; tuv110_1 += tu11_1*v.x;
// tuv100_2 += tu10_2*v.x; tuv101_1 += tu10_1*v.y;
// tuv010_2 += tu01_2*v.x; tuv011_1 += tu01_1*v.y;
// tuv001_2 += tu00_2*v.y; tuv100_2 += tu10_2*v.x;
// tuv200_2 += tu20_2*v.x; tuv010_2 += tu01_2*v.x;
// tuv020_2 += tu02_2*v.x; tuv001_2 += tu00_2*v.y;
// tuv002_2 += tu00_2*v.z; tuv200_2 += tu20_2*v.x;
// tuv110_2 += tu11_2*v.x; tuv020_2 += tu02_2*v.x;
// tuv101_2 += tu10_2*v.y; tuv002_2 += tu00_2*v.z;
// tuv011_2 += tu01_2*v.y; tuv110_2 += tu11_2*v.x;
// tuv000 += tu00*v.x; tuv101_2 += tu10_2*v.y;
// tuv100 += tu10*v.x; tuv011_2 += tu01_2*v.y;
// tuv010 += tu01*v.x; tuv000 += tu00*v.x;
// tuv001 += tu00*v.y; tuv100 += tu10*v.x;
// tuv200 += tu20*v.x; tuv010 += tu01*v.x;
// tuv020 += tu02*v.x; tuv001 += tu00*v.y;
// tuv002 += tu00*v.z; tuv200 += tu20*v.x;
// tuv110 += tu11*v.x; tuv020 += tu02*v.x;
// tuv101 += tu10*v.y; tuv002 += tu00*v.z;
// tuv011 += tu01*v.y; tuv110 += tu11*v.x;
// tuv300 += tu30*v.x; tuv101 += tu10*v.y;
// tuv030 += tu03*v.x; tuv011 += tu01*v.y;
// tuv003 += tu00*v.w; tuv300 += tu30*v.x;
// tuv210 += tu21*v.x; tuv030 += tu03*v.x;
// tuv201 += tu20*v.y; tuv003 += tu00*v.w;
// tuv120 += tu12*v.x; tuv210 += tu21*v.x;
// tuv021 += tu02*v.y; tuv201 += tu20*v.y;
// tuv102 += tu10*v.z; tuv120 += tu12*v.x;
// tuv012 += tu01*v.z; tuv021 += tu02*v.y;
// tuv111 += tu11*v.y; tuv102 += tu10*v.z;
// } tuv012 += tu01*v.z;
// phid[10*m] = 0; tuv111 += tu11*v.y;
// phid[10*m+1] = tuv100_1; }
// phid[10*m+2] = tuv010_1; phid[10*m] = 0;
// phid[10*m+3] = tuv001_1; phid[10*m+1] = tuv100_1;
// phid[10*m+4] = tuv200_1; phid[10*m+2] = tuv010_1;
// phid[10*m+5] = tuv020_1; phid[10*m+3] = tuv001_1;
// phid[10*m+6] = tuv002_1; phid[10*m+4] = tuv200_1;
// phid[10*m+7] = tuv110_1; phid[10*m+5] = tuv020_1;
// phid[10*m+8] = tuv101_1; phid[10*m+6] = tuv002_1;
// phid[10*m+9] = tuv011_1; phid[10*m+7] = tuv110_1;
// phid[10*m+8] = tuv101_1;
// phip[10*m] = 0; phid[10*m+9] = tuv011_1;
// phip[10*m+1] = tuv100_2;
// phip[10*m+2] = tuv010_2; phip[10*m] = 0;
// phip[10*m+3] = tuv001_2; phip[10*m+1] = tuv100_2;
// phip[10*m+4] = tuv200_2; phip[10*m+2] = tuv010_2;
// phip[10*m+5] = tuv020_2; phip[10*m+3] = tuv001_2;
// phip[10*m+6] = tuv002_2; phip[10*m+4] = tuv200_2;
// phip[10*m+7] = tuv110_2; phip[10*m+5] = tuv020_2;
// phip[10*m+8] = tuv101_2; phip[10*m+6] = tuv002_2;
// phip[10*m+9] = tuv011_2; phip[10*m+7] = tuv110_2;
// phip[10*m+8] = tuv101_2;
// phidp[20*m] = tuv000; phip[10*m+9] = tuv011_2;
// phidp[20*m+1] = tuv100;
// phidp[20*m+2] = tuv010; phidp[20*m] = tuv000;
// phidp[20*m+3] = tuv001; phidp[20*m+1] = tuv100;
// phidp[20*m+4] = tuv200; phidp[20*m+2] = tuv010;
// phidp[20*m+5] = tuv020; phidp[20*m+3] = tuv001;
// phidp[20*m+6] = tuv002; phidp[20*m+4] = tuv200;
// phidp[20*m+7] = tuv110; phidp[20*m+5] = tuv020;
// phidp[20*m+8] = tuv101; phidp[20*m+6] = tuv002;
// phidp[20*m+9] = tuv011; phidp[20*m+7] = tuv110;
// phidp[20*m+10] = tuv300; phidp[20*m+8] = tuv101;
// phidp[20*m+11] = tuv030; phidp[20*m+9] = tuv011;
// phidp[20*m+12] = tuv003; phidp[20*m+10] = tuv300;
// phidp[20*m+13] = tuv210; phidp[20*m+11] = tuv030;
// phidp[20*m+14] = tuv201; phidp[20*m+12] = tuv003;
// phidp[20*m+15] = tuv120; phidp[20*m+13] = tuv210;
// phidp[20*m+16] = tuv021; phidp[20*m+14] = tuv201;
// phidp[20*m+17] = tuv102; phidp[20*m+15] = tuv120;
// phidp[20*m+18] = tuv012; phidp[20*m+16] = tuv021;
// phidp[20*m+19] = tuv111; phidp[20*m+17] = tuv102;
// } phidp[20*m+18] = tuv012;
//} phidp[20*m+19] = tuv111;
}
}
extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers, extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole, long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole,
const real* __restrict__ labFrameQuadrupole, const real* __restrict__ phi, real4 periodicBoxSize, real4 invPeriodicBoxSize) { const real* __restrict__ labFrameQuadrupole, const real* __restrict__ phi_global, real4 invPeriodicBoxSize) {
real multipole[10]; real multipole[10];
const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19}; const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19};
const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16}; const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16};
...@@ -700,22 +709,22 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict ...@@ -700,22 +709,22 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
multipole[8] = 2*labFrameQuadrupole[i*5+2]; multipole[8] = 2*labFrameQuadrupole[i*5+2];
multipole[9] = 2*labFrameQuadrupole[i*5+4]; multipole[9] = 2*labFrameQuadrupole[i*5+4];
const real* atomPhi = &phi[20*i]; const real* phi = &phi_global[20*i];
torqueBuffers[i] = (long long) (EPSILON_FACTOR*(multipole[3]*yscale*atomPhi[2] - multipole[2]*zscale*atomPhi[3] torqueBuffers[i] = (long long) (EPSILON_FACTOR*(multipole[3]*yscale*phi[2] - multipole[2]*zscale*phi[3]
+ 2*(multipole[6]-multipole[5])*yscale*zscale*atomPhi[9] + 2*(multipole[6]-multipole[5])*yscale*zscale*phi[9]
+ multipole[8]*xscale*yscale*atomPhi[7] + multipole[9]*yscale*yscale*atomPhi[5] + multipole[8]*xscale*yscale*phi[7] + multipole[9]*yscale*yscale*phi[5]
- multipole[7]*xscale*zscale*atomPhi[8] - multipole[9]*zscale*zscale*atomPhi[6])*0xFFFFFFFF); - multipole[7]*xscale*zscale*phi[8] - multipole[9]*zscale*zscale*phi[6])*0xFFFFFFFF);
torqueBuffers[i+PADDED_NUM_ATOMS] = (long long) (EPSILON_FACTOR*(multipole[1]*zscale*atomPhi[3] - multipole[3]*xscale*atomPhi[1] torqueBuffers[i+PADDED_NUM_ATOMS] = (long long) (EPSILON_FACTOR*(multipole[1]*zscale*phi[3] - multipole[3]*xscale*phi[1]
+ 2*(multipole[4]-multipole[6])*xscale*zscale*atomPhi[8] + 2*(multipole[4]-multipole[6])*xscale*zscale*phi[8]
+ multipole[7]*yscale*zscale*atomPhi[9] + multipole[8]*zscale*zscale*atomPhi[6] + multipole[7]*yscale*zscale*phi[9] + multipole[8]*zscale*zscale*phi[6]
- multipole[8]*xscale*xscale*atomPhi[4] - multipole[9]*xscale*yscale*atomPhi[7])*0xFFFFFFFF); - multipole[8]*xscale*xscale*phi[4] - multipole[9]*xscale*yscale*phi[7])*0xFFFFFFFF);
torqueBuffers[i+PADDED_NUM_ATOMS*2] = (long long) (EPSILON_FACTOR*(multipole[2]*xscale*atomPhi[1] - multipole[1]*yscale*atomPhi[2] torqueBuffers[i+PADDED_NUM_ATOMS*2] = (long long) (EPSILON_FACTOR*(multipole[2]*xscale*phi[1] - multipole[1]*yscale*phi[2]
+ 2*(multipole[5]-multipole[4])*xscale*yscale*atomPhi[7] + 2*(multipole[5]-multipole[4])*xscale*yscale*phi[7]
+ multipole[7]*xscale*xscale*atomPhi[4] + multipole[9]*xscale*zscale*atomPhi[8] + multipole[7]*xscale*xscale*phi[4] + multipole[9]*xscale*zscale*phi[8]
- multipole[7]*yscale*yscale*atomPhi[5] - multipole[8]*yscale*zscale*atomPhi[9])*0xFFFFFFFF); - multipole[7]*yscale*yscale*phi[5] - multipole[8]*yscale*zscale*phi[9])*0xFFFFFFFF);
// Compute the force and energy. // Compute the force and energy.
...@@ -731,10 +740,10 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict ...@@ -731,10 +740,10 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
real4 f = make_real4(0, 0, 0, 0); real4 f = make_real4(0, 0, 0, 0);
for (int k = 0; k < 10; k++) { for (int k = 0; k < 10; k++) {
energy += multipole[k]*atomPhi[k]; energy += multipole[k]*phi[k];
f.x += multipole[k]*atomPhi[deriv1[k]]; f.x += multipole[k]*phi[deriv1[k]];
f.y += multipole[k]*atomPhi[deriv2[k]]; f.y += multipole[k]*phi[deriv2[k]];
f.z += multipole[k]*atomPhi[deriv3[k]]; f.z += multipole[k]*phi[deriv3[k]];
} }
f.x *= EPSILON_FACTOR*xscale; f.x *= EPSILON_FACTOR*xscale;
f.y *= EPSILON_FACTOR*yscale; f.y *= EPSILON_FACTOR*yscale;
...@@ -746,141 +755,125 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict ...@@ -746,141 +755,125 @@ extern "C" __global__ void computeFixedMultipoleForceAndEnergy(real4* __restrict
energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy;
} }
//extern "C" __global__ void kComputeInducedDipoleForceAndEnergy_kernel() { extern "C" __global__ void computeInducedDipoleForceAndEnergy(real4* __restrict__ posq, unsigned long long* __restrict__ forceBuffers,
// real multipole[10]; long long* __restrict__ torqueBuffers, real* __restrict__ energyBuffer, const real* __restrict__ labFrameDipole,
// real inducedDipole[3]; const real* __restrict__ labFrameQuadrupole, const real* __restrict__ inducedDipole_global, const real* __restrict__ inducedDipolePolar_global,
// real inducedDipolePolar[3]; const real* __restrict__ phi_global, const real* __restrict__ phid_global, const real* __restrict__ phip_global,
// real scales[3]; const real* __restrict__ phidp_global, real4 invPeriodicBoxSize) {
// const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19}; real multipole[10];
// const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16}; real inducedDipole[3];
// const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18}; real inducedDipolePolar[3];
// const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x; real scales[3];
// const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y; const int deriv1[] = {1, 4, 7, 8, 10, 15, 17, 13, 14, 19};
// const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z; const int deriv2[] = {2, 7, 5, 9, 13, 11, 18, 15, 19, 16};
// scales[0] = xscale; const int deriv3[] = {3, 8, 9, 6, 14, 16, 12, 19, 17, 18};
// scales[1] = yscale; const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
// scales[2] = zscale; const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
// real energy = 0; const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
// for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { scales[0] = xscale;
// // Compute the torque. scales[1] = yscale;
// scales[2] = zscale;
// multipole[0] = posq[i].w; real energy = 0;
// multipole[1] = labFrameDipole[i*3]; for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// multipole[2] = labFrameDipole[i*3+1]; // Compute the torque.
// multipole[3] = labFrameDipole[i*3+2];
// multipole[4] = labFrameQuadrupole[i*5]; multipole[0] = posq[i].w;
// multipole[5] = labFrameQuadrupole[i*5+3]; multipole[1] = labFrameDipole[i*3];
// multipole[6] = -(multipole[4]+multipole[5]); multipole[2] = labFrameDipole[i*3+1];
// multipole[7] = 2*labFrameQuadrupole[i*5+1]; multipole[3] = labFrameDipole[i*3+2];
// multipole[8] = 2*labFrameQuadrupole[i*5+2]; multipole[4] = labFrameQuadrupole[i*5];
// multipole[9] = 2*labFrameQuadrupole[i*5+4]; multipole[5] = labFrameQuadrupole[i*5+3];
// real* phidp = &cAmoebaSim.pPhidp[20*i]; multipole[6] = -(multipole[4]+multipole[5]);
// multipole[7] = 2*labFrameQuadrupole[i*5+1];
// cAmoebaSim.pTorque[3*i] += 0.5f*EPSILON_FACTOR*(multipole[3]*yscale*phidp[2] - multipole[2]*zscale*phidp[3] multipole[8] = 2*labFrameQuadrupole[i*5+2];
// + 2*(multipole[6]-multipole[5])*yscale*zscale*phidp[9] multipole[9] = 2*labFrameQuadrupole[i*5+4];
// + multipole[8]*xscale*yscale*phidp[7] + multipole[9]*yscale*yscale*phidp[5] const real* phidp = &phidp_global[20*i];
// - multipole[7]*xscale*zscale*phidp[8] - multipole[9]*zscale*zscale*phidp[6]);
// torqueBuffers[i] += (long long) (0.5f*EPSILON_FACTOR*(multipole[3]*yscale*phidp[2] - multipole[2]*zscale*phidp[3]
// cAmoebaSim.pTorque[3*i+1] += 0.5f*EPSILON_FACTOR*(multipole[1]*zscale*phidp[3] - multipole[3]*xscale*phidp[1] + 2*(multipole[6]-multipole[5])*yscale*zscale*phidp[9]
// + 2*(multipole[4]-multipole[6])*xscale*zscale*phidp[8] + multipole[8]*xscale*yscale*phidp[7] + multipole[9]*yscale*yscale*phidp[5]
// + multipole[7]*yscale*zscale*phidp[9] + multipole[8]*zscale*zscale*phidp[6] - multipole[7]*xscale*zscale*phidp[8] - multipole[9]*zscale*zscale*phidp[6])*0xFFFFFFFF);
// - multipole[8]*xscale*xscale*phidp[4] - multipole[9]*xscale*yscale*phidp[7]);
// torqueBuffers[i+PADDED_NUM_ATOMS] += (long long) (0.5f*EPSILON_FACTOR*(multipole[1]*zscale*phidp[3] - multipole[3]*xscale*phidp[1]
// cAmoebaSim.pTorque[3*i+2] += 0.5f*EPSILON_FACTOR*(multipole[2]*xscale*phidp[1] - multipole[1]*yscale*phidp[2] + 2*(multipole[4]-multipole[6])*xscale*zscale*phidp[8]
// + 2*(multipole[5]-multipole[4])*xscale*yscale*phidp[7] + multipole[7]*yscale*zscale*phidp[9] + multipole[8]*zscale*zscale*phidp[6]
// + multipole[7]*xscale*xscale*phidp[4] + multipole[9]*xscale*zscale*phidp[8] - multipole[8]*xscale*xscale*phidp[4] - multipole[9]*xscale*yscale*phidp[7])*0xFFFFFFFF);
// - multipole[7]*yscale*yscale*phidp[5] - multipole[8]*yscale*zscale*phidp[9]);
// torqueBuffers[i+PADDED_NUM_ATOMS*2] += (long long) (0.5f*EPSILON_FACTOR*(multipole[2]*xscale*phidp[1] - multipole[1]*yscale*phidp[2]
// // Compute the force and energy. + 2*(multipole[5]-multipole[4])*xscale*yscale*phidp[7]
// + multipole[7]*xscale*xscale*phidp[4] + multipole[9]*xscale*zscale*phidp[8]
// multipole[1] *= xscale; - multipole[7]*yscale*yscale*phidp[5] - multipole[8]*yscale*zscale*phidp[9])*0xFFFFFFFF);
// multipole[2] *= yscale;
// multipole[3] *= zscale; // Compute the force and energy.
// multipole[4] *= xscale*xscale;
// multipole[5] *= yscale*yscale; multipole[1] *= xscale;
// multipole[6] *= zscale*zscale; multipole[2] *= yscale;
// multipole[7] *= xscale*yscale; multipole[3] *= zscale;
// multipole[8] *= xscale*zscale; multipole[4] *= xscale*xscale;
// multipole[9] *= yscale*zscale; multipole[5] *= yscale*yscale;
// multipole[6] *= zscale*zscale;
// inducedDipole[0] = cAmoebaSim.pInducedDipole[i*3]; multipole[7] *= xscale*yscale;
// inducedDipole[1] = cAmoebaSim.pInducedDipole[i*3+1]; multipole[8] *= xscale*zscale;
// inducedDipole[2] = cAmoebaSim.pInducedDipole[i*3+2]; multipole[9] *= yscale*zscale;
// inducedDipolePolar[0] = cAmoebaSim.pInducedDipolePolar[i*3];
// inducedDipolePolar[1] = cAmoebaSim.pInducedDipolePolar[i*3+1]; inducedDipole[0] = inducedDipole_global[i*3];
// inducedDipolePolar[2] = cAmoebaSim.pInducedDipolePolar[i*3+2]; inducedDipole[1] = inducedDipole_global[i*3+1];
// real* phi = &cAmoebaSim.pPhi[20*i]; inducedDipole[2] = inducedDipole_global[i*3+2];
// real* phip = &cAmoebaSim.pPhip[10*i]; inducedDipolePolar[0] = inducedDipolePolar_global[i*3];
// real* phid = &cAmoebaSim.pPhid[10*i]; inducedDipolePolar[1] = inducedDipolePolar_global[i*3+1];
// real4 f = make_real4(0, 0, 0, 0); inducedDipolePolar[2] = inducedDipolePolar_global[i*3+2];
// const real* phi = &phi_global[20*i];
// energy += GRID_SIZE_X*invPeriodicBoxSize.x*inducedDipole[0]*phi[1]; const real* phip = &phip_global[10*i];
// energy += GRID_SIZE_Y*invPeriodicBoxSize.y*inducedDipole[1]*phi[2]; const real* phid = &phid_global[10*i];
// energy += GRID_SIZE_Z*invPeriodicBoxSize.z*inducedDipole[2]*phi[3]; real4 f = make_real4(0, 0, 0, 0);
//
// for (int k = 0; k < 3; k++) { energy += GRID_SIZE_X*invPeriodicBoxSize.x*inducedDipole[0]*phi[1];
// energy += GRID_SIZE_Y*invPeriodicBoxSize.y*inducedDipole[1]*phi[2];
// int j1 = deriv1[k+1]; energy += GRID_SIZE_Z*invPeriodicBoxSize.z*inducedDipole[2]*phi[3];
// int j2 = deriv2[k+1];
// int j3 = deriv3[k+1]; for (int k = 0; k < 3; k++) {
// int j1 = deriv1[k+1];
// f.x += (inducedDipole[k]+inducedDipolePolar[k])*phi[j1]*(scales[k]/xscale); int j2 = deriv2[k+1];
// f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[j2]*(scales[k]/yscale); int j3 = deriv3[k+1];
// f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[j3]*(scales[k]/zscale); f.x += (inducedDipole[k]+inducedDipolePolar[k])*phi[j1]*(scales[k]/xscale);
// f.y += (inducedDipole[k]+inducedDipolePolar[k])*phi[j2]*(scales[k]/yscale);
// if( cAmoebaSim.polarizationType == 0 ) f.z += (inducedDipole[k]+inducedDipolePolar[k])*phi[j3]*(scales[k]/zscale);
// { #ifndef DIRECT_POLARIZATION
// f.x += (inducedDipole[k]*phip[j1] + inducedDipolePolar[k]*phid[j1])*(scales[k]/xscale); f.x += (inducedDipole[k]*phip[j1] + inducedDipolePolar[k]*phid[j1])*(scales[k]/xscale);
// f.y += (inducedDipole[k]*phip[j2] + inducedDipolePolar[k]*phid[j2])*(scales[k]/yscale); f.y += (inducedDipole[k]*phip[j2] + inducedDipolePolar[k]*phid[j2])*(scales[k]/yscale);
// f.z += (inducedDipole[k]*phip[j3] + inducedDipolePolar[k]*phid[j3])*(scales[k]/zscale); f.z += (inducedDipole[k]*phip[j3] + inducedDipolePolar[k]*phid[j3])*(scales[k]/zscale);
// } #endif
// }
//
// } f.x *= GRID_SIZE_X*invPeriodicBoxSize.x;
// f.y *= GRID_SIZE_Y*invPeriodicBoxSize.y;
// f.x *= GRID_SIZE_X*invPeriodicBoxSize.x; f.z *= GRID_SIZE_Z*invPeriodicBoxSize.z;
// f.y *= GRID_SIZE_Y*invPeriodicBoxSize.y; for (int k = 0; k < 10; k++) {
// f.z *= GRID_SIZE_Z*invPeriodicBoxSize.z; f.x += multipole[k]*phidp[deriv1[k]];
// for (int k = 0; k < 10; k++) { f.y += multipole[k]*phidp[deriv2[k]];
// f.x += multipole[k]*phidp[deriv1[k]]; f.z += multipole[k]*phidp[deriv3[k]];
// f.y += multipole[k]*phidp[deriv2[k]]; }
// f.z += multipole[k]*phidp[deriv3[k]]; f.x *= 0.5f*EPSILON_FACTOR*xscale;
// } f.y *= 0.5f*EPSILON_FACTOR*yscale;
// f.z *= 0.5f*EPSILON_FACTOR*zscale;
// f.x *= 0.5f*EPSILON_FACTOR*xscale; forceBuffers[i] -= static_cast<unsigned long long>((long long) (f.x*0xFFFFFFFF));
// f.y *= 0.5f*EPSILON_FACTOR*yscale; forceBuffers[i+PADDED_NUM_ATOMS] -= static_cast<unsigned long long>((long long) (f.y*0xFFFFFFFF));
// f.z *= 0.5f*EPSILON_FACTOR*zscale; forceBuffers[i+PADDED_NUM_ATOMS*2] -= static_cast<unsigned long long>((long long) (f.z*0xFFFFFFFF));
// }
// real4 force = cSim.pForce4[i]; energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy;
// force.x -= f.x; }
// force.y -= f.y;
// force.z -= f.z; extern "C" __global__ void recordInducedFieldDipoles(const real* __restrict__ phid, real* const __restrict__ phip,
// cSim.pForce4[i] = force; real* __restrict__ inducedDipole, real* __restrict__ inducedDipolePolar, real4 invPeriodicBoxSize) {
// } real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
// cSim.pEnergy[blockIdx.x*blockDim.x+threadIdx.x] += 0.5f*EPSILON_FACTOR*energy; real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
//} real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
// for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
//extern "C" __global__ void kRecordFixedMultipoleField_kernel(real* output) { inducedDipole[3*i] -= xscale*phid[10*i+1];
// const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x; inducedDipole[3*i+1] -= yscale*phid[10*i+2];
// const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y; inducedDipole[3*i+2] -= zscale*phid[10*i+3];
// const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z; inducedDipolePolar[3*i] -= xscale*phip[10*i+1];
// for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) { inducedDipolePolar[3*i+1] -= yscale*phip[10*i+2];
// output[3*i] = -xscale*cAmoebaSim.pPhi[20*i+1]; inducedDipolePolar[3*i+2] -= zscale*phip[10*i+3];
// output[3*i+1] = -yscale*cAmoebaSim.pPhi[20*i+2]; }
// output[3*i+2] = -zscale*cAmoebaSim.pPhi[20*i+3]; }
// }
//}
//
//extern "C" __global__ void kRecordInducedDipoleField_kernel(real* output, real* outputPolar) {
// const real xscale = GRID_SIZE_X*invPeriodicBoxSize.x;
// const real yscale = GRID_SIZE_Y*invPeriodicBoxSize.y;
// const real zscale = GRID_SIZE_Z*invPeriodicBoxSize.z;
// for (int i = blockIdx.x*blockDim.x+threadIdx.x; i < NUM_ATOMS; i += blockDim.x*gridDim.x) {
// output[3*i] -= xscale*cAmoebaSim.pPhid[10*i+1];
// output[3*i+1] -= yscale*cAmoebaSim.pPhid[10*i+2];
// output[3*i+2] -= zscale*cAmoebaSim.pPhid[10*i+3];
// outputPolar[3*i] -= xscale*cAmoebaSim.pPhip[10*i+1];
// outputPolar[3*i+1] -= yscale*cAmoebaSim.pPhip[10*i+2];
// outputPolar[3*i+2] -= zscale*cAmoebaSim.pPhip[10*i+3];
// }
//}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment