Commit f352d116 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to convert AmoebaMultipoleForce: PME with direct polarization now works

parent 7a60fd73
...@@ -1166,11 +1166,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -1166,11 +1166,13 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
pmeUpdateBsplinesKernel = cu.getKernel(module, "updateBsplines"); pmeUpdateBsplinesKernel = cu.getKernel(module, "updateBsplines");
pmeAtomRangeKernel = cu.getKernel(module, "findAtomRangeForGrid"); pmeAtomRangeKernel = cu.getKernel(module, "findAtomRangeForGrid");
pmeSpreadFixedMultipolesKernel = cu.getKernel(module, "gridSpreadFixedMultipoles"); pmeSpreadFixedMultipolesKernel = cu.getKernel(module, "gridSpreadFixedMultipoles");
pmeSpreadInducedDipolesKernel = cu.getKernel(module, "gridSpreadInducedDipoles");
pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution"); pmeConvolutionKernel = cu.getKernel(module, "reciprocalConvolution");
pmeFixedPotentialKernel = cu.getKernel(module, "computeFixedPotentialFromGrid"); pmeFixedPotentialKernel = cu.getKernel(module, "computeFixedPotentialFromGrid");
pmeInducedPotentialKernel = cu.getKernel(module, "computeInducedPotentialFromGrid");
pmeFixedForceKernel = cu.getKernel(module, "computeFixedMultipoleForceAndEnergy"); pmeFixedForceKernel = cu.getKernel(module, "computeFixedMultipoleForceAndEnergy");
// pmeInterpolateForceKernel = cu.getKernel(module, "gridInterpolateForce"); pmeInducedForceKernel = cu.getKernel(module, "computeInducedDipoleForceAndEnergy");
// pmeFinishSpreadChargeKernel = cu.getKernel(module, "finishSpreadCharge"); pmeRecordInducedFieldDipolesKernel = cu.getKernel(module, "recordInducedFieldDipoles");
// cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1); // cuFuncSetCacheConfig(pmeInterpolateForceKernel, CU_FUNC_CACHE_PREFER_L1);
// Create required data structures. // Create required data structures.
...@@ -1415,12 +1417,15 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1415,12 +1417,15 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
// Map torques to force.
void* mapTorqueArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(), void* mapTorqueArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
&cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer()}; &cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer()};
cu.executeKernel(mapTorqueKernel, mapTorqueArgs, cu.getNumAtoms()); cu.executeKernel(mapTorqueKernel, mapTorqueArgs, cu.getNumAtoms());
} }
else { else {
// Compute induced dipoles. // Reciprocal space calculation.
unsigned int maxTiles = nb.getInteractingTiles().getSize(); unsigned int maxTiles = nb.getInteractingTiles().getSize();
void* pmeUpdateBsplinesArgs[] = {&cu.getPosq().getDevicePointer(), &pmeIgrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), void* pmeUpdateBsplinesArgs[] = {&cu.getPosq().getDevicePointer(), &pmeIgrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(),
...@@ -1433,9 +1438,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1433,9 +1438,8 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
cu.executeKernel(pmeAtomRangeKernel, pmeAtomRangeArgs, cu.getNumAtoms(), cu.ThreadBlockSize, cu.ThreadBlockSize*PmeOrder*PmeOrder*elementSize); cu.executeKernel(pmeAtomRangeKernel, pmeAtomRangeArgs, cu.getNumAtoms(), cu.ThreadBlockSize, cu.ThreadBlockSize*PmeOrder*PmeOrder*elementSize);
void* pmeSpreadFixedMultipolesArgs[] = {&cu.getPosq().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), void* pmeSpreadFixedMultipolesArgs[] = {&cu.getPosq().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
&pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), &pmeAtomRange->getDevicePointer(), &pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), &pmeAtomRange->getDevicePointer(),
&pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(), &pmeTheta3->getDevicePointer(), cu.getPeriodicBoxSizePointer(), &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(), &pmeTheta3->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.getInvPeriodicBoxSizePointer()}; cu.executeKernel(pmeSpreadFixedMultipolesKernel, pmeSpreadFixedMultipolesArgs, cu.getNumAtoms());
cu.executeKernel(pmeSpreadFixedMultipolesKernel, pmeSpreadFixedMultipolesArgs, cu.getNumAtoms(), cu.ThreadBlockSize, cu.ThreadBlockSize*PmeOrder*PmeOrder*elementSize);
if (cu.getUseDoublePrecision()) if (cu.getUseDoublePrecision())
cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD); cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
else else
...@@ -1448,23 +1452,16 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1448,23 +1452,16 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
else else
cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE); cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
void* pmeFixedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhi->getDevicePointer(), &field->getDevicePointer(), void* pmeFixedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhi->getDevicePointer(), &field->getDevicePointer(),
&pmeIgrid->getDevicePointer(), &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(), &pmeTheta3->getDevicePointer(), &fieldPolar ->getDevicePointer(), &pmeIgrid->getDevicePointer(), &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(),
&labFrameDipoles->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()}; &pmeTheta3->getDevicePointer(), &labFrameDipoles->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeFixedPotentialKernel, pmeFixedPotentialArgs, cu.getNumAtoms()); cu.executeKernel(pmeFixedPotentialKernel, pmeFixedPotentialArgs, cu.getNumAtoms());
void* pmeFixedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque->getDevicePointer(), void* pmeFixedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
&cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
&pmePhi->getDevicePointer(), cu.getPeriodicBoxSizePointer(), cu.getInvPeriodicBoxSizePointer()}; &pmePhi->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
cu.executeKernel(pmeFixedForceKernel, pmeFixedForceArgs, cu.getNumAtoms()); cu.executeKernel(pmeFixedForceKernel, pmeFixedForceArgs, cu.getNumAtoms());
printf("reciprocal:\n");
vector<long long> f; // Direct space calculation.
printf("force\n");
cu.getForce().download(f);
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF);
// printf("torque\n");
// torque->download(f);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF);
void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(), void* computeFixedFieldArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), &cu.getPosq().getDevicePointer(),
&nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(), &nb.getExclusionIndices().getDevicePointer(), &nb.getExclusionRowIndices().getDevicePointer(),
&covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices, &covalentFlags->getDevicePointer(), &polarizationGroupFlags->getDevicePointer(), &startTileIndex, &numTileIndices,
...@@ -1475,38 +1472,29 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1475,38 +1472,29 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()}; &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms()); cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
printf("direct:\n");
printf("force\n"); // Reciprocal space calculation for the induced dipoles.
cu.getForce().download(f);
for (int i = 0; i < cu.getNumAtoms(); i++) void* pmeSpreadInducedDipolesArgs[] = {&cu.getPosq().getDevicePointer(), &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(),
printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF); &pmeGrid->getDevicePointer(), &pmeAtomGridIndex->getDevicePointer(), &pmeAtomRange->getDevicePointer(),
// printf("torque\n"); &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(), &pmeTheta3->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
// torque->download(f); cu.executeKernel(pmeSpreadInducedDipolesKernel, pmeSpreadInducedDipolesArgs, cu.getNumAtoms());
// for (int i = 0; i < cu.getNumAtoms(); i++) if (cu.getUseDoublePrecision())
// printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF); cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
// vector<float> d, dp; else
// printf("phi\n"); cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_FORWARD);
// pmePhi->download(d); cu.executeKernel(pmeConvolutionKernel, pmeConvolutionArgs, cu.getNumAtoms());
// for (int i = 0; i < d.size(); i++) if (cu.getUseDoublePrecision())
// printf("%d: %g\n", i, d[i]); cufftExecZ2Z(fft, (double2*) pmeGrid->getDevicePointer(), (double2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
// printf("dipoles\n"); else
// labFrameDipoles->download(d); cufftExecC2C(fft, (float2*) pmeGrid->getDevicePointer(), (float2*) pmeGrid->getDevicePointer(), CUFFT_INVERSE);
// for (int i = 0; i < cu.getNumAtoms(); i++) void* pmeInducedPotentialArgs[] = {&pmeGrid->getDevicePointer(), &pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
// printf("%d: %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2]); &pmePhidp->getDevicePointer(), &pmeIgrid->getDevicePointer(), &pmeTheta1->getDevicePointer(), &pmeTheta2->getDevicePointer(),
// printf("quadrupoles\n"); &pmeTheta3->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
// labFrameQuadrupoles->download(d); cu.executeKernel(pmeInducedPotentialKernel, pmeInducedPotentialArgs, cu.getNumAtoms());
// for (int i = 0; i < cu.getNumAtoms(); i++) // void* pmeRecordInducedFieldDipolesArgs[] = {&pmePhid->getDevicePointer(), &pmePhip->getDevicePointer(),
// printf("%d: %g %g %g %g %g %g\n", i, d[5*i], d[5*i+1], d[5*i+2], d[5*i+3], d[5*i+4], -(d[5*i]+d[5*i+3])); // &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
// printf("induced dipoles\n"); // cu.executeKernel(pmeRecordInducedFieldDipolesKernel, pmeRecordInducedFieldDipolesArgs, cu.getNumAtoms());
// inducedDipole->download(d);
// inducedDipolePolar->download(dp);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g, %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2], dp[3*i], dp[3*i+1], dp[3*i+2]);
// printf("positions\n");
// vector<float4> p;
// cu.getPosq().download(p);
// for (int i = 0; i < cu.getNumAtoms(); i++)
// printf("%d: %g %g %g %g\n", i, p[i].x, p[i].y, p[i].z, p[i].w);
// vector<float2> errors; // vector<float2> errors;
...@@ -1541,11 +1529,14 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1541,11 +1529,14 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &inducedDipolePolar->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(electrostaticsKernel, electrostaticsArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
printf("electrostatic:\n"); void* pmeInducedForceArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
printf("force\n"); &cu.getEnergyBuffer().getDevicePointer(), &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(),
cu.getForce().download(f); &inducedDipole->getDevicePointer(), &inducedDipolePolar->getDevicePointer(), &pmePhi->getDevicePointer(), &pmePhid->getDevicePointer(),
for (int i = 0; i < cu.getNumAtoms(); i++) &pmePhip->getDevicePointer(), &pmePhidp->getDevicePointer(), cu.getInvPeriodicBoxSizePointer()};
printf("%d: %g %g %g\n", i, f[i]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()]/(double) 0xFFFFFFFF, f[i+cu.getPaddedNumAtoms()*2]/(double) 0xFFFFFFFF); cu.executeKernel(pmeInducedForceKernel, pmeInducedForceArgs, cu.getNumAtoms());
// Map torques to force.
void* mapTorqueArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(), void* mapTorqueArgs[] = {&cu.getForce().getDevicePointer(), &torque->getDevicePointer(),
&cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer()}; &cu.getPosq().getDevicePointer(), &multipoleParticles->getDevicePointer()};
cu.executeKernel(mapTorqueKernel, mapTorqueArgs, cu.getNumAtoms()); cu.executeKernel(mapTorqueKernel, mapTorqueArgs, cu.getNumAtoms());
......
...@@ -425,7 +425,8 @@ private: ...@@ -425,7 +425,8 @@ private:
CudaSort* sort; CudaSort* sort;
cufftHandle fft; cufftHandle fft;
CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel; CUfunction computeMomentsKernel, recordInducedDipolesKernel, computeFixedFieldKernel, computeInducedFieldKernel, updateInducedFieldKernel, electrostaticsKernel, mapTorqueKernel;
CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeSpreadFixedMultipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeFixedForceKernel; CUfunction pmeUpdateBsplinesKernel, pmeAtomRangeKernel, pmeSpreadFixedMultipolesKernel, pmeSpreadInducedDipolesKernel, pmeConvolutionKernel, pmeFixedPotentialKernel, pmeInducedPotentialKernel;
CUfunction pmeFixedForceKernel, pmeInducedForceKernel, pmeRecordInducedFieldDipolesKernel;
static const int PmeOrder = 5; static const int PmeOrder = 5;
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment