Commit b587b396 authored by Peter Eastman's avatar Peter Eastman
Browse files

Continuing to convert AmoebaGeneralizedKirkwoodForce to new CUDA platform

parent 222378c6
...@@ -1342,22 +1342,11 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in ...@@ -1342,22 +1342,11 @@ double CudaCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bool in
&gkKernel->getBornRadii()->getDevicePointer(), &gkKernel->getField()->getDevicePointer(), &gkKernel->getBornRadii()->getDevicePointer(), &gkKernel->getField()->getDevicePointer(),
&labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()}; &labFrameDipoles->getDevicePointer(), &labFrameQuadrupoles->getDevicePointer(), &dampingAndThole->getDevicePointer()};
cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(computeFixedFieldKernel, computeFixedFieldArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
vector<long long> f;
gkKernel->getField()->download(f);
printf("field\n");
for (int i = 0; i < 3*cu.getNumAtoms(); i++)
printf("%d %g\n", i, f[i]/(double) 0xFFFFFFFF);
void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(), void* recordInducedDipolesArgs[] = {&field->getDevicePointer(), &fieldPolar->getDevicePointer(),
&gkKernel->getField()->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(), &gkKernel->getField()->getDevicePointer(), &gkKernel->getInducedDipoles()->getDevicePointer(),
&gkKernel->getInducedDipolesPolar()->getDevicePointer(), &inducedDipole->getDevicePointer(), &gkKernel->getInducedDipolesPolar()->getDevicePointer(), &inducedDipole->getDevicePointer(),
&inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()}; &inducedDipolePolar->getDevicePointer(), &polarizability->getDevicePointer()};
cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms()); cu.executeKernel(recordInducedDipolesKernel, recordInducedDipolesArgs, cu.getNumAtoms());
vector<float> d, dp;
gkKernel->getInducedDipoles()->download(d);
gkKernel->getInducedDipolesPolar()->download(dp);
printf("dipoles\n");
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g %g %g, %g %g %g\n", i, d[3*i], d[3*i+1], d[3*i+2], dp[3*i], dp[3*i+1], dp[3*i+2]);
} }
// Iterate until the dipoles converge. // Iterate until the dipoles converge.
...@@ -1837,11 +1826,6 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::computeBornRadii() { ...@@ -1837,11 +1826,6 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::computeBornRadii() {
cu.executeKernel(computeBornSumKernel, computeBornSumArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(computeBornSumKernel, computeBornSumArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
void* reduceBornSumArgs[] = {&bornSum->getDevicePointer(), &params->getDevicePointer(), &bornRadii->getDevicePointer()}; void* reduceBornSumArgs[] = {&bornSum->getDevicePointer(), &params->getDevicePointer(), &bornRadii->getDevicePointer()};
cu.executeKernel(reduceBornSumKernel, reduceBornSumArgs, cu.getNumAtoms()); cu.executeKernel(reduceBornSumKernel, reduceBornSumArgs, cu.getNumAtoms());
vector<float> r;
bornRadii->download(r);
printf("radii\n");
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g\n", i, r[i]);
} }
void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles, void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray& torque, CudaArray& labFrameDipoles, CudaArray& labFrameQuadrupoles,
...@@ -1856,6 +1840,11 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray& ...@@ -1856,6 +1840,11 @@ void CudaCalcAmoebaGeneralizedKirkwoodForceKernel::finishComputation(CudaArray&
&labFrameQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(), &labFrameQuadrupoles.getDevicePointer(), &inducedDipole.getDevicePointer(), &inducedDipolePolar.getDevicePointer(),
&bornRadii->getDevicePointer(), &bornForce->getDevicePointer()}; &bornRadii->getDevicePointer(), &bornForce->getDevicePointer()};
cu.executeKernel(gkForceKernel, gkForceArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize); cu.executeKernel(gkForceKernel, gkForceArgs, numForceThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
printf("bornForce\n");
vector<long long> f;
bornForce->download(f);
for (int i = 0; i < cu.getNumAtoms(); i++)
printf("%d %g\n", i, f[i]/(double) 0xFFFFFFFF);
// Compute cavity term... // Compute cavity term...
......
...@@ -275,12 +275,12 @@ extern "C" __global__ void computeGKForces( ...@@ -275,12 +275,12 @@ extern "C" __global__ void computeGKForces(
atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.y*0xFFFFFFFF)));
atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[atom1+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (data.force.z*0xFFFFFFFF)));
// Chain rule terms? // Compute chain rule terms.
zeroAtomData(data); zeroAtomData(data);
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
int atom2 = y*TILE_SIZE+j; int atom2 = y*TILE_SIZE+j;
if (atom1 != atom2 && atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS)
computeOneInteractionB1B2(data, localData[tbx+j]); computeOneInteractionB1B2(data, localData[tbx+j]);
} }
atomicAdd(&bornForce[atom1], static_cast<unsigned long long>((long long) (data.bornForce*0xFFFFFFFF))); atomicAdd(&bornForce[atom1], static_cast<unsigned long long>((long long) (data.bornForce*0xFFFFFFFF)));
...@@ -346,7 +346,7 @@ extern "C" __global__ void computeGKForces( ...@@ -346,7 +346,7 @@ extern "C" __global__ void computeGKForces(
atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF))); atomicAdd(&torqueBuffers[offset+2*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].force.z*0xFFFFFFFF)));
} }
// Chain rule terms? // Compute chain rule terms.
zeroAtomData(data); zeroAtomData(data);
zeroAtomData(localData[threadIdx.x]); zeroAtomData(localData[threadIdx.x]);
......
...@@ -432,6 +432,9 @@ extern "C" __global__ void computeFixedField( ...@@ -432,6 +432,9 @@ extern "C" __global__ void computeFixedField(
AtomData data; AtomData data;
data.field = make_real3(0); data.field = make_real3(0);
data.fieldPolar = make_real3(0); data.fieldPolar = make_real3(0);
#ifdef USE_GK
data.gkField = make_real3(0);
#endif
if (pos < end) { if (pos < end) {
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
if (numTiles <= maxTiles) { if (numTiles <= maxTiles) {
...@@ -501,11 +504,14 @@ extern "C" __global__ void computeFixedField( ...@@ -501,11 +504,14 @@ extern "C" __global__ void computeFixedField(
computeOneInteraction(data, localData[tbx+j], delta, d, p, fields); computeOneInteraction(data, localData[tbx+j], delta, d, p, fields);
data.field += fields[0]; data.field += fields[0];
data.fieldPolar += fields[1]; data.fieldPolar += fields[1];
}
#ifdef USE_GK #ifdef USE_GK
if (atom1 < NUM_ATOMS && atom2 < NUM_ATOMS) {
real3 fields[2];
computeOneGkInteraction(data, localData[tbx+j], delta, fields); computeOneGkInteraction(data, localData[tbx+j], delta, fields);
data.gkField += fields[0]; data.gkField += fields[0];
#endif
} }
#endif
covalent.x >>= 1; covalent.x >>= 1;
covalent.y >>= 1; covalent.y >>= 1;
polarizationGroup >>= 1; polarizationGroup >>= 1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment