Commit 3e4ffaff authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Added documentation associated w/ mapping torques

parent f0612350
...@@ -276,14 +276,14 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)( ...@@ -276,14 +276,14 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
offset = (x + tgx + warp*cSim.paddedNumberOfAtoms); offset = (x + tgx + warp*cSim.paddedNumberOfAtoms);
add3dArray( 3*offset, localParticle.force, outputTorque ); add3dArray( 3*offset, localParticle.force, outputTorque );
offset = (y + tgx + warp*cSim.paddedNumberOfAtoms); offset = (y + tgx + warp*cSim.paddedNumberOfAtoms);
add3dArray( 3*offset, sA[threadIdx.x].force, outputTorque ); add3dArray( 3*offset, sA[threadIdx.x].force, outputTorque );
#else #else
offset = (x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms); offset = (x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( 3*offset, localParticle.force, outputTorque ); load3dArray( 3*offset, localParticle.force, outputTorque );
offset = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms); offset = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
load3dArray( 3*offset, sA[threadIdx.x].force, outputTorque ); load3dArray( 3*offset, sA[threadIdx.x].force, outputTorque );
#endif #endif
......
...@@ -95,9 +95,9 @@ __device__ void loadKirkwoodShared( struct KirkwoodParticle* sA, unsigned int at ...@@ -95,9 +95,9 @@ __device__ void loadKirkwoodShared( struct KirkwoodParticle* sA, unsigned int at
} }
__device__ void calculateKirkwoodPairIxnOrig_kernel( KirkwoodParticle& atomI, KirkwoodParticle& atomJ, __device__ void calculateKirkwoodPairIxnOrig_kernel( KirkwoodParticle& atomI, KirkwoodParticle& atomJ,
float* outputForce, float outputTorque[2][3], float* outputForce, float outputTorque[2][3],
float* outputBorn, float* outputBornPolar, float* outputBorn, float* outputBornPolar,
float* outputEnergy float* outputEnergy
){ ){
...@@ -2004,7 +2004,6 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu ) ...@@ -2004,7 +2004,6 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
// Tinker's Born1 && E-diff // Tinker's Born1 && E-diff
//kCalculateObcGbsaForces2( amoebaGpu->gpuContext );
kCalculateGrycukGbsaForces2( amoebaGpu ); kCalculateGrycukGbsaForces2( amoebaGpu );
kCalculateAmoebaKirkwoodEDiff( amoebaGpu ); kCalculateAmoebaKirkwoodEDiff( amoebaGpu );
......
...@@ -939,8 +939,6 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu ) ...@@ -939,8 +939,6 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
gpuContext gpu = amoebaGpu->gpuContext; gpuContext gpu = amoebaGpu->gpuContext;
// apparently debug array can take up nontrivial no. registers
static unsigned int threadsPerBlock = 0; static unsigned int threadsPerBlock = 0;
if( threadsPerBlock == 0 ){ if( threadsPerBlock == 0 ){
unsigned int maxThreads; unsigned int maxThreads;
......
...@@ -456,6 +456,16 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt ...@@ -456,6 +456,16 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt
gpuContext gpu = amoebaGpu->gpuContext; gpuContext gpu = amoebaGpu->gpuContext;
// The default is to use the Cuda force4 output buffers to collect the forces on each particle arising from the torques: a torque on
// particle-i will be mapped into a force on particle-j, where particle-j is one of particles defining the molecular frame axes
// associated w/ particle-i.
// if amoebaGpu->amoebaSim.maxTorqueBufferIndex > amoebaGpu->gpuContext->sim.outputBuffers, then
// the number of force4 output buffers is too small to accomodate the number of particles whose torques contribute to the force
// on at least one particle. In this case, the CUDAStream amoebaGpu->psTorqueMapForce4 is used instead to collect the forces.
// amoebaClearMapTorqueForce_kernel() clears these buffers; the call to amoebaAddMapTorqueForceToForce_kernel() below
// add the torques from the amoebaGpu->psTorqueMapForce4 buffers to the force4 CUDAStream
if( amoebaGpu->amoebaSim.maxTorqueBufferIndex > amoebaGpu->gpuContext->sim.outputBuffers && amoebaGpu->psTorqueMapForce4 != amoebaGpu->gpuContext->psForce4 && amoebaGpu->psTorqueMapForce4 ){ if( amoebaGpu->amoebaSim.maxTorqueBufferIndex > amoebaGpu->gpuContext->sim.outputBuffers && amoebaGpu->psTorqueMapForce4 != amoebaGpu->gpuContext->psForce4 && amoebaGpu->psTorqueMapForce4 ){
amoebaClearMapTorqueForce_kernel<<< gpu->sim.blocks, gpu->sim.threads_per_block>>> ( ); amoebaClearMapTorqueForce_kernel<<< gpu->sim.blocks, gpu->sim.threads_per_block>>> ( );
LAUNCHERROR("amoebaClearMapTorqueForce"); LAUNCHERROR("amoebaClearMapTorqueForce");
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment