Added documentation associated w/ mapping torques

3e4ffaff · Mark Friedrichs · f0612350 · 3e4ffaff · 3e4ffaff · 3e4ffaff
Commit 3e4ffaff authored Jul 23, 2012 by Mark Friedrichs
4 changed files
--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
@@ -276,14 +276,14 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
            offset                 = (x + tgx + warp*cSim.paddedNumberOfAtoms);
            add3dArray( 3*offset, localParticle.force,  outputTorque );
-            offset                              = (y + tgx + warp*cSim.paddedNumberOfAtoms);
+            offset                 = (y + tgx + warp*cSim.paddedNumberOfAtoms);
            add3dArray( 3*offset, sA[threadIdx.x].force,  outputTorque );
 #else
            offset                 = (x + tgx + (y >> GRIDBITS) * cSim.paddedNumberOfAtoms);
            load3dArray(         3*offset, localParticle.force, outputTorque );
-            offset                              = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
+            offset                 = (y + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
            load3dArray(       3*offset, sA[threadIdx.x].force, outputTorque );
 #endif

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
@@ -95,9 +95,9 @@ __device__ void loadKirkwoodShared( struct KirkwoodParticle* sA, unsigned int at
 }
 __device__ void calculateKirkwoodPairIxnOrig_kernel( KirkwoodParticle& atomI,       KirkwoodParticle& atomJ,
-                                                 float*  outputForce,           float outputTorque[2][3],
+                                                     float*  outputForce,           float outputTorque[2][3],
-                                                 float*  outputBorn,            float*  outputBornPolar,
+                                                     float*  outputBorn,            float*  outputBornPolar,
-                                                 float* outputEnergy
+                                                     float* outputEnergy
 ){
@@ -2004,7 +2004,6 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
    // Tinker's Born1 && E-diff
-    //kCalculateObcGbsaForces2( amoebaGpu->gpuContext );
    kCalculateGrycukGbsaForces2( amoebaGpu );
    kCalculateAmoebaKirkwoodEDiff( amoebaGpu );

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
@@ -939,8 +939,6 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
    gpuContext gpu = amoebaGpu->gpuContext;
-    // apparently debug array can take up nontrivial no. registers
    static unsigned int threadsPerBlock = 0;
    if( threadsPerBlock == 0 ){
        unsigned int maxThreads;

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMapTorques.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMapTorques.cu
@@ -456,6 +456,16 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt
    gpuContext gpu    = amoebaGpu->gpuContext;
+    // The default is to use the Cuda force4 output buffers to collect the forces on each particle arising from the torques: a torque on
+	// particle-i will be mapped into a force on particle-j, where particle-j is one of particles defining the molecular frame axes
+	// associated w/ particle-i. 
+	// if amoebaGpu->amoebaSim.maxTorqueBufferIndex > amoebaGpu->gpuContext->sim.outputBuffers, then 
+	// the number of force4 output buffers is too small to accomodate the number of particles whose torques contribute to the force
+	// on at least one particle. In this case, the CUDAStream amoebaGpu->psTorqueMapForce4 is used instead to collect the forces.
+	// amoebaClearMapTorqueForce_kernel() clears these buffers; the call to amoebaAddMapTorqueForceToForce_kernel() below
+	// add the torques from the amoebaGpu->psTorqueMapForce4 buffers to the force4 CUDAStream
    if( amoebaGpu->amoebaSim.maxTorqueBufferIndex > amoebaGpu->gpuContext->sim.outputBuffers && amoebaGpu->psTorqueMapForce4 != amoebaGpu->gpuContext->psForce4 && amoebaGpu->psTorqueMapForce4 ){
        amoebaClearMapTorqueForce_kernel<<< gpu->sim.blocks, gpu->sim.threads_per_block>>> ( );
        LAUNCHERROR("amoebaClearMapTorqueForce");