Update

2c6cff12 · Mark Friedrichs · f7f79b04 · 2c6cff12 · 2c6cff12 · 2c6cff12
Commit 2c6cff12 authored Jul 23, 2010 by Mark Friedrichs
12 changed files
--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaData.cpp
@@ -94,7 +94,8 @@ KernelImpl* AmoebaCudaData::getAmoebaLocalForcesKernel( void ) const {
 }
 void AmoebaCudaData::setLog( FILE* inputLog ) {
-    log = inputLog;
+    log            = inputLog;
+    amoebaGpu->log = inputLog;
 }
 FILE* AmoebaCudaData::getLog( void ) const {

--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernelFactory.cpp
@@ -103,7 +103,7 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
    if( mapIterator == contextToAmoebaDataMap.end() ){
        amoebaCudaData                         = new AmoebaCudaData( cudaPlatformData );
        contextToAmoebaDataMap[&context]       = amoebaCudaData;
-        //amoebaCudaData->setLog( stderr );
+        amoebaCudaData->setLog( stderr );
        amoebaCudaData->setContextImpl( static_cast<void*>(&context) );
        //(void) fprintf( stderr, "AmoebaCudaKernelFactory::createKernelImpl amoebaCudaDataV=%p\n", static_cast<void*>(amoebaCudaData) );
    } else {

--- a/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+++ b/plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
@@ -46,8 +46,8 @@ using namespace std;
 static void computeAmoebaLocalForces( AmoebaCudaData& data ) {
    amoebaGpuContext gpu = data.getAmoebaGpu();
-    if( gpu->log ){
+    if( 0 && data.getLog() ){
-        (void) fprintf( gpu->log, "computeAmoebaLocalForces\n" ); (void) fflush( gpu->log );
+        (void) fprintf( data.getLog(), "computeAmoebaLocalForces\n" ); (void) fflush( data.getLog() );
    }
    data.initializeGpu();
@@ -485,7 +485,7 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
    amoebaGpuContext gpu = data.getAmoebaGpu();
    data.initializeGpu();
-    if( data.getLog() ){
+    if( 0 && data.getLog() ){
        (void) fprintf( data.getLog(), "computeAmoebaMultipoleForce\n" );
        (void) fflush( data.getLog());
    }
@@ -510,7 +510,7 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
        kCalculateAmoebaKirkwood(gpu);
    }
-    if( data.getLog() ){
+    if( 0 && data.getLog() ){
        (void) fprintf( data.getLog(), "completed computeAmoebaMultipoleForce\n" );
        (void) fflush( data.getLog());
    }
@@ -821,13 +821,13 @@ double CudaCalcAmoebaVdwForceKernel::executeEnergy(ContextImpl& context) {
 static void computeAmoebaWcaDispersionForce( AmoebaCudaData& data ) {
    data.initializeGpu();
-    if( data.getLog() ){
+    if( 0 && data.getLog() ){
        (void) fprintf( data.getLog(), "Calling computeAmoebaWcaDispersionForce  " ); (void) fflush( data.getLog() );
    }
    kCalculateAmoebaWcaDispersionForces( data.getAmoebaGpu() );
-    if( data.getLog() ){
+    if( 0 && data.getLog() ){
        (void) fprintf( data.getLog(), " -- completed\n" ); (void) fflush( data.getLog() );
    }
 }

--- a/plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
+++ b/plugins/amoeba/platforms/cuda/src/kernels/AmoebaGpu.cpp
@@ -2115,6 +2115,7 @@ void amoebaGpuBuildVdwExclusionList( amoebaGpuContext amoebaGpu,  const std::vec
    // ---------------------------------------------------------------------------------------
    static const std::string methodName = "amoebaGpuBuildVdwExclusionList";
+    static const int debugOn            = 0;
    // ---------------------------------------------------------------------------------------
@@ -2155,7 +2156,7 @@ void amoebaGpuBuildVdwExclusionList( amoebaGpuContext amoebaGpu,  const std::vec
    // diagnostics
-    if( amoebaGpu->log ){
+    if( debugOn && amoebaGpu->log ){
        (void) fprintf( amoebaGpu->log, "%s min/max cell indices:\n", methodName.c_str() );
        for (int ii = 0; ii < dim; ii++)
        {
@@ -2202,7 +2203,7 @@ void amoebaGpuBuildVdwExclusionList( amoebaGpuContext amoebaGpu,  const std::vec
    // diagnostics
-    if( amoebaGpu->log ){
+    if( debugOn && amoebaGpu->log ){
        (void) fprintf( amoebaGpu->log, "%s %d cells w/ exclusions\n", methodName.c_str(), numWithExclusionIndices );
        for (int ii = 0; ii < cells; ii++)
        {
@@ -2270,7 +2271,7 @@ void amoebaGpuBuildVdwExclusionList( amoebaGpuContext amoebaGpu,  const std::vec
    // diagnostics
-    if( amoebaGpu->log ){
+    if( debugOn && amoebaGpu->log ){
        (void) fprintf( amoebaGpu->log, "%s Echo exclusions\n", methodName.c_str() );
        (void) fflush( amoebaGpu->log );
@@ -2890,6 +2891,7 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu )
    // ---------------------------------------------------------------------------------------
    static const std::string methodName = "amoebaGpuBuildScalingList";
+    static const int debugOn            = 0;
    // ---------------------------------------------------------------------------------------
@@ -2944,7 +2946,7 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu )
    // diagnostics
-    if( amoebaGpu->log ){
+    if( debugOn && amoebaGpu->log ){
        (void) fprintf( amoebaGpu->log, "%s min/max cell indices:\n", methodName.c_str() );
        for (int ii = 0; ii < dim; ii++)
        {
@@ -2980,7 +2982,7 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu )
    // diagnostics
-#if 1
+#if 0
    if( 0 && amoebaGpu->log ){
        (void) fprintf( amoebaGpu->log, "%s %d cells\n",
                                        methodName.c_str(), numWithScalingIndices );
@@ -3000,7 +3002,7 @@ void amoebaGpuBuildScalingList( amoebaGpuContext amoebaGpu )
        (void) fflush( amoebaGpu->log );
    }
 #else
-    if( amoebaGpu->log ){
+    if( debugOn && amoebaGpu->log ){
        (void) fprintf( amoebaGpu->log, "%s %d cells w/ exclusions\n",
                                        methodName.c_str(), numWithScalingIndices );
        for (int ii = 0; ii < cells; ii++)
@@ -3158,7 +3160,7 @@ static unsigned int targetAtoms[2] = { 0, 1};
    // diagnostics
-    if( amoebaGpu->log && 0 ){
+    if( debugOn && amoebaGpu->log ){
        float* pScaleCheckSum = (float*) malloc( sizeof( float )*paddedAtoms );
        float* dScaleCheckSum = (float*) malloc( sizeof( float )*paddedAtoms );

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
@@ -27,6 +27,7 @@
 #include "amoebaScaleFactors.h"
 __global__ 
+/*
 #if (__CUDA_ARCH__ >= 200)
 __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
 #elif (__CUDA_ARCH__ >= 130)
@@ -34,6 +35,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
 #else
 __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
 #endif
+*/
 void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
                            unsigned int* workUnit,
                            float4* atomCoord,

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.cu
@@ -2342,7 +2342,6 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
    for( int ii = 0; ii < amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; ii++ ){
        (void) fprintf( amoebaGpu->log,"Born %6d %16.9e\n", ii,
                        gpu->psBornRadii->_pSysStream[0][ii] );
    }
 #endif
@@ -2350,8 +2349,26 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
    if( threadsPerBlock == 0 ){
        threadsPerBlock                             = getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodParticle));
+threadsPerBlock = 32;
        //unsigned int eDiffhreadsPerBlock            = getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodEDiffParticle));
        //unsigned int maxThreadsPerBlock             = threadsPerBlock> eDiffhreadsPerBlock ? threadsPerBlock : eDiffhreadsPerBlock;
+        if( amoebaGpu->log ){
+#if (__CUDA_ARCH__ >= 200)
+            unsigned int maxThreads = GF1XX_NONBOND_THREADS_PER_BLOCK;
+#elif (__CUDA_ARCH__ >= 130)
+            unsigned int maxThreads = GT2XX_NONBOND_THREADS_PER_BLOCK;
+#else
+            unsigned int maxThreads = G8X_NONBOND_THREADS_PER_BLOCK;
+#endif
+            (void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaKirkwood: blcks=%u tds=%u %u bPrWrp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
+                            amoebaGpu->nonbondBlocks, threadsPerBlock, maxThreads, amoebaGpu->bOutputBufferPerWarp,
+                            sizeof(KirkwoodParticle), sizeof(KirkwoodParticle)*threadsPerBlock,
+                            amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
+            (void) fflush( amoebaGpu->log );
+        }
    }   
    kClearFields_1( amoebaGpu );
@@ -2531,7 +2548,6 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
    }   
    delete debugArray;
 #endif
    // map torques to forces

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwood.h
@@ -27,6 +27,7 @@
 #include "amoebaScaleFactors.h"
 __global__
+/*
 #if (__CUDA_ARCH__ >= 200)
 __launch_bounds__(GF1XX_NONBOND_THREADS_PER_BLOCK, 1)
 #elif (__CUDA_ARCH__ >= 130)
@@ -34,6 +35,7 @@ __launch_bounds__(GT2XX_NONBOND_THREADS_PER_BLOCK, 1)
 #else
 __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
 #endif
+*/
 void METHOD_NAME(kCalculateAmoebaCudaKirkwood, Forces_kernel)(
                            unsigned int* workUnit,
                            float4* atomCoord,

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaKirkwoodEDiff.cu
@@ -1156,11 +1156,10 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
    static const char* methodName       = "kCalculateAmoebaKirkwoodEDiff";
    static unsigned int threadsPerBlock = 0;
+    static int timestep                 = 0;
+    timestep++;
 #ifdef AMOEBA_DEBUG
-    static int timestep = 0;
    std::vector<int> fileId;
-    timestep++;
    fileId.resize( 2 );
    fileId[0] = timestep;
    fileId[1] = 1;
@@ -1188,21 +1187,21 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
    unsigned int targetAtom                   = 0;
 #endif
-    if( amoebaGpu->log ){
-        (void) fprintf( amoebaGpu->log, "%s %d maxCovalentDegreeSz=%d"
-                        " gamma=%.3e scalingDistanceCutoff=%.3f ZZZ\n",
-                        methodName, gpu->natoms,
-                        amoebaGpu->maxCovalentDegreeSz, amoebaGpu->pGamma,
-                        amoebaGpu->scalingDistanceCutoff );
-        gpuPrintCudaAmoebaGmxSimulation(amoebaGpu, amoebaGpu->log );
-        (void) fflush( amoebaGpu->log );
-    }   
    kClearFields_3( amoebaGpu, 6 );
    if( threadsPerBlock == 0 ){
        threadsPerBlock = getThreadsPerBlock( amoebaGpu, sizeof(KirkwoodEDiffParticle));
    }   
+    if( amoebaGpu->log && timestep == 1 ){
+        (void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaKirkwoodEDiffN2Forces:  numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
+                        amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
+                        sizeof(KirkwoodEDiffParticle), sizeof(KirkwoodEDiffParticle)*threadsPerBlock,
+                        amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
+        //gpuPrintCudaAmoebaGmxSimulation(amoebaGpu, amoebaGpu->log );
+        (void) fflush( amoebaGpu->log );
+    }   
    if (gpu->bOutputBufferPerWarp){
 #if 0
        (void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaKirkwoodEDiffN2Forces warp:  numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
@@ -1414,7 +1413,6 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
         }
    }   
 #endif
   // ---------------------------------------------------------------------------------------

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaMutualInducedAndGkFields.cu
@@ -756,12 +756,11 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
   // ---------------------------------------------------------------------------------------
-#ifdef AMOEBA_DEBUG
-    static const char* methodName = "cudaComputeAmoebaMutualInducedAndGkFieldBySOR";
    static int timestep = 0;
-    std::vector<int> fileId;
    timestep++;
+    static const char* methodName = "cudaComputeAmoebaMutualInducedAndGkFieldBySOR";
+#ifdef AMOEBA_DEBUG
+    std::vector<int> fileId;
    fileId.resize( 2 );
    fileId[0] = timestep;
    fileId[1] = 1;
@@ -780,7 +779,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
    if( (numOfElems % numThreads) != 0 )numBlocks++;
 #ifdef AMOEBA_DEBUG
-    if( amoebaGpu->log ){
+    if( amoebaGpu->log && timestep == 1 ){
        (void) fprintf( amoebaGpu->log, "%s %d numOfElems=%d numThreads=%d numBlocks=%d "
                        "maxIterations=%d targetEpsilon=%.3e\n", 
                        methodName, gpu->natoms, numOfElems, numThreads, numBlocks,
@@ -981,6 +980,12 @@ time_t start = clock();
    amoebaGpu->mutualInducedDone             = done;
    amoebaGpu->mutualInducedConverged        = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1;
+    if( amoebaGpu->log ){
+        (void) fprintf( amoebaGpu->log, "%s done=%d converged=%d iteration=%d eps=%14.7e\n",
+                        methodName, done, amoebaGpu->mutualInducedConverged, iteration, amoebaGpu->mutualInducedCurrentEpsilon );
+        (void) fflush( amoebaGpu->log );
+    }
 #ifdef AMOEBA_DEBUG
    if( 1 ){
        std::vector<int> fileId;

--- a/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaLocalForces.cu
+++ b/plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaLocalForces.cu
@@ -1613,8 +1613,12 @@ void kCalculateAmoebaLocalForces(amoebaGpuContext gpu)
 {
    if( gpu->log ){
-        (void) fprintf( gpu->log,"kCalculateAmoebaLocalForces: blks=%u thrds/blk=%u\n",
+        static int call = 0;
-                        gpu->gpuContext->sim.blocks, gpu->gpuContext->sim.localForces_threads_per_block); fflush( gpu->log );
+        if( call == 0 ){
+            (void) fprintf( gpu->log,"kCalculateAmoebaLocalForces: blks=%u thrds/blk=%u\n",
+                            gpu->gpuContext->sim.blocks, gpu->gpuContext->sim.localForces_threads_per_block); fflush( gpu->log );
+            call++;
+        }
    }
    kCalculateAmoebaLocalForces_kernel<<<gpu->gpuContext->sim.blocks, gpu->gpuContext->sim.localForces_threads_per_block>>>();

--- a/plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
+++ b/plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.cpp
--- a/plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.h
+++ b/plugins/amoeba/platforms/cuda/tests/AmoebaTinkerParameterFile.h
@@ -84,7 +84,9 @@ static std::string AMOEBA_FIXED_E_GK                                  = "AmoebaF
 static std::string AMOEBA_INDUCDED_DIPOLES                            = "AmoebaInducedDipoles";
 static std::string AMOEBA_INDUCDED_DIPOLES_GK                         = "AmoebaInducedDipoles_GK";
-static std::string INCLUDE_OBC_CAVITY_TERM                            = "INCLUDE_OBC_CAVITY_TERM";
+static std::string INCLUDE_OBC_CAVITY_TERM                            = "includeObcCavityTerm";
+static std::string MUTUAL_INDUCED_MAX_ITERATIONS                      = "mutualInducedMaxIterations";
+static std::string MUTUAL_INDUCED_TARGET_EPSILON                      = "mutualInducedTargetEpsilon";
 #define AmoebaHarmonicBondIndex                            0
 #define AmoebaHarmonicAngleIndex                           1
@@ -147,6 +149,10 @@ typedef std::map< std::string, double > MapStringDouble;
 typedef MapStringDouble::iterator MapStringDoubleI;
 typedef MapStringDouble::const_iterator MapStringDoubleCI;
+typedef std::map< std::string, Force*> MapStringForce;
+typedef MapStringForce::iterator MapStringForceI;
+typedef MapStringForce::const_iterator MapStringForceCI;
 // default return value from methods
 static const int DefaultReturnValue               = 0;