Commit 41abd9fb authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Fix for isolated ions

parent 761d7e17
...@@ -93,7 +93,7 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl ...@@ -93,7 +93,7 @@ KernelImpl* AmoebaCudaKernelFactory::createKernelImpl(std::string name, const Pl
if( mapIterator == contextToAmoebaDataMap.end() ){ if( mapIterator == contextToAmoebaDataMap.end() ){
amoebaCudaData = new AmoebaCudaData( cudaPlatformData ); amoebaCudaData = new AmoebaCudaData( cudaPlatformData );
contextToAmoebaDataMap[&context] = amoebaCudaData; contextToAmoebaDataMap[&context] = amoebaCudaData;
amoebaCudaData->setLog( stderr ); //amoebaCudaData->setLog( stderr );
amoebaCudaData->setContextImpl( static_cast<void*>(&context) ); amoebaCudaData->setContextImpl( static_cast<void*>(&context) );
} else { } else {
amoebaCudaData = mapIterator->second; amoebaCudaData = mapIterator->second;
......
...@@ -86,13 +86,14 @@ amoebaGpuContext amoebaGpuInit( _gpuContext* gpu ) ...@@ -86,13 +86,14 @@ amoebaGpuContext amoebaGpuInit( _gpuContext* gpu )
extern "C" extern "C"
void gpuPrintCudaStream( std::string name, void gpuPrintCudaStream( std::string name,
unsigned int length, unsigned int subStreams, unsigned int stride, unsigned int length, unsigned int subStreams, unsigned int stride,
unsigned int memoryFootprint,
void* pSysStream, void* pDevStream, void* pSysStream, void* pDevStream,
void* pSysData, void* pDevData, FILE* log) void* pSysData, void* pDevData, FILE* log)
{ {
(void) fprintf( log, " %-35s [%8u %5u %8u] Stream[%p %p] Data[%16p %16p]\n", (void) fprintf( log, " %-35s [%8u %5u %8u %8u] Stream[%p %p] Data[%16p %16p]\n",
name.c_str(), length, subStreams, name.c_str(), length, subStreams,
stride, pSysStream, pDevStream, pSysData, pDevData ); stride, memoryFootprint, pSysStream, pDevStream, pSysData, pDevData );
} }
extern "C" extern "C"
...@@ -102,6 +103,7 @@ void gpuPrintCudaStreamFloat( CUDAStream<float>* cUDAStream, FILE* log ) ...@@ -102,6 +103,7 @@ void gpuPrintCudaStreamFloat( CUDAStream<float>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return; if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(), gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride, cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( float ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream), static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log ); static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
} }
...@@ -113,6 +115,7 @@ void gpuPrintCudaStreamFloat2( CUDAStream<float2>* cUDAStream, FILE* log ) ...@@ -113,6 +115,7 @@ void gpuPrintCudaStreamFloat2( CUDAStream<float2>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return; if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(), gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride, cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( float2 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream), static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log ); static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
} }
...@@ -124,6 +127,7 @@ void gpuPrintCudaStreamFloat4( CUDAStream<float4>* cUDAStream, FILE* log ) ...@@ -124,6 +127,7 @@ void gpuPrintCudaStreamFloat4( CUDAStream<float4>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return; if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(), gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride, cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( float4 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream), static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log ); static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
} }
...@@ -135,6 +139,7 @@ void gpuPrintCudaStreamUnsignedInt( CUDAStream<unsigned int>* cUDAStream, FILE* ...@@ -135,6 +139,7 @@ void gpuPrintCudaStreamUnsignedInt( CUDAStream<unsigned int>* cUDAStream, FILE*
if( cUDAStream == NULL )return; if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(), gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride, cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( unsigned int ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream), static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log ); static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
} }
...@@ -146,6 +151,7 @@ void gpuPrintCudaStreamInt( CUDAStream<int>* cUDAStream, FILE* log ) ...@@ -146,6 +151,7 @@ void gpuPrintCudaStreamInt( CUDAStream<int>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return; if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(), gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride, cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( int ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream), static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log ); static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
} }
...@@ -157,6 +163,7 @@ void gpuPrintCudaStreamInt2( CUDAStream<int2>* cUDAStream, FILE* log ) ...@@ -157,6 +163,7 @@ void gpuPrintCudaStreamInt2( CUDAStream<int2>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return; if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(), gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride, cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( int2 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream), static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log ); static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
} }
...@@ -168,6 +175,7 @@ void gpuPrintCudaStreamInt4( CUDAStream<int4>* cUDAStream, FILE* log ) ...@@ -168,6 +175,7 @@ void gpuPrintCudaStreamInt4( CUDAStream<int4>* cUDAStream, FILE* log )
if( cUDAStream == NULL )return; if( cUDAStream == NULL )return;
gpuPrintCudaStream( cUDAStream->_name.c_str(), gpuPrintCudaStream( cUDAStream->_name.c_str(),
cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride, cUDAStream->_length, cUDAStream->_subStreams, cUDAStream->_stride,
cUDAStream->_length*cUDAStream->_subStreams*sizeof( int4 ),
static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream), static_cast<void*>(cUDAStream->_pSysStream), static_cast<void*>(cUDAStream->_pDevStream),
static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log ); static_cast<void*>(cUDAStream->_pSysData), static_cast<void*>(cUDAStream->_pDevData), log );
} }
...@@ -1277,15 +1285,6 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu ) ...@@ -1277,15 +1285,6 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log,"%s: paddedNumberOfAtoms=%d\n",
methodName.c_str(), paddedNumberOfAtoms ); (void) fflush( amoebaGpu->log );
}
#endif
// work space
// parameters // parameters
amoebaGpu->psMultipoleParticlesIdsAndAxisType = new CUDAStream<int4>(paddedNumberOfAtoms, 1, "MultipoleParticlesIdsAndAxisType"); amoebaGpu->psMultipoleParticlesIdsAndAxisType = new CUDAStream<int4>(paddedNumberOfAtoms, 1, "MultipoleParticlesIdsAndAxisType");
...@@ -1296,9 +1295,11 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu ) ...@@ -1296,9 +1295,11 @@ static void gpuRotationToLabFrameAllocate( amoebaGpuContext amoebaGpu )
amoebaGpu->psMolecularDipole = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "MolecularDipole"); amoebaGpu->psMolecularDipole = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "MolecularDipole");
amoebaGpu->amoebaSim.pMolecularDipole = amoebaGpu->psMolecularDipole->_pDevData; amoebaGpu->amoebaSim.pMolecularDipole = amoebaGpu->psMolecularDipole->_pDevData;
memset( amoebaGpu->psMolecularDipole->_pSysData, 0, sizeof(float)*3*paddedNumberOfAtoms );
amoebaGpu->psMolecularQuadrupole = new CUDAStream<float>(9*paddedNumberOfAtoms, 1, "MolecularQuadrupole"); amoebaGpu->psMolecularQuadrupole = new CUDAStream<float>(9*paddedNumberOfAtoms, 1, "MolecularQuadrupole");
amoebaGpu->amoebaSim.pMolecularQuadrupole = amoebaGpu->psMolecularQuadrupole->_pDevData; amoebaGpu->amoebaSim.pMolecularQuadrupole = amoebaGpu->psMolecularQuadrupole->_pDevData;
memset( amoebaGpu->psMolecularQuadrupole->_pSysData, 0, sizeof(float)*9*paddedNumberOfAtoms );
// output // output
......
...@@ -47,38 +47,10 @@ typedef MapIntFloat::const_iterator MapIntFloatCI; ...@@ -47,38 +47,10 @@ typedef MapIntFloat::const_iterator MapIntFloatCI;
struct _amoebaGpuContext { struct _amoebaGpuContext {
_gpuContext* gpuContext; _gpuContext* gpuContext;
cudaAmoebaGmxSimulation amoebaSim;
FILE* log; FILE* log;
//bool bOutputBufferPerWarp;
//unsigned int paddedNumberOfAtoms;
//unsigned int nonbondBlocks;
//unsigned int nonbondThreadsPerBlock;
//unsigned int nonbondOutputBuffers;
//unsigned int threadsPerBlock;
//unsigned int fieldReduceThreadsPerBlock;
//unsigned int outputBuffers;
unsigned int workUnits;
// workspace arrays
CUDAStream<float>* psWorkArray_3_1;
CUDAStream<float>* psWorkArray_3_2;
CUDAStream<float>* psWorkArray_3_3;
CUDAStream<float>* psWorkArray_3_4;
CUDAStream<float>* psWorkArray_1_1;
CUDAStream<float>* psWorkArray_1_2;
CUDAStream<unsigned int>* psWorkUnit;
CUDAStream<int>* psScalingIndicesIndex;
CUDAStream<int>* ps_D_ScaleIndices;
CUDAStream<int2>* ps_P_ScaleIndices;
CUDAStream<int2>* ps_M_ScaleIndices;
cudaAmoebaGmxSimulation amoebaSim;
int maxCovalentDegreeSz;
CUDAStream<int4>* psAmoebaBondID; CUDAStream<int4>* psAmoebaBondID;
CUDAStream<float2>* psAmoebaBondParameter; CUDAStream<float2>* psAmoebaBondParameter;
...@@ -116,6 +88,25 @@ struct _amoebaGpuContext { ...@@ -116,6 +88,25 @@ struct _amoebaGpuContext {
CUDAStream<int4>* psAmoebaTorsionTorsionID3; CUDAStream<int4>* psAmoebaTorsionTorsionID3;
CUDAStream<float4>* psAmoebaTorsionTorsionGrids; CUDAStream<float4>* psAmoebaTorsionTorsionGrids;
unsigned int workUnits;
// workspace arrays
CUDAStream<float>* psWorkArray_3_1;
CUDAStream<float>* psWorkArray_3_2;
CUDAStream<float>* psWorkArray_3_3;
CUDAStream<float>* psWorkArray_3_4;
CUDAStream<float>* psWorkArray_1_1;
CUDAStream<float>* psWorkArray_1_2;
CUDAStream<unsigned int>* psWorkUnit;
CUDAStream<int>* psScalingIndicesIndex;
CUDAStream<int>* ps_D_ScaleIndices;
CUDAStream<int2>* ps_P_ScaleIndices;
CUDAStream<int2>* ps_M_ScaleIndices;
int maxCovalentDegreeSz;
float solventDielectric; float solventDielectric;
// multipole parameters // multipole parameters
...@@ -126,7 +117,6 @@ struct _amoebaGpuContext { ...@@ -126,7 +117,6 @@ struct _amoebaGpuContext {
// buffer indices used for mapping torques onto forces // buffer indices used for mapping torques onto forces
int maxTorqueBufferIndex; int maxTorqueBufferIndex;
int useNewTorqueMapScheme;
int torqueMapForce4Delete; int torqueMapForce4Delete;
CUDAStream<int4>* psMultipoleParticlesTorqueBufferIndices; CUDAStream<int4>* psMultipoleParticlesTorqueBufferIndices;
CUDAStream<float4>* psTorqueMapForce4; CUDAStream<float4>* psTorqueMapForce4;
......
...@@ -6,7 +6,8 @@ ...@@ -6,7 +6,8 @@
#include "amoebaCudaKernels.h" #include "amoebaCudaKernels.h"
#include "kCalculateAmoebaCudaUtilities.h" #include "kCalculateAmoebaCudaUtilities.h"
#define AMOEBA_DEBUG //#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
static __constant__ cudaGmxSimulation cSim; static __constant__ cudaGmxSimulation cSim;
static __constant__ cudaAmoebaGmxSimulation cAmoebaSim; static __constant__ cudaAmoebaGmxSimulation cAmoebaSim;
...@@ -480,7 +481,7 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu ) ...@@ -480,7 +481,7 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
// write results to file // write results to file
if( 1 ){ if( 0 ){
std::vector<int> fileId; std::vector<int> fileId;
//fileId.push_back( 0 ); //fileId.push_back( 0 );
VectorOfDoubleVectors outputVector; VectorOfDoubleVectors outputVector;
......
...@@ -1730,7 +1730,7 @@ static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu ) ...@@ -1730,7 +1730,7 @@ static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu )
} }
LAUNCHERROR("kReduceToBornForcePrefactor"); LAUNCHERROR("kReduceToBornForcePrefactor");
#define AMOEBA_DEBUG //#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){ if( amoebaGpu->log ){
...@@ -1748,7 +1748,7 @@ static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu ) ...@@ -1748,7 +1748,7 @@ static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu )
} }
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
*/ */
if( 1 ){ if( 0 ){
std::vector<int> fileId; std::vector<int> fileId;
//fileId.push_back( 0 ); //fileId.push_back( 0 );
VectorOfDoubleVectors outputVector; VectorOfDoubleVectors outputVector;
...@@ -1943,7 +1943,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu ) ...@@ -1943,7 +1943,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpu, amoebaGpu->psTorque ); cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpu, amoebaGpu->psTorque );
if( 1 ){ if( 0 ){
std::vector<int> fileId; std::vector<int> fileId;
VectorOfDoubleVectors outputVector; VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f ); //cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
...@@ -1956,7 +1956,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu ) ...@@ -1956,7 +1956,7 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
kCalculateObcGbsaForces2( amoebaGpu->gpuContext ); kCalculateObcGbsaForces2( amoebaGpu->gpuContext );
if( 1 ){ if( 0 ){
std::vector<int> fileId; std::vector<int> fileId;
VectorOfDoubleVectors outputVector; VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f ); //cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
......
...@@ -33,8 +33,8 @@ void GetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGp ...@@ -33,8 +33,8 @@ void GetCalculateAmoebaCudaMutualInducedAndGkFieldsSim(amoebaGpuContext amoebaGp
RTERROR(status, "GetCalculateAmoebaCudaMutualInducedAndGkFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"); RTERROR(status, "GetCalculateAmoebaCudaMutualInducedAndGkFieldSim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
} }
#define AMOEBA_DEBUG //#define AMOEBA_DEBUG
//#undef AMOEBA_DEBUG #undef AMOEBA_DEBUG
#define GK #define GK
#include "kCalculateAmoebaCudaMutualInducedParticle.h" #include "kCalculateAmoebaCudaMutualInducedParticle.h"
...@@ -923,7 +923,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe ...@@ -923,7 +923,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
} }
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( 1 ){ if( 0 ){
std::vector<int> fileId; std::vector<int> fileId;
//fileId.push_back( 0 ); //fileId.push_back( 0 );
VectorOfDoubleVectors outputVector; VectorOfDoubleVectors outputVector;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment