Commit 6b0ad778 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

PME mods

parent f21e5169
...@@ -98,7 +98,7 @@ __device__ static void debugSetup( unsigned int atomI, unsigned int atomJ, ...@@ -98,7 +98,7 @@ __device__ static void debugSetup( unsigned int atomI, unsigned int atomJ,
*/ */
// self-energy for PME // self-energy for PME
/*
__device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI, float* energy) __device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI, float* energy)
{ {
float term = 2.0f*cSim.alphaEwald*cSim.alphaEwald; float term = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
...@@ -122,7 +122,7 @@ __device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDir ...@@ -122,7 +122,7 @@ __device__ static void calculatePmeSelfEnergyElectrostaticPairIxn_kernel( PmeDir
*energy = (cii + term*(dii/3.0f + 2.0f*term*qii/5.0f)); *energy = (cii + term*(dii/3.0f + 2.0f*term*qii/5.0f));
*energy += term*uii/3.0f; *energy += term*uii/3.0f;
*energy *= fterm; *energy *= fterm;
} */ }
// self-torque for PME // self-torque for PME
...@@ -134,14 +134,13 @@ __device__ static void calculatePmeSelfTorqueElectrostaticPairIxn_kernel( PmeDir ...@@ -134,14 +134,13 @@ __device__ static void calculatePmeSelfTorqueElectrostaticPairIxn_kernel( PmeDir
float uiy = 0.5f*(atomI.inducedDipole[1] + atomI.inducedDipoleP[1]); float uiy = 0.5f*(atomI.inducedDipole[1] + atomI.inducedDipoleP[1]);
float uiz = 0.5f*(atomI.inducedDipole[2] + atomI.inducedDipoleP[2]); float uiz = 0.5f*(atomI.inducedDipole[2] + atomI.inducedDipoleP[2]);
atomI.torque[0] += term*(atomI.labFrameDipole[1]*uiz - atomI.labFrameDipole[2]*uiy); atomI.torque[0] -= term*(atomI.labFrameDipole[1]*uiz - atomI.labFrameDipole[2]*uiy);
atomI.torque[1] += term*(atomI.labFrameDipole[2]*uix - atomI.labFrameDipole[0]*uiz); atomI.torque[1] -= term*(atomI.labFrameDipole[2]*uix - atomI.labFrameDipole[0]*uiz);
atomI.torque[2] += term*(atomI.labFrameDipole[0]*uiy - atomI.labFrameDipole[1]*uix); atomI.torque[2] -= term*(atomI.labFrameDipole[0]*uiy - atomI.labFrameDipole[1]*uix);
} }
__device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI, PmeDirectElectrostaticParticle& atomJ, __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectrostaticParticle& atomI, PmeDirectElectrostaticParticle& atomJ,
float* scalingFactors, float* outputForce, float outputTorque[2][3], float* scalingFactors, float* outputForce, float outputTorque[2][3], float* energy
float* energy
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
,float4* debugArray ,float4* debugArray
#endif #endif
...@@ -870,18 +869,18 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros ...@@ -870,18 +869,18 @@ __device__ void calculatePmeDirectElectrostaticPairIxn_kernel( PmeDirectElectros
// increment gradient due to force and torque on first site; // increment gradient due to force and torque on first site;
outputForce[0] = -conversionFactor*(ftm2[1] + ftm2i[1]); outputForce[0] = conversionFactor*(ftm2[1] + ftm2i[1]);
outputForce[1] = -conversionFactor*(ftm2[2] + ftm2i[2]); outputForce[1] = conversionFactor*(ftm2[2] + ftm2i[2]);
outputForce[2] = -conversionFactor*(ftm2[3] + ftm2i[3]); outputForce[2] = conversionFactor*(ftm2[3] + ftm2i[3]);
outputTorque[0][0] = conversionFactor*(ttm2[1] + ttm2i[1]); conversionFactor *= -1.0;
outputTorque[0][1] = conversionFactor*(ttm2[2] + ttm2i[2]); outputTorque[0][0] = conversionFactor*(ttm2[1] + ttm2i[1]);
outputTorque[0][2] = conversionFactor*(ttm2[3] + ttm2i[3]); outputTorque[0][1] = conversionFactor*(ttm2[2] + ttm2i[2]);
outputTorque[0][2] = conversionFactor*(ttm2[3] + ttm2i[3]);
outputTorque[1][0] = conversionFactor*(ttm3[1] + ttm3i[1]); outputTorque[1][0] = conversionFactor*(ttm3[1] + ttm3i[1]);
outputTorque[1][1] = conversionFactor*(ttm3[2] + ttm3i[2]); outputTorque[1][1] = conversionFactor*(ttm3[2] + ttm3i[2]);
outputTorque[1][2] = conversionFactor*(ttm3[3] + ttm3i[3]); outputTorque[1][2] = conversionFactor*(ttm3[3] + ttm3i[3]);
//outputTorque[1][2] = conversionFactor*(ttm3_2 + ttm3i_2);
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
int debugIndex = 0; int debugIndex = 0;
...@@ -1299,10 +1298,10 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1299,10 +1298,10 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
} }
delete debugArray; delete debugArray;
#endif #endif
// --------------------------------------------------------------------------------------- cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpu, amoebaGpu->psTorque, amoebaGpu->psForce, gpu->psForce4 );
} }
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -1315,7 +1314,6 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1315,7 +1314,6 @@ void cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpuContext amoebaGpu )
void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu ) void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
{ {
cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpu ); cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpu );
kCalculateAmoebaPME( amoebaGpu ); kCalculateAmoebaPME( amoebaGpu );
} }
......
...@@ -79,13 +79,13 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)( ...@@ -79,13 +79,13 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectElectrostatic, Forces_kernel)(
PmeDirectElectrostaticParticle localParticle; PmeDirectElectrostaticParticle localParticle;
loadPmeDirectElectrostaticShared(&localParticle, atomI ); loadPmeDirectElectrostaticShared(&localParticle, atomI );
localParticle.force[0] = 0.0f; localParticle.force[0] = 0.0f;
localParticle.force[1] = 0.0f; localParticle.force[1] = 0.0f;
localParticle.force[2] = 0.0f; localParticle.force[2] = 0.0f;
localParticle.torque[0] = 0.0f; localParticle.torque[0] = 0.0f;
localParticle.torque[1] = 0.0f; localParticle.torque[1] = 0.0f;
localParticle.torque[2] = 0.0f; localParticle.torque[2] = 0.0f;
scalingFactors[PScaleIndex] = 1.0f; scalingFactors[PScaleIndex] = 1.0f;
scalingFactors[DScaleIndex] = 1.0f; scalingFactors[DScaleIndex] = 1.0f;
...@@ -236,9 +236,9 @@ if( atomI == targetAtom ){ ...@@ -236,9 +236,9 @@ if( atomI == targetAtom ){
if( atomI < cAmoebaSim.numberOfAtoms ){ if( atomI < cAmoebaSim.numberOfAtoms ){
calculatePmeSelfTorqueElectrostaticPairIxn_kernel( localParticle ); calculatePmeSelfTorqueElectrostaticPairIxn_kernel( localParticle );
//float energy; float energy;
//calculatePmeSelfEnergyElectrostaticPairIxn_kernel( localParticle, &energy ); calculatePmeSelfEnergyElectrostaticPairIxn_kernel( localParticle, &energy );
//totalEnergy += energy; totalEnergy += energy;
} }
// Write results // Write results
...@@ -282,10 +282,8 @@ if( atomI == targetAtom ){ ...@@ -282,10 +282,8 @@ if( atomI == targetAtom ){
#endif #endif
} }
else // 100% utilization else
{ {
// Read fixed atom data into registers and GRF
if (lasty != y) if (lasty != y)
{ {
// load shared data // load shared data
......
...@@ -382,10 +382,11 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle& ...@@ -382,10 +382,11 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
--------------------------------------------------------------------------------------- */ --------------------------------------------------------------------------------------- */
#ifdef AMOEBA_DEBUG
static int isNanOrInfinity( double number ){ static int isNanOrInfinity( double number ){
return (number != number || number == std::numeric_limits<double>::infinity() || number == -std::numeric_limits<double>::infinity()) ? 1 : 0; return (number != number || number == std::numeric_limits<double>::infinity() || number == -std::numeric_limits<double>::infinity()) ? 1 : 0;
} }
#endif
/**--------------------------------------------------------------------------------------- /**---------------------------------------------------------------------------------------
...@@ -398,13 +399,9 @@ static int isNanOrInfinity( double number ){ ...@@ -398,13 +399,9 @@ static int isNanOrInfinity( double number ){
static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
{ {
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
gpuContext gpu = amoebaGpu->gpuContext; gpuContext gpu = amoebaGpu->gpuContext;
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
static const char* methodName = "computeCudaAmoebaPmeFixedEField"; static const char* methodName = "computeCudaAmoebaPmeFixedEField";
if( amoebaGpu->log ){ if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "\n%s\n", methodName ); (void) fflush( amoebaGpu->log ); (void) fprintf( amoebaGpu->log, "\n%s\n", methodName ); (void) fflush( amoebaGpu->log );
...@@ -414,15 +411,13 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -414,15 +411,13 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
// N2 debug array // N2 debug array
CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray"); CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms); memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms);
debugArray->Upload(); debugArray->Upload();
// print intermediate results for the targetAtom // print intermediate results for the targetAtom
unsigned int targetAtom = 0; unsigned int targetAtom = 0;
#endif
int maxPrint = 3002; int maxPrint = 3002;
amoebaGpu->psE_Field->Download(); amoebaGpu->psE_Field->Download();
(void) fprintf( amoebaGpu->log, "Recip EFields In\n" ); (void) fprintf( amoebaGpu->log, "Recip EFields In\n" );
...@@ -448,6 +443,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -448,6 +443,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
} }
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
(void) fprintf( amoebaGpu->log, "Recip EFields End\n" ); (void) fprintf( amoebaGpu->log, "Recip EFields End\n" );
#endif
kClearFields_3( amoebaGpu, 2 ); kClearFields_3( amoebaGpu, 2 );
......
...@@ -725,8 +725,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -725,8 +725,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu->mutualInducedDone = done; amoebaGpu->mutualInducedDone = done;
amoebaGpu->mutualInducedConverged = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1; amoebaGpu->mutualInducedConverged = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1;
#ifdef AMOEBA_DEBUG
/*
if( 0 ){ if( 0 ){
std::vector<int> fileId; std::vector<int> fileId;
//fileId.push_back( 0 ); //fileId.push_back( 0 );
...@@ -736,8 +734,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -736,8 +734,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector ); cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector ); cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
} }
*/
#endif
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
} }
......
...@@ -389,18 +389,11 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG ...@@ -389,18 +389,11 @@ void kCalculateAmoebaMultipoleForces(amoebaGpuContext amoebaGpu, bool hasAmoebaG
if( amoebaGpu->multipoleNonbondedMethod == AMOEBA_NO_CUTOFF ){ if( amoebaGpu->multipoleNonbondedMethod == AMOEBA_NO_CUTOFF ){
cudaComputeAmoebaElectrostatic( amoebaGpu ); cudaComputeAmoebaElectrostatic( amoebaGpu );
// map torques to forces
cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpu, amoebaGpu->psTorque, amoebaGpu->psForce, amoebaGpu->gpuContext->psForce4 );
} else { } else {
cudaComputeAmoebaPmeElectrostatic( amoebaGpu ); cudaComputeAmoebaPmeElectrostatic( amoebaGpu );
} }
// map torques to forces
cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpu, amoebaGpu->psTorque, amoebaGpu->psForce, amoebaGpu->gpuContext->psForce4 );
if( 0 && amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "Done mapping torques -> forces%s\n", methodName.c_str() ); fflush( NULL );
(void) fflush( NULL );
}
} }
#undef AMOEBA_DEBUG #undef AMOEBA_DEBUG
...@@ -3608,12 +3608,14 @@ Integrator* readAmoebaParameterFile( const std::string& inputParameterFile, MapS ...@@ -3608,12 +3608,14 @@ Integrator* readAmoebaParameterFile( const std::string& inputParameterFile, MapS
} }
} else if( field == "AmoebaRealPmeForce" || } else if( field == "AmoebaRealPmeForce" ||
field == "AmoebaKSpacePmeForce" || field == "AmoebaKSpacePmeForce" ||
field == "AmoebaDirAndSForce" ||
field == "AmoebaSelfPmeForce" ){ field == "AmoebaSelfPmeForce" ){
std::vector< std::vector<double> > vectorOfDoubleVectors; std::vector< std::vector<double> > vectorOfDoubleVectors;
readVectorOfDoubleVectors( filePtr, tokens, vectorOfDoubleVectors, &lineCount, field, log ); readVectorOfDoubleVectors( filePtr, tokens, vectorOfDoubleVectors, &lineCount, field, log );
supplementary[field] = vectorOfDoubleVectors; supplementary[field] = vectorOfDoubleVectors;
} else if( field == "AmoebaRealPmeEnergy" || } else if( field == "AmoebaRealPmeEnergy" ||
field == "AmoebaKSpacePmeEnergy" || field == "AmoebaKSpacePmeEnergy" ||
field == "AmoebaDirAndSEnergy" ||
field == "AmoebaSelfPmeEnergy" ){ field == "AmoebaSelfPmeEnergy" ){
double value = atof( tokens[1].c_str() ); double value = atof( tokens[1].c_str() );
std::vector< std::vector<double> > vectorOfDoubleVectors; std::vector< std::vector<double> > vectorOfDoubleVectors;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment