Commit ce0cc4e0 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Debye constant was set for kcal/A units; updated to kJ/nm units

Bug in Urey-Bradley force calculation fixed
parent 1b5ee8f9
...@@ -976,14 +976,26 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const ...@@ -976,14 +976,26 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
force.getPmeGridDimensions( pmeGridDimension ); force.getPmeGridDimensions( pmeGridDimension );
if( 1 || pmeGridDimension[0] == 0 ){ if( 1 || pmeGridDimension[0] == 0 ){
NonbondedForceImpl::calcPMEParameters(system, nb, alpha, xsize, ysize, zsize); NonbondedForceImpl::calcPMEParameters(system, nb, alpha, xsize, ysize, zsize);
/*
alpha = 5.446;
xsize = 60;
ysize = 48;
zsize = 48;
*/
} else { } else {
alpha = force.getAEwald(); alpha = force.getAEwald();
xsize = pmeGridDimension[0]; xsize = pmeGridDimension[0];
ysize = pmeGridDimension[1]; ysize = pmeGridDimension[1];
zsize = pmeGridDimension[2]; zsize = pmeGridDimension[2];
} }
if( data.getLog() ){
(void) fprintf( data.getLog(), "AmoebaMultipoleForce: PME parameters tol=%12.3e cutoff=%12.3f alpha=%12.3f [%d %d %d]\n",
force.getEwaldErrorTolerance(), force.getCutoffDistance(), alpha, xsize, ysize, zsize );
(void) fflush( data.getLog() );
}
gpuSetAmoebaPMEParameters(data.getAmoebaGpu(), (float) alpha, xsize, ysize, zsize); gpuSetAmoebaPMEParameters(data.getAmoebaGpu(), (float) alpha, xsize, ysize, zsize);
data.setApplyMultipoleCutoff( 1 ); data.setApplyMultipoleCutoff( 1 );
data.cudaPlatformData.nonbondedMethod = PARTICLE_MESH_EWALD; data.cudaPlatformData.nonbondedMethod = PARTICLE_MESH_EWALD;
amoebaGpuContext amoebaGpu = data.getAmoebaGpu(); amoebaGpuContext amoebaGpu = data.getAmoebaGpu();
gpuContext gpu = amoebaGpu->gpuContext; gpuContext gpu = amoebaGpu->gpuContext;
......
...@@ -1155,10 +1155,9 @@ void gpuSetAmoebaBondOffsets(amoebaGpuContext amoebaGpu ) ...@@ -1155,10 +1155,9 @@ void gpuSetAmoebaBondOffsets(amoebaGpuContext amoebaGpu )
(amoebaGpu->psAmoebaUreyBradleyParameter ? amoebaGpu->psAmoebaUreyBradleyParameter->_stride : 0); (amoebaGpu->psAmoebaUreyBradleyParameter ? amoebaGpu->psAmoebaUreyBradleyParameter->_stride : 0);
//gpu->sim.localForces_threads_per_block = (std::max(amoebaGpu->amoebaSim.amoebaTorsionTorsion_offset, gpu->sim.customBonds) / gpu->sim.blocks + 15) & 0xfffffff0; //gpu->sim.localForces_threads_per_block = (std::max(amoebaGpu->amoebaSim.amoebaTorsionTorsion_offset, gpu->sim.customBonds) / gpu->sim.blocks + 15) & 0xfffffff0;
//fprintf( amoebaGpu->log, "Final (UreyBradley) offset : %5d \n", amoebaGpu->amoebaSim.amoebaUreyBradley_offset );
unsigned int maxI = (amoebaGpu->amoebaSim.amoebaTorsionTorsion_offset > gpu->sim.customBonds) ? amoebaGpu->amoebaSim.amoebaTorsionTorsion_offset : gpu->sim.customBonds; unsigned int maxI = (amoebaGpu->amoebaSim.amoebaUreyBradley_offset > gpu->sim.customBonds) ? amoebaGpu->amoebaSim.amoebaUreyBradley_offset : gpu->sim.customBonds;
gpu->sim.localForces_threads_per_block = (maxI/gpu->sim.blocks + 15) & 0xfffffff0; gpu->sim.localForces_threads_per_block = (maxI/gpu->sim.blocks + 15) & 0xfffffff0;
if (gpu->sim.localForces_threads_per_block > gpu->sim.max_localForces_threads_per_block) if (gpu->sim.localForces_threads_per_block > gpu->sim.max_localForces_threads_per_block)
gpu->sim.localForces_threads_per_block = gpu->sim.max_localForces_threads_per_block; gpu->sim.localForces_threads_per_block = gpu->sim.max_localForces_threads_per_block;
if (gpu->sim.localForces_threads_per_block < 1) if (gpu->sim.localForces_threads_per_block < 1)
...@@ -1264,8 +1263,8 @@ void gpuSetAmoebaBondOffsets(amoebaGpuContext amoebaGpu ) ...@@ -1264,8 +1263,8 @@ void gpuSetAmoebaBondOffsets(amoebaGpuContext amoebaGpu )
if( amoebaGpu->psAmoebaUreyBradleyID ){ if( amoebaGpu->psAmoebaUreyBradleyID ){
amoebaGpu->psAmoebaUreyBradleyID->Upload(); amoebaGpu->psAmoebaUreyBradleyID->Upload();
} }
}
}
} }
...@@ -1511,7 +1510,7 @@ void gpuKirkwoodAllocate( amoebaGpuContext amoebaGpu ) ...@@ -1511,7 +1510,7 @@ void gpuKirkwoodAllocate( amoebaGpuContext amoebaGpu )
amoebaGpu->psKirkwoodForce = new CUDAStream<float>(paddedNumberOfAtoms*3, 1, "KirkwoodForce"); amoebaGpu->psKirkwoodForce = new CUDAStream<float>(paddedNumberOfAtoms*3, 1, "KirkwoodForce");
amoebaGpu->psKirkwoodEDiffForce = new CUDAStream<float>(paddedNumberOfAtoms*3, 1, "KirkwoodEDiffForce"); amoebaGpu->psKirkwoodEDiffForce = new CUDAStream<float>(paddedNumberOfAtoms*3, 1, "KirkwoodEDiffForce");
unsigned int offset = paddedNumberOfAtoms*sizeof( float ); unsigned int offset = paddedNumberOfAtoms*sizeof( float );
memset( amoebaGpu->psBorn->_pSysStream[0], 0, offset ); memset( amoebaGpu->psBorn->_pSysStream[0], 0, offset );
memset( amoebaGpu->psBornPolar->_pSysStream[0], 0, offset ); memset( amoebaGpu->psBornPolar->_pSysStream[0], 0, offset );
......
...@@ -441,6 +441,7 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -441,6 +441,7 @@ void kCalculateAmoebaLocalForces_kernel()
force.y += c23.y; force.y += c23.y;
force.z += c23.z; force.z += c23.z;
cSim.pForce4[offset] = force; cSim.pForce4[offset] = force;
} }
pos += blockDim.x * gridDim.x; pos += blockDim.x * gridDim.x;
} }
...@@ -1580,10 +1581,12 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -1580,10 +1581,12 @@ void kCalculateAmoebaLocalForces_kernel()
int4 atom = cAmoebaSim.pAmoebaUreyBradleyID[pos1]; int4 atom = cAmoebaSim.pAmoebaUreyBradleyID[pos1];
float4 atomA = cSim.pPosq[atom.x]; float4 atomA = cSim.pPosq[atom.x];
float4 atomB = cSim.pPosq[atom.y]; float4 atomB = cSim.pPosq[atom.y];
float2 bond = cAmoebaSim.pAmoebaUreyBradleyParameter[pos]; float2 bond = cAmoebaSim.pAmoebaUreyBradleyParameter[pos1];
float dx = atomB.x - atomA.x; float dx = atomB.x - atomA.x;
float dy = atomB.y - atomA.y; float dy = atomB.y - atomA.y;
float dz = atomB.z - atomA.z; float dz = atomB.z - atomA.z;
float r2 = dx * dx + dy * dy + dz * dz; float r2 = dx * dx + dy * dy + dz * dz;
float r = sqrt(r2); float r = sqrt(r2);
float deltaIdeal = r - bond.x; float deltaIdeal = r - bond.x;
...@@ -1599,16 +1602,20 @@ void kCalculateAmoebaLocalForces_kernel() ...@@ -1599,16 +1602,20 @@ void kCalculateAmoebaLocalForces_kernel()
dx *= dEdR; dx *= dEdR;
dy *= dEdR; dy *= dEdR;
dz *= dEdR; dz *= dEdR;
unsigned int offsetA = atom.x + atom.z * cSim.stride; unsigned int offsetA = atom.x + atom.z * cSim.stride;
unsigned int offsetB = atom.y + atom.w * cSim.stride; unsigned int offsetB = atom.y + atom.w * cSim.stride;
float4 forceA = cSim.pForce4[offsetA]; float4 forceA = cSim.pForce4[offsetA];
float4 forceB = cSim.pForce4[offsetB]; float4 forceB = cSim.pForce4[offsetB];
forceA.x += dx; forceA.x += dx;
forceA.y += dy; forceA.y += dy;
forceA.z += dz; forceA.z += dz;
forceB.x -= dx; forceB.x -= dx;
forceB.y -= dy; forceB.y -= dy;
forceB.z -= dz; forceB.z -= dz;
cSim.pForce4[offsetA] = forceA; cSim.pForce4[offsetA] = forceA;
cSim.pForce4[offsetB] = forceB; cSim.pForce4[offsetB] = forceB;
} }
...@@ -1629,7 +1636,6 @@ void kCalculateAmoebaLocalForces(amoebaGpuContext gpu) ...@@ -1629,7 +1636,6 @@ void kCalculateAmoebaLocalForces(amoebaGpuContext gpu)
call++; call++;
} }
} }
kCalculateAmoebaLocalForces_kernel<<<gpu->gpuContext->sim.blocks, gpu->gpuContext->sim.localForces_threads_per_block>>>(); kCalculateAmoebaLocalForces_kernel<<<gpu->gpuContext->sim.blocks, gpu->gpuContext->sim.localForces_threads_per_block>>>();
LAUNCHERROR("kCalculateAmoebaLocalForces"); LAUNCHERROR("kCalculateAmoebaLocalForces");
......
...@@ -324,12 +324,12 @@ void kReduceMutualInducedAndGkFieldDelta_kernel( float* arrayOfDeltas1, float* a ...@@ -324,12 +324,12 @@ void kReduceMutualInducedAndGkFieldDelta_kernel( float* arrayOfDeltas1, float* a
epsilon[0] = epsilon[0] < delta[0].y ? delta[0].y : epsilon[0]; epsilon[0] = epsilon[0] < delta[0].y ? delta[0].y : epsilon[0];
epsilon[0] = epsilon[0] < delta[0].z ? delta[0].z : epsilon[0]; epsilon[0] = epsilon[0] < delta[0].z ? delta[0].z : epsilon[0];
epsilon[0] = epsilon[0] < delta[0].w ? delta[0].w : epsilon[0]; epsilon[0] = epsilon[0] < delta[0].w ? delta[0].w : epsilon[0];
epsilon[0] = 4.8033324f*sqrtf( epsilon[0]/( (float) cAmoebaSim.numberOfAtoms ) ); epsilon[0] = 48.033324f*sqrtf( epsilon[0]/( (float) cAmoebaSim.numberOfAtoms ) );
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
epsilon[1] = 4.8033324f*sqrtf( delta[0].x/( (float) cAmoebaSim.numberOfAtoms ) ); epsilon[1] = 48.033324f*sqrtf( delta[0].x/( (float) cAmoebaSim.numberOfAtoms ) );
epsilon[2] = 4.8033324f*sqrtf( delta[0].y/( (float) cAmoebaSim.numberOfAtoms ) ); epsilon[2] = 48.033324f*sqrtf( delta[0].y/( (float) cAmoebaSim.numberOfAtoms ) );
epsilon[3] = 4.8033324f*sqrtf( delta[0].z/( (float) cAmoebaSim.numberOfAtoms ) ); epsilon[3] = 48.033324f*sqrtf( delta[0].z/( (float) cAmoebaSim.numberOfAtoms ) );
epsilon[4] = 4.8033324f*sqrtf( delta[0].w/( (float) cAmoebaSim.numberOfAtoms ) ); epsilon[4] = 48.033324f*sqrtf( delta[0].w/( (float) cAmoebaSim.numberOfAtoms ) );
#endif #endif
} }
} }
...@@ -831,7 +831,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe ...@@ -831,7 +831,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
float currentEpsilon = -1.0e30; float currentEpsilon = -1.0e30;
for( int ii = 0; ii < 4; ii++ ){ for( int ii = 0; ii < 4; ii++ ){
sum[ii] = cudaGetSum( 3*gpu->natoms, amoebaGpu->psWorkVector[ii]); sum[ii] = cudaGetSum( 3*gpu->natoms, amoebaGpu->psWorkVector[ii]);
sum[ii] = 4.8033324f*sqrtf( sum[ii]/( (float) gpu->natoms) ); sum[ii] = 48.033324f*sqrtf( sum[ii]/( (float) gpu->natoms) );
if( sum[ii] > currentEpsilon ){ if( sum[ii] > currentEpsilon ){
currentEpsilon = sum[ii]; currentEpsilon = sum[ii];
} }
...@@ -845,7 +845,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe ...@@ -845,7 +845,7 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
} }
#endif #endif
// Debye=4.8033324f // Debye=48.033324f
amoebaGpu->psCurrentEpsilon->Download(); amoebaGpu->psCurrentEpsilon->Download();
float currentEpsilon = amoebaGpu->psCurrentEpsilon->_pSysStream[0][0]; float currentEpsilon = amoebaGpu->psCurrentEpsilon->_pSysStream[0][0];
......
...@@ -175,7 +175,7 @@ void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* arrayOfDe ...@@ -175,7 +175,7 @@ void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* arrayOfDe
if (threadIdx.x == 0) if (threadIdx.x == 0)
{ {
epsilon[0] = delta[0].x > delta[0].y ? delta[0].x : delta[0].y; epsilon[0] = delta[0].x > delta[0].y ? delta[0].x : delta[0].y;
epsilon[0] = 4.8033324f*sqrtf( epsilon[0]/( (float) (numberOfEntries/3)) ); epsilon[0] = 48.033324f*sqrtf( epsilon[0]/( (float) (numberOfEntries/3)) );
} }
} }
...@@ -550,7 +550,7 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu ...@@ -550,7 +550,7 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
trackMutualInducedIterations( amoebaGpu, iteration); trackMutualInducedIterations( amoebaGpu, iteration);
} }
// Debye=4.8033324f // Debye=48.033324f
amoebaGpu->psCurrentEpsilon->Download(); amoebaGpu->psCurrentEpsilon->Download();
float currentEpsilon = amoebaGpu->psCurrentEpsilon->_pSysStream[0][0]; float currentEpsilon = amoebaGpu->psCurrentEpsilon->_pSysStream[0][0];
amoebaGpu->mutualInducedCurrentEpsilon = currentEpsilon; amoebaGpu->mutualInducedCurrentEpsilon = currentEpsilon;
......
...@@ -297,10 +297,10 @@ static void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* ar ...@@ -297,10 +297,10 @@ static void kReduceMutualInducedFieldDelta_kernel(int numberOfEntries, float* ar
if (threadIdx.x == 0) if (threadIdx.x == 0)
{ {
epsilon[0] = delta[0].x > delta[0].y ? delta[0].x : delta[0].y; epsilon[0] = delta[0].x > delta[0].y ? delta[0].x : delta[0].y;
epsilon[0] = 4.8033324f*sqrtf( epsilon[0]/( (float) (numberOfEntries/3)) ); epsilon[0] = 48.033324f*sqrtf( epsilon[0]/( (float) (numberOfEntries/3)) );
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
epsilon[1] = 4.8033324f*sqrtf( delta[0].x/( (float) (numberOfEntries/3)) ); epsilon[1] = 48.033324f*sqrtf( delta[0].x/( (float) (numberOfEntries/3)) );
epsilon[2] = 4.8033324f*sqrtf( delta[0].y/( (float) (numberOfEntries/3)) ); epsilon[2] = 48.033324f*sqrtf( delta[0].y/( (float) (numberOfEntries/3)) );
#endif #endif
} }
} }
...@@ -522,6 +522,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -522,6 +522,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
//#define AMOEBA_DEBUG
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
static const char* methodName = "cudaComputeAmoebaPmeMutualInducedFieldBySOR"; static const char* methodName = "cudaComputeAmoebaPmeMutualInducedFieldBySOR";
static int timestep = 0; static int timestep = 0;
...@@ -645,7 +646,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -645,7 +646,6 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
kCalculateAmoebaPMEInducedDipoleField( amoebaGpu ); kCalculateAmoebaPMEInducedDipoleField( amoebaGpu );
LAUNCHERROR("cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop\n"); LAUNCHERROR("cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop\n");
#ifdef GET_EFIELD_FROM_FILE #ifdef GET_EFIELD_FROM_FILE
{ {
std::string fileName = "waterInduceRecip.txt"; std::string fileName = "waterInduceRecip.txt";
...@@ -695,7 +695,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -695,7 +695,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
trackMutualInducedIterations( amoebaGpu, iteration); trackMutualInducedIterations( amoebaGpu, iteration);
} }
// Debye=4.8033324f // Debye=48.033324f
amoebaGpu->psCurrentEpsilon->Download(); amoebaGpu->psCurrentEpsilon->Download();
float currentEpsilon = amoebaGpu->psCurrentEpsilon->_pSysStream[0][0]; float currentEpsilon = amoebaGpu->psCurrentEpsilon->_pSysStream[0][0];
amoebaGpu->mutualInducedCurrentEpsilon = currentEpsilon; amoebaGpu->mutualInducedCurrentEpsilon = currentEpsilon;
...@@ -728,9 +728,13 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -728,9 +728,13 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
(void) fprintf( amoebaGpu->log, "%4d ", ii ); (void) fprintf( amoebaGpu->log, "%4d ", ii );
(void) fprintf( amoebaGpu->log," Mi[%14.6e %14.6e %14.6e] ", (void) fprintf( amoebaGpu->log," Mi[%14.6e %14.6e %14.6e] ",
amoebaGpu->psInducedDipole->_pSysStream[0][offset], amoebaGpu->psInducedDipole->_pSysStream[0][offset+1], amoebaGpu->psInducedDipole->_pSysStream[0][offset+2] ); amoebaGpu->psInducedDipole->_pSysStream[0][offset],
amoebaGpu->psInducedDipole->_pSysStream[0][offset+1],
amoebaGpu->psInducedDipole->_pSysStream[0][offset+2] );
(void) fprintf( amoebaGpu->log,"Mip[%14.6e %14.6e %14.6e]\n", (void) fprintf( amoebaGpu->log,"Mip[%14.6e %14.6e %14.6e]\n",
amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset], amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset+1], amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset+2] ); amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset],
amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset+1],
amoebaGpu->psInducedDipolePolar->_pSysStream[0][offset+2] );
if( ii == maxPrint && (ii < (gpu->natoms - maxPrint) ) ){ if( ii == maxPrint && (ii < (gpu->natoms - maxPrint) ) ){
ii = (gpu->natoms - maxPrint); ii = (gpu->natoms - maxPrint);
offset = 3*(ii+1); offset = 3*(ii+1);
...@@ -758,7 +762,12 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -758,7 +762,12 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu->psCurrentEpsilon->_pSysStream[0][1], amoebaGpu->psCurrentEpsilon->_pSysStream[0][1],
amoebaGpu->psCurrentEpsilon->_pSysStream[0][2], done ); amoebaGpu->psCurrentEpsilon->_pSysStream[0][2], done );
fflush( amoebaGpu->log ); fflush( amoebaGpu->log );
if( amoebaGpu->mutualInducedCurrentEpsilon != amoebaGpu->mutualInducedCurrentEpsilon )exit(0);
// exit if nan
if( amoebaGpu->mutualInducedCurrentEpsilon != amoebaGpu->mutualInducedCurrentEpsilon ){
(void) fprintf( amoebaGpu->log, "PME MI iteration=%3d eps is nan -- exiting.\n", iteration );
exit(0);
}
iteration++; iteration++;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment