Commit f01dc8d9 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Bug fix for MI calculation

parent 35a4b156
......@@ -445,9 +445,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
gpuContext gpu = amoebaGpu->gpuContext;
#ifdef AMOEBA_DEBUG
static int iteration = 1;
int targetAtom = 546;
static const char* methodName = "cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply";
static int iteration = 1;
if( 1 && amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s\n", methodName );
(void) fflush( amoebaGpu->log );
......@@ -485,30 +485,17 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
#endif
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaPmeMutualInducedFieldCutoffByWarp_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit,
amoebaGpu->psWorkArray_3_1->_pDevData,
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevData,
debugArray->_pDevData, targetAtom );
#else
amoebaGpu->psWorkArray_3_2->_pDevData );
#endif
} else {
kCalculateAmoebaPmeMutualInducedFieldCutoff_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit,
amoebaGpu->psWorkArray_3_1->_pDevData,
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevData,
debugArray->_pDevData, targetAtom );
#else
amoebaGpu->psWorkArray_3_2->_pDevData );
#endif
}
LAUNCHERROR("kCalculateAmoebaPmeMutualInducedField");
......@@ -520,8 +507,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
iteration ); (void) fflush( amoebaGpu->log );
outputArray->Download();
outputPolarArray->Download();
debugArray->Download();
int maxPrint = 5;
// debugArray->Download();
int maxPrint = 5000000;
for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5d ", ii);
......@@ -564,7 +551,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
iteration++;
}
delete debugArray;
// delete debugArray;
#endif
}
......@@ -652,6 +639,18 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// post matrix multiply
if( 0 ){
gpuContext gpu = amoebaGpu->gpuContext;
std::vector<int> fileId;
fileId.push_back( iteration );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psWorkVector[0], outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psWorkVector[1], outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeDirectMIPre", fileId, outputVector );
}
kSorUpdateMutualInducedField_kernel<<< gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block >>>(
gpu->natoms, amoebaGpu->psPolarizability->_pDevData,
amoebaGpu->psInducedDipole->_pDevData, amoebaGpu->psInducedDipolePolar->_pDevData,
......@@ -664,11 +663,13 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
std::vector<int> fileId;
fileId.push_back( iteration );
VectorOfDoubleVectors outputVector;
// cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData );
// cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psPolarizability, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeDirectMI", fileId, outputVector );
//exit(0);
}
// get total epsilon -- performing sums on gpu
......@@ -762,6 +763,14 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu->psCurrentEpsilon->_pSysData[2], done );
(void) fflush( amoebaGpu->log );
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "MI iteration=%3d eps %14.6e [%14.6e %14.6e] done=%d\n",
iteration, amoebaGpu->mutualInducedCurrentEpsilon,
amoebaGpu->psCurrentEpsilon->_pSysData[1],
amoebaGpu->psCurrentEpsilon->_pSysData[2], done );
(void) fflush( amoebaGpu->log );
}
#endif
// exit if nan
......@@ -781,7 +790,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, 1.0f );
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData, 1.0f );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
......
......@@ -104,13 +104,12 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
float4 delta;
float prefactor2;
if( ( (atomI == (y + j)) || (atomI >= cSim.atoms) || ((y+j) >= cSim.atoms) ) ){
delta.w = prefactor2 = 0.0f;
} else {
if( ( (atomI != (y + j)) && (atomI < cSim.atoms) && ((y+j) < cSim.atoms) ) ){
setupMutualInducedFieldPairIxn_kernel( localParticle, psA[j], uscale, &delta, &prefactor2 );
}
//delta.w = prefactor2 = 0.0f;
calculateMutualInducedFieldPairIxn_kernel( psA[j].inducedDipole, delta, prefactor2, fieldSum );
calculateMutualInducedFieldPairIxn_kernel( psA[j].inducedDipolePolar, delta, prefactor2, fieldPolarSum );
}
}
......@@ -118,24 +117,18 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned int offset = 3*(x + tgx + warp*cSim.paddedNumberOfAtoms);
load3dArrayBufferPerWarp( offset, fieldSum, outputField );
load3dArrayBufferPerWarp( offset, fieldPolarSum, outputFieldPolar);
#else
unsigned int offset = 3*(x + tgx + (x >> GRIDBITS) * cSim.paddedNumberOfAtoms);
#endif
load3dArray( offset, fieldSum, outputField );
load3dArray( offset, fieldPolarSum, outputFieldPolar);
#endif
} else {
if (lasty != y)
{
unsigned int atomJ = y + tgx;
// load coordinates, charge, ...
loadMutualInducedShared( &(sA[threadIdx.x]), atomJ );
}
......@@ -152,18 +145,13 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
zeroMutualInducedParticleSharedField( &(sA[threadIdx.x]) );
for (unsigned int j = 0; j < GRID; j++)
{
if ((flags&(1<<j)) != 0)
{
for (unsigned int j = 0; j < GRID; j++){
if ((flags&(1<<j)) != 0) {
unsigned int jIdx = (flags == 0xFFFFFFFF) ? tj : j;
float4 delta;
float prefactor2;
if( (atomI >= cSim.atoms) || ((y+jIdx) >= cSim.atoms) ){
delta.w = prefactor2 = 0.0f;
} else {
if( (atomI < cSim.atoms) && ((y+jIdx) < cSim.atoms) ){
setupMutualInducedFieldPairIxn_kernel( localParticle, psA[jIdx], uscale, &delta, &prefactor2 );
}
calculateMutualInducedFieldPairIxn_kernel( psA[jIdx].inducedDipole, delta, prefactor2, fieldSum );
calculateMutualInducedFieldPairIxn_kernel( psA[jIdx].inducedDipolePolar, delta, prefactor2, fieldPolarSum );
#ifndef INCLUDE_MI_FIELD_BUFFERS
......@@ -203,7 +191,7 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
}
#endif
}
}
tj = (tj + 1) & (GRID - 1);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment