Commit 42d5b80a authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Continue PME

parent 770f7abc
...@@ -1271,6 +1271,6 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu ) ...@@ -1271,6 +1271,6 @@ void cudaComputeAmoebaPmeElectrostatic( amoebaGpuContext amoebaGpu )
{ {
cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpu ); cudaComputeAmoebaPmeDirectElectrostatic( amoebaGpu );
kCalculateAmoebaPME( amoebaGpu ); //kCalculateAmoebaPME( amoebaGpu );
} }
...@@ -46,6 +46,7 @@ void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int ...@@ -46,6 +46,7 @@ void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int
// Reduce field // Reduce field
const float term = (4.0f/3.0f)*(cSim.alphaEwald*cSim.alphaEwald*cSim.alphaEwald)/cAmoebaSim.sqrtPi; const float term = (4.0f/3.0f)*(cSim.alphaEwald*cSim.alphaEwald*cSim.alphaEwald)/cAmoebaSim.sqrtPi;
//const float term = 0.0f;
while (pos < fieldComponents) while (pos < fieldComponents)
{ {
...@@ -73,6 +74,7 @@ void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int ...@@ -73,6 +74,7 @@ void kReduceDirectSelfFields_kernel( unsigned int fieldComponents, unsigned int
{ {
totalField += pFt[0]; totalField += pFt[0];
} }
fieldOut[pos] = totalField; fieldOut[pos] = totalField;
pos += gridDim.x * blockDim.x; pos += gridDim.x * blockDim.x;
} }
...@@ -256,18 +258,19 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle& ...@@ -256,18 +258,19 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
if( r2 <= cAmoebaSim.cutoffDistance2 ){ if( r2 <= cAmoebaSim.cutoffDistance2 ){
fields[0][0] = fim[0] - fid[0]; fields[0][0] = fim[0] - fid[0];
fields[1][0] = fkm[0] - fkd[0];
fields[2][0] = fim[0] - fip[0];
fields[3][0] = fkm[0] - fkp[0];
fields[0][1] = fim[1] - fid[1]; fields[0][1] = fim[1] - fid[1];
fields[1][1] = fkm[1] - fkd[1];
fields[2][1] = fim[1] - fip[1];
fields[3][1] = fkm[1] - fkp[1];
fields[0][2] = fim[2] - fid[2]; fields[0][2] = fim[2] - fid[2];
fields[1][0] = fkm[0] - fkd[0];
fields[1][1] = fkm[1] - fkd[1];
fields[1][2] = fkm[2] - fkd[2]; fields[1][2] = fkm[2] - fkd[2];
fields[2][0] = fim[0] - fip[0];
fields[2][1] = fim[1] - fip[1];
fields[2][2] = fim[2] - fip[2]; fields[2][2] = fim[2] - fip[2];
fields[3][0] = fkm[0] - fkp[0];
fields[3][1] = fkm[1] - fkp[1];
fields[3][2] = fkm[2] - fkp[2]; fields[3][2] = fkm[2] - fkp[2];
} else { } else {
...@@ -297,11 +300,12 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle& ...@@ -297,11 +300,12 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
pullBack[1].y = atomJ.y - atomI.y; pullBack[1].y = atomJ.y - atomI.y;
pullBack[1].z = atomJ.z - atomI.z; pullBack[1].z = atomJ.z - atomI.z;
pullBack[1].w = (atomJ.x - atomI.x)*(atomJ.x - atomI.x) + (atomJ.y - atomI.y)*(atomJ.y - atomI.y)+ (atomJ.z - atomI.z)*(atomJ.z - atomI.z); pullBack[1].w = (atomJ.x - atomI.x)*(atomJ.x - atomI.x) + (atomJ.y - atomI.y)*(atomJ.y - atomI.y)+ (atomJ.z - atomI.z)*(atomJ.z - atomI.z);
/*
pullBack[1].x = scale3; pullBack[2].x = scale3;
pullBack[1].y = scale5; pullBack[2].y = scale5;
pullBack[1].z = scale7; pullBack[2].z = scale7;
*/ pullBack[2].w = -1.0;
#endif #endif
} }
...@@ -351,10 +355,17 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -351,10 +355,17 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
unsigned int targetAtom = 0; unsigned int targetAtom = 0;
#endif #endif
//#define SET_ALPHA_EWALD
//#ifdef SET_ALPHA_EWALD
// amoebaGpu->gpuContext->sim.alphaEwald = 5.4459052e+00f;
// (void) fprintf( amoebaGpu->log, "computeCudaAmoebaPmeFixedEField: forceing alphaEwald=%15.7e\n", amoebaGpu->gpuContext->sim.alphaEwald );
// SetCalculateAmoebaCudaPmeFixedEFieldSim(amoebaGpu);
//#endif
kClearFields_3( amoebaGpu, 2 ); kClearFields_3( amoebaGpu, 2 );
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
(void) fprintf( amoebaGpu->log, "N2 warp\n" );
kCalculateAmoebaPmeDirectFixedE_FieldN2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock>>>( kCalculateAmoebaPmeDirectFixedE_FieldN2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0], amoebaGpu->psWorkUnit->_pDevStream[0],
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
...@@ -366,14 +377,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -366,14 +377,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
#endif #endif
} else { } else {
#ifdef AMOEBA_DEBUG
(void) fprintf( amoebaGpu->log, "N2 no warp\n" );
(void) fprintf( amoebaGpu->log, "AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(FixedFieldParticle), sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
#endif
kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock>>>( kCalculateAmoebaPmeDirectFixedE_FieldN2_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0], amoebaGpu->psWorkUnit->_pDevStream[0],
amoebaGpu->psWorkArray_3_1->_pDevStream[0], amoebaGpu->psWorkArray_3_1->_pDevStream[0],
...@@ -384,7 +387,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -384,7 +387,6 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
amoebaGpu->psWorkArray_3_2->_pDevStream[0] ); amoebaGpu->psWorkArray_3_2->_pDevStream[0] );
#endif #endif
} }
LAUNCHERROR("kCalculateAmoebaPmeDirectFixedE_Field_kernel"); LAUNCHERROR("kCalculateAmoebaPmeDirectFixedE_Field_kernel");
#if 0 #if 0
...@@ -406,16 +408,44 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -406,16 +408,44 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){ if( amoebaGpu->log ){
gpu->psInteractionCount->Download(); gpu->psInteractionCount->Download();
(void) fprintf( amoebaGpu->log, "AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n", (void) fprintf( amoebaGpu->log, "AmoebaN2Forces_kernel numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u warp=%d\n",
amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->bOutputBufferPerWarp, amoebaGpu->nonbondBlocks, amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(FixedFieldParticle), sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits ); sizeof(FixedFieldParticle), sizeof(FixedFieldParticle)*amoebaGpu->nonbondThreadsPerBlock, amoebaGpu->energyOutputBuffers,
(*gpu->psInteractionCount)[0], gpu->sim.workUnits, gpu->bOutputBufferPerWarp );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
amoebaGpu->psWorkArray_3_1->Download(); amoebaGpu->psWorkArray_3_1->Download();
amoebaGpu->psWorkArray_3_2->Download(); amoebaGpu->psWorkArray_3_2->Download();
amoebaGpu->psE_Field->Download(); amoebaGpu->psE_Field->Download();
amoebaGpu->psE_FieldPolar->Download(); amoebaGpu->psE_FieldPolar->Download();
(void) fprintf( amoebaGpu->log, "OutEFields\n" ); (void) fprintf( amoebaGpu->log, "Out WorkArray_3_[1,2] paddedNumberOfAtoms=%d\n", amoebaGpu->paddedNumberOfAtoms, amoebaGpu->outputBuffers );
int maxPrint = 32; int maxPrint = 32;
for( int ii = 0; ii < amoebaGpu->paddedNumberOfAtoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5d ", ii);
int indexOffset = ii*3;
// buffer 1
(void) fprintf( amoebaGpu->log,"WArry1[%16.9e %16.9e %16.9e] ",
amoebaGpu->psWorkArray_3_1->_pSysStream[0][indexOffset],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][indexOffset+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[0][indexOffset+2] );
// buffer 2
(void) fprintf( amoebaGpu->log,"WArry2[%16.9e %16.9e %16.9e] ",
amoebaGpu->psWorkArray_3_2->_pSysStream[0][indexOffset],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][indexOffset+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[0][indexOffset+2] );
(void) fprintf( amoebaGpu->log,"\n" );
if( ii == maxPrint && (gpu->natoms - maxPrint) > ii ){
ii = gpu->natoms - maxPrint;
}
}
(void) fflush( amoebaGpu->log );
(void) fprintf( amoebaGpu->log, "EFields End\n" );
for( int ii = 0; ii < gpu->natoms; ii++ ){ for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%5d ", ii); (void) fprintf( amoebaGpu->log, "%5d ", ii);
...@@ -450,7 +480,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -450,7 +480,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
for( int jj = 0; jj < gpu->natoms; jj++ ){ for( int jj = 0; jj < gpu->natoms; jj++ ){
int debugIndex = jj; int debugIndex = jj;
(void) fprintf( amoebaGpu->log,"%5d PmeFixedEField\n", jj ); (void) fprintf( amoebaGpu->log,"%5d PmeFixedEField\n", jj );
for( int kk = 0; kk < 7; kk++ ){ for( int kk = 0; kk < 10; kk++ ){
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n", (void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e %16.9e]\n",
debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y, debugArray->_pSysStream[0][debugIndex].x, debugArray->_pSysStream[0][debugIndex].y,
debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w ); debugArray->_pSysStream[0][debugIndex].z, debugArray->_pSysStream[0][debugIndex].w );
...@@ -476,7 +506,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -476,7 +506,7 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
} }
#endif #endif
if( 1 ){ if( 0 ){
std::vector<int> fileId; std::vector<int> fileId;
fileId.push_back( 0 ); fileId.push_back( 0 );
VectorOfDoubleVectors outputVector; VectorOfDoubleVectors outputVector;
...@@ -491,5 +521,5 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -491,5 +521,5 @@ static void cudaComputeAmoebaPmeDirectFixedEField( amoebaGpuContext amoebaGpu )
void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu ) void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
{ {
cudaComputeAmoebaPmeDirectFixedEField( amoebaGpu ); cudaComputeAmoebaPmeDirectFixedEField( amoebaGpu );
kCalculateAmoebaPMEFixedMultipoleField( amoebaGpu ); //kCalculateAmoebaPMEFixedMultipoleField( amoebaGpu );
} }
...@@ -44,6 +44,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)( ...@@ -44,6 +44,7 @@ void METHOD_NAME(kCalculateAmoebaPmeDirectFixedE_Field, _kernel)(
){ ){
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
int pullIndexMax = 12;
float4 pullBack[12]; float4 pullBack[12];
float dScaleVal; float dScaleVal;
float pScaleVal; float pScaleVal;
...@@ -120,6 +121,7 @@ pScaleVal = 1.0f; ...@@ -120,6 +121,7 @@ pScaleVal = 1.0f;
); );
unsigned int match = (atomI == (y + j)) ? 1 : 0; unsigned int match = (atomI == (y + j)) ? 1 : 0;
match = 1;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole // add to field at atomI the field due atomJ's charge/dipole/quadrupole
...@@ -130,6 +132,7 @@ pScaleVal = 1.0f; ...@@ -130,6 +132,7 @@ pScaleVal = 1.0f;
fieldPolarSum[0] += match ? 0.0f : ijField[2][0]; fieldPolarSum[0] += match ? 0.0f : ijField[2][0];
fieldPolarSum[1] += match ? 0.0f : ijField[2][1]; fieldPolarSum[1] += match ? 0.0f : ijField[2][1];
fieldPolarSum[2] += match ? 0.0f : ijField[2][2]; fieldPolarSum[2] += match ? 0.0f : ijField[2][2];
} }
} }
...@@ -166,7 +169,7 @@ pScaleVal = pScaleValue; ...@@ -166,7 +169,7 @@ pScaleVal = pScaleValue;
// nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution // nan*0.0 = nan not 0.0, so explicitly exclude (atomI == atomJ) contribution
// by setting match flag // by setting match flag
unsigned int match = (atomI == (y + j)) ? 1 : 0; unsigned int match = ( (atomI == (y + j)) || (atomI >= cAmoebaSim.numberOfAtoms) || ((y+j) >= cAmoebaSim.numberOfAtoms) ) ? 1 : 0;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole // add to field at atomI the field due atomJ's charge/dipole/quadrupole
...@@ -180,54 +183,40 @@ pScaleVal = pScaleValue; ...@@ -180,54 +183,40 @@ pScaleVal = pScaleValue;
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( atomI == targetAtom ){ if( atomI == targetAtom ){
unsigned int index = atomI == targetAtom ? (y + j) : atomI; unsigned int index = atomI == targetAtom ? (y + j) : atomI;
unsigned int pullBackIndex = 0; unsigned int indexI = 0;
unsigned int indexI = 0; unsigned int indexJ = indexI ? 0 : 2;
unsigned int indexJ = indexI ? 0 : 2; unsigned int indices[4] = { indexI, indexJ, indexI+1, indexJ+1 };
debugArray[index].x = (float) atomI; debugArray[index].x = (float) atomI;
debugArray[index].y = (float) (y + j); debugArray[index].y = (float) (y + j);
debugArray[index].z = dScaleValue; debugArray[index].z = dScaleValue;
debugArray[index].w = pScaleValue; debugArray[index].w = pScaleValue;
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms; unsigned int off = 3*(x + tgx + (x >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
debugArray[index].x = pullBack[pullBackIndex].x; debugArray[index].x = (float) x;
debugArray[index].y = pullBack[pullBackIndex].y; debugArray[index].y = (float) tgx;
debugArray[index].z = pullBack[pullBackIndex].z; debugArray[index].z = -2;
debugArray[index].w = pullBack[pullBackIndex].w; debugArray[index].w = (float) off;
pullBackIndex++; float flag = 7.0f;
index += cAmoebaSim.paddedNumberOfAtoms; for( int ii = 0; ii < 4; ii++ ){
debugArray[index].x = pullBack[pullBackIndex].x;
debugArray[index].y = pullBack[pullBackIndex].y;
debugArray[index].z = pullBack[pullBackIndex].z;
debugArray[index].w = pullBack[pullBackIndex].w;
index += cAmoebaSim.paddedNumberOfAtoms; index += cAmoebaSim.paddedNumberOfAtoms;
float flag = 7.0f; debugArray[index].x = match ? 0.0f : ijField[indices[ii]][0];
debugArray[index].x = ijField[indexI][0]; debugArray[index].y = match ? 0.0f : ijField[indices[ii]][1];
debugArray[index].y = ijField[indexI][1]; debugArray[index].z = match ? 0.0f : ijField[indices[ii]][2];
debugArray[index].z = ijField[indexI][2];
debugArray[index].w = flag; debugArray[index].w = flag;
}
for( int pullIndex = 0; pullIndex < pullIndexMax; pullIndex++ ){
index += cAmoebaSim.paddedNumberOfAtoms; index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ][0]; debugArray[index].x = pullBack[pullIndex].x;
debugArray[index].y = ijField[indexJ][1]; debugArray[index].y = pullBack[pullIndex].y;
debugArray[index].z = ijField[indexJ][2]; debugArray[index].z = pullBack[pullIndex].z;
debugArray[index].w = flag; debugArray[index].w = pullBack[pullIndex].w;
}
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexI+1][0];
debugArray[index].y = ijField[indexI+1][1];
debugArray[index].z = ijField[indexI+1][2];
debugArray[index].w = flag;
index += cAmoebaSim.paddedNumberOfAtoms;
debugArray[index].x = ijField[indexJ+1][0];
debugArray[index].y = ijField[indexJ+1][1];
debugArray[index].z = ijField[indexJ+1][2];
debugArray[index].w = flag;
/* /*
index += cAmoebaSim.paddedNumberOfAtoms; index += cAmoebaSim.paddedNumberOfAtoms;
...@@ -300,25 +289,27 @@ pScaleVal = 1.0f; ...@@ -300,25 +289,27 @@ pScaleVal = 1.0f;
#endif #endif
); );
unsigned int outOfBounds = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+tj) >= cAmoebaSim.numberOfAtoms) ) ? 1 : 0;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole // add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum[0] += ijField[0][0]; fieldSum[0] += outOfBounds ? 0.0f : ijField[0][0];
fieldSum[1] += ijField[0][1]; fieldSum[1] += outOfBounds ? 0.0f : ijField[0][1];
fieldSum[2] += ijField[0][2]; fieldSum[2] += outOfBounds ? 0.0f : ijField[0][2];
fieldPolarSum[0] += ijField[2][0]; fieldPolarSum[0] += outOfBounds ? 0.0f : ijField[2][0];
fieldPolarSum[1] += ijField[2][1]; fieldPolarSum[1] += outOfBounds ? 0.0f : ijField[2][1];
fieldPolarSum[2] += ijField[2][2]; fieldPolarSum[2] += outOfBounds ? 0.0f : ijField[2][2];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole // add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA[tj].eField[0] += ijField[1][0]; psA[tj].eField[0] += outOfBounds ? 0.0f : ijField[1][0];
psA[tj].eField[1] += ijField[1][1]; psA[tj].eField[1] += outOfBounds ? 0.0f : ijField[1][1];
psA[tj].eField[2] += ijField[1][2]; psA[tj].eField[2] += outOfBounds ? 0.0f : ijField[1][2];
psA[tj].eFieldP[0] += ijField[3][0]; psA[tj].eFieldP[0] += outOfBounds ? 0.0f : ijField[3][0];
psA[tj].eFieldP[1] += ijField[3][1]; psA[tj].eFieldP[1] += outOfBounds ? 0.0f : ijField[3][1];
psA[tj].eFieldP[2] += ijField[3][2]; psA[tj].eFieldP[2] += outOfBounds ? 0.0f : ijField[3][2];
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -422,25 +413,27 @@ pScaleVal = pScaleValue; ...@@ -422,25 +413,27 @@ pScaleVal = pScaleValue;
#endif #endif
); );
unsigned int outOfBounds = ( (atomI >= cAmoebaSim.numberOfAtoms) || ((y+tj) >= cAmoebaSim.numberOfAtoms) ) ? 1 : 0;
// add to field at atomI the field due atomJ's charge/dipole/quadrupole // add to field at atomI the field due atomJ's charge/dipole/quadrupole
fieldSum[0] += ijField[0][0]; fieldSum[0] += outOfBounds ? 0.0f : ijField[0][0];
fieldSum[1] += ijField[0][1]; fieldSum[1] += outOfBounds ? 0.0f : ijField[0][1];
fieldSum[2] += ijField[0][2]; fieldSum[2] += outOfBounds ? 0.0f : ijField[0][2];
fieldPolarSum[0] += ijField[2][0]; fieldPolarSum[0] += outOfBounds ? 0.0f : ijField[2][0];
fieldPolarSum[1] += ijField[2][1]; fieldPolarSum[1] += outOfBounds ? 0.0f : ijField[2][1];
fieldPolarSum[2] += ijField[2][2]; fieldPolarSum[2] += outOfBounds ? 0.0f : ijField[2][2];
// add to field at atomJ the field due atomI's charge/dipole/quadrupole // add to field at atomJ the field due atomI's charge/dipole/quadrupole
psA[tj].eField[0] += ijField[1][0]; psA[tj].eField[0] += outOfBounds ? 0.0f : ijField[1][0];
psA[tj].eField[1] += ijField[1][1]; psA[tj].eField[1] += outOfBounds ? 0.0f : ijField[1][1];
psA[tj].eField[2] += ijField[1][2]; psA[tj].eField[2] += outOfBounds ? 0.0f : ijField[1][2];
psA[tj].eFieldP[0] += ijField[3][0]; psA[tj].eFieldP[0] += outOfBounds ? 0.0f : ijField[3][0];
psA[tj].eFieldP[1] += ijField[3][1]; psA[tj].eFieldP[1] += outOfBounds ? 0.0f : ijField[3][1];
psA[tj].eFieldP[2] += ijField[3][2]; psA[tj].eFieldP[2] += outOfBounds ? 0.0f : ijField[3][2];
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
......
...@@ -429,7 +429,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte ...@@ -429,7 +429,8 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( amoebaGpu->log && iteration == 1 ){ if( amoebaGpu->log && iteration == 1 ){
(void) fprintf( amoebaGpu->log, "Finished maxtrixMultiply kernel execution %d\n", iteration ); (void) fflush( amoebaGpu->log ); (void) fprintf( amoebaGpu->log, "Finished maxtrixMultiply kernel execution %d -- Direct only -- self added in kSorUpdateMutualInducedField_kernel\n",
iteration ); (void) fflush( amoebaGpu->log );
outputArray->Download(); outputArray->Download();
outputPolarArray->Download(); outputPolarArray->Download();
debugArray->Download(); debugArray->Download();
...@@ -556,6 +557,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -556,6 +557,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu->psE_Field->_pSysStream[0][ii] *= conversion; amoebaGpu->psE_Field->_pSysStream[0][ii] *= conversion;
amoebaGpu->psE_FieldPolar->_pSysStream[0][ii] *= conversion; amoebaGpu->psE_FieldPolar->_pSysStream[0][ii] *= conversion;
} }
amoebaGpu->gpuContext->sim.alphaEwald = 5.4459052e+00f;
(void) fprintf( amoebaGpu->log, "cudaComputeAmoebaPmeMutualInducedFieldBySOR: forcing alphaEwald=%15.7e\n", amoebaGpu->gpuContext->sim.alphaEwald );
SetCalculateAmoebaCudaPmeMutualInducedFieldSim(amoebaGpu);
amoebaGpu->psE_Field->Upload(); amoebaGpu->psE_Field->Upload();
amoebaGpu->psE_FieldPolar->Upload(); amoebaGpu->psE_FieldPolar->Upload();
#endif #endif
...@@ -584,7 +588,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -584,7 +588,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
amoebaGpu->psPolarizability->Download(); amoebaGpu->psPolarizability->Download();
(void) fprintf( amoebaGpu->log, "%s Initial setup for matrix multiply\n", methodName ); (void) fprintf( amoebaGpu->log, "%s Initial setup for matrix multiply\n", methodName );
int offset = 0; int offset = 0;
int maxPrint = 20000; int maxPrint = 10;
for( int ii = 0; ii < gpu->natoms; ii++ ){ for( int ii = 0; ii < gpu->natoms; ii++ ){
(void) fprintf( amoebaGpu->log, "%4d pol=%12.4e ", ii, (void) fprintf( amoebaGpu->log, "%4d pol=%12.4e ", ii,
amoebaGpu->psPolarizability->_pSysStream[0][offset] ); amoebaGpu->psPolarizability->_pSysStream[0][offset] );
...@@ -616,7 +620,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba ...@@ -616,7 +620,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
// matrix multiply // matrix multiply
cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpu, amoebaGpu->psWorkVector[0], amoebaGpu->psWorkVector[1] ); cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpu, amoebaGpu->psWorkVector[0], amoebaGpu->psWorkVector[1] );
kCalculateAmoebaPMEInducedDipoleField( amoebaGpu ); //kCalculateAmoebaPMEInducedDipoleField( amoebaGpu );
LAUNCHERROR("cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop\n"); LAUNCHERROR("cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply Loop\n");
......
...@@ -63,7 +63,9 @@ void testPMEWater() { ...@@ -63,7 +63,9 @@ void testPMEWater() {
quadrupole[0] = 3.540307211e-2; quadrupole[0] = 3.540307211e-2;
quadrupole[4] = -3.902570771e-2; quadrupole[4] = -3.902570771e-2;
quadrupole[8] = 3.622635596e-3; quadrupole[8] = 3.622635596e-3;
mp->addParticle(-0.51966, dipole, quadrupole, 1, 1, 2, 0.39, 9.707801995e-1, 0.837); double damp = 9.707801995e-01*sqrt(0.1);
double polarity = 0.837*0.001;
mp->addParticle(-0.51966, dipole, quadrupole, 1, 1, 2, 0.39, damp, polarity);
dipole[0] = -2.042094848e-2; dipole[0] = -2.042094848e-2;
dipole[2] = -3.078753000e-2; dipole[2] = -3.078753000e-2;
quadrupole[0] = -3.428482490e-3; quadrupole[0] = -3.428482490e-3;
...@@ -71,10 +73,30 @@ void testPMEWater() { ...@@ -71,10 +73,30 @@ void testPMEWater() {
quadrupole[4] = -1.002408752e-2; quadrupole[4] = -1.002408752e-2;
quadrupole[6] = -1.894859639e-4; quadrupole[6] = -1.894859639e-4;
quadrupole[8] = 1.345257001e-2; quadrupole[8] = 1.345257001e-2;
mp->addParticle(0.25983, dipole, quadrupole, 0, 0, 2, 0.39, 8.897068742e-1, 0.496); damp = 8.897068742e-01*sqrt(0.1);
mp->addParticle(0.25983, dipole, quadrupole, 0, 0, 1, 0.39, 8.897068742e-1, 0.496); polarity = 0.496*0.001;
mp->addParticle(0.25983, dipole, quadrupole, 0, 0, 2, 0.39, damp, polarity);
mp->addParticle(0.25983, dipole, quadrupole, 0, 0, 1, 0.39, damp, polarity);
mp->setCutoffDistance(1.0); mp->setCutoffDistance(1.0);
// nonbonded->setEwaldErrorTolerance(TOL);
std::vector<int> intVector;
intVector.push_back( 0 );
intVector.push_back( 1 );
intVector.push_back( 2 );
mp->setCovalentMap( 0, AmoebaMultipoleForce::PolarizationCovalent11, intVector );
mp->setCovalentMap( 1, AmoebaMultipoleForce::PolarizationCovalent11, intVector );
mp->setCovalentMap( 2, AmoebaMultipoleForce::PolarizationCovalent11, intVector );
intVector.resize(0); intVector.push_back( 1 ); intVector.push_back( 2 );
mp->setCovalentMap( 0, AmoebaMultipoleForce::Covalent12, intVector );
intVector.resize(0); intVector.push_back( 0 ); intVector.push_back( 2 );
mp->setCovalentMap( 1, AmoebaMultipoleForce::Covalent12, intVector );
intVector.resize(0); intVector.push_back( 0 ); intVector.push_back( 1 );
mp->setCovalentMap( 2, AmoebaMultipoleForce::Covalent12, intVector );
mp->setEwaldErrorTolerance(TOL);
system.setDefaultPeriodicBoxVectors(Vec3(2, 0, 0), Vec3(0, 2, 0), Vec3(0, 0, 2)); system.setDefaultPeriodicBoxVectors(Vec3(2, 0, 0), Vec3(0, 2, 0), Vec3(0, 0, 2));
system.addForce(mp); system.addForce(mp);
Context context(system, integrator, platform); Context context(system, integrator, platform);
...@@ -87,6 +109,13 @@ void testPMEWater() { ...@@ -87,6 +109,13 @@ void testPMEWater() {
context.setPositions(positions); context.setPositions(positions);
State state = context.getState(State::Forces | State::Energy); State state = context.getState(State::Forces | State::Energy);
const vector<Vec3>& forces = state.getForces(); const vector<Vec3>& forces = state.getForces();
(void) fprintf( stderr, "PME forces\n" );
for( unsigned int ii = 0; ii < forces.size(); ii++ ){
(void) fprintf( stderr, "%6u [%14.7e %14.7e %14.7e]\n", ii,
forces[ii][0], forces[ii][1], forces[ii][2] );
}
(void) fflush( stderr );
} }
int main() { int main() {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment