Commit 4d763c4d authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Fixed bug in direct PME

Added diagnostics for handling reorder of atoms
parent 8b8defe8
......@@ -38,7 +38,7 @@ using namespace OpenMM;
using std::string;
using std::vector;
AmoebaMultipoleForce::AmoebaMultipoleForce() : nonbondedMethod(NoCutoff), pmeBSplineOrder(5), cutoffDistance(1.0), ewaldErrorTol(5e-4), mutualInducedIterationMethod(SOR), mutualInducedMaxIterations(60),
AmoebaMultipoleForce::AmoebaMultipoleForce() : nonbondedMethod(NoCutoff), pmeBSplineOrder(5), cutoffDistance(1.0), ewaldErrorTol(1e-4), mutualInducedIterationMethod(SOR), mutualInducedMaxIterations(60),
mutualInducedTargetEpsilon(1.0e-05), scalingDistanceCutoff(100.0), electricConstant(138.9354558456) {
}
......
......@@ -779,17 +779,15 @@ static void computeAmoebaMultipoleForce( AmoebaCudaData& data ) {
if( data.getMultipoleForceCount() == 0 ){
gpuCopyWorkUnit( gpu );
}
//if( data.getApplyCutoff() && (data.getMultipoleForceCount() % 100) == 0 ){
//gpuReorderAtoms(gpu->gpuContext);
//}
data.incrementMultipoleForceCount();
data.initializeGpu();
if( 0 && data.getLog() ){
(void) fprintf( data.getLog(), "computeAmoebaMultipoleForce\n" );
(void) fprintf( data.getLog(), "In computeAmoebaMultipoleForce\n" );
(void) fflush( data.getLog());
}
data.initializeGpu();
// calculate Born radii
if( data.getHasAmoebaGeneralizedKirkwood() ){
......@@ -974,23 +972,27 @@ void CudaCalcAmoebaMultipoleForceKernel::initialize(const System& system, const
nb.setCutoffDistance(force.getCutoffDistance());
std::vector<int> pmeGridDimension;
force.getPmeGridDimensions( pmeGridDimension );
if( 1 || pmeGridDimension[0] == 0 ){
int pmeParametersSetBasedOnEwaldErrorTolerance;
if( pmeGridDimension[0] == 0 ){
NonbondedForceImpl::calcPMEParameters(system, nb, alpha, xsize, ysize, zsize);
/*
alpha = 5.446;
xsize = 60;
ysize = 48;
zsize = 48;
*/
pmeParametersSetBasedOnEwaldErrorTolerance = 1;
} else {
alpha = force.getAEwald();
xsize = pmeGridDimension[0];
ysize = pmeGridDimension[1];
zsize = pmeGridDimension[2];
pmeParametersSetBasedOnEwaldErrorTolerance = 0;
}
if( data.getLog() ){
(void) fprintf( data.getLog(), "AmoebaMultipoleForce: PME parameters tol=%12.3e cutoff=%12.3f alpha=%12.3f [%d %d %d]\n",
(void) fprintf( data.getLog(), "AmoebaMultipoleForce: PME parameters tol=%12.3e cutoff=%12.3f alpha=%12.3f [%d %d %d] -",
force.getEwaldErrorTolerance(), force.getCutoffDistance(), alpha, xsize, ysize, zsize );
if( pmeParametersSetBasedOnEwaldErrorTolerance ){
(void) fprintf( data.getLog(), " parameters set based on error tolerance and OpenMM algorithm.\n" );
} else {
double impliedTolerance = alpha*force.getCutoffDistance();
impliedTolerance = 0.5*exp( -(impliedTolerance*impliedTolerance) );
(void) fprintf( data.getLog(), " using input parameters implied tolerance=%12.3e\n", impliedTolerance );
}
(void) fflush( data.getLog() );
}
gpuSetAmoebaPMEParameters(data.getAmoebaGpu(), (float) alpha, xsize, ysize, zsize);
......
......@@ -3909,6 +3909,33 @@ void cudaWriteVectorOfDoubleVectorsToFile( char* fname, std::vector<int>& fileId
(void) fclose( filePtr );
}
CUDAStream<float>* reorderFloat( amoebaGpuContext amoebaGpu, CUDAStream<float>* arrayToReorder ){
gpuContext gpu = amoebaGpu->gpuContext;
CUDAStream<float>* reorderdArray = new CUDAStream<float>(amoebaGpu->gpuContext->sim.paddedNumberOfAtoms, 1, "TempReorder");
int* order = gpu->psAtomIndex->_pSysData;
for( int ii = 0; ii < gpu->natoms; ii++ ){
reorderdArray->_pSysStream[0][order[ii]] = arrayToReorder->_pSysStream[0][ii];
}
return reorderdArray;
}
CUDAStream<float4>* reorderFloat4( amoebaGpuContext amoebaGpu, CUDAStream<float4>* arrayToReorder ){
gpuContext gpu = amoebaGpu->gpuContext;
CUDAStream<float4>* reorderdArray = new CUDAStream<float4>(amoebaGpu->gpuContext->sim.paddedNumberOfAtoms, 1, "TempReorder4");
int* order = gpu->psAtomIndex->_pSysData;
for( int ii = 0; ii < gpu->natoms; ii++ ){
reorderdArray->_pSysStream[0][order[ii]].x = arrayToReorder->_pSysStream[0][ii].x;
reorderdArray->_pSysStream[0][order[ii]].y = arrayToReorder->_pSysStream[0][ii].y;
reorderdArray->_pSysStream[0][order[ii]].z = arrayToReorder->_pSysStream[0][ii].z;
reorderdArray->_pSysStream[0][order[ii]].w = arrayToReorder->_pSysStream[0][ii].w;
}
return reorderdArray;
}
/**---------------------------------------------------------------------------------------
Load contents of arrays into vector
......@@ -3920,7 +3947,9 @@ void cudaWriteVectorOfDoubleVectorsToFile( char* fname, std::vector<int>& fileId
--------------------------------------------------------------------------------------- */
void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDAStream<float>* array, VectorOfDoubleVectors& outputVector )
void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle,
CUDAStream<float>* array, VectorOfDoubleVectors& outputVector,
int* order )
{
// ---------------------------------------------------------------------------------------
......@@ -3929,14 +3958,19 @@ void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDA
// ---------------------------------------------------------------------------------------
array->Download();
int runningIndex = 0;
int orderIndex = 0;
outputVector.resize( numberOfParticles );
for( int ii = 0; ii < numberOfParticles; ii++ ){
for( int jj = 0; jj < entriesPerParticle; jj++ ) {
outputVector[ii].push_back( array->_pSysStream[0][runningIndex++] );
}
if( order ){
orderIndex = order[ii];
} else {
orderIndex = ii;
}
for( int jj = 0; jj < entriesPerParticle; jj++ ) {
outputVector[orderIndex].push_back( array->_pSysStream[0][entriesPerParticle*ii+jj] );
}
}
}
......@@ -3975,6 +4009,7 @@ void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerParticle, CUD
}
}
/**---------------------------------------------------------------------------------------
Load contents of arrays into vector
......@@ -3983,10 +4018,12 @@ void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerParticle, CUD
@param entriesPerParticle entries/particles array
@param array cuda array
@param outputVector output vector
@param order if set, reorder entries
--------------------------------------------------------------------------------------- */
void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float4>* array, VectorOfDoubleVectors& outputVector )
void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float4>* array,
VectorOfDoubleVectors& outputVector, int* order )
{
// ---------------------------------------------------------------------------------------
......@@ -3996,21 +4033,27 @@ void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUD
array->Download();
int runningIndex = 0;
int orderIndex;
outputVector.resize( numberOfParticles );
for( int ii = 0; ii < numberOfParticles; ii++ ){
if( order ){
orderIndex = order[runningIndex];
} else {
orderIndex = runningIndex;
}
if( entriesPerParticle > 0 ){
outputVector[ii].push_back( array->_pSysStream[0][runningIndex].x );
outputVector[orderIndex].push_back( array->_pSysStream[0][ii].x );
}
if( entriesPerParticle > 1 ){
outputVector[ii].push_back( array->_pSysStream[0][runningIndex].y );
outputVector[orderIndex].push_back( array->_pSysStream[0][ii].y );
}
if( entriesPerParticle > 2 ){
outputVector[ii].push_back( array->_pSysStream[0][runningIndex].z );
outputVector[orderIndex].push_back( array->_pSysStream[0][ii].z );
}
if( entriesPerParticle > 3 ){
outputVector[ii].push_back( array->_pSysStream[0][runningIndex].w );
outputVector[orderIndex].push_back( array->_pSysStream[0][ii].w );
}
runningIndex++;
}
......@@ -4353,8 +4396,8 @@ void trackMutualInducedIterations( amoebaGpuContext amoebaGpu, int iteration){
}
(void) fflush( amoebaGpu->log );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psVelm4, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL );
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psVelm4, outputVector, NULL );
/*
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector );
......
......@@ -147,9 +147,9 @@ extern void cudaWriteFloat1AndFloat1ArraysToFile( int numberOfAtoms, char* fname
int entriesPerAtom2, CUDAStream<float>* array2 );
extern void readFile( std::string fileName, StringVectorVector& fileContents );
extern void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDAStream<float>* array, VectorOfDoubleVectors& outputVector );
extern void cudaLoadCudaFloatArray( int numberOfParticles, int entriesPerParticle, CUDAStream<float>* array, VectorOfDoubleVectors& outputVector, int* order );
extern void cudaLoadCudaFloat2Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float2>* array, VectorOfDoubleVectors& outputVector );
extern void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float4>* array, VectorOfDoubleVectors& outputVector );
extern void cudaLoadCudaFloat4Array( int numberOfParticles, int entriesPerParticle, CUDAStream<float4>* array, VectorOfDoubleVectors& outputVector, int* order );
extern void cudaWriteVectorOfDoubleVectorsToFile( char* fname, std::vector<int>& fileId, VectorOfDoubleVectors& outputVector );
extern void kClearFloat( amoebaGpuContext amoebaGpu, unsigned int entries, CUDAStream<float>* fieldToClear );
......
......@@ -947,9 +947,9 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu )
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psForce, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector);
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psForce, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector, NULL);
cudaWriteVectorOfDoubleVectorsToFile( "CudaForceTorque", fileId, outputVector );
}
......
......@@ -562,9 +562,9 @@ void cudaComputeAmoebaFixedEAndGkFields( amoebaGpuContext amoebaGpu )
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector);
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psGk_Field, outputVector);
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, NULL);
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psGk_Field, outputVector, NULL);
cudaWriteVectorOfDoubleVectorsToFile( "CudaEAndGkField", fileId, outputVector );
}
......
......@@ -307,8 +307,8 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector);
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, NULL);
cudaWriteVectorOfDoubleVectorsToFile( "CudaEField", fileId, outputVector );
}
......
......@@ -1748,10 +1748,10 @@ static void kReduceToBornForcePrefactor( amoebaGpuContext amoebaGpu )
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( amoebaGpu->gpuContext->natoms, 3, amoebaGpu->gpuContext->psPosq4, outputVector );
cudaLoadCudaFloatArray( amoebaGpu->gpuContext->natoms, 1, amoebaGpu->gpuContext->psBornRadii, outputVector );
cudaLoadCudaFloat2Array( amoebaGpu->gpuContext->natoms, 2, amoebaGpu->gpuContext->psObcData, outputVector );
cudaLoadCudaFloatArray( amoebaGpu->gpuContext->natoms, 1, amoebaGpu->gpuContext->psBornForce, outputVector );
cudaLoadCudaFloat4Array( amoebaGpu->gpuContext->natoms, 3, amoebaGpu->gpuContext->psPosq4, outputVector, NULL );
cudaLoadCudaFloatArray( amoebaGpu->gpuContext->natoms, 1, amoebaGpu->gpuContext->psBornRadii, outputVector, NULL );
cudaLoadCudaFloat2Array( amoebaGpu->gpuContext->natoms, 2, amoebaGpu->gpuContext->psObcData, outputVector, NULL );
cudaLoadCudaFloatArray( amoebaGpu->gpuContext->natoms, 1, amoebaGpu->gpuContext->psBornForce, outputVector, NULL );
cudaWriteVectorOfDoubleVectorsToFile( "CudaBornForce", fileId, outputVector );
(void) fprintf( amoebaGpu->log, "kReduceToBornForcePrefactor: exiting.\n" );
(void) fprintf( stderr, "kReduceToBornForcePrefactor: exiting.\n" ); (void) fflush( stderr );
......@@ -2069,8 +2069,8 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psKirkwoodForce, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector);
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psKirkwoodForce, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector, NULL);
cudaWriteVectorOfDoubleVectorsToFile( "CudaForceTorque", fileId, outputVector );
}
......@@ -2113,8 +2113,8 @@ void kCalculateAmoebaKirkwood( amoebaGpuContext amoebaGpu )
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psKirkwoodForce, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psKirkwoodForce, outputVector, NULL );
cudaWriteVectorOfDoubleVectorsToFile( "CudaKirkwoodForce", fileId, outputVector );
}
......
......@@ -1215,9 +1215,9 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psKirkwoodEDiffForce, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector);
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psKirkwoodEDiffForce, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector, NULL);
cudaWriteVectorOfDoubleVectorsToFile( "CudaForceTorque", fileId, outputVector );
}
......@@ -1260,8 +1260,8 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psKirkwoodEDiffForce, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psKirkwoodEDiffForce, outputVector, NULL );
cudaWriteVectorOfDoubleVectorsToFile( "CudaKirkwoodEDiffForce", fileId, outputVector );
}
......
......@@ -829,8 +829,8 @@ void cudaComputeAmoebaMapTorques( amoebaGpuContext amoebaGpu, CUDAStream<float>*
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psForce, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector);
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psForce, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector, NULL);
cudaWriteVectorOfDoubleVectorsToFile( "CudaVacuumElecForce", fileId, outputVector );
}
#endif
......@@ -1031,9 +1031,9 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce( amoebaGpuContext amoebaGpu,
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 4, gpu->psForce4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, psForce, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, psTorque, outputVector);
cudaLoadCudaFloat4Array( gpu->natoms, 4, gpu->psForce4, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, psForce, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, psTorque, outputVector, NULL);
cudaWriteVectorOfDoubleVectorsToFile( "CudaVacuumElecForce", fileId, outputVector );
}
#endif
......@@ -1123,9 +1123,9 @@ void cudaComputeAmoebaMapTorquesAndAddTotalForce2( amoebaGpuContext amoebaGpu,
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 4, gpu->psForce4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psForce, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector);
cudaLoadCudaFloat4Array( gpu->natoms, 4, gpu->psForce4, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psForce, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psTorque, outputVector, NULL);
cudaWriteVectorOfDoubleVectorsToFile( "CudaVacuumElecForce", fileId, outputVector );
}
#endif
......
......@@ -925,9 +925,9 @@ static void cudaComputeAmoebaMutualInducedAndGkFieldBySOR( amoebaGpuContext amoe
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector, NULL );
cudaWriteVectorOfDoubleVectorsToFile( "CudaMI_GK", fileId, outputVector );
}
#endif
......
......@@ -594,8 +594,8 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
// cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector, NULL );
cudaWriteVectorOfDoubleVectorsToFile( "CudaMI", fileId, outputVector );
}
......
......@@ -194,18 +194,18 @@ __device__ void calculateFixedFieldRealSpacePairIxn_kernel( FixedFieldParticle&
float ralpha = cSim.alphaEwald*r;
float bn0 = erfc(ralpha)/r;
float bn0 = erfc(ralpha)/r;
float alsq2 = 2.0f*cSim.alphaEwald*cSim.alphaEwald;
float alsq2n = 1.0f/(cAmoebaSim.sqrtPi*cSim.alphaEwald);
float exp2a = exp(-(ralpha*ralpha));
alsq2n *= alsq2;
float bn1 = (bn0+alsq2n*exp2a)/r2;
float bn1 = (bn0+alsq2n*exp2a)/r2;
alsq2n *= alsq2;
float bn2 = (3.0f*bn1+alsq2n*exp2a)/r2;
float bn2 = (3.0f*bn1+alsq2n*exp2a)/r2;
alsq2n *= alsq2;
float bn3 = (5.0f*bn2+alsq2n*exp2a)/r2;
float bn3 = (5.0f*bn2+alsq2n*exp2a)/r2;
// compute the error function scaled and unscaled terms
......@@ -565,9 +565,9 @@ if( fabs(debugArray->_pSysStream[0][jj+3*paddedNumberOfAtoms].x) > 0.0 ){
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector);
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData );
cudaWriteVectorOfDoubleVectorsToFile( "CudaEField", fileId, outputVector );
}
delete debugArray;
......@@ -578,9 +578,9 @@ if( fabs(debugArray->_pSysStream[0][jj+3*paddedNumberOfAtoms].x) > 0.0 ){
std::vector<int> fileId;
fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector);
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData);
cudaWriteVectorOfDoubleVectorsToFile( "CudaEField", fileId, outputVector );
}
......@@ -590,4 +590,15 @@ void cudaComputeAmoebaPmeFixedEField( amoebaGpuContext amoebaGpu )
{
kCalculateAmoebaPMEFixedMultipoles( amoebaGpu );
cudaComputeAmoebaPmeDirectFixedEField( amoebaGpu );
if( 0 ){
gpuContext gpu = amoebaGpu->gpuContext;
std::vector<int> fileId;
fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector, gpu->psAtomIndex->_pSysData );
cudaWriteVectorOfDoubleVectorsToFile( "CudaEField", fileId, outputVector );
}
}
......@@ -193,6 +193,7 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
fields[2].z = 0.0f;
fields[2].w = 0.0f;
}
/*
#ifdef AMOEBA_DEBUG
pullBack[0].x = xr;
pullBack[0].y = yr;
......@@ -204,7 +205,6 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
pullBack[1].z = bn2;
pullBack[1].w = exp2a;
/*
pullBack[1].x = atomJ.x - atomI.x;
pullBack[1].y = atomJ.y - atomI.y;
pullBack[1].z = atomJ.z - atomI.z;
......@@ -212,8 +212,8 @@ __device__ void calculatePmeDirectMutualInducedFieldPairIxn_kernel( MutualInduce
pullBack[1].x = scale3;
pullBack[1].y = scale5;
pullBack[1].z = scale7;
*/
#endif
*/
}
// Include versions of the kernels for N^2 calculations.
......@@ -385,8 +385,7 @@ static void cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply( amoebaGpuConte
static const char* methodName = "cudaComputeAmoebaPmeMutualInducedFieldMatrixMultiply";
static int iteration = 1;
if( 1 && amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s: scalingDistanceCutoff=%.5f\n",
methodName, amoebaGpu->scalingDistanceCutoff );
(void) fprintf( amoebaGpu->log, "%s\n", methodName );
(void) fflush( amoebaGpu->log );
}
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
......@@ -748,9 +747,9 @@ static void cudaComputeAmoebaPmeMutualInducedFieldBySOR( amoebaGpuContext amoeba
std::vector<int> fileId;
fileId.push_back( iteration );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
}
......@@ -780,9 +779,9 @@ fflush( amoebaGpu->log );
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector );
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector, gpu->psAtomIndex->_pSysData );
cudaWriteVectorOfDoubleVectorsToFile( "CudaPmeMI", fileId, outputVector );
}
......
......@@ -122,6 +122,7 @@ void METHOD_NAME(kCalculateAmoebaPmeMutualInducedField, _kernel)(
fieldPolarSum[1] += mask ? ijField[1].z : 0.0f;
fieldPolarSum[2] += mask ? ijField[2].z : 0.0f;
/*
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+j) == targetAtom ){
unsigned int index = atomI == targetAtom ? (y+j) : atomI;
......@@ -173,7 +174,6 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
debugArray[index].z = ijField[indexJ+1][2];
debugArray[index].w = flag;
/*
index += cAmoebaSim.paddedNumberOfAtoms;
index += cAmoebaSim.paddedNumberOfAtoms;
......@@ -189,10 +189,10 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
debugArray[index].y = scaleMask.x & mask ? 1.0f : -1.0f;
debugArray[index].z = scaleMask.y & mask ? 1.0f : -1.0f;
debugArray[index].w = + 10.0f;
*/
}
#endif
*/
}
// Write results
......@@ -308,6 +308,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
}
/*
#ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+jIdx) == targetAtom ){
unsigned int index = atomI == targetAtom ? (y+jIdx) : atomI;
......@@ -360,6 +361,7 @@ if( atomI == targetAtom || (y+jIdx) == targetAtom ){
debugArray[index].w = flag;
}
#endif
*/
}
tj = (tj + 1) & (GRID - 1);
......
......@@ -528,8 +528,8 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( particles, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( particles, 9, amoebaGpu->psRotationMatrix, outputVector );
cudaLoadCudaFloat4Array( particles, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( particles, 9, amoebaGpu->psRotationMatrix, outputVector, gpu->psAtomIndex->_pSysData );
cudaWriteVectorOfDoubleVectorsToFile( "CudaRotationMatrices", fileId, outputVector );
}
if( 0 ){
......@@ -539,9 +539,9 @@ void cudaComputeAmoebaLabFrameMoments( amoebaGpuContext amoebaGpu )
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( particles, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( particles, 3, amoebaGpu->psLabFrameDipole, outputVector );
cudaLoadCudaFloatArray( particles, 9, amoebaGpu->psLabFrameQuadrupole, outputVector );
cudaLoadCudaFloat4Array( particles, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( particles, 3, amoebaGpu->psLabFrameDipole, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloatArray( particles, 9, amoebaGpu->psLabFrameQuadrupole, outputVector, gpu->psAtomIndex->_pSysData );
cudaWriteVectorOfDoubleVectorsToFile( "CudaRotatedMoments", fileId, outputVector );
}
......
......@@ -715,8 +715,8 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 3, psTempForce, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, gpu->psAtomIndex->_pSysData );
cudaLoadCudaFloat4Array( gpu->natoms, 3, psTempForce, outputVector, gpu->psAtomIndex->_pSysData );
cudaWriteVectorOfDoubleVectorsToFile( "CudaVdw", fileId, outputVector );
delete psTempForce;
//exit(0);
......
......@@ -631,8 +631,8 @@ void kCalculateAmoebaWcaDispersionForces( amoebaGpuContext amoebaGpu )
std::vector<int> fileId;
//fileId.push_back( 0 );
VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, psTempForce, outputVector );
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL );
cudaLoadCudaFloatArray( gpu->natoms, 3, psTempForce, outputVector, NULL );
cudaWriteVectorOfDoubleVectorsToFile( "CudaWca", fileId, outputVector );
delete psTempForce;
//exit(0);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment