Commit 06089c9a authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

No substantive changes -- cleanup

parent 4d6895b0
......@@ -1642,9 +1642,10 @@ void gpuSetAmoebaMultipoleParameters(amoebaGpuContext amoebaGpu, const std::vect
}
}
if( 0 && amoebaGpu->log )
fprintf( amoebaGpu->log, "Z1 %4d %d [%4d %4d %4d]\n", ii, axisType[ii],
multipoleParticleX[ii], multipoleParticleY[ii], multipoleParticleZ[ii] );
if( 0 && amoebaGpu->log ){
fprintf( amoebaGpu->log, "Z1 %4d %d [%4d %4d %4d] dmp/thole %15.7e %15.7e\n", ii, axisType[ii],
multipoleParticleX[ii], multipoleParticleY[ii], multipoleParticleZ[ii], dampingFactors[ii], tholes[ii] );
}
// charges
......
......@@ -513,17 +513,9 @@ static __device__ void loadElectrostaticParticle( struct ElectrostaticParticle*
}
static __device__ void zeroElectrostaticParticle( struct ElectrostaticParticle* sA ){
// coordinates & charge
sA->force[0] = 0.0f;
sA->force[1] = 0.0f;
sA->force[2] = 0.0f;
/*
sA->torque[0] = 0.0f;
sA->torque[1] = 0.0f;
sA->torque[2] = 0.0f;
*/
}
#undef SUB_METHOD_NAME
......@@ -649,7 +641,8 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
CUDAStream<float4>* debugArray = new CUDAStream<float4>(maxSlots*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysData, 0, sizeof( float )*4*maxSlots*paddedNumberOfAtoms);
debugArray->Upload();
unsigned int targetAtom = 237;
//unsigned int targetAtom = 1137;
unsigned int targetAtom = 1;
#endif
// on first pass, set threads/block
......@@ -675,8 +668,14 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
gpu->sim.nonbond_blocks, threadsPerBlock, gpu->bOutputBufferPerWarp,
sizeof(ElectrostaticParticle), sizeof(ElectrostaticParticle)*threadsPerBlock, (*gpu->psInteractionCount)[0], gpu->sim.workUnits ); (void) fflush( amoebaGpu->log );
}
#endif
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData, debugArray->_pDevData, targetAtom );
} else {
kCalculateAmoebaCudaElectrostaticN2Forces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData, debugArray->_pDevData, targetAtom );
}
#else
if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaCudaElectrostaticN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData );
......@@ -684,8 +683,44 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
kCalculateAmoebaCudaElectrostaticN2Forces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(ElectrostaticParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData );
}
#endif
LAUNCHERROR("kCalculateAmoebaCudaElectrostaticN2Forces");
#ifdef AMOEBA_DEBUG
if( 0 ){
debugArray->Download();
std::vector<double> conversions;
conversions.push_back( 0.1f/4.184f );
conversions.push_back( 0.1f/4.184f );
unsigned int kkBlocks = 4;
(void) fprintf( stderr, "\nTarget atom output %5u\n", targetAtom );
for( unsigned int ii = 0; ii < amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; ii++ ){
double sum = 0.0;
for( unsigned int kk = 0; kk < kkBlocks && sum == 0.0; kk++ ){
unsigned int index = ii + kk*amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
sum += debugArray->_pSysData[index].x + debugArray->_pSysData[index].y + debugArray->_pSysData[index].z + debugArray->_pSysData[index].w;
}
if( sum > 0.0 ){
(void) fprintf( stderr, "%5u", ii );
for( unsigned int kk = 0; kk < kkBlocks; kk++ ){
unsigned int index = ii + kk*amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
(void) fprintf( stderr, " %15.7e %15.7e %15.7e %5.1f",
conversions[kk]*debugArray->_pSysData[index].x, conversions[kk]*debugArray->_pSysData[index].y, conversions[kk]*debugArray->_pSysData[index].z,
debugArray->_pSysData[index].w );
if( ((kk+1) % 2) == 0 && (kk != (kkBlocks-1) ) ){
(void) fprintf( stderr, "\n%5u", ii );
}
}
(void) fprintf( stderr, "\n" );
if( kkBlocks > 2 ){
(void) fprintf( stderr, "\n" );
}
}
}
}
#endif
#ifdef AMOEBA_DEBUG
if( 0 ){
VectorOfDoubleVectors outputVector;
......@@ -694,7 +729,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
fileId.push_back( call++ );
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "Temp1");
CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "ElectrostaticTemp");
//cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector, NULL, 1.0f );
reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 );
......@@ -706,6 +741,7 @@ void cudaComputeAmoebaElectrostatic( amoebaGpuContext amoebaGpu, int addTorqueTo
cudaWriteVectorOfDoubleVectorsToFile( "CudaElectrostaticTorque", fileId, outputVector );
delete temp;
}
#endif
if( addTorqueToForce ){
kReduceTorque( amoebaGpu );
......
......@@ -42,10 +42,6 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
#endif
){
#ifdef AMOEBA_DEBUG
float4 pullBack[20];
#endif
extern __shared__ ElectrostaticParticle sA[];
unsigned int totalWarps = gridDim.x*blockDim.x/GRID;
......@@ -210,6 +206,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
psA[tj].force[0] -= force[0];
psA[tj].force[1] -= force[1];
psA[tj].force[2] -= force[2];
}
tj = (tj + 1) & (GRID - 1);
......@@ -243,6 +240,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
#endif
zeroElectrostaticParticle( &(sA[threadIdx.x]) );
zeroElectrostaticParticle( &localParticle );
tj = tgx;
for (unsigned int j = 0; j < GRID; j++){
unsigned int atomJ = y + tj;
......@@ -264,6 +262,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
psA[tj].force[0] += force[0];
psA[tj].force[1] += force[1];
psA[tj].force[2] += force[2];
}
tj = (tj + 1) & (GRID - 1);
......@@ -298,5 +297,6 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
pos++;
}
cSim.pEnergy[blockIdx.x * blockDim.x + threadIdx.x] += (conversionFactor*totalEnergy);
}
......@@ -110,7 +110,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%lu ixnCt=%lu workUnits=%lu\n", methodName,
(void) fprintf( amoebaGpu->log, "cudaComputeAmoebaFixedEField numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%lu ixnCt=%lu workUnits=%lu\n",
gpu->sim.nonbond_blocks, threadsPerBlock, gpu->bOutputBufferPerWarp,
sizeof(FixedFieldParticle), sizeof(FixedFieldParticle)*threadsPerBlock, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
......@@ -121,23 +121,13 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
kCalculateAmoebaFixedE_FieldN2ByWarpForces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData,
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevData,
debugArray->_pDevData, targetAtom );
#else
amoebaGpu->psWorkArray_3_2->_pDevData );
#endif
} else {
kCalculateAmoebaFixedE_FieldN2Forces_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(FixedFieldParticle)*threadsPerBlock>>>(
gpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData,
#ifdef AMOEBA_DEBUG
amoebaGpu->psWorkArray_3_2->_pDevData,
debugArray->_pDevData, targetAtom );
#else
amoebaGpu->psWorkArray_3_2->_pDevData );
#endif
}
LAUNCHERROR("kCalculateAmoebaFixedE_FieldN2Forces_kernel");
......@@ -198,91 +188,6 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
(void) fflush( amoebaGpu->log );
(void) fprintf( amoebaGpu->log, "EFields End\n" );
/*
(void) fprintf( amoebaGpu->log, "DebugQ\n" );
debugArray->Download();
if( 0 ){
int ii = targetAtom;
float sum[2][3] = { { 0.0f, 0.0f, 0.0f }, { 0.0f, 0.0f, 0.0f } };
(void) fprintf( amoebaGpu->log,"\n" );
for( int jj = 0; jj < 1248; jj++ ){
int debugIndex = jj;
if( jj == ii )continue;
(void) fprintf( amoebaGpu->log,"\n\n%4d %4d rrs\n[%16.9e %16.9e %16.9e %16.9e]\n",
ii, jj,
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z, debugArray->_pSysData[debugIndex].w );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"[%16.9e %16.9e %16.9e]\n",
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"Y1 %5d %16.9e %16.9e %16.9e\n", jj,
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
sum[0][0] += debugArray->_pSysData[debugIndex].x;
sum[0][1] += debugArray->_pSysData[debugIndex].y;
sum[0][2] += debugArray->_pSysData[debugIndex].z;
debugIndex += amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log,"Y2 %5d %16.9e %16.9e %16.9e\n", jj,
debugArray->_pSysData[debugIndex].x, debugArray->_pSysData[debugIndex].y,
debugArray->_pSysData[debugIndex].z );
sum[1][0] += debugArray->_pSysData[debugIndex].x;
sum[1][1] += debugArray->_pSysData[debugIndex].y;
sum[1][2] += debugArray->_pSysData[debugIndex].z;
}
(void) fprintf( amoebaGpu->log,"SumQ [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]\n",
sum[0][0], sum[0][1], sum[0][2],
sum[1][0], sum[1][1], sum[1][2] );
}
for( unsigned int ii = 0; ii < debugArray->_stride; ii++ ){
int print;
if( debugArray->_pSysData[ii].x != 0.0f || debugArray->_pSysData[ii].y != 0.0f ||
debugArray->_pSysData[ii].y != 0.0f || debugArray->_pSysData[ii].w != 0.0f ||
debugArray->_pSysData[ii].x != debugArray->_pSysData[ii].x ||
debugArray->_pSysData[ii].y != debugArray->_pSysData[ii].y ||
debugArray->_pSysData[ii].z != debugArray->_pSysData[ii].z ||
debugArray->_pSysData[ii].w != debugArray->_pSysData[ii].w ){
print = 0;
} else {
print = 0;
}
if( print ){
unsigned int atomI = ii/amoebaGpu->paddedNumberOfAtoms;
unsigned int atomJ = ii - atomI*amoebaGpu->paddedNumberOfAtoms;
(void) fprintf( amoebaGpu->log, "%5u [%5u %5u] ", ii, atomI, atomJ);
(void) fprintf( amoebaGpu->log, "%14.6e %14.6e %14.6e %14.6e\n",
debugArray->_pSysData[ii].x,
debugArray->_pSysData[ii].y,
debugArray->_pSysData[ii].z,
debugArray->_pSysData[ii].w );
}
}
*/
// write results to file
......
......@@ -447,7 +447,7 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt
fileId.push_back( call++ );
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "Temp1");
CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "Temp_MapTorqueAndAddToForce");
reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 );
cudaLoadCudaFloatArray( gpu->natoms, 3, temp, outputVector, NULL, 1.0f/4.184f );
......@@ -479,7 +479,7 @@ void cudaComputeAmoebaMapTorqueAndAddToForce( amoebaGpuContext amoebaGpu, CUDASt
fileId.push_back( call++ );
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "Temp1");
CUDAStream<float>* temp = new CUDAStream<float>(3*paddedNumberOfAtoms, 1, "TempMapTorqueAndAddToForce2");
reduceAndCopyCUDAStreamFloat4( gpu->psForce4, temp, 1.0 );
cudaLoadCudaFloatArray( gpu->natoms, 3, temp, outputVector, NULL, 1.0f/4.184f );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment