Commit 1b5ee8f9 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Fixed bug in Vdw w/o cutoffs if bOutputBufferPerWarp is set

parent db8a55b3
...@@ -1129,6 +1129,10 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba ...@@ -1129,6 +1129,10 @@ void CudaCalcAmoebaVdwForceKernel::initialize(const System& system, const Amoeba
force.getSigmaCombiningRule(), force.getEpsilonCombiningRule(), force.getSigmaCombiningRule(), force.getEpsilonCombiningRule(),
allExclusions, force.getPBC(), static_cast<float>(force.getCutoff()) ); allExclusions, force.getPBC(), static_cast<float>(force.getCutoff()) );
data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force)); data.getAmoebaGpu()->gpuContext->forces.push_back(new ForceInfo(force));
if( data.getLog() ){
(void) fprintf( data.getLog(), "CudaCalcAmoebaVdwForceKernel PBC=%d getUseNeighborList=%d\n",
force.getPBC(), force.getUseNeighborList() );
}
data.setUseVdwNeighborList( force.getUseNeighborList() ); data.setUseVdwNeighborList( force.getUseNeighborList() );
} }
......
...@@ -306,7 +306,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu ) ...@@ -306,7 +306,7 @@ void cudaComputeAmoebaFixedEField( amoebaGpuContext amoebaGpu )
std::vector<int> fileId; std::vector<int> fileId;
//fileId.push_back( 0 ); //fileId.push_back( 0 );
VectorOfDoubleVectors outputVector; VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector ); //cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector ); cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_Field, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector); cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psE_FieldPolar, outputVector);
cudaWriteVectorOfDoubleVectorsToFile( "CudaEField", fileId, outputVector ); cudaWriteVectorOfDoubleVectorsToFile( "CudaEField", fileId, outputVector );
......
...@@ -247,8 +247,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext ...@@ -247,8 +247,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
static const char* methodName = "cudaComputeAmoebaMutualInducedFieldMatrixMultiply"; static const char* methodName = "cudaComputeAmoebaMutualInducedFieldMatrixMultiply";
static int iteration = 1; static int iteration = 1;
if( 1 && amoebaGpu->log ){ if( 1 && amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s: scalingDistanceCutoff=%.5f\n", (void) fprintf( amoebaGpu->log, "%s\n", methodName );
methodName, amoebaGpu->scalingDistanceCutoff );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
} }
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
...@@ -594,17 +593,16 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu ...@@ -594,17 +593,16 @@ static void cudaComputeAmoebaMutualInducedFieldBySOR( amoebaGpuContext amoebaGpu
amoebaGpu->mutualInducedConverged = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1; amoebaGpu->mutualInducedConverged = ( !done || iteration > amoebaGpu->mutualInducedMaxIterations ) ? 0 : 1;
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
/*
if( 0 ){ if( 0 ){
std::vector<int> fileId; std::vector<int> fileId;
//fileId.push_back( 0 ); //fileId.push_back( 0 );
VectorOfDoubleVectors outputVector; VectorOfDoubleVectors outputVector;
cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector ); // cudaLoadCudaFloat4Array( gpu->natoms, 3, gpu->psPosq4, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector ); cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipole, outputVector );
cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector ); cudaLoadCudaFloatArray( gpu->natoms, 3, amoebaGpu->psInducedDipolePolar, outputVector );
cudaWriteVectorOfDoubleVectorsToFile( "CudaMI", fileId, outputVector ); cudaWriteVectorOfDoubleVectorsToFile( "CudaMI", fileId, outputVector );
} }
*/
#endif #endif
// --------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------
......
...@@ -599,7 +599,6 @@ if( 0 ){ ...@@ -599,7 +599,6 @@ if( 0 ){
x = (x >> 17) << GRIDBITS; x = (x >> 17) << GRIDBITS;
(void) fprintf( amoebaGpu->log, " AmGpu %8u [%5u %5u %1u]\n", amoebaGpu->psWorkUnit->_pSysStream[0][ii], x,y,exclusions ); (void) fprintf( amoebaGpu->log, " AmGpu %8u [%5u %5u %1u]\n", amoebaGpu->psWorkUnit->_pSysStream[0][ii], x,y,exclusions );
} }
} else {
} }
cudaComputeAmoebaPmeFixedEField( amoebaGpu ); cudaComputeAmoebaPmeFixedEField( amoebaGpu );
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "amoebaScaleFactors.h" #include "amoebaScaleFactors.h"
#include <stdio.h> #include <stdio.h>
extern int isNanOrInfinity( double number );
using namespace std; using namespace std;
...@@ -36,8 +37,11 @@ void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext amoebaGpu) ...@@ -36,8 +37,11 @@ void GetCalculateAmoebaCudaVdw14_7Sim(amoebaGpuContext amoebaGpu)
RTERROR(status, "GetCalculateAmoebaCudaVdw14_7Sim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed"); RTERROR(status, "GetCalculateAmoebaCudaVdw14_7Sim: cudaMemcpyFromSymbol: SetSim copy from cAmoebaSim failed");
} }
#define AMOEBA_DEBUG //#define AMOEBA_DEBUG_PRINT
//#undef AMOEBA_DEBUG #undef AMOEBA_DEBUG_PRINT
//#define AMOEBA_DEBUG
#undef AMOEBA_DEBUG
__device__ void zeroVdw14_7SharedForce( struct Vdw14_7Particle* sA ) __device__ void zeroVdw14_7SharedForce( struct Vdw14_7Particle* sA )
{ {
...@@ -434,8 +438,8 @@ static void kCalculateAmoebaVdw14_7NonReduction(amoebaGpuContext amoebaGpu, CUDA ...@@ -434,8 +438,8 @@ static void kCalculateAmoebaVdw14_7NonReduction(amoebaGpuContext amoebaGpu, CUDA
static void kReduceVdw14_7(amoebaGpuContext amoebaGpu, CUDAStream<float>* outputArray ) static void kReduceVdw14_7(amoebaGpuContext amoebaGpu, CUDAStream<float>* outputArray )
{ {
kReduceFields_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->fieldReduceThreadsPerBlock>>>( kReduceFields_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->fieldReduceThreadsPerBlock>>>(
amoebaGpu->paddedNumberOfAtoms*3, amoebaGpu->outputBuffers, amoebaGpu->paddedNumberOfAtoms*3, amoebaGpu->outputBuffers,
amoebaGpu->psWorkArray_3_1->_pDevStream[0], outputArray->_pDevStream[0] ); amoebaGpu->psWorkArray_3_1->_pDevStream[0], outputArray->_pDevStream[0] );
LAUNCHERROR("kReduceVdw14_7"); LAUNCHERROR("kReduceVdw14_7");
} }
...@@ -486,17 +490,19 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff ...@@ -486,17 +490,19 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
gpuContext gpu = amoebaGpu->gpuContext; gpuContext gpu = amoebaGpu->gpuContext;
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG_PRINT
static const char* methodName = "kCalculateAmoebaVdw14_7Forces"; static const char* methodName = "kCalculateAmoebaVdw14_7Forces";
if( 1 && amoebaGpu->log ){ if( 1 && amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "%s: \n", methodName ); (void) fprintf( amoebaGpu->log, "%s: \n", methodName );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
} }
#ifdef AMOEBA_DEBUG
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray"); CUDAStream<float4>* debugArray = new CUDAStream<float4>(paddedNumberOfAtoms*paddedNumberOfAtoms, 1, "DebugArray");
memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms); memset( debugArray->_pSysStream[0], 0, sizeof( float )*4*paddedNumberOfAtoms*paddedNumberOfAtoms);
debugArray->Upload(); debugArray->Upload();
int targetAtom = 342; int targetAtom = 342;
#endif
#endif #endif
// set threads/block first time through // set threads/block first time through
...@@ -517,12 +523,30 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff ...@@ -517,12 +523,30 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpu, gpu->psPosq4, amoebaGpu->psAmoebaVdwCoordinates ); kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpu, gpu->psPosq4, amoebaGpu->psAmoebaVdwCoordinates );
kCalculateAmoebaVdw14_7CoordinateReduction( amoebaGpu, amoebaGpu->psAmoebaVdwCoordinates, amoebaGpu->psAmoebaVdwCoordinates ); kCalculateAmoebaVdw14_7CoordinateReduction( amoebaGpu, amoebaGpu->psAmoebaVdwCoordinates, amoebaGpu->psAmoebaVdwCoordinates );
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG_PRINT
(void) fprintf( amoebaGpu->log, "Apply cutoff=%d warp=%d\n", applyCutoff, gpu->bOutputBufferPerWarp ); (void) fprintf( amoebaGpu->log, "Apply cutoff=%d warp=%d\n", applyCutoff, gpu->bOutputBufferPerWarp );
(void) fprintf( amoebaGpu->log, "numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n", (void) fprintf( amoebaGpu->log, "numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp, amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(Vdw14_7Particle), sizeof(Vdw14_7Particle)*threadsPerBlock, sizeof(Vdw14_7Particle), sizeof(Vdw14_7Particle)*threadsPerBlock,
amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits ); amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
if( 0 ){
gpu->psInteractionCount->Download();
amoebaGpu->psVdwWorkUnit->Download();
unsigned int totalWarps = (amoebaGpu->nonbondBlocks*threadsPerBlock)/GRID;
float ratiof = (float)totalWarps/(float)amoebaGpu->psVdwWorkUnit->_length;
(void) fprintf( amoebaGpu->log, "Ixn warps=%u count=%u\n", totalWarps, gpu->psInteractionCount->_pSysStream[0][0] );
for( unsigned int ii = 0; ii < amoebaGpu->psVdwWorkUnit->_length; ii++ ){
unsigned int x = amoebaGpu->psVdwWorkUnit->_pSysStream[0][ii];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
unsigned int exclusions = (x & 0x1);
x = (x >> 17) << GRIDBITS;
float warp = (float)(ii)*ratiof;
(void) fprintf( amoebaGpu->log, "GpuCell %8u [%5u %5u %1u] %10u warp=%15.6f\n", ii, x,y,exclusions, warp );
}
}
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
#endif #endif
...@@ -541,28 +565,28 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff ...@@ -541,28 +565,28 @@ void kCalculateAmoebaVdw14_7Forces( amoebaGpuContext amoebaGpu, int applyCutoff
sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit); sizeof(unsigned int)*gpu->sim.nonbond_threads_per_block>>>(gpu->sim.pInteractingWorkUnit);
LAUNCHERROR("kFindInteractionsWithinBlocksVdwPeriodic"); LAUNCHERROR("kFindInteractionsWithinBlocksVdwPeriodic");
if( 1 ){ if( 0 ){
gpu->psInteractionCount->Download(); gpu->psInteractionCount->Download();
gpu->psInteractingWorkUnit->Download(); gpu->psInteractingWorkUnit->Download();
gpu->psInteractionFlag->Download(); gpu->psInteractionFlag->Download();
amoebaGpu->psVdwWorkUnit->Download(); amoebaGpu->psVdwWorkUnit->Download();
(void) fprintf( amoebaGpu->log, "Vdw Ixn count=%u\n", gpu->psInteractionCount->_pSysStream[0][0] ); (void) fprintf( amoebaGpu->log, "Vdw Ixn count=%u\n", gpu->psInteractionCount->_pSysStream[0][0] );
for( unsigned int ii = 0; ii < gpu->psInteractingWorkUnit->_length; ii++ ){ for( unsigned int ii = 0; ii < gpu->psInteractingWorkUnit->_length; ii++ ){
unsigned int x = gpu->psInteractingWorkUnit->_pSysStream[0][ii]; unsigned int x = gpu->psInteractingWorkUnit->_pSysStream[0][ii];
unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS; unsigned int y = ((x >> 2) & 0x7fff) << GRIDBITS;
unsigned int exclusions = (x & 0x1); unsigned int exclusions = (x & 0x1);
x = (x >> 17) << GRIDBITS; x = (x >> 17) << GRIDBITS;
(void) fprintf( amoebaGpu->log, "GpuCell %8u %8u [%5u %5u %1u] %10u ", ii, gpu->psInteractingWorkUnit->_pSysStream[0][ii], x,y,exclusions, gpu->psInteractionFlag->_pSysStream[0][ii] ); (void) fprintf( amoebaGpu->log, "GpuCell %8u %8u [%5u %5u %1u] %10u ", ii, gpu->psInteractingWorkUnit->_pSysStream[0][ii], x,y,exclusions, gpu->psInteractionFlag->_pSysStream[0][ii] );
x = amoebaGpu->psVdwWorkUnit->_pSysStream[0][ii]; x = amoebaGpu->psVdwWorkUnit->_pSysStream[0][ii];
y = ((x >> 2) & 0x7fff) << GRIDBITS; y = ((x >> 2) & 0x7fff) << GRIDBITS;
exclusions = (x & 0x1); exclusions = (x & 0x1);
x = (x >> 17) << GRIDBITS; x = (x >> 17) << GRIDBITS;
(void) fprintf( amoebaGpu->log, " AmGpu %8u [%5u %5u %1u]\n", amoebaGpu->psWorkUnit->_pSysStream[0][ii], x,y,exclusions ); (void) fprintf( amoebaGpu->log, " AmGpu %8u [%5u %5u %1u]\n", amoebaGpu->psWorkUnit->_pSysStream[0][ii], x,y,exclusions );
} }
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
} }
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaVdw14_7CutoffByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>( kCalculateAmoebaVdw14_7CutoffByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>(
...@@ -578,6 +602,7 @@ if( 1 ){ ...@@ -578,6 +602,7 @@ if( 1 ){
amoebaGpu->psWorkArray_3_1->_pDevStream[0] ); amoebaGpu->psWorkArray_3_1->_pDevStream[0] );
#endif #endif
} else { } else {
kCalculateAmoebaVdw14_7Cutoff_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>( kCalculateAmoebaVdw14_7Cutoff_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit, gpu->sim.pInteractingWorkUnit,
amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0], amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0],
...@@ -598,9 +623,8 @@ if( 1 ){ ...@@ -598,9 +623,8 @@ if( 1 ){
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
//amoebaGpu->psVdwWorkUnit->_pDevStream[0],
kCalculateAmoebaVdw14_7N2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>( kCalculateAmoebaVdw14_7N2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>(
gpu->sim.pInteractingWorkUnit, amoebaGpu->psVdwWorkUnit->_pDevStream[0],
amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0], amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0],
amoebaGpu->psVdwSigmaEpsilon->_pDevStream[0], amoebaGpu->psVdwSigmaEpsilon->_pDevStream[0],
amoebaGpu->vdwSigmaCombiningRule, amoebaGpu->vdwSigmaCombiningRule,
...@@ -612,6 +636,7 @@ if( 1 ){ ...@@ -612,6 +636,7 @@ if( 1 ){
amoebaGpu->psWorkArray_3_1->_pDevStream[0] ); amoebaGpu->psWorkArray_3_1->_pDevStream[0] );
#endif #endif
} else { } else {
kCalculateAmoebaVdw14_7N2_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>( kCalculateAmoebaVdw14_7N2_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(Vdw14_7Particle)*threadsPerBlock>>>(
amoebaGpu->psVdwWorkUnit->_pDevStream[0], amoebaGpu->psVdwWorkUnit->_pDevStream[0],
amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0], amoebaGpu->psAmoebaVdwCoordinates->_pDevStream[0],
...@@ -629,14 +654,15 @@ if( 1 ){ ...@@ -629,14 +654,15 @@ if( 1 ){
LAUNCHERROR("kCalculateAmoebaVdw14_7N2"); LAUNCHERROR("kCalculateAmoebaVdw14_7N2");
} }
#ifdef AMOEBA_DEBUG
if( amoebaGpu->log ){
debugArray->Download(); #ifdef AMOEBA_DEBUG_PRINT
if( amoebaGpu->log ){
(void) fprintf( amoebaGpu->log, "Finished 14-7 kernel execution\n" ); (void) fprintf( amoebaGpu->log, "Finished 14-7 kernel execution\n" );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
#ifdef AMOEBA_DEBUG
debugArray->Download();
int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms; int paddedNumberOfAtoms = amoebaGpu->gpuContext->sim.paddedNumberOfAtoms;
double cutOff = 1.0e+03; double cutOff = 1.0e+03;
for( int jj = 0; jj < gpu->natoms; jj++ ){ for( int jj = 0; jj < gpu->natoms; jj++ ){
...@@ -654,7 +680,26 @@ if( 1 ){ ...@@ -654,7 +680,26 @@ if( 1 ){
debugIndex += paddedNumberOfAtoms; debugIndex += paddedNumberOfAtoms;
} }
(void) fprintf( amoebaGpu->log,"\n" ); (void) fprintf( amoebaGpu->log,"\n" );
} }
#endif
amoebaGpu->psWorkArray_3_2->Download();
amoebaGpu->psWorkArray_3_1->Download();
//for( int jj = 0; jj < 3*gpu->natoms; jj += 3 )
for( int jj = 0; jj < 3*gpu->natoms; jj += 3 ){
for( int kk = 0; kk < amoebaGpu->outputBuffers; kk++ ){
float delta = fabs(amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2] + 1.0f);
if( delta < 5.0e-06 || isNanOrInfinity( (double) amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj] ) || isNanOrInfinity( (double) amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2] ) )
(void) fprintf( amoebaGpu->log,"%6d %6d [%16.9e %16.9e %16.9e] [%16.9e %16.9e %16.9e]\n", jj, kk,
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj],
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+1],
amoebaGpu->psWorkArray_3_1->_pSysStream[kk][jj+2],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+1],
amoebaGpu->psWorkArray_3_2->_pSysStream[kk][jj+2] );
}
}
} }
#endif #endif
...@@ -667,7 +712,7 @@ if( 1 ){ ...@@ -667,7 +712,7 @@ if( 1 ){
CUDAStream<float4>* psTempForce = new CUDAStream<float4>(paddedNumberOfAtoms, 1, "psTempForce"); CUDAStream<float4>* psTempForce = new CUDAStream<float4>(paddedNumberOfAtoms, 1, "psTempForce");
kClearFloat4( amoebaGpu, paddedNumberOfAtoms, psTempForce ); kClearFloat4( amoebaGpu, paddedNumberOfAtoms, psTempForce );
kCalculateAmoebaVdw14_7Reduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, psTempForce ); kCalculateAmoebaVdw14_7Reduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, psTempForce );
kCalculateAmoebaVdw14_7NonReduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, psTempForce ); //kCalculateAmoebaVdw14_7NonReduction( amoebaGpu, amoebaGpu->psWorkArray_3_2, psTempForce );
std::vector<int> fileId; std::vector<int> fileId;
//fileId.push_back( 0 ); //fileId.push_back( 0 );
VectorOfDoubleVectors outputVector; VectorOfDoubleVectors outputVector;
......
...@@ -144,10 +144,10 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)( ...@@ -144,10 +144,10 @@ void METHOD_NAME(kCalculateAmoebaVdw14_7, _kernel)(
// add to field at atomI the field due atomJ's dipole // add to field at atomI the field due atomJ's dipole
forceSum[0] += mask ? ijForce[0] : 0.0f; forceSum[0] += mask ? ijForce[0] : 0.0f;
forceSum[1] += mask ? ijForce[1] : 0.0f; forceSum[1] += mask ? ijForce[1] : 0.0f;
forceSum[2] += mask ? ijForce[2] : 0.0f; forceSum[2] += mask ? ijForce[2] : 0.0f;
totalEnergy += mask ? 0.5*energy : 0.0f; totalEnergy += mask ? 0.5f*energy : 0.0f;
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
if( atomI == targetAtom || (y+j) == targetAtom ){ if( atomI == targetAtom || (y+j) == targetAtom ){
...@@ -193,7 +193,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){ ...@@ -193,7 +193,7 @@ if( atomI == targetAtom || (y+j) == targetAtom ){
load3dArrayBufferPerWarp( offset, forceSum, outputForce ); load3dArrayBufferPerWarp( offset, forceSum, outputForce );
#else #else
unsigned int offset = 3*(x + tgx + (x >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms); unsigned int offset = 3*(x + tgx + (x >> GRIDBITS) * cAmoebaSim.paddedNumberOfAtoms);
load3dArray( offset, forceSum, outputForce ); load3dArray( offset, forceSum, outputForce );
#endif #endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment