Commit e69c8c66 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

sharedMemoryPerBlock was not set correctly for 480

parent d7aa33fe
......@@ -62,7 +62,15 @@ amoebaGpuContext amoebaGpuInit( _gpuContext* gpu )
amoebaGpu->log = stderr;
#endif
amoebaGpu->numberOfSorWorkVectors = 4;
if( gpu->sm_version >= SM_20 ){
amoebaGpu->sharedMemoryPerBlock = 49152;
} else if( gpu->sm_version >= SM_12 ){
amoebaGpu->sharedMemoryPerBlock = 16384;
} else {
amoebaGpu->sharedMemoryPerBlock = 8192;
}
amoebaGpu->paddedNumberOfAtoms = gpu->sim.paddedNumberOfAtoms;
amoebaGpu->amoebaSim.numberOfAtoms = gpu->natoms;
amoebaGpu->amoebaSim.paddedNumberOfAtoms = gpu->sim.paddedNumberOfAtoms;
......
......@@ -1203,10 +1203,11 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
}
if( amoebaGpu->log && timestep == 1 ){
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaKirkwoodEDiffN2Forces: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%lu shrd=%lu Ebuf=%u ixnCt=%lu workUnits=%u\n",
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaKirkwoodEDiffN2Forces: blocks=%u threads=%u bffr/Warp=%u atm=%lu shrd=%lu"
" Ebuf=%u ixnCt=%lu workUnits=%u sm=%d device=%d sharedMemoryPerBlock=%u\n",
amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(KirkwoodEDiffParticle), sizeof(KirkwoodEDiffParticle)*threadsPerBlock,
amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits, gpu->sm_version, gpu->device, amoebaGpu->sharedMemoryPerBlock );
//gpuPrintCudaAmoebaGmxSimulation(amoebaGpu, amoebaGpu->log );
(void) fflush( amoebaGpu->log );
}
......@@ -1238,14 +1239,6 @@ void kCalculateAmoebaKirkwoodEDiff( amoebaGpuContext amoebaGpu )
} else {
#ifdef AMOEBA_DEBUG
(void) fprintf( amoebaGpu->log, "kCalculateAmoebaCudaKirkwoodEDiffN2Forces no warp: numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u Ebuf=%u ixnCt=%u workUnits=%u\n",
amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp,
sizeof(KirkwoodEDiffParticle), sizeof(KirkwoodEDiffParticle)*threadsPerBlock,
amoebaGpu->energyOutputBuffers, (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log );
#endif
kCalculateAmoebaCudaKirkwoodEDiffN2Forces_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(KirkwoodEDiffParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevStream[0],
gpu->psPosq4->_pDevStream[0],
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment