Commit 761d7e17 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Removal of limitation for 'long-range in sequence' covalent bonds

Reduced memory footprint
parent 80c4976e
...@@ -217,13 +217,14 @@ void kSorUpdateMutualInducedField_kernel( ...@@ -217,13 +217,14 @@ void kSorUpdateMutualInducedField_kernel(
static void kReduceMutualInducedFields(amoebaGpuContext amoebaGpu, CUDAStream<float>* outputArray, CUDAStream<float>* outputPolarArray ) static void kReduceMutualInducedFields(amoebaGpuContext amoebaGpu, CUDAStream<float>* outputArray, CUDAStream<float>* outputPolarArray )
{ {
kReduceFields_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->fieldReduceThreadsPerBlock>>>( gpuContext gpu = amoebaGpu->gpuContext;
amoebaGpu->paddedNumberOfAtoms*3, amoebaGpu->outputBuffers, kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_1->_pDevData, outputArray->_pDevData ); amoebaGpu->psWorkArray_3_1->_pDevData, outputArray->_pDevData );
LAUNCHERROR("kReduceMI_Fields1"); LAUNCHERROR("kReduceMI_Fields1");
kReduceFields_kernel<<<amoebaGpu->nonbondBlocks, amoebaGpu->fieldReduceThreadsPerBlock>>>( kReduceFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.bsf_reduce_threads_per_block>>>(
amoebaGpu->paddedNumberOfAtoms*3, amoebaGpu->outputBuffers, gpu->sim.paddedNumberOfAtoms*3, gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_2->_pDevData, outputPolarArray->_pDevData ); amoebaGpu->psWorkArray_3_2->_pDevData, outputPolarArray->_pDevData );
LAUNCHERROR("kReduceMI_Fields2"); LAUNCHERROR("kReduceMI_Fields2");
} }
...@@ -277,14 +278,14 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext ...@@ -277,14 +278,14 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
(void) fprintf( amoebaGpu->log, "%s numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u\n", methodName, (void) fprintf( amoebaGpu->log, "%s numBlocks=%u numThreads=%u bufferPerWarp=%u atm=%u shrd=%u ixnCt=%u workUnits=%u\n", methodName,
amoebaGpu->nonbondBlocks, threadsPerBlock, amoebaGpu->bOutputBufferPerWarp, gpu->sim.nonbond_blocks, threadsPerBlock, gpu->bOutputBufferPerWarp,
sizeof(MutualInducedParticle), sizeof(MutualInducedParticle)*threadsPerBlock, sizeof(MutualInducedParticle), sizeof(MutualInducedParticle)*threadsPerBlock,
(*gpu->psInteractionCount)[0], gpu->sim.workUnits ); (*gpu->psInteractionCount)[0], gpu->sim.workUnits );
(void) fflush( amoebaGpu->log ); (void) fflush( amoebaGpu->log );
#endif #endif
if (gpu->bOutputBufferPerWarp){ if (gpu->bOutputBufferPerWarp){
kCalculateAmoebaMutualInducedFieldN2ByWarp_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>( kCalculateAmoebaMutualInducedFieldN2ByWarp_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevData, amoebaGpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData,
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
...@@ -296,7 +297,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext ...@@ -296,7 +297,7 @@ static void cudaComputeAmoebaMutualInducedFieldMatrixMultiply( amoebaGpuContext
} else { } else {
kCalculateAmoebaMutualInducedFieldN2_kernel<<<amoebaGpu->nonbondBlocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>( kCalculateAmoebaMutualInducedFieldN2_kernel<<<gpu->sim.nonbond_blocks, threadsPerBlock, sizeof(MutualInducedParticle)*threadsPerBlock>>>(
amoebaGpu->psWorkUnit->_pDevData, amoebaGpu->psWorkUnit->_pDevData,
amoebaGpu->psWorkArray_3_1->_pDevData, amoebaGpu->psWorkArray_3_1->_pDevData,
#ifdef AMOEBA_DEBUG #ifdef AMOEBA_DEBUG
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment