Commit 8ab02ff2 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Modified thread counts that were causing failures on lower end boards

parent 82d0ec38
...@@ -1417,7 +1417,7 @@ void gpuMutualInducedFieldAllocate( amoebaGpuContext amoebaGpu ) ...@@ -1417,7 +1417,7 @@ void gpuMutualInducedFieldAllocate( amoebaGpuContext amoebaGpu )
amoebaGpu->amoebaSim.pInducedDipolePolar = amoebaGpu->psInducedDipolePolar->_pDevData; amoebaGpu->amoebaSim.pInducedDipolePolar = amoebaGpu->psInducedDipolePolar->_pDevData;
amoebaGpu->psCurrentEpsilon = new CUDAStream<float>(5, 1, "CurrentEpsilon"); amoebaGpu->psCurrentEpsilon = new CUDAStream<float>(5, 1, "CurrentEpsilon");
amoebaGpu->epsilonThreadsPerBlock = 384; amoebaGpu->epsilonThreadsPerBlock = amoebaGpu->gpuContext->sim.threads_per_block;
amoebaGpu->psPolarizability = new CUDAStream<float>(paddedNumberOfAtoms*3, 1, "Polarizability"); amoebaGpu->psPolarizability = new CUDAStream<float>(paddedNumberOfAtoms*3, 1, "Polarizability");
unsigned int offset = paddedNumberOfAtoms*3*sizeof( float ); unsigned int offset = paddedNumberOfAtoms*3*sizeof( float );
......
...@@ -142,14 +142,14 @@ void kClearFields_3( amoebaGpuContext amoebaGpu, unsigned int numberToClear ) ...@@ -142,14 +142,14 @@ void kClearFields_3( amoebaGpuContext amoebaGpu, unsigned int numberToClear )
LAUNCHERROR("kClearFields_3_1"); LAUNCHERROR("kClearFields_3_1");
if( numberToClear > 1 ){ if( numberToClear > 1 ){
kClearFields_kernel<<<gpu->sim.nonbond_blocks, 384>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers, amoebaGpu->psWorkArray_3_2->_pDevData ); kClearFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.threads_per_block>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers, amoebaGpu->psWorkArray_3_2->_pDevData );
LAUNCHERROR("kClearFields_3_2"); LAUNCHERROR("kClearFields_3_2");
} else { } else {
return; return;
} }
if( numberToClear > 2 ){ if( numberToClear > 2 ){
kClearFields_kernel<<<gpu->sim.nonbond_blocks, 384>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers, kClearFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.threads_per_block>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_3->_pDevData ); amoebaGpu->psWorkArray_3_3->_pDevData );
LAUNCHERROR("kClearFields_3_3"); LAUNCHERROR("kClearFields_3_3");
} else { } else {
...@@ -157,7 +157,7 @@ void kClearFields_3( amoebaGpuContext amoebaGpu, unsigned int numberToClear ) ...@@ -157,7 +157,7 @@ void kClearFields_3( amoebaGpuContext amoebaGpu, unsigned int numberToClear )
} }
if( numberToClear > 3 ){ if( numberToClear > 3 ){
kClearFields_kernel<<<gpu->sim.nonbond_blocks, 384>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers, kClearFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.threads_per_block>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_4->_pDevData ); amoebaGpu->psWorkArray_3_4->_pDevData );
LAUNCHERROR("kClearFields_3_4"); LAUNCHERROR("kClearFields_3_4");
} }
......
...@@ -416,7 +416,7 @@ void kCalculateAmoebaVdw14_7NonReduction_kernel( float* inputForce, float4* outp ...@@ -416,7 +416,7 @@ void kCalculateAmoebaVdw14_7NonReduction_kernel( float* inputForce, float4* outp
static void kCalculateAmoebaVdw14_7NonReduction(amoebaGpuContext amoebaGpu, CUDAStream<float>* vdwOutputArray, CUDAStream<float4>* forceOutputArray ) static void kCalculateAmoebaVdw14_7NonReduction(amoebaGpuContext amoebaGpu, CUDAStream<float>* vdwOutputArray, CUDAStream<float4>* forceOutputArray )
{ {
kCalculateAmoebaVdw14_7NonReduction_kernel<<<amoebaGpu->gpuContext->sim.blocks, 384>>>( kCalculateAmoebaVdw14_7NonReduction_kernel<<<amoebaGpu->gpuContext->sim.blocks, amoebaGpu->gpuContext->sim.threads_per_block>>>(
vdwOutputArray->_pDevData, forceOutputArray->_pDevData ); vdwOutputArray->_pDevData, forceOutputArray->_pDevData );
LAUNCHERROR("kCalculateAmoebaVdw14_7MonReduction"); LAUNCHERROR("kCalculateAmoebaVdw14_7MonReduction");
} }
...@@ -480,7 +480,7 @@ void kCalculateAmoebaVdw14_7CopyCoordinates_kernel( unsigned int bufferLength, f ...@@ -480,7 +480,7 @@ void kCalculateAmoebaVdw14_7CopyCoordinates_kernel( unsigned int bufferLength, f
void kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpuContext amoebaGpu, CUDAStream<float4>* toCopy, CUDAStream<float4>* copy ) void kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpuContext amoebaGpu, CUDAStream<float4>* toCopy, CUDAStream<float4>* copy )
{ {
kCalculateAmoebaVdw14_7CopyCoordinates_kernel<<<amoebaGpu->gpuContext->blocksPerSM, 384>>>( amoebaGpu->gpuContext->sim.paddedNumberOfAtoms, kCalculateAmoebaVdw14_7CopyCoordinates_kernel<<<amoebaGpu->gpuContext->blocksPerSM, amoebaGpu->gpuContext->sim.threads_per_block>>>( amoebaGpu->gpuContext->sim.paddedNumberOfAtoms,
toCopy->_pDevData, copy->_pDevData ); toCopy->_pDevData, copy->_pDevData );
LAUNCHERROR("kCalculateAmoebaVdw14_7CopyCoordinates"); LAUNCHERROR("kCalculateAmoebaVdw14_7CopyCoordinates");
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment