Commit 8ab02ff2 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Modified thread counts that were causing failures on lower end boards

parent 82d0ec38
......@@ -1417,7 +1417,7 @@ void gpuMutualInducedFieldAllocate( amoebaGpuContext amoebaGpu )
amoebaGpu->amoebaSim.pInducedDipolePolar = amoebaGpu->psInducedDipolePolar->_pDevData;
amoebaGpu->psCurrentEpsilon = new CUDAStream<float>(5, 1, "CurrentEpsilon");
amoebaGpu->epsilonThreadsPerBlock = 384;
amoebaGpu->epsilonThreadsPerBlock = amoebaGpu->gpuContext->sim.threads_per_block;
amoebaGpu->psPolarizability = new CUDAStream<float>(paddedNumberOfAtoms*3, 1, "Polarizability");
unsigned int offset = paddedNumberOfAtoms*3*sizeof( float );
......
......@@ -142,14 +142,14 @@ void kClearFields_3( amoebaGpuContext amoebaGpu, unsigned int numberToClear )
LAUNCHERROR("kClearFields_3_1");
if( numberToClear > 1 ){
kClearFields_kernel<<<gpu->sim.nonbond_blocks, 384>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers, amoebaGpu->psWorkArray_3_2->_pDevData );
kClearFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.threads_per_block>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers, amoebaGpu->psWorkArray_3_2->_pDevData );
LAUNCHERROR("kClearFields_3_2");
} else {
return;
}
if( numberToClear > 2 ){
kClearFields_kernel<<<gpu->sim.nonbond_blocks, 384>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers,
kClearFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.threads_per_block>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_3->_pDevData );
LAUNCHERROR("kClearFields_3_3");
} else {
......@@ -157,7 +157,7 @@ void kClearFields_3( amoebaGpuContext amoebaGpu, unsigned int numberToClear )
}
if( numberToClear > 3 ){
kClearFields_kernel<<<gpu->sim.nonbond_blocks, 384>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers,
kClearFields_kernel<<<gpu->sim.nonbond_blocks, gpu->sim.threads_per_block>>>( gpu->sim.paddedNumberOfAtoms*3*gpu->sim.outputBuffers,
amoebaGpu->psWorkArray_3_4->_pDevData );
LAUNCHERROR("kClearFields_3_4");
}
......
......@@ -416,7 +416,7 @@ void kCalculateAmoebaVdw14_7NonReduction_kernel( float* inputForce, float4* outp
static void kCalculateAmoebaVdw14_7NonReduction(amoebaGpuContext amoebaGpu, CUDAStream<float>* vdwOutputArray, CUDAStream<float4>* forceOutputArray )
{
kCalculateAmoebaVdw14_7NonReduction_kernel<<<amoebaGpu->gpuContext->sim.blocks, 384>>>(
kCalculateAmoebaVdw14_7NonReduction_kernel<<<amoebaGpu->gpuContext->sim.blocks, amoebaGpu->gpuContext->sim.threads_per_block>>>(
vdwOutputArray->_pDevData, forceOutputArray->_pDevData );
LAUNCHERROR("kCalculateAmoebaVdw14_7MonReduction");
}
......@@ -480,7 +480,7 @@ void kCalculateAmoebaVdw14_7CopyCoordinates_kernel( unsigned int bufferLength, f
void kCalculateAmoebaVdw14_7CopyCoordinates( amoebaGpuContext amoebaGpu, CUDAStream<float4>* toCopy, CUDAStream<float4>* copy )
{
kCalculateAmoebaVdw14_7CopyCoordinates_kernel<<<amoebaGpu->gpuContext->blocksPerSM, 384>>>( amoebaGpu->gpuContext->sim.paddedNumberOfAtoms,
kCalculateAmoebaVdw14_7CopyCoordinates_kernel<<<amoebaGpu->gpuContext->blocksPerSM, amoebaGpu->gpuContext->sim.threads_per_block>>>( amoebaGpu->gpuContext->sim.paddedNumberOfAtoms,
toCopy->_pDevData, copy->_pDevData );
LAUNCHERROR("kCalculateAmoebaVdw14_7CopyCoordinates");
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment