"platforms/common/vscode:/vscode.git/clone" did not exist on "c1d643e2fbfbcbe4c84ebabb05738c34c7647d9e"
Commit d927ff49 authored by Peter Eastman's avatar Peter Eastman
Browse files

Minor optimizations

parent 27550631
......@@ -205,6 +205,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
int major, minor;
CHECK_RESULT(cuDeviceComputeCapability(&major, &minor, device));
int numThreadBlocksPerComputeUnit = (major >= 6 ? 4 : 6);
#if __CUDA_API_VERSION < 7000
// This is a workaround to support GTX 980 with CUDA 6.5. It reports
// its compute capability as 5.2, but the compiler doesn't support
......@@ -241,7 +242,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
numAtomBlocks = (paddedNumAtoms+(TileSize-1))/TileSize;
int multiprocessors;
CHECK_RESULT(cuDeviceGetAttribute(&multiprocessors, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
int numThreadBlocksPerComputeUnit = 6;
numThreadBlocks = numThreadBlocksPerComputeUnit*multiprocessors;
if (useDoublePrecision) {
posq = CudaArray::create<double4>(*this, paddedNumAtoms, "posq");
......
......@@ -27,8 +27,18 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
maxPos = make_real4(max(maxPos.x,pos.x), max(maxPos.y,pos.y), max(maxPos.z,pos.z), 0);
}
real4 blockSize = 0.5f*(maxPos-minPos);
real4 center = 0.5f*(maxPos+minPos);
blockSize.w = 0;
for (int i = base+1; i < last; i++) {
pos = posq[i];
real4 delta = posq[i]-center;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
blockSize.w = max(blockSize.w, delta.x*delta.x+delta.y*delta.y+delta.z*delta.z);
}
blockBoundingBox[index] = blockSize;
blockCenter[index] = 0.5f*(maxPos+minPos);
blockCenter[index] = center;
sortedBlocks[index] = make_real2(blockSize.x+blockSize.y+blockSize.z, index);
index += blockDim.x*gridDim.x;
base = index*TILE_SIZE;
......@@ -182,6 +192,7 @@ extern "C" __global__ void findBlocksWithInteractions(real4 periodicBoxSize, rea
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(blockDelta)
#endif
includeBlock2 &= (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z < (PADDED_CUTOFF+blockSizeX.w+blockSizeY.w)*(PADDED_CUTOFF+blockSizeX.w+blockSizeY.w));
blockDelta.x = max(0.0f, fabs(blockDelta.x)-blockSizeX.x-blockSizeY.x);
blockDelta.y = max(0.0f, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y);
blockDelta.z = max(0.0f, fabs(blockDelta.z)-blockSizeX.z-blockSizeY.z);
......
......@@ -27,8 +27,18 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
maxPos = max(maxPos, pos);
}
real4 blockSize = 0.5f*(maxPos-minPos);
real4 center = 0.5f*(maxPos+minPos);
blockSize.w = 0;
for (int i = base+1; i < last; i++) {
pos = posq[i];
real4 delta = posq[i]-center;
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
blockSize.w = max(blockSize.w, delta.x*delta.x+delta.y*delta.y+delta.z*delta.z);
}
blockBoundingBox[index] = blockSize;
blockCenter[index] = 0.5f*(maxPos+minPos);
blockCenter[index] = center;
sortedBlocks[index] = (real2) (blockSize.x+blockSize.y+blockSize.z, index);
index += get_global_size(0);
base = index*TILE_SIZE;
......@@ -142,6 +152,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
#ifdef USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(blockDelta)
#endif
includeBlock2 &= (blockDelta.x*blockDelta.x+blockDelta.y*blockDelta.y+blockDelta.z*blockDelta.z < (PADDED_CUTOFF+blockSizeX.w+blockSizeY.w)*(PADDED_CUTOFF+blockSizeX.w+blockSizeY.w));
blockDelta.x = max((real) 0, fabs(blockDelta.x)-blockSizeX.x-blockSizeY.x);
blockDelta.y = max((real) 0, fabs(blockDelta.y)-blockSizeX.y-blockSizeY.y);
blockDelta.z = max((real) 0, fabs(blockDelta.z)-blockSizeX.z-blockSizeY.z);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment