"wrappers/python/src/vscode:/vscode.git/clone" did not exist on "ca28c38a7eef89ee6538480e7bfed7004cea8eb5"
Commit dffbb5e4 authored by peastman's avatar peastman
Browse files

Merge pull request #864 from peastman/cpu

Fixed errors using OpenCL with CPU device
parents 24a960a2 8fc71320
...@@ -1882,7 +1882,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ ...@@ -1882,7 +1882,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeUpdateBsplinesKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]); pmeUpdateBsplinesKernel.setArg<mm_float4>(7, recipBoxVectorsFloat[2]);
} }
cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms()); cl.executeKernel(pmeUpdateBsplinesKernel, cl.getNumAtoms());
if (deviceIsCpu) { if (deviceIsCpu && !cl.getSupports64BitGlobalAtomics()) {
setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5); setPeriodicBoxSizeArg(cl, pmeSpreadChargeKernel, 5);
if (cl.getUseDoublePrecision()) { if (cl.getUseDoublePrecision()) {
pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]); pmeSpreadChargeKernel.setArg<mm_double4>(6, recipBoxVectors[0]);
......
...@@ -5,16 +5,15 @@ ...@@ -5,16 +5,15 @@
/** /**
* Find a bounding box for the atoms in each block. * Find a bounding box for the atoms in each block.
*/ */
__kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global const real4* restrict posq, __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict rebuildNeighborList, __global const real4* restrict posq, __global real4* restrict blockCenter, __global real4* restrict blockBoundingBox, __global int* restrict rebuildNeighborList,
__global real2* restrict sortedBlocks) { __global real2* restrict sortedBlocks) {
int index = get_global_id(0); int index = get_global_id(0);
int base = index*TILE_SIZE; int base = index*TILE_SIZE;
while (base < numAtoms) { while (base < numAtoms) {
real4 pos = posq[base]; real4 pos = posq[base];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
pos.xyz -= floor(pos.xyz*invPeriodicBoxSize.xyz)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS(pos)
real4 firstPoint = pos;
#endif #endif
real4 minPos = pos; real4 minPos = pos;
real4 maxPos = pos; real4 maxPos = pos;
...@@ -22,7 +21,8 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri ...@@ -22,7 +21,8 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
for (int i = base+1; i < last; i++) { for (int i = base+1; i < last; i++) {
pos = posq[i]; pos = posq[i];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
pos.xyz -= floor((pos.xyz-firstPoint.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; real4 center = 0.5f*(maxPos+minPos);
APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)
#endif #endif
minPos = min(minPos, pos); minPos = min(minPos, pos);
maxPos = max(maxPos, pos); maxPos = max(maxPos, pos);
...@@ -44,7 +44,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri ...@@ -44,7 +44,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
__kernel void sortBoxData(__global const real2* restrict sortedBlock, __global const real4* restrict blockCenter, __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global const real4* restrict blockCenter,
__global const real4* restrict blockBoundingBox, __global real4* restrict sortedBlockCenter, __global const real4* restrict blockBoundingBox, __global real4* restrict sortedBlockCenter,
__global real4* restrict sortedBlockBoundingBox, __global const real4* restrict posq, __global const real4* restrict oldPositions, __global real4* restrict sortedBlockBoundingBox, __global const real4* restrict posq, __global const real4* restrict oldPositions,
__global unsigned int* restrict interactionCount, __global int* restrict rebuildNeighborList) { __global unsigned int* restrict interactionCount, __global int* restrict rebuildNeighborList, int forceRebuild) {
for (int i = get_global_id(0); i < NUM_BLOCKS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_BLOCKS; i += get_global_size(0)) {
int index = (int) sortedBlock[i].y; int index = (int) sortedBlock[i].y;
sortedBlockCenter[i] = blockCenter[index]; sortedBlockCenter[i] = blockCenter[index];
...@@ -53,7 +53,7 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c ...@@ -53,7 +53,7 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
// Also check whether any atom has moved enough so that we really need to rebuild the neighbor list. // Also check whether any atom has moved enough so that we really need to rebuild the neighbor list.
bool rebuild = false; bool rebuild = forceRebuild;
for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) { for (int i = get_global_id(0); i < NUM_ATOMS; i += get_global_size(0)) {
real4 delta = oldPositions[i]-posq[i]; real4 delta = oldPositions[i]-posq[i];
if (delta.x*delta.x + delta.y*delta.y + delta.z*delta.z > 0.25f*PADDING*PADDING) if (delta.x*delta.x + delta.y*delta.y + delta.z*delta.z > 0.25f*PADDING*PADDING)
...@@ -70,8 +70,8 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c ...@@ -70,8 +70,8 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
* to global memory. * to global memory.
*/ */
void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms, int* numAtoms, int numValid, __global unsigned int* interactionCount, void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms, int* numAtoms, int numValid, __global unsigned int* interactionCount,
__global int* interactingTiles, __global unsigned int* interactingAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, __global int* interactingTiles, __global unsigned int* interactingAtoms, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX,
__global real4* posq, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) { real4 periodicBoxVecY, real4 periodicBoxVecZ, __global const real4* posq, real4 blockCenterX, real4 blockSizeX, unsigned int maxTiles, bool finish) {
real4 posBuffer[TILE_SIZE]; real4 posBuffer[TILE_SIZE];
const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF && const bool singlePeriodicCopy = (0.5f*periodicBoxSize.x-blockSizeX.x >= PADDED_CUTOFF &&
0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF && 0.5f*periodicBoxSize.y-blockSizeX.y >= PADDED_CUTOFF &&
...@@ -83,7 +83,7 @@ void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms, ...@@ -83,7 +83,7 @@ void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms,
// The box is small enough that we can just translate all the atoms into a single periodic // The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later. // box, then skip having to apply periodic boundary conditions later.
pos.xyz -= floor((pos.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, blockCenterX)
} }
#endif #endif
posBuffer[i] = pos; posBuffer[i] = pos;
...@@ -99,14 +99,14 @@ void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms, ...@@ -99,14 +99,14 @@ void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms,
real4 pos = posq[atom]; real4 pos = posq[atom];
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
if (singlePeriodicCopy) if (singlePeriodicCopy)
pos.xyz -= floor((pos.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, blockCenterX)
#endif #endif
bool interacts = false; bool interacts = false;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
if (!singlePeriodicCopy) { if (!singlePeriodicCopy) {
for (int j = 0; j < TILE_SIZE && !interacts; j++) { for (int j = 0; j < TILE_SIZE && !interacts; j++) {
real4 delta = pos-posBuffer[j]; real4 delta = pos-posBuffer[j];
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
interacts = (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED); interacts = (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED);
} }
} }
...@@ -159,9 +159,10 @@ void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms, ...@@ -159,9 +159,10 @@ void storeInteractionData(unsigned short x, unsigned short* buffer, int* atoms,
* Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart, * Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart,
* mark them as non-interacting. * mark them as non-interacting.
*/ */
__kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, __global unsigned int* restrict interactionCount, __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ,
__global int* restrict interactingTiles, __global unsigned int* restrict interactingAtoms, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex, __global unsigned int* restrict interactionCount, __global int* restrict interactingTiles, __global unsigned int* restrict interactingAtoms,
unsigned int numBlocks, __global real2* restrict sortedBlocks, __global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox, __global const real4* restrict posq, unsigned int maxTiles, unsigned int startBlockIndex, unsigned int numBlocks, __global real2* restrict sortedBlocks,
__global const real4* restrict sortedBlockCenter, __global const real4* restrict sortedBlockBoundingBox,
__global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions, __global const unsigned int* restrict exclusionIndices, __global const unsigned int* restrict exclusionRowIndices, __global real4* restrict oldPositions,
__global const int* restrict rebuildNeighborList) { __global const int* restrict rebuildNeighborList) {
if (rebuildNeighborList[0] == 0) if (rebuildNeighborList[0] == 0)
...@@ -204,9 +205,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -204,9 +205,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
real4 blockSizeY = sortedBlockBoundingBox[j]; real4 blockSizeY = sortedBlockBoundingBox[j];
real4 delta = blockCenterX-blockCenterY; real4 delta = blockCenterX-blockCenterY;
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.x -= floor(delta.x*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_DELTA(delta)
delta.y -= floor(delta.y*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
delta.z -= floor(delta.z*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
#endif #endif
delta.x = max((real) 0, fabs(delta.x)-blockSizeX.x-blockSizeY.x); delta.x = max((real) 0, fabs(delta.x)-blockSizeX.x-blockSizeY.x);
delta.y = max((real) 0, fabs(delta.y)-blockSizeX.y-blockSizeY.y); delta.y = max((real) 0, fabs(delta.y)-blockSizeX.y-blockSizeY.y);
...@@ -216,12 +215,12 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi ...@@ -216,12 +215,12 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
buffer[valuesInBuffer++] = y; buffer[valuesInBuffer++] = y;
if (valuesInBuffer == BUFFER_SIZE) { if (valuesInBuffer == BUFFER_SIZE) {
storeInteractionData(x, buffer, atoms, &numAtoms, valuesInBuffer, interactionCount, interactingTiles, interactingAtoms, periodicBoxSize, invPeriodicBoxSize, posq, blockCenterX, blockSizeX, maxTiles, false); storeInteractionData(x, buffer, atoms, &numAtoms, valuesInBuffer, interactionCount, interactingTiles, interactingAtoms, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, posq, blockCenterX, blockSizeX, maxTiles, false);
valuesInBuffer = 0; valuesInBuffer = 0;
} }
} }
} }
storeInteractionData(x, buffer, atoms, &numAtoms, valuesInBuffer, interactionCount, interactingTiles, interactingAtoms, periodicBoxSize, invPeriodicBoxSize, posq, blockCenterX, blockSizeX, maxTiles, true); storeInteractionData(x, buffer, atoms, &numAtoms, valuesInBuffer, interactionCount, interactingTiles, interactingAtoms, periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ, posq, blockCenterX, blockSizeX, maxTiles, true);
} }
// Record the positions the neighbor list is based on. // Record the positions the neighbor list is based on.
......
...@@ -23,7 +23,8 @@ __kernel void computeNonbonded( ...@@ -23,7 +23,8 @@ __kernel void computeNonbonded(
__global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices __global const ushort2* restrict exclusionTiles, unsigned int startTileIndex, unsigned int numTileIndices
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
, __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize, , __global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms
#endif #endif
PARAMETER_ARGUMENTS) { PARAMETER_ARGUMENTS) {
real energy = 0; real energy = 0;
...@@ -65,7 +66,7 @@ __kernel void computeNonbonded( ...@@ -65,7 +66,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -133,7 +134,7 @@ __kernel void computeNonbonded( ...@@ -133,7 +134,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
...@@ -291,15 +292,13 @@ __kernel void computeNonbonded( ...@@ -291,15 +292,13 @@ __kernel void computeNonbonded(
real4 blockCenterX = blockCenter[x]; real4 blockCenterX = blockCenter[x];
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) { for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
localData[tgx].x -= floor((localData[tgx].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x; APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[tgx], blockCenterX)
localData[tgx].y -= floor((localData[tgx].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[tgx].z -= floor((localData[tgx].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
} }
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) { for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
unsigned int atom1 = x*TILE_SIZE+tgx; unsigned int atom1 = x*TILE_SIZE+tgx;
real4 force = 0; real4 force = 0;
real4 posq1 = posq[atom1]; real4 posq1 = posq[atom1];
posq1.xyz -= floor((posq1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
LOAD_ATOM1_PARAMETERS LOAD_ATOM1_PARAMETERS
for (unsigned int j = 0; j < TILE_SIZE; j++) { for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
...@@ -364,7 +363,7 @@ __kernel void computeNonbonded( ...@@ -364,7 +363,7 @@ __kernel void computeNonbonded(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q); real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0); real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC #ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz; APPLY_PERIODIC_TO_DELTA(delta)
#endif #endif
real r2 = dot(delta.xyz, delta.xyz); real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF #ifdef USE_CUTOFF
......
...@@ -410,9 +410,9 @@ void testTriclinic() { ...@@ -410,9 +410,9 @@ void testTriclinic() {
} }
else { else {
const Vec3 force = delta*ONE_4PI_EPS0*(-1.0/(distance*distance*distance)+2.0*krf); const Vec3 force = delta*ONE_4PI_EPS0*(-1.0/(distance*distance*distance)+2.0*krf);
ASSERT_EQUAL_TOL(ONE_4PI_EPS0*(1.0/distance+krf*distance*distance-crf), state.getPotentialEnergy(), TOL); ASSERT_EQUAL_TOL(ONE_4PI_EPS0*(1.0/distance+krf*distance*distance-crf), state.getPotentialEnergy(), 1e-4);
ASSERT_EQUAL_VEC(force, state.getForces()[0], TOL); ASSERT_EQUAL_VEC(force, state.getForces()[0], 1e-4);
ASSERT_EQUAL_VEC(-force, state.getForces()[1], TOL); ASSERT_EQUAL_VEC(-force, state.getForces()[1], 1e-4);
} }
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment