Commit bcf9f9b4 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed thread synchronization problem in RPMD kernels

parent 95696f7a
......@@ -38,8 +38,6 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
mixed invMass = particleVelm.w;
if (invMass == 0)
continue;
mixed c3_0 = c2_0*SQRT(nkT*invMass);
// Forward FFT.
......@@ -76,6 +74,7 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns
// Inverse FFT.
FFT_V_BACKWARD
if (invMass != 0)
velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w);
randomIndex += blockDim.x*gridDim.x;
}
......@@ -102,11 +101,10 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
int index = particle+indexInBlock*PADDED_NUM_ATOMS;
int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3;
mixed4 particleVelm = velm[index];
if (particleVelm.w == 0)
continue;
particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w);
particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w);
particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w);
if (particleVelm.w != 0)
velm[index] = particleVelm;
}
......@@ -122,8 +120,6 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
mixed4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS];
mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
if (particleVelm.w == 0)
continue;
// Forward FFT.
......@@ -159,9 +155,11 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
FFT_Q_BACKWARD
FFT_V_BACKWARD
if (particleVelm.w != 0) {
posq[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*qreal[indexInBlock].x, SCALE*qreal[indexInBlock].y, SCALE*qreal[indexInBlock].z, particlePosq.w);
velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w);
}
}
}
/**
......@@ -179,11 +177,10 @@ extern "C" __global__ void advanceVelocities(mixed4* velm, long long* force, mix
int index = particle+indexInBlock*PADDED_NUM_ATOMS;
int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3;
mixed4 particleVelm = velm[index];
if (particleVelm.w == 0)
continue;
particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w);
particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w);
particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w);
if (particleVelm.w != 0)
velm[index] = particleVelm;
}
}
......
......@@ -38,8 +38,6 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
mixed invMass = particleVelm.w;
if (invMass == 0)
continue;
mixed c3_0 = c2_0*sqrt(nkT*invMass);
// Forward FFT.
......@@ -75,6 +73,7 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random
// Inverse FFT.
FFT_V_BACKWARD
if (invMass != 0)
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz;
randomIndex += get_global_size(0);
}
......@@ -99,9 +98,8 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS;
mixed4 particleVelm = velm[index];
if (particleVelm.w == 0)
continue;
particleVelm.xyz += convert_mixed4(force[index]).xyz*(0.5f*dt*particleVelm.w);
if (particleVelm.w != 0)
velm[index] = particleVelm;
}
......@@ -117,8 +115,6 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
mixed4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS];
mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
if (particleVelm.w == 0)
continue;
// Forward FFT.
......@@ -154,9 +150,11 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
FFT_Q_BACKWARD
FFT_V_BACKWARD
if (particleVelm.w != 0) {
posq[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*qreal[indexInBlock].xyz;
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz;
}
}
}
/**
......@@ -172,9 +170,8 @@ __kernel void advanceVelocities(__global mixed4* velm, __global real4* force, mi
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS;
mixed4 particleVelm = velm[index];
if (particleVelm.w == 0)
continue;
particleVelm.xyz += convert_mixed4(force[index]).xyz*(0.5f*dt*particleVelm.w);
if (particleVelm.w != 0)
velm[index] = particleVelm;
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment