Commit bcf9f9b4 authored by Peter Eastman's avatar Peter Eastman
Browse files

Fixed thread synchronization problem in RPMD kernels

parent 95696f7a
...@@ -38,8 +38,6 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns ...@@ -38,8 +38,6 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
mixed invMass = particleVelm.w; mixed invMass = particleVelm.w;
if (invMass == 0)
continue;
mixed c3_0 = c2_0*SQRT(nkT*invMass); mixed c3_0 = c2_0*SQRT(nkT*invMass);
// Forward FFT. // Forward FFT.
...@@ -76,7 +74,8 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns ...@@ -76,7 +74,8 @@ extern "C" __global__ void applyPileThermostat(mixed4* velm, float4* random, uns
// Inverse FFT. // Inverse FFT.
FFT_V_BACKWARD FFT_V_BACKWARD
velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w); if (invMass != 0)
velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w);
randomIndex += blockDim.x*gridDim.x; randomIndex += blockDim.x*gridDim.x;
} }
} }
...@@ -102,12 +101,11 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long* ...@@ -102,12 +101,11 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
int index = particle+indexInBlock*PADDED_NUM_ATOMS; int index = particle+indexInBlock*PADDED_NUM_ATOMS;
int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3; int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3;
mixed4 particleVelm = velm[index]; mixed4 particleVelm = velm[index];
if (particleVelm.w == 0)
continue;
particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w); particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w);
particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w); particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w);
particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w); particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w);
velm[index] = particleVelm; if (particleVelm.w != 0)
velm[index] = particleVelm;
} }
// Evolve the free ring polymer by transforming to the frequency domain. // Evolve the free ring polymer by transforming to the frequency domain.
...@@ -122,8 +120,6 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long* ...@@ -122,8 +120,6 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = (blockIdx.x*blockDim.x+threadIdx.x)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
mixed4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS];
mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
if (particleVelm.w == 0)
continue;
// Forward FFT. // Forward FFT.
...@@ -159,8 +155,10 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long* ...@@ -159,8 +155,10 @@ extern "C" __global__ void integrateStep(mixed4* posq, mixed4* velm, long long*
FFT_Q_BACKWARD FFT_Q_BACKWARD
FFT_V_BACKWARD FFT_V_BACKWARD
posq[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*qreal[indexInBlock].x, SCALE*qreal[indexInBlock].y, SCALE*qreal[indexInBlock].z, particlePosq.w); if (particleVelm.w != 0) {
velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w); posq[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*qreal[indexInBlock].x, SCALE*qreal[indexInBlock].y, SCALE*qreal[indexInBlock].z, particlePosq.w);
velm[particle+indexInBlock*PADDED_NUM_ATOMS] = make_mixed4(SCALE*vreal[indexInBlock].x, SCALE*vreal[indexInBlock].y, SCALE*vreal[indexInBlock].z, particleVelm.w);
}
} }
} }
...@@ -179,12 +177,11 @@ extern "C" __global__ void advanceVelocities(mixed4* velm, long long* force, mix ...@@ -179,12 +177,11 @@ extern "C" __global__ void advanceVelocities(mixed4* velm, long long* force, mix
int index = particle+indexInBlock*PADDED_NUM_ATOMS; int index = particle+indexInBlock*PADDED_NUM_ATOMS;
int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3; int forceIndex = particle+indexInBlock*PADDED_NUM_ATOMS*3;
mixed4 particleVelm = velm[index]; mixed4 particleVelm = velm[index];
if (particleVelm.w == 0)
continue;
particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w); particleVelm.x += forceScale*force[forceIndex]*(0.5f*dt*particleVelm.w);
particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w); particleVelm.y += forceScale*force[forceIndex+PADDED_NUM_ATOMS]*(0.5f*dt*particleVelm.w);
particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w); particleVelm.z += forceScale*force[forceIndex+PADDED_NUM_ATOMS*2]*(0.5f*dt*particleVelm.w);
velm[index] = particleVelm; if (particleVelm.w != 0)
velm[index] = particleVelm;
} }
} }
......
...@@ -38,8 +38,6 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random ...@@ -38,8 +38,6 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
mixed invMass = particleVelm.w; mixed invMass = particleVelm.w;
if (invMass == 0)
continue;
mixed c3_0 = c2_0*sqrt(nkT*invMass); mixed c3_0 = c2_0*sqrt(nkT*invMass);
// Forward FFT. // Forward FFT.
...@@ -75,7 +73,8 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random ...@@ -75,7 +73,8 @@ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random
// Inverse FFT. // Inverse FFT.
FFT_V_BACKWARD FFT_V_BACKWARD
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz; if (invMass != 0)
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz;
randomIndex += get_global_size(0); randomIndex += get_global_size(0);
} }
} }
...@@ -99,10 +98,9 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob ...@@ -99,10 +98,9 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS; int index = particle+indexInBlock*PADDED_NUM_ATOMS;
mixed4 particleVelm = velm[index]; mixed4 particleVelm = velm[index];
if (particleVelm.w == 0)
continue;
particleVelm.xyz += convert_mixed4(force[index]).xyz*(0.5f*dt*particleVelm.w); particleVelm.xyz += convert_mixed4(force[index]).xyz*(0.5f*dt*particleVelm.w);
velm[index] = particleVelm; if (particleVelm.w != 0)
velm[index] = particleVelm;
} }
// Evolve the free ring polymer by transforming to the frequency domain. // Evolve the free ring polymer by transforming to the frequency domain.
...@@ -117,8 +115,6 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob ...@@ -117,8 +115,6 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
mixed4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS];
mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS];
if (particleVelm.w == 0)
continue;
// Forward FFT. // Forward FFT.
...@@ -154,8 +150,10 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob ...@@ -154,8 +150,10 @@ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __glob
FFT_Q_BACKWARD FFT_Q_BACKWARD
FFT_V_BACKWARD FFT_V_BACKWARD
posq[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*qreal[indexInBlock].xyz; if (particleVelm.w != 0) {
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz; posq[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*qreal[indexInBlock].xyz;
velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz;
}
} }
} }
...@@ -172,10 +170,9 @@ __kernel void advanceVelocities(__global mixed4* velm, __global real4* force, mi ...@@ -172,10 +170,9 @@ __kernel void advanceVelocities(__global mixed4* velm, __global real4* force, mi
for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) {
int index = particle+indexInBlock*PADDED_NUM_ATOMS; int index = particle+indexInBlock*PADDED_NUM_ATOMS;
mixed4 particleVelm = velm[index]; mixed4 particleVelm = velm[index];
if (particleVelm.w == 0)
continue;
particleVelm.xyz += convert_mixed4(force[index]).xyz*(0.5f*dt*particleVelm.w); particleVelm.xyz += convert_mixed4(force[index]).xyz*(0.5f*dt*particleVelm.w);
velm[index] = particleVelm; if (particleVelm.w != 0)
velm[index] = particleVelm;
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment