Unverified Commit c91894e8 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Bug fix to QTB (#5057)

* Big fix to QTB

* Fixed race condition
parent b4507392
...@@ -92,7 +92,7 @@ void CommonIntegrateQTBStepKernel::initialize(const System& system, const QTBInt ...@@ -92,7 +92,7 @@ void CommonIntegrateQTBStepKernel::initialize(const System& system, const QTBInt
replacements["FFT_FORWARD"] = createFFT(3*segmentLength, 0, outputIndex, true); replacements["FFT_FORWARD"] = createFFT(3*segmentLength, 0, outputIndex, true);
replacements["RECIP_DATA"] = (outputIndex == 0 ? "data0" : "data1"); replacements["RECIP_DATA"] = (outputIndex == 0 ? "data0" : "data1");
replacements["FFT_BACKWARD"] = createFFT(3*segmentLength, outputIndex, outputIndex, false); replacements["FFT_BACKWARD"] = createFFT(3*segmentLength, outputIndex, outputIndex, false);
replacements["ADAPTATION_FFT"] = createFFT(segmentLength, 0, outputIndex, true); replacements["ADAPTATION_FFT"] = createFFT(3*segmentLength, 0, outputIndex, true);
replacements["ADAPTATION_RECIP"] = (outputIndex == 0 ? "data0" : "data1"); replacements["ADAPTATION_RECIP"] = (outputIndex == 0 ? "data0" : "data1");
ComputeProgram program = cc.compileProgram(cc.replaceStrings(CommonKernelSources::qtb, replacements), defines); ComputeProgram program = cc.compileProgram(cc.replaceStrings(CommonKernelSources::qtb, replacements), defines);
kernel1 = program->createKernel("integrateQTBPart1"); kernel1 = program->createKernel("integrateQTBPart1");
......
...@@ -169,12 +169,13 @@ KERNEL void generateRandomForce(int numAtoms, int segmentLength, mixed dt, mixed ...@@ -169,12 +169,13 @@ KERNEL void generateRandomForce(int numAtoms, int segmentLength, mixed dt, mixed
KERNEL void adaptFrictionPart1(int numAtoms, int segmentLength, GLOBAL const mixed4* RESTRICT velm, GLOBAL const int* RESTRICT particleType, KERNEL void adaptFrictionPart1(int numAtoms, int segmentLength, GLOBAL const mixed4* RESTRICT velm, GLOBAL const int* RESTRICT particleType,
GLOBAL const mixed* RESTRICT randomForce, GLOBAL const mixed* RESTRICT segmentVelocity, GLOBAL const mixed* RESTRICT adaptedFriction, GLOBAL const mixed* RESTRICT randomForce, GLOBAL const mixed* RESTRICT segmentVelocity, GLOBAL const mixed* RESTRICT adaptedFriction,
GLOBAL mm_ulong* RESTRICT dfdt, GLOBAL mixed2* RESTRICT workspace) { GLOBAL mm_ulong* RESTRICT dfdt, GLOBAL mixed2* RESTRICT workspace) {
const int numFreq = (segmentLength+1)/2; const int fftLength = 3*segmentLength;
GLOBAL mixed2* data0 = &workspace[GROUP_ID*3*segmentLength]; const int numFreq = (fftLength+1)/2;
GLOBAL mixed2* data1 = &data0[segmentLength]; GLOBAL mixed2* data0 = &workspace[GROUP_ID*3*fftLength];
GLOBAL mixed2* w = &data1[segmentLength]; GLOBAL mixed2* data1 = &data0[fftLength];
for (int i = LOCAL_ID; i < segmentLength; i += LOCAL_SIZE) GLOBAL mixed2* w = &data1[fftLength];
w[i] = make_mixed2(cos(-i*2*M_PI/segmentLength), sin(-i*2*M_PI/segmentLength)); for (int i = LOCAL_ID; i < fftLength; i += LOCAL_SIZE)
w[i] = make_mixed2(cos(-i*2*M_PI/fftLength), sin(-i*2*M_PI/fftLength));
for (int i = GROUP_ID; i < 3*numAtoms; i += NUM_GROUPS) { for (int i = GROUP_ID; i < 3*numAtoms; i += NUM_GROUPS) {
int atom = i/3; int atom = i/3;
int axis = i%3; int axis = i%3;
...@@ -184,11 +185,13 @@ KERNEL void adaptFrictionPart1(int numAtoms, int segmentLength, GLOBAL const mix ...@@ -184,11 +185,13 @@ KERNEL void adaptFrictionPart1(int numAtoms, int segmentLength, GLOBAL const mix
// Pack the velocities and forces together so we can transform both at once. // Pack the velocities and forces together so we can transform both at once.
for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE) for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE)
data0[j] = make_mixed2(segmentVelocity[numAtoms*(3*j+axis)+atom], randomForce[numAtoms*(3*j+axis)+atom]); data0[j] = make_mixed2(segmentVelocity[numAtoms*(3*j+axis)+atom], randomForce[numAtoms*(3*j+axis)+atom]);
for (int j = segmentLength+LOCAL_ID; j < fftLength; j += LOCAL_SIZE)
data0[j] = make_mixed2(0);
SYNC_THREADS SYNC_THREADS
ADAPTATION_FFT ADAPTATION_FFT
for (int j = LOCAL_ID; j < numFreq; j += LOCAL_SIZE) { for (int j = LOCAL_ID; j < numFreq; j += LOCAL_SIZE) {
mixed2 d1 = ADAPTATION_RECIP[j]; mixed2 d1 = ADAPTATION_RECIP[j];
mixed2 d2 = (j == 0 ? ADAPTATION_RECIP[0] : ADAPTATION_RECIP[segmentLength-j]); mixed2 d2 = (j == 0 ? ADAPTATION_RECIP[0] : ADAPTATION_RECIP[fftLength-j]);
mixed2 v = 0.5f*make_mixed2(d1.x+d2.x, d1.y-d2.y); mixed2 v = 0.5f*make_mixed2(d1.x+d2.x, d1.y-d2.y);
mixed2 f = 0.5f*make_mixed2(d1.y+d2.y, -d1.x+d2.x); mixed2 f = 0.5f*make_mixed2(d1.y+d2.y, -d1.x+d2.x);
mixed cvv = v.x*v.x + v.y*v.y; mixed cvv = v.x*v.x + v.y*v.y;
...@@ -196,6 +199,7 @@ KERNEL void adaptFrictionPart1(int numAtoms, int segmentLength, GLOBAL const mix ...@@ -196,6 +199,7 @@ KERNEL void adaptFrictionPart1(int numAtoms, int segmentLength, GLOBAL const mix
mixed dfdtinc = adaptedFriction[type*numFreq+j]*cvv/invMass - cvf.x; mixed dfdtinc = adaptedFriction[type*numFreq+j]*cvv/invMass - cvf.x;
ATOMIC_ADD(&dfdt[type*numFreq+j], (mm_ulong) realToFixedPoint(dfdtinc)); ATOMIC_ADD(&dfdt[type*numFreq+j], (mm_ulong) realToFixedPoint(dfdtinc));
} }
SYNC_THREADS
} }
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment