/** * Perform the first part of integration: velocity step. */ KERNEL void integrateQTBPart1(int numAtoms, int paddedNumAtoms, mixed dt, int stepIndex, GLOBAL mixed4* RESTRICT velm, GLOBAL const mm_long* RESTRICT force, GLOBAL mixed* RESTRICT segmentVelocity, GLOBAL const int* RESTRICT atomOrder) { mixed fscale = dt/(mixed) 0x100000000; for (int atom = GLOBAL_ID; atom < numAtoms; atom += GLOBAL_SIZE) { int atomIndex = atomOrder[atom]; mixed4 velocity = velm[atom]; segmentVelocity[3*numAtoms*stepIndex + atomIndex] = velocity.x; segmentVelocity[3*numAtoms*stepIndex + numAtoms + atomIndex] = velocity.y; segmentVelocity[3*numAtoms*stepIndex + 2*numAtoms + atomIndex] = velocity.z; if (velocity.w != 0.0) { velocity.x += fscale*velocity.w*force[atom]; velocity.y += fscale*velocity.w*force[atom+paddedNumAtoms]; velocity.z += fscale*velocity.w*force[atom+paddedNumAtoms*2]; velm[atom] = velocity; } } } /** * Perform the second part of integration: position half step, then interact with heat bath, * then another position half step. */ KERNEL void integrateQTBPart2(int numAtoms, mixed dt, mixed friction, int stepIndex, GLOBAL mixed4* RESTRICT velm, GLOBAL mixed4* RESTRICT posDelta, GLOBAL mixed4* RESTRICT oldDelta, GLOBAL const mixed* RESTRICT randomForce, GLOBAL const int* RESTRICT atomOrder) { mixed halfdt = 0.5f*dt; mixed vscale = EXP(-dt*friction); for (int atom = GLOBAL_ID; atom < numAtoms; atom += GLOBAL_SIZE) { int atomIndex = atomOrder[atom]; mixed4 velocity = velm[atom]; if (velocity.w != 0.0) { mixed4 delta = make_mixed4(halfdt*velocity.x, halfdt*velocity.y, halfdt*velocity.z, 0); mixed fscale = dt*velocity.w; velocity.x = vscale*velocity.x + fscale*randomForce[3*numAtoms*stepIndex + atomIndex]; velocity.y = vscale*velocity.y + fscale*randomForce[3*numAtoms*stepIndex + numAtoms + atomIndex]; velocity.z = vscale*velocity.z + fscale*randomForce[3*numAtoms*stepIndex + 2*numAtoms + atomIndex]; velm[atom] = velocity; delta += make_mixed4(halfdt*velocity.x, halfdt*velocity.y, halfdt*velocity.z, 0); posDelta[atom] = delta; oldDelta[atom] = delta; } } } /** * Perform the third part of integration: apply constraint forces to velocities, then record * the constrained positions. */ KERNEL void integrateQTBPart3(int numAtoms, mixed dt, GLOBAL real4* RESTRICT posq, GLOBAL mixed4* RESTRICT velm, GLOBAL const mixed4* RESTRICT posDelta, GLOBAL const mixed4* RESTRICT oldDelta #ifdef USE_MIXED_PRECISION , GLOBAL real4* RESTRICT posqCorrection #endif ) { mixed invDt = 1/dt; for (int index = GLOBAL_ID; index < numAtoms; index += GLOBAL_SIZE) { mixed4 velocity = velm[index]; if (velocity.w != 0.0) { mixed4 delta = posDelta[index]; velocity.x += (delta.x-oldDelta[index].x)*invDt; velocity.y += (delta.y-oldDelta[index].y)*invDt; velocity.z += (delta.z-oldDelta[index].z)*invDt; velm[index] = velocity; #ifdef USE_MIXED_PRECISION real4 pos1 = posq[index]; real4 pos2 = posqCorrection[index]; mixed4 pos = make_mixed4(pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w); #else real4 pos = posq[index]; #endif pos.x += delta.x; pos.y += delta.y; pos.z += delta.z; #ifdef USE_MIXED_PRECISION posq[index] = make_real4((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w); posqCorrection[index] = make_real4(pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0); #else posq[index] = pos; #endif } } } /** * Update the buffer of white noise. */ KERNEL void generateNoise(int numAtoms, int segmentLength, GLOBAL float* RESTRICT noise, GLOBAL const float* RESTRICT random, unsigned int randomIndex) { randomIndex = 4*randomIndex; // Interpret it as float instead of float4 int fftLength = 3*segmentLength; for (int i = GROUP_ID; i < 3*numAtoms; i += NUM_GROUPS) { // Copy segment 2 over to segment 1 for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE) noise[i*fftLength+j] = noise[i*fftLength+j+segmentLength]; SYNC_THREADS // Copy segment 3 over to segment 2 for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE) noise[i*fftLength+j+segmentLength] = noise[i*fftLength+j+2*segmentLength]; SYNC_THREADS // Fill segment 3 with new random values. for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE) noise[i*fftLength+j+2*segmentLength] = random[randomIndex+i*segmentLength+j]; SYNC_THREADS } } DEVICE mixed2 multiplyComplex(mixed2 c1, mixed2 c2) { return make_mixed2(c1.x*c2.x-c1.y*c2.y, c1.x*c2.y+c1.y*c2.x); } DEVICE mixed2 multiplyComplexConj(mixed2 c1, mixed2 c2) { return make_mixed2(c1.x*c2.x+c1.y*c2.y, c1.x*c2.y-c1.y*c2.x); } /** * Generate the random force for the next segment. */ KERNEL void generateRandomForce(int numAtoms, int segmentLength, mixed dt, mixed friction, GLOBAL float* RESTRICT noise, GLOBAL mixed* RESTRICT randomForce, GLOBAL mixed4* RESTRICT velm, GLOBAL mixed* RESTRICT thetad, GLOBAL mixed* RESTRICT cutoffFunction, GLOBAL int* RESTRICT particleType, GLOBAL mixed* RESTRICT adaptedFriction, GLOBAL mixed2* RESTRICT workspace) { const int fftLength = 3*segmentLength; const int numFreq = (fftLength+1)/2; GLOBAL mixed2* data0 = &workspace[GROUP_ID*3*fftLength]; GLOBAL mixed2* data1 = &data0[fftLength]; GLOBAL mixed2* w = &data1[fftLength]; for (int i = LOCAL_ID; i < fftLength; i += LOCAL_SIZE) w[i] = make_mixed2(cos(-i*2*M_PI/fftLength), sin(-i*2*M_PI/fftLength)); for (int i = GROUP_ID; i < 3*numAtoms; i += NUM_GROUPS) { int atom = i/3; int axis = i%3; mixed invMass = velm[atom].w; int type = particleType[atom]; if (invMass != 0) { for (int j = LOCAL_ID; j < fftLength; j += LOCAL_SIZE) data0[j] = make_mixed2(noise[i*fftLength+j], 0); SYNC_THREADS FFT_FORWARD for (int j = LOCAL_ID; j < numFreq; j += LOCAL_SIZE) { mixed f = M_PI*j/(numFreq*dt); mixed gamma = adaptedFriction[type*numFreq+j]; mixed cw = (1 - 2*EXP(-dt*friction)*COS(f*dt) + EXP(-2*friction*dt)) / ((friction*friction+f*f)*dt*dt); mixed2 d = RECIP_DATA[j] * SQRT(cutoffFunction[j]*thetad[j]*cw*gamma/friction); RECIP_DATA[j] = d; if (j != 0 && j*2 != fftLength) RECIP_DATA[fftLength-j] = make_mixed2(d.x, -d.y); } SYNC_THREADS FFT_BACKWARD const mixed scale = SQRT(2*friction/(dt*invMass))/fftLength; for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE) randomForce[numAtoms*(3*j+axis)+atom] = scale*data0[segmentLength+j].x; SYNC_THREADS } } } /** * Update the friction rates used for generating noise, part 1: compute the error in the * fluctuation dissipation theorem. */ KERNEL void adaptFrictionPart1(int numAtoms, int segmentLength, GLOBAL const mixed4* RESTRICT velm, GLOBAL const int* RESTRICT particleType, GLOBAL const mixed* RESTRICT randomForce, GLOBAL const mixed* RESTRICT segmentVelocity, GLOBAL const mixed* RESTRICT adaptedFriction, GLOBAL mm_ulong* RESTRICT dfdt, GLOBAL mixed2* RESTRICT workspace) { const int fftLength = 3*segmentLength; const int numFreq = (fftLength+1)/2; GLOBAL mixed2* data0 = &workspace[GROUP_ID*3*fftLength]; GLOBAL mixed2* data1 = &data0[fftLength]; GLOBAL mixed2* w = &data1[fftLength]; for (int i = LOCAL_ID; i < fftLength; i += LOCAL_SIZE) w[i] = make_mixed2(cos(-i*2*M_PI/fftLength), sin(-i*2*M_PI/fftLength)); for (int i = GROUP_ID; i < 3*numAtoms; i += NUM_GROUPS) { int atom = i/3; int axis = i%3; int type = particleType[atom]; mixed invMass = velm[atom].w; if (invMass != 0) { // Pack the velocities and forces together so we can transform both at once. for (int j = LOCAL_ID; j < segmentLength; j += LOCAL_SIZE) data0[j] = make_mixed2(segmentVelocity[numAtoms*(3*j+axis)+atom], randomForce[numAtoms*(3*j+axis)+atom]); for (int j = segmentLength+LOCAL_ID; j < fftLength; j += LOCAL_SIZE) data0[j] = make_mixed2(0); SYNC_THREADS ADAPTATION_FFT for (int j = LOCAL_ID; j < numFreq; j += LOCAL_SIZE) { mixed2 d1 = ADAPTATION_RECIP[j]; mixed2 d2 = (j == 0 ? ADAPTATION_RECIP[0] : ADAPTATION_RECIP[fftLength-j]); mixed2 v = 0.5f*make_mixed2(d1.x+d2.x, d1.y-d2.y); mixed2 f = 0.5f*make_mixed2(d1.y+d2.y, -d1.x+d2.x); mixed cvv = v.x*v.x + v.y*v.y; mixed2 cvf = multiplyComplexConj(f, v); mixed dfdtinc = adaptedFriction[type*numFreq+j]*cvv/invMass - cvf.x; ATOMIC_ADD(&dfdt[type*numFreq+j], (mm_ulong) realToFixedPoint(dfdtinc)); } SYNC_THREADS } } } /** * Update the friction rates used for generating noise, part 2: update the friction based * on the error. */ KERNEL void adaptFrictionPart2(int numTypes, int segmentLength, mixed dt, GLOBAL const int* RESTRICT typeParticleCount, GLOBAL const float* RESTRICT typeAdaptationRate, GLOBAL mixed* RESTRICT adaptedFriction, GLOBAL const mm_long* RESTRICT dfdt) { int numFreq = (3*segmentLength+1)/2; for (int type = GROUP_ID; type < numTypes; type += NUM_GROUPS) { mixed scale = dt*typeAdaptationRate[type]/(3*typeParticleCount[type]*segmentLength)/(mixed) 0x100000000; for (int i = LOCAL_ID; i < numFreq; i += LOCAL_SIZE) { mixed delta = -scale*dfdt[type*numFreq+i]; adaptedFriction[type*numFreq+i] = max((mixed) 0, adaptedFriction[type*numFreq+i]+delta); } } }