mixed4 multiplyComplexRealPart(mixed2 c1, mixed4 c2r, mixed4 c2i) { return c1.x*c2r-c1.y*c2i; } mixed4 multiplyComplexImagPart(mixed2 c1, mixed4 c2r, mixed4 c2i) { return c1.x*c2i+c1.y*c2r; } mixed4 multiplyComplexRealPartConj(mixed2 c1, mixed4 c2r, mixed4 c2i) { return c1.x*c2r+c1.y*c2i; } mixed4 multiplyComplexImagPartConj(mixed2 c1, mixed4 c2r, mixed4 c2i) { return c1.x*c2i-c1.y*c2r; } /** * Apply the PILE-L thermostat. */ __kernel void applyPileThermostat(__global mixed4* velm, __global float4* random, unsigned int randomIndex, mixed dt, mixed kT, mixed friction) { const int numBlocks = get_global_size(0)/NUM_COPIES; const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES); const int indexInBlock = get_local_id(0)-blockStart; const mixed nkT = NUM_COPIES*kT; const mixed twown = 2.0f*nkT/HBAR; const mixed c1_0 = exp(-0.5f*dt*friction); const mixed c2_0 = sqrt(1.0f-c1_0*c1_0); __local mixed4 v[2*THREAD_BLOCK_SIZE]; __local mixed4 temp[2*THREAD_BLOCK_SIZE]; __local mixed2 w[NUM_COPIES]; __local mixed4* vreal = &v[blockStart]; __local mixed4* vimag = &v[blockStart+get_local_size(0)]; if (get_local_id(0) < NUM_COPIES) w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES)); barrier(CLK_LOCAL_MEM_FENCE); randomIndex += NUM_COPIES*(get_global_id(0)/NUM_COPIES); for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed invMass = particleVelm.w; mixed c3_0 = c2_0*sqrt(nkT*invMass); // Forward FFT. vreal[indexInBlock] = SCALE*particleVelm; vimag[indexInBlock] = (mixed4) (0.0f, 0.0f, 0.0f, 0.0f); barrier(CLK_GLOBAL_MEM_FENCE); FFT_V_FORWARD // Apply the thermostat. if (indexInBlock == 0) { // Apply a local Langevin thermostat to the centroid mode. vreal[0].xyz = vreal[0].xyz*c1_0 + c3_0*convert_mixed4(random[randomIndex]).xyz; } else { // Use critical damping white noise for the remaining modes. int k = (indexInBlock <= NUM_COPIES/2 ? indexInBlock : NUM_COPIES-indexInBlock); const bool isCenter = (NUM_COPIES%2 == 0 && k == NUM_COPIES/2); const mixed wk = twown*sin(k*M_PI/NUM_COPIES); const mixed c1 = exp(-wk*dt); const mixed c2 = sqrt((1.0f-c1*c1)/2.0f) * (isCenter ? sqrt(2.0f) : 1.0f); const mixed c3 = c2*sqrt(nkT*invMass); mixed4 rand1 = c3*convert_mixed4(random[randomIndex+k]); mixed4 rand2 = (isCenter ? 0.0f : c3*convert_mixed4(random[randomIndex+NUM_COPIES-k])); vreal[indexInBlock].xyz = c1*vreal[indexInBlock].xyz + rand1.xyz; vimag[indexInBlock].xyz = c1*vimag[indexInBlock].xyz + (indexInBlock < NUM_COPIES/2 ? rand2.xyz : -rand2.xyz); } barrier(CLK_GLOBAL_MEM_FENCE); // Inverse FFT. FFT_V_BACKWARD if (invMass != 0) velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz; randomIndex += get_global_size(0); } } /** * Advance the positions and velocities. */ __kernel void integrateStep(__global mixed4* posq, __global mixed4* velm, __global real4* force, mixed dt, mixed kT) { const int numBlocks = get_global_size(0)/NUM_COPIES; const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES); const int indexInBlock = get_local_id(0)-blockStart; const mixed nkT = NUM_COPIES*kT; const mixed twown = 2.0f*nkT/HBAR; __local mixed4 q[2*THREAD_BLOCK_SIZE]; __local mixed4 v[2*THREAD_BLOCK_SIZE]; __local mixed4 temp[2*THREAD_BLOCK_SIZE]; __local mixed2 w[NUM_COPIES]; // Update velocities. for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { int index = particle+indexInBlock*PADDED_NUM_ATOMS; mixed4 particleVelm = velm[index]; particleVelm.xyz += convert_mixed4(force[index]).xyz*(0.5f*dt*particleVelm.w); if (particleVelm.w != 0) velm[index] = particleVelm; } // Evolve the free ring polymer by transforming to the frequency domain. __local mixed4* qreal = &q[blockStart]; __local mixed4* qimag = &q[blockStart+get_local_size(0)]; __local mixed4* vreal = &v[blockStart]; __local mixed4* vimag = &v[blockStart+get_local_size(0)]; if (get_local_id(0) < NUM_COPIES) w[indexInBlock] = (mixed2) (cos(-indexInBlock*2*M_PI/NUM_COPIES), sin(-indexInBlock*2*M_PI/NUM_COPIES)); barrier(CLK_LOCAL_MEM_FENCE); for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { mixed4 particlePosq = posq[particle+indexInBlock*PADDED_NUM_ATOMS]; mixed4 particleVelm = velm[particle+indexInBlock*PADDED_NUM_ATOMS]; // Forward FFT. qreal[indexInBlock] = SCALE*particlePosq; qimag[indexInBlock] = (mixed4) (0.0f, 0.0f, 0.0f, 0.0f); vreal[indexInBlock] = SCALE*particleVelm; vimag[indexInBlock] = (mixed4) (0.0f, 0.0f, 0.0f, 0.0f); barrier(CLK_GLOBAL_MEM_FENCE); FFT_Q_FORWARD FFT_V_FORWARD // Apply the thermostat. if (indexInBlock == 0) { qreal[0].xyz += vreal[0].xyz*dt; qimag[0].xyz += vimag[0].xyz*dt; } else { const mixed wk = twown*sin(indexInBlock*M_PI/NUM_COPIES); const mixed wt = wk*dt; const mixed coswt = cos(wt); const mixed sinwt = sin(wt); const mixed4 vprimereal = vreal[indexInBlock]*coswt - qreal[indexInBlock]*(wk*sinwt); // Advance velocity from t to t+dt const mixed4 vprimeimag = vimag[indexInBlock]*coswt - qimag[indexInBlock]*(wk*sinwt); qreal[indexInBlock] = vreal[indexInBlock]*(sinwt/wk) + qreal[indexInBlock]*coswt; // Advance position from t to t+dt qimag[indexInBlock] = vimag[indexInBlock]*(sinwt/wk) + qimag[indexInBlock]*coswt; vreal[indexInBlock] = vprimereal; vimag[indexInBlock] = vprimeimag; } barrier(CLK_GLOBAL_MEM_FENCE); // Inverse FFT. FFT_Q_BACKWARD FFT_V_BACKWARD if (particleVelm.w != 0) { posq[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*qreal[indexInBlock].xyz; velm[particle+indexInBlock*PADDED_NUM_ATOMS].xyz = SCALE*vreal[indexInBlock].xyz; } } } /** * Advance the velocities by a half step. */ __kernel void advanceVelocities(__global mixed4* velm, __global real4* force, mixed dt) { const int numBlocks = get_global_size(0)/NUM_COPIES; const int blockStart = NUM_COPIES*(get_local_id(0)/NUM_COPIES); const int indexInBlock = get_local_id(0)-blockStart; // Update velocities. for (int particle = get_global_id(0)/NUM_COPIES; particle < NUM_ATOMS; particle += numBlocks) { int index = particle+indexInBlock*PADDED_NUM_ATOMS; mixed4 particleVelm = velm[index]; particleVelm.xyz += convert_mixed4(force[index]).xyz*(0.5f*dt*particleVelm.w); if (particleVelm.w != 0) velm[index] = particleVelm; } } /** * Copy a set of positions and velocities from the integrator's arrays to the context. */ __kernel void copyDataToContext(__global mixed4* srcVel, __global mixed4* dstVel, __global mixed4* srcPos, __global real4* dstPos, __global int* order, int copy) { const int base = copy*PADDED_NUM_ATOMS; for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) { int index = base+order[particle]; dstVel[particle] = srcVel[index]; dstPos[particle].xyz = convert_real4(srcPos[index]).xyz; } } /** * Copy a set of positions, velocities, and forces from the context to the integrator's arrays. */ __kernel void copyDataFromContext(__global real4* srcForce, __global real4* dstForce, __global mixed4* srcVel, __global mixed4* dstVel, __global real4* srcPos, __global mixed4* dstPos, __global int* order, int copy) { const int base = copy*PADDED_NUM_ATOMS; for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) { int index = base+order[particle]; dstForce[index] = srcForce[particle]; dstVel[index] = srcVel[particle]; dstPos[index].xyz = convert_mixed4(srcPos[particle]).xyz; } } /** * Atom positions in one copy have been modified. Apply the same offsets to all the other copies. */ __kernel void applyCellTranslations(__global mixed4* posq, __global real4* movedPos, __global int* order, int movedCopy) { for (int particle = get_global_id(0); particle < NUM_ATOMS; particle += get_global_size(0)) { int index = order[particle]; mixed4 delta = convert_mixed4(movedPos[particle])-posq[movedCopy*PADDED_NUM_ATOMS+index]; for (int copy = 0; copy < NUM_COPIES; copy++) posq[copy*PADDED_NUM_ATOMS+index].xyz += delta.xyz; } }