Commit fee019b0 authored by Peter Eastman's avatar Peter Eastman
Browse files

Converted OpenCL platform to use improved Langevin integrator

parent 8725cede
...@@ -2995,10 +2995,6 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet ...@@ -2995,10 +2995,6 @@ void OpenCLIntegrateVerletStepKernel::execute(ContextImpl& context, const Verlet
OpenCLIntegrateLangevinStepKernel::~OpenCLIntegrateLangevinStepKernel() { OpenCLIntegrateLangevinStepKernel::~OpenCLIntegrateLangevinStepKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (xVector != NULL)
delete xVector;
if (vVector != NULL)
delete vVector;
} }
void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) { void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const LangevinIntegrator& integrator) {
...@@ -3010,12 +3006,7 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L ...@@ -3010,12 +3006,7 @@ void OpenCLIntegrateLangevinStepKernel::initialize(const System& system, const L
cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines); cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines);
kernel1 = cl::Kernel(program, "integrateLangevinPart1"); kernel1 = cl::Kernel(program, "integrateLangevinPart1");
kernel2 = cl::Kernel(program, "integrateLangevinPart2"); kernel2 = cl::Kernel(program, "integrateLangevinPart2");
kernel3 = cl::Kernel(program, "integrateLangevinPart3"); params = new OpenCLArray<cl_float>(cl, 3, "langevinParams");
params = new OpenCLArray<cl_float>(cl, 11, "langevinParams");
xVector = new OpenCLArray<mm_float4>(cl, cl.getPaddedNumAtoms(), "xVector");
vVector = new OpenCLArray<mm_float4>(cl, cl.getPaddedNumAtoms(), "vVector");
vector<mm_float4> initialXVector(xVector->getSize(), mm_float4(0.0f, 0.0f, 0.0f, 0.0f));
xVector->upload(initialXVector);
prevStepSize = -1.0; prevStepSize = -1.0;
} }
...@@ -3028,19 +3019,10 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang ...@@ -3028,19 +3019,10 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, params->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(3, params->getDeviceBuffer());
kernel1.setArg(4, params->getSize()*sizeof(cl_float), NULL); kernel1.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, xVector->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(5, integration.getRandom().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(6, vVector->getDeviceBuffer()); kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(7,integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(1, integration.getPosDelta().getDeviceBuffer()); kernel2.setArg<cl::Buffer>(1, integration.getPosDelta().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, params->getDeviceBuffer());
kernel2.setArg(3, params->getSize()*sizeof(cl_float), NULL);
kernel2.setArg<cl::Buffer>(4, xVector->getDeviceBuffer());
kernel2.setArg<cl::Buffer>(5, vVector->getDeviceBuffer());
kernel2.setArg<cl::Buffer>(6,integration.getRandom().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(1, integration.getPosDelta().getDeviceBuffer());
} }
double temperature = integrator.getTemperature(); double temperature = integrator.getTemperature();
double friction = integrator.getFriction(); double friction = integrator.getFriction();
...@@ -3050,59 +3032,16 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang ...@@ -3050,59 +3032,16 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
double tau = (friction == 0.0 ? 0.0 : 1.0/friction); double tau = (friction == 0.0 ? 0.0 : 1.0/friction);
double kT = BOLTZ*temperature; double kT = BOLTZ*temperature;
double GDT = stepSize/tau; double vscale = std::exp(-stepSize/tau);
double EPH = exp(0.5*GDT); double fscale = (1-vscale)*tau;
double EMH = exp(-0.5*GDT); double noisescale = std::sqrt(2*kT/tau)*std::sqrt(0.5*(1-vscale*vscale)*tau);
double EP = exp(GDT);
double EM = exp(-GDT);
double B, C, D;
if (GDT >= 0.1) {
double term1 = EPH - 1.0;
term1 *= term1;
B = GDT*(EP - 1.0) - 4.0*term1;
C = GDT - 3.0 + 4.0*EMH - EM;
D = 2.0 - EPH - EMH;
}
else {
double term1 = 0.5*GDT;
double term2 = term1*term1;
double term4 = term2*term2;
double third = 1.0/3.0;
double o7_9 = 7.0/9.0;
double o1_12 = 1.0/12.0;
double o17_90 = 17.0/90.0;
double o7_30 = 7.0/30.0;
double o31_1260 = 31.0/1260.0;
double o_360 = 1.0/360.0;
B = term4*(third + term1*(third + term1*(o17_90 + term1*o7_9)));
C = term2*term1*(2.0*third + term1*(-0.5 + term1*(o7_30 + term1*(-o1_12 + term1*o31_1260))));
D = term2*(-1.0 + term2*(-o1_12 - term2*o_360));
}
double DOverTauC = D/(tau*C);
double TauOneMinusEM = tau*(1.0-EM);
double TauDOverEMMinusOne = tau*D/(EM - 1.0);
double fix1 = tau*(EPH - EMH);
if (fix1 == 0.0)
fix1 = stepSize;
double oneOverFix1 = 1.0/fix1;
double V = sqrt(kT*(1.0 - EM));
double X = tau*sqrt(kT*C);
double Yv = sqrt(kT*B/C);
double Yx = tau*sqrt(kT*B/(1.0 - EM));
vector<cl_float> p(params->getSize()); vector<cl_float> p(params->getSize());
p[0] = (cl_float) EM; p[0] = (cl_float) vscale;
p[1] = (cl_float) EM; p[1] = (cl_float) fscale;
p[2] = (cl_float) DOverTauC; p[2] = (cl_float) noisescale;
p[3] = (cl_float) TauOneMinusEM;
p[4] = (cl_float) TauDOverEMMinusOne;
p[5] = (cl_float) V;
p[6] = (cl_float) X;
p[7] = (cl_float) Yv;
p[8] = (cl_float) Yx;
p[9] = (cl_float) fix1;
p[10] = (cl_float) oneOverFix1;
params->upload(p); params->upload(p);
integration.getStepSize()[0].y = stepSize;
integration.getStepSize().upload();
prevTemp = temperature; prevTemp = temperature;
prevFriction = friction; prevFriction = friction;
prevStepSize = stepSize; prevStepSize = stepSize;
...@@ -3110,7 +3049,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang ...@@ -3110,7 +3049,7 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
// Call the first integration kernel. // Call the first integration kernel.
kernel1.setArg<cl_uint>(8, integration.prepareRandomNumbers(2*cl.getPaddedNumAtoms())); kernel1.setArg<cl_uint>(6, integration.prepareRandomNumbers(cl.getPaddedNumAtoms()));
cl.executeKernel(kernel1, numAtoms); cl.executeKernel(kernel1, numAtoms);
// Apply constraints. // Apply constraints.
...@@ -3119,17 +3058,8 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang ...@@ -3119,17 +3058,8 @@ void OpenCLIntegrateLangevinStepKernel::execute(ContextImpl& context, const Lang
// Call the second integration kernel. // Call the second integration kernel.
kernel2.setArg<cl_uint>(7, integration.prepareRandomNumbers(2*cl.getPaddedNumAtoms()));
cl.executeKernel(kernel2, numAtoms); cl.executeKernel(kernel2, numAtoms);
// Reapply constraints.
integration.applyConstraints(integrator.getConstraintTolerance());
// Call the third integration kernel.
cl.executeKernel(kernel3, numAtoms);
// Update the time and step count. // Update the time and step count.
cl.setTime(cl.getTime()+stepSize); cl.setTime(cl.getTime()+stepSize);
...@@ -3263,10 +3193,6 @@ void OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons ...@@ -3263,10 +3193,6 @@ void OpenCLIntegrateVariableVerletStepKernel::execute(ContextImpl& context, cons
OpenCLIntegrateVariableLangevinStepKernel::~OpenCLIntegrateVariableLangevinStepKernel() { OpenCLIntegrateVariableLangevinStepKernel::~OpenCLIntegrateVariableLangevinStepKernel() {
if (params != NULL) if (params != NULL)
delete params; delete params;
if (xVector != NULL)
delete xVector;
if (vVector != NULL)
delete vVector;
} }
void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) { void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, const VariableLangevinIntegrator& integrator) {
...@@ -3278,13 +3204,8 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system, ...@@ -3278,13 +3204,8 @@ void OpenCLIntegrateVariableLangevinStepKernel::initialize(const System& system,
cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines); cl::Program program = cl.createProgram(OpenCLKernelSources::langevin, defines);
kernel1 = cl::Kernel(program, "integrateLangevinPart1"); kernel1 = cl::Kernel(program, "integrateLangevinPart1");
kernel2 = cl::Kernel(program, "integrateLangevinPart2"); kernel2 = cl::Kernel(program, "integrateLangevinPart2");
kernel3 = cl::Kernel(program, "integrateLangevinPart3");
selectSizeKernel = cl::Kernel(program, "selectLangevinStepSize"); selectSizeKernel = cl::Kernel(program, "selectLangevinStepSize");
params = new OpenCLArray<cl_float>(cl, 11, "langevinParams"); params = new OpenCLArray<cl_float>(cl, 3, "langevinParams");
xVector = new OpenCLArray<mm_float4>(cl, cl.getPaddedNumAtoms(), "xVector");
vVector = new OpenCLArray<mm_float4>(cl, cl.getPaddedNumAtoms(), "vVector");
vector<mm_float4> initialXVector(xVector->getSize(), mm_float4(0.0f, 0.0f, 0.0f, 0.0f));
xVector->upload(initialXVector);
blockSize = std::min(256, system.getNumParticles()); blockSize = std::min(256, system.getNumParticles());
blockSize = std::max(blockSize, params->getSize()); blockSize = std::max(blockSize, params->getSize());
blockSize = std::min(blockSize, (int) cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>()); blockSize = std::min(blockSize, (int) cl.getDevice().getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>());
...@@ -3299,20 +3220,11 @@ void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co ...@@ -3299,20 +3220,11 @@ void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(1, cl.getForce().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer()); kernel1.setArg<cl::Buffer>(2, integration.getPosDelta().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(3, params->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(3, params->getDeviceBuffer());
kernel1.setArg(4, params->getSize()*sizeof(cl_float), NULL); kernel1.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(5, xVector->getDeviceBuffer()); kernel1.setArg<cl::Buffer>(5, integration.getRandom().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(6, vVector->getDeviceBuffer()); kernel2.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
kernel1.setArg<cl::Buffer>(7,integration.getRandom().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(0, cl.getVelm().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(1, integration.getPosDelta().getDeviceBuffer()); kernel2.setArg<cl::Buffer>(1, integration.getPosDelta().getDeviceBuffer());
kernel2.setArg<cl::Buffer>(2, params->getDeviceBuffer()); selectSizeKernel.setArg<cl::Buffer>(4, integration.getStepSize().getDeviceBuffer());
kernel2.setArg(3, params->getSize()*sizeof(cl_float), NULL);
kernel2.setArg<cl::Buffer>(4, xVector->getDeviceBuffer());
kernel2.setArg<cl::Buffer>(5, vVector->getDeviceBuffer());
kernel2.setArg<cl::Buffer>(6,integration.getRandom().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(0, cl.getPosq().getDeviceBuffer());
kernel3.setArg<cl::Buffer>(1, integration.getPosDelta().getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(4, cl.getIntegrationUtilities().getStepSize().getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(5, cl.getVelm().getDeviceBuffer()); selectSizeKernel.setArg<cl::Buffer>(5, cl.getVelm().getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(6, cl.getForce().getDeviceBuffer()); selectSizeKernel.setArg<cl::Buffer>(6, cl.getForce().getDeviceBuffer());
selectSizeKernel.setArg<cl::Buffer>(7, params->getDeviceBuffer()); selectSizeKernel.setArg<cl::Buffer>(7, params->getDeviceBuffer());
...@@ -3331,7 +3243,7 @@ void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co ...@@ -3331,7 +3243,7 @@ void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
// Call the first integration kernel. // Call the first integration kernel.
kernel1.setArg<cl_uint>(8, integration.prepareRandomNumbers(2*cl.getPaddedNumAtoms())); kernel1.setArg<cl_uint>(6, integration.prepareRandomNumbers(cl.getPaddedNumAtoms()));
cl.executeKernel(kernel1, numAtoms); cl.executeKernel(kernel1, numAtoms);
// Apply constraints. // Apply constraints.
...@@ -3340,17 +3252,8 @@ void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co ...@@ -3340,17 +3252,8 @@ void OpenCLIntegrateVariableLangevinStepKernel::execute(ContextImpl& context, co
// Call the second integration kernel. // Call the second integration kernel.
kernel2.setArg<cl_uint>(7, integration.prepareRandomNumbers(2*cl.getPaddedNumAtoms()));
cl.executeKernel(kernel2, numAtoms); cl.executeKernel(kernel2, numAtoms);
// Reapply constraints.
integration.applyConstraints(integrator.getConstraintTolerance());
// Call the third integration kernel.
cl.executeKernel(kernel3, numAtoms);
// Update the time and step count. // Update the time and step count.
cl.getIntegrationUtilities().getStepSize().download(); cl.getIntegrationUtilities().getStepSize().download();
......
...@@ -752,7 +752,7 @@ private: ...@@ -752,7 +752,7 @@ private:
class OpenCLIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel { class OpenCLIntegrateLangevinStepKernel : public IntegrateLangevinStepKernel {
public: public:
OpenCLIntegrateLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl), OpenCLIntegrateLangevinStepKernel(std::string name, const Platform& platform, OpenCLContext& cl) : IntegrateLangevinStepKernel(name, platform), cl(cl),
hasInitializedKernels(false), params(NULL), xVector(NULL), vVector(NULL) { hasInitializedKernels(false), params(NULL) {
} }
~OpenCLIntegrateLangevinStepKernel(); ~OpenCLIntegrateLangevinStepKernel();
/** /**
...@@ -774,9 +774,7 @@ private: ...@@ -774,9 +774,7 @@ private:
double prevTemp, prevFriction, prevStepSize; double prevTemp, prevFriction, prevStepSize;
bool hasInitializedKernels; bool hasInitializedKernels;
OpenCLArray<cl_float>* params; OpenCLArray<cl_float>* params;
OpenCLArray<mm_float4>* xVector; cl::Kernel kernel1, kernel2;
OpenCLArray<mm_float4>* vVector;
cl::Kernel kernel1, kernel2, kernel3;
}; };
/** /**
...@@ -869,9 +867,7 @@ private: ...@@ -869,9 +867,7 @@ private:
bool hasInitializedKernels; bool hasInitializedKernels;
int blockSize; int blockSize;
OpenCLArray<cl_float>* params; OpenCLArray<cl_float>* params;
OpenCLArray<mm_float4>* xVector; cl::Kernel kernel1, kernel2, selectSizeKernel;
OpenCLArray<mm_float4>* vVector;
cl::Kernel kernel1, kernel2, kernel3, selectSizeKernel;
double prevTemp, prevFriction, prevErrorTol; double prevTemp, prevFriction, prevErrorTol;
}; };
......
enum {EM, EM_V, DOverTauC, TauOneMinusEM_V, TauDOverEMMinusOne, V, X, Yv, Yx, Fix1, OneOverFix1, MaxParams}; enum {VelScale, ForceScale, NoiseScale, MaxParams};
/** /**
* Perform the first step of Langevin integration. * Perform the first step of Langevin integration.
*/ */
__kernel void integrateLangevinPart1(__global float4* velm, __global float4* force, __global float4* posDelta, __kernel void integrateLangevinPart1(__global float4* velm, __global float4* force, __global float4* posDelta,
__global float* paramBuffer, __local float* params, __global float4* xVector, __global float4* vVector, __global float* paramBuffer, __global float2* dt, __global float4* random, unsigned int randomIndex) {
__global float4* random, unsigned int randomIndex) { float vscale = paramBuffer[VelScale];
float fscale = paramBuffer[ForceScale];
// Load the parameters into local memory for faster access. float noisescale = paramBuffer[NoiseScale];
float stepSize = dt[0].y;
if (get_local_id(0) < MaxParams)
params[get_local_id(0)] = paramBuffer[get_local_id(0)];
barrier(CLK_LOCAL_MEM_FENCE);
int index = get_global_id(0); int index = get_global_id(0);
randomIndex += index; randomIndex += index;
while (index < NUM_ATOMS) { while (index < NUM_ATOMS) {
float4 velocity = velm[index]; float4 velocity = velm[index];
float sqrtInvMass = sqrt(velocity.w); float sqrtInvMass = sqrt(velocity.w);
float4 vmh = (float4) (xVector[index].xyz*params[DOverTauC] + sqrtInvMass*params[Yv]*random[randomIndex].xyz, 0.0f); velocity.xyz = vscale*velocity.xyz + fscale*velocity.w*force[index].xyz + noisescale*sqrtInvMass*random[randomIndex].xyz;
float4 vVec = (float4) (sqrtInvMass*params[V]*random[randomIndex+PADDED_NUM_ATOMS].xyz, 0.0f);
randomIndex += get_global_size(0);
vVector[index] = vVec;
velocity.xyz = velocity.xyz*params[EM_V] + velocity.w*force[index].xyz*params[TauOneMinusEM_V] + vVec.xyz - params[EM]*vmh.xyz;
posDelta[index] = velocity*params[Fix1];
velm[index] = velocity; velm[index] = velocity;
index += get_global_size(0); posDelta[index] = stepSize*velocity;
}
}
/**
* Perform the second step of Langevin integration.
*/
__kernel void integrateLangevinPart2(__global float4* velm, __global float4* posDelta, __global float* paramBuffer,
__local float* params, __global float4* xVector, __global float4* vVector, __global float4* random, unsigned int randomIndex) {
// Load the parameters into local memory for faster access.
if (get_local_id(0) < MaxParams)
params[get_local_id(0)] = paramBuffer[get_local_id(0)];
barrier(CLK_LOCAL_MEM_FENCE);
int index = get_global_id(0);
randomIndex += index;
while (index < NUM_ATOMS) {
float4 delta = posDelta[index];
float4 velocity = velm[index];
float sqrtInvMass = sqrt(velocity.w);
velocity.xyz = delta.xyz*params[OneOverFix1];
float4 xmh = (float4) (vVector[index].xyz*params[TauDOverEMMinusOne] + sqrtInvMass*params[Yx]*random[randomIndex].xyz, 0.0f);
float4 xVec = (float4) (sqrtInvMass*params[X]*random[randomIndex+PADDED_NUM_ATOMS].xyz, 0.0f);
randomIndex += get_global_size(0); randomIndex += get_global_size(0);
delta.xyz += xVec.xyz - xmh.xyz;
posDelta[index] = delta;
velm[index] = velocity;
xVector[index] = xVec;
index += get_global_size(0); index += get_global_size(0);
} }
} }
/** /**
* Perform the third step of Langevin integration. * Perform the second step of Langevin integration.
*/ */
__kernel void integrateLangevinPart3(__global float4* posq, __global float4* posDelta) { __kernel void integrateLangevinPart2(__global float4* posq, __global float4* posDelta) {
int index = get_global_id(0); int index = get_global_id(0);
while (index < NUM_ATOMS) { while (index < NUM_ATOMS) {
float4 pos = posq[index]; float4 pos = posq[index];
...@@ -116,51 +80,12 @@ __kernel void selectLangevinStepSize(float maxStepSize, float errorTol, float ta ...@@ -116,51 +80,12 @@ __kernel void selectLangevinStepSize(float maxStepSize, float errorTol, float ta
// Recalculate the integration parameters. // Recalculate the integration parameters.
float gdt = newStepSize/tau; float vscale = exp(-newStepSize/tau);
float eph = exp(0.5f*gdt); float fscale = (1-vscale)*tau;
float emh = exp(-0.5f*gdt); float noisescale = sqrt(2*kT/tau)*sqrt(0.5f*(1-vscale*vscale)*tau);
float ep = exp(gdt); params[VelScale] = vscale;
float em = exp(-gdt); params[ForceScale] = fscale;
float em_v = exp(-0.5f*(oldStepSize+newStepSize)/tau); params[NoiseScale] = noisescale;
float b, c, d;
if (gdt >= 0.1f) {
float term1 = eph-1.0f;
term1 *= term1;
b = gdt*(ep-1.0f) - 4.0f*term1;
c = gdt - 3.0f + 4.0f*emh - em;
d = 2.0f - eph - emh;
}
else {
float term1 = 0.5f*gdt;
float term2 = term1*term1;
float term4 = term2*term2;
float third = 1.0f/3.0f;
float o7_9 = 7.0f/9.0f;
float o1_12 = 1.0f/12.0f;
float o17_90 = 17.0f/90.0f;
float o7_30 = 7.0f/30.0f;
float o31_1260 = 31.0f/1260.0f;
float o_360 = 1.0f/360.0f;
b = term4*(third + term1*(third + term1*(o17_90 + term1*o7_9)));
c = term2*term1*(2.0f*third + term1*(-0.5f + term1*(o7_30 + term1*(-o1_12 + term1*o31_1260))));
d = term2*(-1.0f + term2*(-o1_12 - term2*o_360));
}
float fix1 = tau*(eph - emh);
if (fix1 == 0.0f)
fix1 = newStepSize;
params[EM] = em;
params[EM_V] = em_v;
params[DOverTauC] = d/(tau*c);
params[TauOneMinusEM_V] = tau*(1.0f-em_v);
params[TauDOverEMMinusOne] = tau*d/(em - 1.0f);
params[Fix1] = fix1;
params[OneOverFix1] = 1.0f/fix1;
params[V] = sqrt(kT*(1.0f - em));
params[X] = tau*sqrt(kT*c);
params[Yv] = sqrt(kT*b/c);
params[Yx] = tau*sqrt(kT*b/(1.0f - em));
} }
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if (get_local_id(0) < MaxParams) if (get_local_id(0) < MaxParams)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment