Unverified Commit 98d81730 authored by Peter Eastman's avatar Peter Eastman Committed by GitHub
Browse files

Converted more code to common platform (#3073)

* Converted more code to common platform

* Converted more code to common platform
parent 72c70cfe
......@@ -413,6 +413,8 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines["ACOS"] = "acos";
compilationDefines["ASIN"] = "asin";
compilationDefines["ATAN"] = "atan";
compilationDefines["ERF"] = "erf";
compilationDefines["ERFC"] = "erfc";
// Set defines for applying periodic boundary conditions.
......
......@@ -72,9 +72,9 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
if (name == UpdateStateDataKernel::Name())
return new OpenCLUpdateStateDataKernel(name, platform, cl);
if (name == ApplyConstraintsKernel::Name())
return new OpenCLApplyConstraintsKernel(name, platform, cl);
return new CommonApplyConstraintsKernel(name, platform, cl);
if (name == VirtualSitesKernel::Name())
return new OpenCLVirtualSitesKernel(name, platform, cl);
return new CommonVirtualSitesKernel(name, platform, cl);
if (name == CalcHarmonicBondForceKernel::Name())
return new CommonCalcHarmonicBondForceKernel(name, platform, cl, context.getSystem());
if (name == CalcCustomBondForceKernel::Name())
......@@ -134,7 +134,7 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
if (name == IntegrateNoseHooverStepKernel::Name())
return new CommonIntegrateNoseHooverStepKernel(name, platform, cl);
if (name == ApplyMonteCarloBarostatKernel::Name())
return new OpenCLApplyMonteCarloBarostatKernel(name, platform, cl);
return new CommonApplyMonteCarloBarostatKernel(name, platform, cl);
if (name == RemoveCMMotionKernel::Name())
return new CommonRemoveCMMotionKernel(name, platform, cl);
throw OpenMMException((std::string("Tried to create kernel with illegal kernel name '")+name+"'").c_str());
......
This diff is collapsed.
real4 v0 = pos2-pos1;
real4 v1 = pos2-pos3;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
#endif
real4 cp = cross(v0, v1);
real rp = cp.x*cp.x + cp.y*cp.y + cp.z*cp.z;
rp = max(SQRT(rp), (real) 1.0e-06f);
real r21 = v0.x*v0.x + v0.y*v0.y + v0.z*v0.z;
real r23 = v1.x*v1.x + v1.y*v1.y + v1.z*v1.z;
real dot = v0.x*v1.x + v0.y*v1.y + v0.z*v1.z;
real cosine = clamp(dot*RSQRT(r21*r23), (real) -1, (real) 1);
real theta = acos(cosine);
COMPUTE_FORCE
real4 force1 = cross(v0, cp)*(dEdAngle/(r21*rp));
real4 force3 = cross(cp, v1)*(dEdAngle/(r23*rp));
real4 force2 = -force1-force3;
real4 delta = pos2-pos1;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r = SQRT(delta.x*delta.x + delta.y*delta.y + delta.z*delta.z);
COMPUTE_FORCE
dEdR = (r > 0.0f) ? (dEdR / r) : 0.0f;
delta.xyz *= dEdR;
real4 force1 = delta;
real4 force2 = -delta;
\ No newline at end of file
__kernel void applyPositionDeltas(__global real4* restrict posq, __global real4* restrict posqCorrection, __global mixed4* restrict posDelta) {
for (unsigned int index = get_global_id(0); index < NUM_ATOMS; index += get_global_size(0)) {
#ifdef USE_MIXED_PRECISION
real4 pos1 = posq[index];
real4 pos2 = posqCorrection[index];
mixed4 pos = (mixed4) (pos1.x+(mixed)pos2.x, pos1.y+(mixed)pos2.y, pos1.z+(mixed)pos2.z, pos1.w);
#else
mixed4 pos = posq[index];
#endif
pos.xyz += posDelta[index].xyz;
#ifdef USE_MIXED_PRECISION
posq[index] = (real4) ((real) pos.x, (real) pos.y, (real) pos.z, (real) pos.w);
posqCorrection[index] = (real4) (pos.x-(real) pos.x, pos.y-(real) pos.y, pos.z-(real) pos.z, 0);
#else
posq[index] = pos;
#endif
}
}
{
#ifdef USE_DOUBLE_PRECISION
unsigned long includeInteraction;
#else
unsigned int includeInteraction;
#endif
#if USE_EWALD
includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
#if HAS_COULOMB
const real prefactor = ONE_4PI_EPS0*CHARGE1*CHARGE2*invR;
#else
const real prefactor = 0.0f;
#endif
#ifdef USE_DOUBLE_PRECISION
const real erfcAlphaR = erfc(alphaR);
#else
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 1.5e-7.
const real t = RECIP(1.0f+0.3275911f*alphaR);
const real erfcAlphaR = (0.254829592f+(-0.284496736f+(1.421413741f+(-1.453152027f+1.061405429f*t)*t)*t)*t)*t*expAlphaRSqr;
#endif
real tempForce = 0;
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real eps = SIGMA_EPSILON1.y*SIGMA_EPSILON2.y;
real epssig6 = sig6*eps;
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6 - 1.0f);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
ljEnergy *= switchValue;
}
#endif
#if DO_LJPME
// The multiplicative term to correct for the multiplicative terms that are always
// present in reciprocal space.
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const float2 sigExpProd = SIGMA_EPSILON1*SIGMA_EPSILON2;
const real c6 = 64*sigExpProd.x*sigExpProd.x*sigExpProd.x*sigExpProd.y;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
// The multiplicative grid term
ljEnergy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
// The potential shift accounts for the step at the cutoff introduced by the
// transition from additive to multiplicative combintion rules and is only
// needed for the real (not excluded) terms. By addin these terms to ljEnergy
// instead of tempEnergy here, the includeInteraction mask is correctly applied.
sig2 = sig*sig;
sig6 = sig2*sig2*sig2*INVCUT6;
epssig6 = eps*sig6;
// The additive part of the potential shift
ljEnergy += epssig6*(1.0f - sig6);
// The multiplicative part of the potential shift
ljEnergy += MULTSHIFT6*c6;
#endif
tempForce += prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, ljEnergy + prefactor*erfcAlphaR, includeInteraction);
#else
tempForce = prefactor*(erfcAlphaR+alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
tempEnergy += select((real) 0, prefactor*erfcAlphaR, includeInteraction);
#endif
dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
#else
#ifdef USE_CUTOFF
includeInteraction = (!isExcluded && r2 < CUTOFF_SQUARED);
#else
includeInteraction = (!isExcluded);
#endif
real tempForce = 0;
#if HAS_LENNARD_JONES
real sig = SIGMA_EPSILON1.x + SIGMA_EPSILON2.x;
real sig2 = invR*sig;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real epssig6 = sig6*(SIGMA_EPSILON1.y*SIGMA_EPSILON2.y);
tempForce = epssig6*(12.0f*sig6 - 6.0f);
real ljEnergy = epssig6*(sig6-1);
#if USE_LJ_SWITCH
if (r > LJ_SWITCH_CUTOFF) {
real x = r-LJ_SWITCH_CUTOFF;
real switchValue = 1+x*x*x*(LJ_SWITCH_C3+x*(LJ_SWITCH_C4+x*LJ_SWITCH_C5));
real switchDeriv = x*x*(3*LJ_SWITCH_C3+x*(4*LJ_SWITCH_C4+x*5*LJ_SWITCH_C5));
tempForce = tempForce*switchValue - ljEnergy*switchDeriv*r;
ljEnergy *= switchValue;
}
#endif
ljEnergy = select((real) 0, ljEnergy, includeInteraction);
tempEnergy += ljEnergy;
#endif
#if HAS_COULOMB
#ifdef USE_CUTOFF
const real prefactor = ONE_4PI_EPS0*CHARGE1*CHARGE2;
tempForce += prefactor*(invR - 2.0f*REACTION_FIELD_K*r2);
tempEnergy += select((real) 0, prefactor*(invR + REACTION_FIELD_K*r2 - REACTION_FIELD_C), includeInteraction);
#else
const real prefactor = ONE_4PI_EPS0*CHARGE1*CHARGE2*invR;
tempForce += prefactor;
tempEnergy += select((real) 0, prefactor, includeInteraction);
#endif
#endif
dEdR += select((real) 0, tempForce*invR*invR, includeInteraction);
#endif
}
/**
* Copy the positions and velocities to the inner context.
*/
__kernel void copyState(__global real4* posq, __global real4* posqCorrection, __global mixed4* velm, __global int* restrict atomOrder,
__global real4* innerPosq, __global real4* innerPosqCorrection, __global mixed4* innerVelm, __global int* restrict innerInvAtomOrder,
int numAtoms) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
int index = innerInvAtomOrder[atomOrder[i]];
innerPosq[index] = posq[i];
innerVelm[index] = velm[i];
#ifdef USE_MIXED_PRECISION
innerPosqCorrection[index] = posqCorrection[i];
#endif
}
}
/**
* Copy the forces back to the main context.
*/
__kernel void copyForces(__global real4* forces, __global int* restrict invAtomOrder, __global real4* innerForces,
__global int* restrict innerAtomOrder, int numAtoms) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
int index = invAtomOrder[innerAtomOrder[i]];
forces[index] = innerForces[i];
}
}
/**
* Add all the forces from the CVs.
*/
__kernel void addForces(__global real4* forces, int numAtoms
PARAMETER_ARGUMENTS) {
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
real4 f = forces[i];
ADD_FORCES
forces[i] = f;
}
}
\ No newline at end of file
real2 multofReal2(real2 a, real2 b) {
return (real2) (a.x*b.x - a.y*b.y, a.x*b.y + a.y*b.x);
}
/**
* Precompute the cosine and sine sums which appear in each force term.
*/
__kernel void calculateEwaldCosSinSums(__global mixed* restrict energyBuffer, __global const real4* restrict posq, __global real2* restrict cosSinSum, real4 reciprocalPeriodicBoxSize, real reciprocalCoefficient) {
const unsigned int ksizex = 2*KMAX_X-1;
const unsigned int ksizey = 2*KMAX_Y-1;
const unsigned int ksizez = 2*KMAX_Z-1;
const unsigned int totalK = ksizex*ksizey*ksizez;
unsigned int index = get_global_id(0);
mixed energy = 0;
while (index < (KMAX_Y-1)*ksizez+KMAX_Z)
index += get_global_size(0);
while (index < totalK) {
// Find the wave vector (kx, ky, kz) this index corresponds to.
int rx = index/(ksizey*ksizez);
int remainder = index - rx*ksizey*ksizez;
int ry = remainder/ksizez;
int rz = remainder - ry*ksizez - KMAX_Z + 1;
ry += -KMAX_Y + 1;
real kx = rx*reciprocalPeriodicBoxSize.x;
real ky = ry*reciprocalPeriodicBoxSize.y;
real kz = rz*reciprocalPeriodicBoxSize.z;
// Compute the sum for this wave vector.
real2 sum = 0.0f;
for (int atom = 0; atom < NUM_ATOMS; atom++) {
real4 apos = posq[atom];
real phase = apos.x*kx;
real2 structureFactor = (real2) (cos(phase), sin(phase));
phase = apos.y*ky;
structureFactor = multofReal2(structureFactor, (real2) (cos(phase), sin(phase)));
phase = apos.z*kz;
structureFactor = multofReal2(structureFactor, (real2) (cos(phase), sin(phase)));
sum += apos.w*structureFactor;
}
cosSinSum[index] = sum;
// Compute the contribution to the energy.
real k2 = kx*kx + ky*ky + kz*kz;
real ak = EXP(k2*EXP_COEFFICIENT) / k2;
energy += reciprocalCoefficient*ak*(sum.x*sum.x + sum.y*sum.y);
index += get_global_size(0);
}
energyBuffer[get_global_id(0)] += energy;
}
/**
* Compute the reciprocal space part of the Ewald force, using the precomputed sums from the
* previous routine.
*/
__kernel void calculateEwaldForces(__global real4* restrict forceBuffers, __global const real4* restrict posq, __global const real2* restrict cosSinSum, real4 reciprocalPeriodicBoxSize, real reciprocalCoefficient) {
unsigned int atom = get_global_id(0);
while (atom < NUM_ATOMS) {
real4 force = forceBuffers[atom];
real4 apos = posq[atom];
// Loop over all wave vectors.
int lowry = 0;
int lowrz = 1;
for (int rx = 0; rx < KMAX_X; rx++) {
real kx = rx*reciprocalPeriodicBoxSize.x;
for (int ry = lowry; ry < KMAX_Y; ry++) {
real ky = ry*reciprocalPeriodicBoxSize.y;
real phase = apos.x*kx;
real2 tab_xy = (real2) (cos(phase), sin(phase));
phase = apos.y*ky;
tab_xy = multofReal2(tab_xy, (real2) (cos(phase), sin(phase)));
for (int rz = lowrz; rz < KMAX_Z; rz++) {
real kz = rz*reciprocalPeriodicBoxSize.z;
// Compute the force contribution of this wave vector.
int index = rx*(KMAX_Y*2-1)*(KMAX_Z*2-1) + (ry+KMAX_Y-1)*(KMAX_Z*2-1) + (rz+KMAX_Z-1);
real k2 = kx*kx + ky*ky + kz*kz;
real ak = EXP(k2*EXP_COEFFICIENT)/k2;
phase = apos.z*kz;
real2 structureFactor = multofReal2(tab_xy, (real2) (cos(phase), sin(phase)));
real2 sum = cosSinSum[index];
real dEdR = 2*reciprocalCoefficient*ak*apos.w*(sum.x*structureFactor.y - sum.y*structureFactor.x);
force.x += dEdR*kx;
force.y += dEdR*ky;
force.z += dEdR*kz;
lowrz = 1 - KMAX_Z;
}
lowry = 1 - KMAX_Y;
}
}
// Record the force on the atom.
forceBuffers[atom] = force;
atom += get_global_size(0);
}
}
/**
* Scale the particle positions with each axis independent.
*/
__kernel void scalePositions(float scaleX, float scaleY, float scaleZ, int numMolecules, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, __global real4* restrict posq,
__global const int* restrict moleculeAtoms, __global const int* restrict moleculeStartIndex) {
for (int index = get_global_id(0); index < numMolecules; index += get_global_size(0)) {
int first = moleculeStartIndex[index];
int last = moleculeStartIndex[index+1];
int numAtoms = last-first;
// Find the center of each molecule.
real3 center = (real3) 0;
for (int atom = first; atom < last; atom++)
center += posq[moleculeAtoms[atom]].xyz;
center /= (real) numAtoms;
// Move it into the first periodic box.
real3 oldCenter = center;
APPLY_PERIODIC_TO_POS(center)
real3 delta = oldCenter-center;;
real3 scaleXYZ = (real3) (scaleX, scaleY, scaleZ);
// Now scale the position of the molecule center.
delta = center*(scaleXYZ-1)-delta;
for (int atom = first; atom < last; atom++) {
real4 pos = posq[moleculeAtoms[atom]];
pos.xyz += delta.xyz;
posq[moleculeAtoms[atom]] = pos;
}
}
}
float4 exceptionParams = PARAMS[index];
real4 delta = pos2-pos1;
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real invR = RSQRT(r2);
real sig2 = invR*exceptionParams.y;
sig2 *= sig2;
real sig6 = sig2*sig2*sig2;
real dEdR = exceptionParams.z*(12.0f*sig6-6.0f)*sig6;
real tempEnergy = exceptionParams.z*(sig6-1.0f)*sig6;
dEdR += exceptionParams.x*invR;
dEdR *= invR*invR;
tempEnergy += exceptionParams.x*invR;
energy += tempEnergy;
delta.xyz *= dEdR;
real4 force1 = -delta;
real4 force2 = delta;
/**
* Compute the nonbonded parameters for particles and exceptions.
*/
__kernel void computeParameters(__global mixed* restrict energyBuffer, int includeSelfEnergy, __global real* restrict globalParams,
int numAtoms, __global const float4* restrict baseParticleParams, __global real4* restrict posq, __global real* restrict charge,
__global float2* restrict sigmaEpsilon, __global float4* restrict particleParamOffsets, __global int* restrict particleOffsetIndices
#ifdef HAS_EXCEPTIONS
, int numExceptions, __global const float4* restrict baseExceptionParams, __global float4* restrict exceptionParams,
__global float4* restrict exceptionParamOffsets, __global int* restrict exceptionOffsetIndices
#endif
) {
mixed energy = 0;
// Compute particle parameters.
for (int i = get_global_id(0); i < numAtoms; i += get_global_size(0)) {
float4 params = baseParticleParams[i];
#ifdef HAS_OFFSETS
int start = particleOffsetIndices[i], end = particleOffsetIndices[i+1];
for (int j = start; j < end; j++) {
float4 offset = particleParamOffsets[j];
real value = globalParams[(int) offset.w];
params.x += value*offset.x;
params.y += value*offset.y;
params.z += value*offset.z;
}
#endif
#ifdef USE_POSQ_CHARGES
posq[i].w = params.x;
#else
charge[i] = params.x;
#endif
sigmaEpsilon[i] = (float2) (0.5f*params.y, 2*SQRT(params.z));
#ifdef HAS_OFFSETS
#ifdef INCLUDE_EWALD
energy -= EWALD_SELF_ENERGY_SCALE*params.x*params.x;
#endif
#ifdef INCLUDE_LJPME
real sig3 = params.y*params.y*params.y;
energy += LJPME_SELF_ENERGY_SCALE*sig3*sig3*params.z;
#endif
#endif
}
// Compute exception parameters.
#ifdef HAS_EXCEPTIONS
for (int i = get_global_id(0); i < numExceptions; i += get_global_size(0)) {
float4 params = baseExceptionParams[i];
#ifdef HAS_OFFSETS
int start = exceptionOffsetIndices[i], end = exceptionOffsetIndices[i+1];
for (int j = start; j < end; j++) {
float4 offset = exceptionParamOffsets[j];
real value = globalParams[(int) offset.w];
params.x += value*offset.x;
params.y += value*offset.y;
params.z += value*offset.z;
}
#endif
exceptionParams[i] = (float4) ((float) (ONE_4PI_EPS0*params.x), (float) params.y, (float) (4*params.z), 0);
}
#endif
if (includeSelfEnergy)
energyBuffer[get_global_id(0)] += energy;
}
/**
* Compute parameters for subtracting the reciprocal part of excluded interactions.
*/
__kernel void computeExclusionParameters(__global real4* restrict posq, __global real* restrict charge, __global float2* restrict sigmaEpsilon,
int numExclusions, __global const int2* restrict exclusionAtoms, __global float4* restrict exclusionParams) {
for (int i = get_global_id(0); i < numExclusions; i += get_global_size(0)) {
int2 atoms = exclusionAtoms[i];
#ifdef USE_POSQ_CHARGES
real chargeProd = posq[atoms.x].w*posq[atoms.y].w;
#else
real chargeProd = charge[atoms.x]*charge[atoms.y];
#endif
#ifdef INCLUDE_LJPME
float2 sigEps1 = sigmaEpsilon[atoms.x];
float2 sigEps2 = sigmaEpsilon[atoms.y];
float sigma = sigEps1.x*sigEps2.x;
float epsilon = sigEps1.y*sigEps2.y;
#else
float sigma = 0;
float epsilon = 0;
#endif
exclusionParams[i] = (float4) ((float) (ONE_4PI_EPS0*chargeProd), sigma, epsilon, 0);
}
}
const float4 exclusionParams = PARAMS[index];
real3 delta = (real3) (pos2.x-pos1.x, pos2.y-pos1.y, pos2.z-pos1.z);
#if USE_PERIODIC
APPLY_PERIODIC_TO_DELTA(delta)
#endif
const real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
const real r = SQRT(r2);
const real invR = RECIP(r);
const real alphaR = EWALD_ALPHA*r;
const real expAlphaRSqr = EXP(-alphaR*alphaR);
real tempForce = 0.0f;
if (alphaR > 1e-6f) {
const real erfAlphaR = erf(alphaR);
const real prefactor = exclusionParams.x*invR;
tempForce = -prefactor*(erfAlphaR-alphaR*expAlphaRSqr*TWO_OVER_SQRT_PI);
energy -= prefactor*erfAlphaR;
}
else {
energy -= TWO_OVER_SQRT_PI*EWALD_ALPHA*exclusionParams.x;
}
#if DO_LJPME
const real dispersionAlphaR = EWALD_DISPERSION_ALPHA*r;
const real dar2 = dispersionAlphaR*dispersionAlphaR;
const real dar4 = dar2*dar2;
const real dar6 = dar4*dar2;
const real invR2 = invR*invR;
const real expDar2 = EXP(-dar2);
const real c6 = 64*exclusionParams.y*exclusionParams.y*exclusionParams.y*exclusionParams.z;
const real coef = invR2*invR2*invR2*c6;
const real eprefac = 1.0f + dar2 + 0.5f*dar4;
const real dprefac = eprefac + dar6/6.0f;
energy += coef*(1.0f - expDar2*eprefac);
tempForce += 6.0f*coef*(1.0f - expDar2*dprefac);
#endif
if (r > 0)
delta *= tempForce*invR*invR;
real3 force1 = -delta;
real3 force2 = delta;
const real PI = 3.14159265358979323846f;
real4 v0 = (real4) (pos1.xyz-pos2.xyz, 0.0f);
real4 v1 = (real4) (pos3.xyz-pos2.xyz, 0.0f);
real4 v2 = (real4) (pos3.xyz-pos4.xyz, 0.0f);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA(v0)
APPLY_PERIODIC_TO_DELTA(v1)
APPLY_PERIODIC_TO_DELTA(v2)
#endif
real4 cp0 = cross(v0, v1);
real4 cp1 = cross(v1, v2);
real cosangle = dot(normalize(cp0), normalize(cp1));
real theta;
if (cosangle > 0.99f || cosangle < -0.99f) {
// We're close to the singularity in acos(), so take the cross product and use asin() instead.
real4 cross_prod = cross(cp0, cp1);
real scale = dot(cp0, cp0)*dot(cp1, cp1);
theta = asin(SQRT(dot(cross_prod, cross_prod)/scale));
if (cosangle < 0)
theta = PI-theta;
}
else
theta = acos(cosangle);
theta = (dot(v0, cp1) >= 0 ? theta : -theta);
COMPUTE_FORCE
real normCross1 = dot(cp0, cp0);
real normSqrBC = dot(v1, v1);
real normBC = SQRT(normSqrBC);
real normCross2 = dot(cp1, cp1);
real dp = 1.0f/normSqrBC;
real4 ff = (real4) ((-dEdAngle*normBC)/normCross1, dot(v0, v1)*dp, dot(v2, v1)*dp, (dEdAngle*normBC)/normCross2);
real4 force1 = ff.x*cp0;
real4 force4 = ff.w*cp1;
real4 s = ff.y*force1 - ff.z*force4;
real4 force2 = s-force1;
real4 force3 = -s-force4;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment