Commit 7ed51ce4 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Bonded forces ok

parent 08c2e027
......@@ -99,6 +99,17 @@ class OpenMMBrookInterface {
int getNumberOfParticles( void ) const;
/**
* Set number of particles
*
* @param numberOfParticles number of particles
*
* @return DefaultReturnValue
*
*/
int setNumberOfParticles( int numberOfParticles );
/**
* Get particle stream width
*
......@@ -166,10 +177,13 @@ class OpenMMBrookInterface {
* Compute energy
*
* @param context OpenMMContextImpl context
* @param system system reference
*
* @return potential energy
*
*/
double computeEnergy( OpenMMContextImpl& context );
double computeEnergy( OpenMMContextImpl& context, System& system );
/**
* Set trigger Force Kernel
......@@ -207,6 +221,24 @@ class OpenMMBrookInterface {
void* getTriggerEnergyKernel( void ) const;
/**
* Get BrookNonBonded reference
*
* @return BrookNonBonded reference
*
*/
BrookNonBonded& getBrookNonBonded( void );
/**
* Get BrookGbsa reference
*
* @return BrookGbsa reference
*
*/
BrookGbsa& getBrookGbsa( void );
/**
* Get BrookBondParameters for harmonic bond force
*
......@@ -392,9 +424,9 @@ class OpenMMBrookInterface {
// Brook bonded, nonbonded, Gbsa
BrookBonded* _brookBonded;
BrookNonBonded* _brookNonBonded;
BrookGbsa* _brookGbsa;
BrookBonded _brookBonded;
BrookNonBonded _brookNonBonded;
BrookGbsa _brookGbsa;
void* _triggerForceKernel;
void* _triggerEnergyKernel;
......
......@@ -61,6 +61,7 @@ kernel float4 get_r2_CDLJ( float3 d1, float3 d2, float3 d3, float3 d4 ) {
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
return r2;
}
kernel void knbforce_CDLJ (
float natoms,
float dupfac,
......@@ -270,177 +271,7 @@ if( exclmask.w > 0.5f ){
}
kernel void knbforce_CDLJ2(
float natoms,
float dupfac,
float strheight,
float strwidth,
float jstrwidth,
float fstrwidth,
float epsfac,
float4 params,
float4 posq[][],
float4 isigeps[][],
float4 sigma[][],
float4 epsilon[][],
float excl[][],
out float3 outforce1<>,
out float3 outforce2<> ) {
float4 jsig, jeps;
float3 ipos1, ipos2;
float4 jpos;
float3 jpos1, jpos2, jpos3, jpos4;
float qi1, qi2;
float isig1, isig2;
float ieps1, ieps2;
float4 qj, qq;
float3 d1, d2, d3, d4;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2;
float3 pad;
float4 exclmask;
float4 exclconst;
float2 iatom;
float2 jstrindex;
float a_iatom;
float linind;
float breakflag;
float2 jatom;
float4 fs;
float jend, jstart;
float which_rep;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
outforce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
outforce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
a_iatom = 2.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
linind = fmod( a_iatom, natoms );
iatom.x = fmod( linind, strwidth );
iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
jpos = posq[ iatom ];
ipos1 = jpos.xyz;
qi1 = jpos.w;
jstrindex = isigeps[ iatom ];
isig1 = jstrindex.x;
ieps1 = jstrindex.y;
iatom.x += 1;
jpos = posq[ iatom ];
ipos2 = jpos.xyz;
qi2 = jpos.w;
jstrindex = isigeps[ iatom ];
isig2 = jstrindex.x;
ieps2 = jstrindex.y;
breakflag = 1.0f;
jatom.y = 0.0f;
which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
jend = 1 + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep * jend;
if ( which_rep > dupfac - 1 - 0.5f ) {
jend = 1e6f;
}
jend += jstart;
exclind.x = linind;
exclind.y = jstart * strwidth/4.0f;
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
linind = (jatom.x + jatom.y*strwidth)/4.0f;
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y * jstrwidth;
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
jpos = posq[ jatom ];
jpos1 = jpos.xyz;
qj.x = jpos.w;
jatom.x += 1.0f;
jpos = posq[ jatom ];
jpos2 = jpos.xyz;
qj.y = jpos.w;
jatom.x += 1.0f;
jpos = posq[ jatom ];
jpos3 = jpos.xyz;
qj.z = jpos.w;
jatom.x += 1.0f;
jpos = posq[ jatom ];
jpos4 = jpos.xyz;
qj.w = jpos.w;
jatom.x += 1.0f;
sig = isig1 + jsig;
eps = ieps1 * jeps;
qq = qi1*qj;
d1 = ipos1 - jpos1;
d2 = ipos1 - jpos2;
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + exclmask * 10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce1 += fs.x * d1;
outforce1 += fs.y * d2;
outforce1 += fs.z * d3;
outforce1 += fs.w * d4;
exclind.x += 1.0f;
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig2 + jsig;
eps = ieps2 * jeps;
qq = qi2 * qj;
d1 = ipos2 - jpos1;
d2 = ipos2 - jpos2;
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + exclmask * 10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce2 += fs.x * d1;
outforce2 += fs.y * d2;
outforce2 += fs.z * d3;
outforce2 += fs.w * d4;
exclind.x -= 1.0f;
exclind.y += 1.0f;
if ( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )
breakflag = -1.0f;
}
jatom.y += 1.0f;
}
}
kernel void knbforce_CDLJ4Ex(
kernel void knbforce_CDLJ4(
float natoms,
float nAtomsCeiling,
float dupfac,
......@@ -489,13 +320,16 @@ kernel void knbforce_CDLJ4Ex(
float4 fs;
float jend, jstart;
float which_rep;
float maskMultiplier;
maskMultiplier = 1.0e+06f;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
outforce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
outforce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
outforce3 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
outforce4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
outforce1 = float3( 0.0f, 0.0f, 0.0f );
outforce2 = float3( 0.0f, 0.0f, 0.0f );
outforce3 = float3( 0.0f, 0.0f, 0.0f );
outforce4 = float3( 0.0f, 0.0f, 0.0f );
a_iatom = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
......@@ -540,6 +374,7 @@ kernel void knbforce_CDLJ4Ex(
ipos4 = posq[ iatom ];
qi4 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig4 = jstrindex.x;
ieps4 = jstrindex.y;
......@@ -552,28 +387,31 @@ kernel void knbforce_CDLJ4Ex(
// which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
which_rep = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
jend = 1 + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jend = 1.0f + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep * jend;
if ( which_rep > dupfac - 1 - 0.5f ) {
if ( which_rep > dupfac - 1.5f ) {
jend = 1e6f;
}
jend += jstart;
// exclind.x = linind;
// exclind.y = jstart * strwidth/4.0f;
exclind.x = linind;
exclind.y = jstart * strwidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
//exclusions = excl[ exclind ];
//exclmask = fmod( exclusions, exclconst );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
linind = (jatom.x + jatom.y*strwidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y * jstrwidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
/* --------------------------------------------------------------------- */
......@@ -582,19 +420,19 @@ kernel void knbforce_CDLJ4Ex(
jpos1 = posq[ jatom ];
qj.x = charge[ jatom ];
jatom.x += 1.0f;
jatom.x = floor( jatom.x + 1.2f );
jpos2 = posq[ jatom ];
qj.y = charge[ jatom ];
jatom.x += 1.0f;
jatom.x = floor( jatom.x + 1.2f );
jpos3 = posq[ jatom ];
qj.z = charge[ jatom ];
jatom.x += 1.0f;
jatom.x = floor( jatom.x + 1.2f );
jpos4 = posq[ jatom ];
qj.w = charge[ jatom ];
jatom.x += 1.0f;
jatom.x = floor( jatom.x + 1.2f );
/* --------------------------------------------------------------------- */
......@@ -607,7 +445,7 @@ kernel void knbforce_CDLJ4Ex(
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce1 += fs.x * d1;
......@@ -618,8 +456,9 @@ kernel void knbforce_CDLJ4Ex(
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
//exclusions = excl[ exclind ];
//exclmask = fmod( exclusions, exclconst );
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig2 + jsig;
eps = ieps2 * jeps;
......@@ -630,7 +469,7 @@ kernel void knbforce_CDLJ4Ex(
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce2 += fs.x * d1;
......@@ -641,8 +480,9 @@ kernel void knbforce_CDLJ4Ex(
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
//exclusions = excl[ exclind ];
//exclmask = fmod( exclusions, exclconst );
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig3 + jsig;
eps = ieps3 * jeps;
......@@ -653,7 +493,7 @@ kernel void knbforce_CDLJ4Ex(
d3 = ipos3 - jpos3;
d4 = ipos3 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce3 += fs.x * d1;
......@@ -664,8 +504,9 @@ kernel void knbforce_CDLJ4Ex(
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
//exclusions = excl[ exclind ];
//exclmask = fmod( exclusions, exclconst );
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig4 + jsig;
eps = ieps4 * jeps;
......@@ -676,7 +517,7 @@ kernel void knbforce_CDLJ4Ex(
d3 = ipos4 - jpos3;
d4 = ipos4 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce4 += fs.x * d1;
......@@ -687,1133 +528,62 @@ kernel void knbforce_CDLJ4Ex(
/* --------------------------------------------------------------------- */
//exclind.x -= 3.0f;
exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
exclind.y = floor( exclind.y + 1.25f );
if ( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )
breakflag = -1.0f;
if( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )breakflag = -1.0f;
}
jatom.y += 1.0f;
//jatom.y += 1.0f;
jatom.y = floor( jatom.y + 1.2f );
}
}
kernel void knbforce_CDLJ4(
kernel void kforce14_CDLJ(
float natoms,
float nAtomsCeiling,
float dupfac,
float strheight,
float strwidth,
float jstrwidth,
float fstrwidth,
float epsfac,
float4 params,
float3 posq[][],
float charge[][],
float4 isigeps[][],
float4 sigma[][],
float4 epsilon[][],
float excl[][],
out float3 outforce1<>,
out float3 outforce2<>,
out float3 outforce3<>,
out float3 outforce4<> ) {
float2 atoms<>,
float4 posq[][],
float2 sigeps<>,
out float3 fi<>,
out float3 fj<>
)
{
float2 ai, aj;
float3 d1;
float r2;
float qq, fs;
float4 jsig, jeps;
float3 ipos1, ipos2, ipos3, ipos4;
float4 jpos;
float3 jpos1, jpos2, jpos3, jpos4;
float qi1, qi2, qi3, qi4;
//ai.y = floor( atoms.x / strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
ai.x = atoms.x - ai.y * strwidth;
float isig1, isig2, isig3, isig4;
float ieps1, ieps2, ieps3, ieps4;
//aj.y = floor( atoms.y / strwidth );
aj.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
aj.x = atoms.y - aj.y * strwidth;
float4 qj, qq;
float3 d1, d2, d3, d4;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2;
float3 pad;
float4 exclmask;
float4 exclconst;
float2 iatom;
float2 jstrindex;
float a_iatom;
float linind;
d1 = posq[ai].xyz - posq[aj].xyz;
qq = posq[ai].w * posq[aj].w;
float breakflag;
float2 jatom;
float4 fs;
float jend, jstart;
float which_rep;
float maskMultiplier;
maskMultiplier = 1.0e+06f;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
outforce1 = float3( 0.0f, 0.0f, 0.0f );
outforce2 = float3( 0.0f, 0.0f, 0.0f );
outforce3 = float3( 0.0f, 0.0f, 0.0f );
outforce4 = float3( 0.0f, 0.0f, 0.0f );
a_iatom = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
// linind = fmod( a_iatom, natoms );
linind = fmod( a_iatom, nAtomsCeiling );
iatom.x = fmod( linind, strwidth );
iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
/* --------------------------------------------------------------------- */
ipos1 = posq[ iatom ];
qi1 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig1 = jstrindex.x;
ieps1 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
ipos2 = posq[ iatom ];
qi2 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig2 = jstrindex.x;
ieps2 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
ipos3 = posq[ iatom ];
qi3 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig3 = jstrindex.x;
ieps3 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
ipos4 = posq[ iatom ];
qi4 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig4 = jstrindex.x;
ieps4 = jstrindex.y;
/* --------------------------------------------------------------------- */
breakflag = 1.0f;
jatom.y = 0.0f;
// which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
which_rep = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
jend = 1.0f + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep * jend;
if ( which_rep > dupfac - 1.5f ) {
jend = 1e6f;
}
jend += jstart;
exclind.x = linind;
exclind.y = jstart * strwidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
linind = (jatom.x + jatom.y*strwidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y * jstrwidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
/* --------------------------------------------------------------------- */
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
jpos1 = posq[ jatom ];
qj.x = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
jpos2 = posq[ jatom ];
qj.y = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
jpos3 = posq[ jatom ];
qj.z = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
jpos4 = posq[ jatom ];
qj.w = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
/* --------------------------------------------------------------------- */
sig = isig1 + jsig;
eps = ieps1 * jeps;
qq = qi1*qj;
d1 = ipos1 - jpos1;
d2 = ipos1 - jpos2;
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce1 += fs.x * d1;
outforce1 += fs.y * d2;
outforce1 += fs.z * d3;
outforce1 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig2 + jsig;
eps = ieps2 * jeps;
qq = qi2 * qj;
d1 = ipos2 - jpos1;
d2 = ipos2 - jpos2;
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce2 += fs.x * d1;
outforce2 += fs.y * d2;
outforce2 += fs.z * d3;
outforce2 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig3 + jsig;
eps = ieps3 * jeps;
qq = qi3 * qj;
d1 = ipos3 - jpos1;
d2 = ipos3 - jpos2;
d3 = ipos3 - jpos3;
d4 = ipos3 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce3 += fs.x * d1;
outforce3 += fs.y * d2;
outforce3 += fs.z * d3;
outforce3 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig4 + jsig;
eps = ieps4 * jeps;
qq = qi4 * qj;
d1 = ipos4 - jpos1;
d2 = ipos4 - jpos2;
d3 = ipos4 - jpos3;
d4 = ipos4 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce4 += fs.x * d1;
outforce4 += fs.y * d2;
outforce4 += fs.z * d3;
outforce4 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x -= 3.0f;
exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
exclind.y = floor( exclind.y + 1.25f );
if( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )breakflag = -1.0f;
}
//jatom.y += 1.0f;
jatom.y = floor( jatom.y + 1.2f );
}
}
kernel void knbforce_CDLJ_1(
float natoms,
float nAtomsCeiling,
float strwidth,
float epsfac,
float4 posq[][],
float2 isigeps[][],
float excl[][],
out float3 outforce<> ){
float jsig, jeps;
float3 ipos;
float3 jpos;
float qi, qj;
float3 d1;
float sig, eps;
float isig, ieps;
float2 exclind;
float exclusion;
float r2, invr, invrsig2, invrsig6;
float4 exclmask;
float2 iatom;
float a_iatom;
float linind;
float2 jatom;
float fs;
outforce = float3( 0.0f, 0.0f, 0.0f );
iatom.x = (indexof outforce).x;
iatom.y = (indexof outforce).y;
a_iatom = iatom.x + iatom.y*strwidth;
/* --------------------------------------------------------------------- */
ipos = posq[ iatom ].xyz;
qi = epsfac*posq[ iatom ].w;
isig = isigeps[ iatom ].x;
ieps = isigeps[ iatom ].y;
/* --------------------------------------------------------------------- */
jatom = float2( 0.0f, 0.0f );
linind = 0.0f;
exclind.x = 0.0f;
exclind.y = a_iatom;
while ( linind < natoms ){
exclusion = excl[ exclind ];
/* --------------------------------------------------------------------- */
jsig = isigeps[ jatom ].x;
jeps = isigeps[ jatom ].y;
jpos = posq[ jatom ].xyz;
qj = posq[ jatom ].w;
/* --------------------------------------------------------------------- */
sig = isig + jsig;
eps = ieps * jeps;
d1 = ipos - jpos;
r2 = dot( d1, d1 ) + exclusion*10000.0f;
invr = rsqrt( r2 );
invrsig2 = invr*sig;
invrsig2 = invrsig2*invrsig2;
invrsig6 = invrsig2*invrsig2*invrsig2;
fs = eps* ( 12.0f*invrsig6 - 6.0f ) * invrsig6;
fs += qi*qj*invr;
fs *= invr*invr;
outforce += fs*d1;
/* --------------------------------------------------------------------- */
exclind.x = floor( exclind.x + 1.05f );
jatom.x += 1.0f;
if( abs( jatom.x - strwidth ) < 0.1f ){
jatom.x = 0.0f;
jatom.y += 1.0f;
}
linind += 1.0f;
}
}
kernel void knbforce_CDLJ_1z(
float natoms,
float nAtomsCeiling,
float strwidth,
float epsfac,
float4 posq[][],
float2 isigeps[][],
float excl[][],
out float3 outforce<> ){
float jsig, jeps;
float3 ipos;
float3 jpos;
float qi, qj;
float3 d1;
float sig, eps;
float isig, ieps;
float2 exclind;
float exclusion;
float r2, invr, invrsig2, invrsig6;
float4 exclmask;
float2 iatom;
float a_iatom;
float linind;
float2 jatom;
float fs;
outforce = float3( 0.0f, 0.0f, 0.0f );
iatom.x = (indexof outforce).x;
iatom.y = (indexof outforce).y;
a_iatom = iatom.x + iatom.y*strwidth;
/* --------------------------------------------------------------------- */
ipos = posq[ iatom ].xyz;
qi = epsfac*posq[ iatom ].w;
isig = isigeps[ iatom ].x;
ieps = isigeps[ iatom ].y;
/* --------------------------------------------------------------------- */
jatom = float2( 0.0f, 0.0f );
linind = 0.0f;
exclind.x = 0.0f;
exclind.y = a_iatom;
//outforce.y = exclind.y;
//outforce.z = a_iatom;
while ( linind < natoms ){
exclusion = excl[ exclind ];
/* --------------------------------------------------------------------- */
jsig = isigeps[ jatom ].x;
jeps = isigeps[ jatom ].y;
jpos = posq[ jatom ].xyz;
qj = posq[ jatom ].w;
/* --------------------------------------------------------------------- */
eps = ieps * jeps;
d1 = ipos - jpos;
r2 = dot( d1, d1 ) + exclusion*10000.0f;
invr = rsqrt( r2 );
invrsig2 = invr*sig;
invrsig2 = invrsig2*invrsig2;
invrsig6 = invrsig2*invrsig2*invrsig2;
fs = eps* ( 12.0f*invrsig6 - 6.0f ) * invrsig6;
fs += qi*qj*invr;
fs *= invr*invr;
outforce += fs*d1;
//outforce.x += exclusion;
/* --------------------------------------------------------------------- */
exclind.x = floor( exclind.x + 1.05f );
jatom.x += 1.0f;
if( abs( jatom.x - strwidth ) < 0.1f ){
jatom.x = 0.0f;
jatom.y += 1.0f;
}
linind += 1.0f;
}
}
kernel void knbforce_CDLJ4Debug(
float natoms,
float nAtomsCeiling,
float dupfac,
float strheight,
float strwidth,
float jstrwidth,
float fstrwidth,
float epsfac,
float4 params,
float4 posq[][],
float4 isigeps[][],
float4 sigma[][],
float4 epsilon[][],
float excl[][],
out float3 outforce1<>,
out float3 outforce2<>,
out float3 outforce3<>,
out float3 outforce4<> ) {
float4 jsig, jeps;
float3 ipos1, ipos2, ipos3, ipos4;
float4 jpos;
float3 jpos1, jpos2, jpos3, jpos4;
float qi1, qi2, qi3, qi4;
float isig1, isig2, isig3, isig4;
float ieps1, ieps2, ieps3, ieps4;
float4 qj, qq;
float3 d1, d2, d3, d4;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2;
float3 pad;
float4 exclmask;
float4 exclconst;
float2 iatom;
float2 jstrindex;
float a_iatom;
float linind;
float linind1;
float breakflag;
float2 jatom;
float4 fs;
float jend, jstart;
float which_rep;
float4 one4;
float4 zero4;
float4 frac4;
float sum;
float bigValue;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
bigValue = 1000000.0f;
outforce1 = float3( 0.0f, 0.0f, 0.0f );
outforce2 = float3( 0.0f, 0.0f, 0.0f );
outforce3 = float3( 0.0f, 0.0f, 0.0f );
outforce4 = float3( 0.0f, 0.0f, 0.0f );
one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
frac4 = float4( 0.2f, 0.2f, 0.2f, 0.2f );
a_iatom = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
// linind = fmod( a_iatom, natoms );
linind = fmod( a_iatom, nAtomsCeiling );
iatom.x = fmod( linind, strwidth );
iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
/* --------------------------------------------------------------------- */
jpos = posq[ iatom ];
ipos1 = jpos.xyz;
qi1 = jpos.w;
jstrindex = isigeps[ iatom ];
isig1 = jstrindex.x;
ieps1 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
jpos = posq[ iatom ];
ipos2 = jpos.xyz;
qi2 = jpos.w;
jstrindex = isigeps[ iatom ];
isig2 = jstrindex.x;
ieps2 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
jpos = posq[ iatom ];
ipos3 = jpos.xyz;
qi3 = jpos.w;
jstrindex = isigeps[ iatom ];
isig3 = jstrindex.x;
ieps3 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
jpos = posq[ iatom ];
ipos4 = jpos.xyz;
qi4 = jpos.w;
jstrindex = isigeps[ iatom ];
isig4 = jstrindex.x;
ieps4 = jstrindex.y;
/* --------------------------------------------------------------------- */
breakflag = 1.0f;
jatom.y = 0.0f;
// which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
which_rep = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
jend = 1.0f + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep*jend;
if ( which_rep > dupfac - 1.5f ) {
jend = 1e6f;
}
jend += jstart;
exclind.x = linind;
linind1 = linind;
exclind.y = jstart*strwidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
/*
outforce1 = float3( linind, exclind.y, jstart );
outforce2 = float3( linind, exclind.y, jend );
outforce3 = float3( linind, exclind.y, which_rep );
outforce4 = float3( linind, exclind.y, exclind.y );
*/
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
linind = (jatom.x + jatom.y*strwidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y*jstrwidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
// ---------------------------------------------------------------------
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
jpos = posq[ jatom ];
jpos1 = jpos.xyz;
qj.x = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
jpos = posq[ jatom ];
jpos2 = jpos.xyz;
qj.y = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
jpos = posq[ jatom ];
jpos3 = jpos.xyz;
qj.z = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
jpos = posq[ jatom ];
jpos4 = jpos.xyz;
qj.w = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
// ---------------------------------------------------------------------
sig = isig1 + jsig;
eps = ieps1 * jeps;
qq = qi1*qj;
d1 = ipos1 - jpos1;
d2 = ipos1 - jpos2;
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 += exclmask*10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
//r2 = 1.0f/r2;
//outforce1.x += r2.x + r2.y + r2.z + r2.w;
//outforce1.y += 4.0f;
// outforce1 += fs.x * d1;
// outforce1 += fs.y * d2;
// outforce1 += fs.z * d3;
// outforce1 += fs.w * d4;
// outforce1 += float3( 4.0, 4.0, 4.0);
// outforce1 += 1.0/r2;
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
//if( exclind.y < 5.9f && exclind.y > 4.1f ){
if( sum > 0.25f ){
outforce1.x += 1.0f;
outforce1.y += exclusions;
//outforce1.y += linind;
outforce1.z += sum;
//outforce1.z += exclind.x + exclind.y*576;
}
// ---------------------------------------------------------------------
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig2 + jsig;
eps = ieps2 * jeps;
qq = qi2 * qj;
d1 = ipos2 - jpos1;
d2 = ipos2 - jpos2;
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 += exclmask*10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
//r2 =1.0f/r2;
//outforce2.x += r2.x + r2.y + r2.z + r2.w;
//outforce2.y += 4.0f;
//
// outforce2 += fs.x * d1;
// outforce2 += fs.y * d2;
// outforce2 += fs.z * d3;
// outforce2 += fs.w * d4;
//
// outforce2 += float3( 4.0f, 4.0f, 4.0f);
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
outforce2.x += 1.0f;
outforce2.y += linind;
// outforce2.z += sum;
outforce2.z += exclind.x + exclind.y*576;
}
//
// sum = abs( fs.x ) + abs( fs.y ) + abs( fs.z ) + abs( fs.w );
// if( sum > bigValue ){
// outforce2.x = linind;
// outforce2.y = linind1;
// outforce2.z = sum;
// }
//
//
// ---------------------------------------------------------------------
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig3 + jsig;
eps = ieps3 * jeps;
qq = qi3 * qj;
d1 = ipos3 - jpos1;
d2 = ipos3 - jpos2;
d3 = ipos3 - jpos3;
d4 = ipos3 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 += exclmask * 10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
//r2 =1.0f/r2;
//outforce3.x += r2.x + r2.y + r2.z + r2.w;
//outforce3.y += 4.0f;
// outforce3 += fs.x * d1;
// outforce3 += fs.y * d2;
// outforce3 += fs.z * d3;
// outforce3 += fs.w * d4;
// outforce3 += float3( 4.0f, 4.0f, 4.0f);
// outforce3 += 1.0f/r2;
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
outforce3.x += 1.0f;
outforce3.y += linind;
//outforce3.z += sum;
outforce3.z += exclind.x + strwidth*exclind.y;
//outforce3.y += 4.0;
}
//
// sum = abs( fs.x ) + abs( fs.y ) + abs( fs.z ) + abs( fs.w );
// if( sum > bigValue ){
// outforce3.x = linind;
// outforce3.y = linind1;
// outforce3.z = sum;
// }
//
// ---------------------------------------------------------------------
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig4 + jsig;
eps = ieps4 * jeps;
qq = qi4 * qj;
d1 = ipos4 - jpos1;
d2 = ipos4 - jpos2;
d3 = ipos4 - jpos3;
d4 = ipos4 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 = exclmask * 10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
//r2 =1.0f/r2;
//outforce4.x += r2.x + r2.y + r2.z + r2.w;
//outforce4.y += 4.0f;
// outforce4 += fs.x * d1;
// outforce4 += fs.y * d2;
// outforce4 += fs.z * d3;
// outforce4 += fs.w * d4;
// outforce4 += float3( 4.0f, 4.0f, 4.0f);
// outforce4 += 1.0f/r2;
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
outforce4.x += 1.0f;
//outforce4.y += exclusions;
outforce4.y += linind;
//outforce4.z += sum;
outforce4.z += exclind.x + strwidth*exclind.y;
//outforce4.y += 4.0;
}
//
// sum = abs( fs.x ) + abs( fs.y ) + abs( fs.z ) + abs( fs.w );
// if( sum > bigValue ){
// outforce4.x = linind;
// outforce4.y = linind1;
// outforce4.z = sum;
// }
//
// ---------------------------------------------------------------------
exclind.x = floor( exclind.x - 2.9f );
exclind.y = floor( exclind.y + 1.2f );
if ( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )
breakflag = -1.0f;
}
jatom.y = floor( jatom.y + 1.2f );
}
}
kernel void kforce14_CDLJ(
float natoms,
float strwidth,
float epsfac,
float4 params,
float2 atoms<>,
float4 posq[][],
float2 sigeps<>,
out float3 fi<>,
out float3 fj<>
)
{
float2 ai, aj;
float3 d1;
float r2;
float qq, fs;
//ai.y = floor( atoms.x / strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
ai.x = atoms.x - ai.y * strwidth;
//aj.y = floor( atoms.y / strwidth );
aj.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
aj.x = atoms.y - aj.y * strwidth;
d1 = posq[ai].xyz - posq[aj].xyz;
qq = posq[ai].w * posq[aj].w;
r2 = dot( d1, d1 );
fs = scalar_force_single_CDLJ( qq, epsfac, sigeps.x, sigeps.y, r2, params );
fi = fs.x * d1;
fj = -fi;
}
kernel void kbonded_CDLJ (
float epsfac,
float xstrwidth,
float4 nbparams,
float3 posq[][],
float charge[][],
float4 atoms<>,
float4 parm0<>,
float4 parm1<>,
float4 parm2<>,
float4 parm3<>,
float4 parm4<>,
out float3 fi<>,
out float3 fj<>,
out float3 fk<>,
out float3 fl<>
)
{
float3 rij, rkj, rkl, ril;
float2 ai, aj, ak, al;
float3 m, n;
float sgnphi;
float cosfac;
float ddphi, ddphi_rb;
float3 u, v, s;
float invlkj, invlij, invlkl, msq, nsq, cos_phi, sin_phi;
float3 uij, ukj, ukl;
float phi, mdphi;
float rij_d_ukj, ukj_d_rkl;
float normfac;
float qq;
float3 fi_ang, fj_ang, fk_ang;
float3 fi_bond;
float cii, ckk, cik;
float fs, st, sth;
float3 fi_pair;
float r2 ;
float torsion_numerator;
float theta1, costheta1, invsintheta1;
float theta2, costheta2, invsintheta2;
float3 posqi, posqj, posqk, posql;
float chargei, chargel;
float sig, eps;
ai.y = round( (atoms.x - fmod( atoms.x, xstrwidth ))/xstrwidth );
ai.x = atoms.x - ai.y * xstrwidth;
posqi = posq[ ai ];
chargei = charge[ ai ];
if ( atoms.y > -0.5f ) {
aj.y = round( (atoms.y - fmod( atoms.y, xstrwidth ))/xstrwidth );
aj.x = atoms.y - aj.y * xstrwidth;
posqj = posq[ aj ];
}
else
posqj = float4( 0.0f, 0.0f, 1.0f, 0.0f );
if ( atoms.z > -0.5f ) {
ak.y = round( (atoms.z - fmod( atoms.z, xstrwidth ))/xstrwidth );
ak.x = atoms.z - ak.y * xstrwidth;
posqk = posq[ ak ];
}
else
posqk = float4( 0.0f, 1.0f, 1.0f, 0.0f );
if ( atoms.w > -0.5f ) {
al.y = round( (atoms.w - fmod( atoms.w, xstrwidth ))/xstrwidth );
al.x = atoms.w - al.y * xstrwidth;
posql = posq[ al ];
chargel = charge[ al ];
} else {
posql = float4( 1.0f, 1.0f, 1.0f, 0.0f );
chargel = 0.0f;
}
qq = chargei*chargel;
rij = posqi.xyz - posqj.xyz;
rkj = posqk.xyz - posqj.xyz;
rkl = posqk.xyz - posql.xyz;
ril = posqi.xyz - posql.xyz;
invlij = rsqrt( dot( rij, rij ) );
invlkj = rsqrt( dot( rkj, rkj ) );
invlkl = rsqrt( dot( rkl, rkl ) );
uij = rij * invlij;
ukj = rkj * invlkj;
ukl = rkl * invlkl;
rij_d_ukj = dot( rij, ukj );
ukj_d_rkl = dot( ukj, rkl );
m = cross( uij, ukj );
n = cross( ukj, ukl );
costheta1 = clamp( rij_d_ukj * invlij, -1.0f, 1.0f );
theta1 = acos( costheta1 );
invsintheta1 = rsqrt( 1.0f - costheta1 * costheta1 );
costheta2 = clamp( ukj_d_rkl * invlkl, -1.0f, 1.0f );
theta2 = acos( costheta2 );
invsintheta2 = rsqrt( 1.0f - costheta2 * costheta2 );
cos_phi = clamp( dot(m,n) * invsintheta1 * invsintheta2, -1.0f, 1.0f );
sgnphi = sign( dot(rij, n) );
phi = sgnphi * acos( cos_phi );
mdphi = parm1.w * phi - parm1.z;
ddphi = -parm1.y * parm1.w * sin( mdphi );
cos_phi = -cos_phi;
sin_phi = -sgnphi*sqrt( clamp( 1.0f - cos_phi * cos_phi, 0.0f, 1.0f) );
ddphi_rb = 5.0f * parm1.x;
ddphi_rb = 4.0f * parm0.w + ddphi_rb * cos_phi;
ddphi_rb = 3.0f * parm0.z + ddphi_rb * cos_phi;
ddphi_rb = 2.0f * parm0.y + ddphi_rb * cos_phi;
ddphi_rb = parm0.x + ddphi_rb * cos_phi;
ddphi_rb = -ddphi_rb * sin_phi;
ddphi += ddphi_rb;
fi = (-ddphi * invlij * invsintheta1 * invsintheta1 ) * m;
fl = ( ddphi * invlkl * invsintheta2 * invsintheta2 ) * n;
u = rij_d_ukj * invlkj * fi;
v = ukj_d_rkl * invlkj * fl;
s = u - v;
fj = s - fi;
fk = -(s + fl);
fs = -parm2.y * ( theta1 - parm2.x );
st = fs * invsintheta1;
st = clamp( st, -100000.0f, 100000.0f );
sth = st * costheta1;
cik = st * invlkj * invlij;
cii = sth * invlij;
ckk = sth * invlkj;
fi_ang = -( cik * rkj - cii * uij );
fk_ang = -( cik * rij - ckk * ukj );
fj_ang = -fi_ang - fk_ang;
fi += fi_ang;
fj += fj_ang;
fk += fk_ang;
fs = -parm2.w * ( theta2 - parm2.z );
st = fs * invsintheta2;
st = clamp( st, -100000.0f, 100000.0f );
sth = st * costheta2;
cik = st * invlkj * invlkl;
cii = sth * invlkj;
ckk = sth * invlkl;
fi_ang = ( cik * rkl - cii * ukj );
fk_ang = ( cik * rkj - ckk * ukl );
fj_ang = -fi_ang - fk_ang;
fj += fi_ang;
fk += fj_ang;
fl += fk_ang;
fi_bond = -parm3.y * ( 1.0f - parm3.x * invlij ) * rij;
fi += fi_bond;
fj += -fi_bond;
fi_bond = parm3.w * ( 1.0f - parm3.z * invlkj ) * rkj;
fj += fi_bond;
fk += -fi_bond;
fi_bond = -parm4.y * ( 1.0f - parm4.x * invlkl ) * rkl;
fk += fi_bond;
fl += -fi_bond;
r2 = dot( d1, d1 );
if ( parm4.z > -0.5f ) {
r2 = dot( ril, ril );
fs = scalar_force_single_CDLJ( qq, epsfac, parm4.z, parm4.w, r2, nbparams );
fi_pair = fs * ril;
fi += fi_pair;
fl -= fi_pair;
}
fs = scalar_force_single_CDLJ( qq, epsfac, sigeps.x, sigeps.y, r2, params );
fi = fs.x * d1;
fj = -fi;
}
kernel void kbonded_CDLJDebug (
kernel void kbonded_CDLJ (
float epsfac,
float xstrwidth,
float4 nbparams,
float4 posq[][],
float3 posq[][],
float charge[][],
float4 atoms<>,
float4 parm0<>,
float4 parm1<>,
......@@ -1842,482 +612,188 @@ kernel void kbonded_CDLJDebug (
float3 fi_ang, fj_ang, fk_ang;
float3 fi_bond;
float cii, ckk, cik;
float fs, st, sth;
float3 fi_pair;
float r2 ;
float torsion_numerator;
float theta1, costheta1, invsintheta1;
float theta2, costheta2, invsintheta2;
float4 posqi, posqj, posqk, posql;
float sig, eps;
ai.y = round( (atoms.x - fmod( atoms.x, xstrwidth ))/xstrwidth );
ai.x = atoms.x - ai.y * xstrwidth;
posqi = posq[ ai ];
if ( atoms.y > -0.5f ) {
aj.y = round( (atoms.y - fmod( atoms.y, xstrwidth ))/xstrwidth );
aj.x = atoms.y - aj.y * xstrwidth;
posqj = posq[ aj ];
} else {
posqj = float4( 0.0f, 0.0f, 1.0f, 0.0f );
}
if ( atoms.z > -0.5f ) {
ak.y = round( (atoms.z - fmod( atoms.z, xstrwidth ))/xstrwidth );
ak.x = atoms.z - ak.y * xstrwidth;
posqk = posq[ ak ];
} else {
posqk = float4( 0.0f, 1.0f, 1.0f, 0.0f );
}
if ( atoms.w > -0.5f ) {
al.y = round( (atoms.w - fmod( atoms.w, xstrwidth ))/xstrwidth );
al.x = atoms.w - al.y * xstrwidth;
posql = posq[ al ];
} else {
posql = float4( 1.0f, 1.0f, 1.0f, 0.0f );
}
qq = posqi.w * posql.w;
rij = posqi.xyz - posqj.xyz;
rkj = posqk.xyz - posqj.xyz;
rkl = posqk.xyz - posql.xyz;
ril = posqi.xyz - posql.xyz;
invlij = rsqrt( dot( rij, rij ) );
invlkj = rsqrt( dot( rkj, rkj ) );
invlkl = rsqrt( dot( rkl, rkl ) );
uij = rij * invlij;
ukj = rkj * invlkj;
ukl = rkl * invlkl;
rij_d_ukj = dot( rij, ukj );
ukj_d_rkl = dot( ukj, rkl );
m = cross( uij, ukj );
n = cross( ukj, ukl );
costheta1 = clamp( rij_d_ukj * invlij, -1.0f, 1.0f );
theta1 = acos( costheta1 );
invsintheta1 = rsqrt( 1.0f - costheta1 * costheta1 );
costheta2 = clamp( ukj_d_rkl * invlkl, -1.0f, 1.0f );
theta2 = acos( costheta2 );
invsintheta2 = rsqrt( 1.0f - costheta2 * costheta2 );
cos_phi = clamp( dot(m,n) * invsintheta1 * invsintheta2, -1.0f, 1.0f );
sgnphi = sign( dot(rij, n) );
phi = sgnphi * acos( cos_phi );
mdphi = parm1.w * phi - parm1.z;
ddphi = -parm1.y * parm1.w * sin( mdphi );
cos_phi = -cos_phi;
sin_phi = -sgnphi*sqrt( clamp( 1.0f - cos_phi * cos_phi, 0.0f, 1.0f) );
ddphi_rb = 5.0f * parm1.x;
ddphi_rb = 4.0f * parm0.w + ddphi_rb * cos_phi;
ddphi_rb = 3.0f * parm0.z + ddphi_rb * cos_phi;
ddphi_rb = 2.0f * parm0.y + ddphi_rb * cos_phi;
ddphi_rb = parm0.x + ddphi_rb * cos_phi;
ddphi_rb = -ddphi_rb * sin_phi;
ddphi += ddphi_rb;
//ddphi = ddphi_rb;
//ddphi = 0.0f;
fi = (-ddphi * invlij * invsintheta1 * invsintheta1 ) * m;
fl = ( ddphi * invlkl * invsintheta2 * invsintheta2 ) * n;
u = rij_d_ukj * invlkj * fi;
v = ukj_d_rkl * invlkj * fl;
s = u - v;
fj = s - fi;
fk = -(s + fl);
fs = -parm2.y * ( theta1 - parm2.x );
st = fs * invsintheta1;
st = clamp( st, -100000.0f, 100000.0f );
sth = st * costheta1;
cik = st * invlkj * invlij;
cii = sth * invlij;
ckk = sth * invlkj;
fi_ang = -( cik * rkj - cii * uij );
fk_ang = -( cik * rij - ckk * ukj );
fj_ang = -fi_ang - fk_ang;
/*
fi += fi_ang;
fj += fj_ang;
fk += fk_ang;
*/
fs = -parm2.w * ( theta2 - parm2.z );
st = fs * invsintheta2;
st = clamp( st, -100000.0f, 100000.0f );
sth = st * costheta2;
cik = st * invlkj * invlkl;
cii = sth * invlkj;
ckk = sth * invlkl;
fi_ang = ( cik * rkl - cii * ukj );
fk_ang = ( cik * rkj - ckk * ukl );
fj_ang = -fi_ang - fk_ang;
/*
fj += fi_ang;
fk += fj_ang;
fl += fk_ang;
*/
// bonded terms
// k*(r - d)*deltaX/r = k*(1 - d/r)*deltaX
/*
fi = float3( 0.0f, 0.0f, 0.0f );
fj = float3( 0.0f, 0.0f, 0.0f );
fk = float3( 0.0f, 0.0f, 0.0f );
fl = float3( 0.0f, 0.0f, 0.0f );
*/
/*
// ------------------------------------------------------------
fi_bond = -parm3.y * ( 1.0f - parm3.x * invlij ) * rij;
fi += fi_bond;
fj += -fi_bond;
// ------------------------------------------------------------
fi_bond = parm3.w * ( 1.0f - parm3.z * invlkj ) * rkj;
fj += fi_bond;
fk += -fi_bond;
// ------------------------------------------------------------
fi_bond = -parm4.y * ( 1.0f - parm4.x * invlkl ) * rkl;
fk += fi_bond;
fl += -fi_bond;
*/
// ------------------------------------------------------------
// LJ14 terms
//fi = float3( 0.0f, 0.0f, 0.0f);
//fj = float3( 0.0f, 0.0f, 0.0f);
//fk = float3( 0.0f, 0.0f, 0.0f);
//fl = float3( 0.0f, 0.0f, 0.0f);
/*
if ( parm4.z > -0.5f ) {
r2 = dot( ril, ril );
fs = scalar_force_single_CDLJ( qq, epsfac, parm4.z, parm4.w, r2, nbparams );
fi_pair = fs * ril;
//fi_pair = float3( posqi.w + posql.w, posqi.w, posql.w );
//fi_pair = float3( posqi.w + posql.w, posqi.w, posql.w );
//fi_pair = float3( atoms.x, parm4.z, parm4.w );
//fi_pair = float3( 0.0f, 0.0f, 0.0f);
fi += fi_pair;
fl -= fi_pair;
//fi = fi_pair;
//fl = float3( atoms.w, parm4.z, parm4.w );
}
*/
}
kernel void knbforce_CDLJ4NoEx(
float natoms,
float nAtomsCeiling,
float dupfac,
float strheight,
float strwidth,
float jstrwidth,
float fstrwidth,
float epsfac,
float4 params,
float4 posq[][],
float4 isigeps[][],
float4 sigma[][],
float4 epsilon[][],
float excl[][],
out float3 outforce1<>,
out float3 outforce2<>,
out float3 outforce3<>,
out float3 outforce4<> ) {
float4 jsig, jeps;
float3 ipos1, ipos2, ipos3, ipos4;
float4 jpos;
float3 jpos1, jpos2, jpos3, jpos4;
float qi1, qi2, qi3, qi4;
float isig1, isig2, isig3, isig4;
float ieps1, ieps2, ieps3, ieps4;
float4 qj, qq;
float3 d1, d2, d3, d4;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2;
float3 pad;
float4 exclmask;
float4 exclconst;
float2 iatom;
float2 jstrindex;
float a_iatom;
float linind;
float fs, st, sth;
float3 fi_pair;
float r2 ;
float torsion_numerator;
float theta1, costheta1, invsintheta1;
float theta2, costheta2, invsintheta2;
float3 posqi, posqj, posqk, posql;
float chargei, chargel;
float sig, eps;
float breakflag;
float2 jatom;
float4 fs;
float jend, jstart;
float which_rep;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
outforce1 = float3( 0.0f, 0.0f, 0.0f );
outforce2 = float3( 0.0f, 0.0f, 0.0f );
outforce3 = float3( 0.0f, 0.0f, 0.0f );
outforce4 = float3( 0.0f, 0.0f, 0.0f );
a_iatom = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
// linind = fmod( a_iatom, natoms );
linind = fmod( a_iatom, nAtomsCeiling );
iatom.x = fmod( linind, strwidth );
iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, xstrwidth ))/xstrwidth );
ai.x = atoms.x - ai.y * xstrwidth;
posqi = posq[ ai ];
chargei = charge[ ai ];
/* --------------------------------------------------------------------- */
jpos = posq[ iatom ];
ipos1 = jpos.xyz;
qi1 = jpos.w;
jstrindex = isigeps[ iatom ];
isig1 = jstrindex.x;
ieps1 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1.0f;
jpos = posq[ iatom ];
ipos2 = jpos.xyz;
qi2 = jpos.w;
jstrindex = isigeps[ iatom ];
isig2 = jstrindex.x;
ieps2 = jstrindex.y;
if ( atoms.y > -0.5f ) {
/* --------------------------------------------------------------------- */
aj.y = round( (atoms.y - fmod( atoms.y, xstrwidth ))/xstrwidth );
aj.x = atoms.y - aj.y * xstrwidth;
posqj = posq[ aj ];
}
else
posqj = float4( 0.0f, 0.0f, 1.0f, 0.0f );
iatom.x += 1.0f;
jpos = posq[ iatom ];
ipos3 = jpos.xyz;
qi3 = jpos.w;
if ( atoms.z > -0.5f ) {
jstrindex = isigeps[ iatom ];
isig3 = jstrindex.x;
ieps3 = jstrindex.y;
ak.y = round( (atoms.z - fmod( atoms.z, xstrwidth ))/xstrwidth );
ak.x = atoms.z - ak.y * xstrwidth;
posqk = posq[ ak ];
}
else
posqk = float4( 0.0f, 1.0f, 1.0f, 0.0f );
/* --------------------------------------------------------------------- */
if ( atoms.w > -0.5f ) {
iatom.x += 1.0f;
jpos = posq[ iatom ];
ipos4 = jpos.xyz;
qi4 = jpos.w;
al.y = round( (atoms.w - fmod( atoms.w, xstrwidth ))/xstrwidth );
al.x = atoms.w - al.y * xstrwidth;
posql = posq[ al ];
chargel = charge[ al ];
} else {
posql = float4( 1.0f, 1.0f, 1.0f, 0.0f );
chargel = 0.0f;
}
jstrindex = isigeps[ iatom ];
isig4 = jstrindex.x;
ieps4 = jstrindex.y;
qq = chargei*chargel;
rij = posqi.xyz - posqj.xyz;
rkj = posqk.xyz - posqj.xyz;
rkl = posqk.xyz - posql.xyz;
ril = posqi.xyz - posql.xyz;
/* --------------------------------------------------------------------- */
breakflag = 1.0f;
jatom.y = 0.0f;
// which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
which_rep = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
invlij = rsqrt( dot( rij, rij ) );
invlkj = rsqrt( dot( rkj, rkj ) );
invlkl = rsqrt( dot( rkl, rkl ) );
jend = 1.0f + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep * jend;
if ( which_rep > dupfac - 1.5f ) {
jend = 1e6f;
}
jend += jstart;
uij = rij * invlij;
ukj = rkj * invlkj;
ukl = rkl * invlkl;
//exclind.x = linind;
//exclind.y = jstart * strwidth/4.0f;
//exclind.y = floor( exclind.y + 0.25f );
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
rij_d_ukj = dot( rij, ukj );
ukj_d_rkl = dot( ukj, rkl );
// exclusions = excl[ exclind ];
// exclmask = fmod( exclusions, exclconst );
m = cross( uij, ukj );
n = cross( ukj, ukl );
linind = (jatom.x + jatom.y*strwidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y * jstrwidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
costheta1 = clamp( rij_d_ukj * invlij, -1.0f, 1.0f );
theta1 = acos( costheta1 );
invsintheta1 = rsqrt( 1.0f - costheta1 * costheta1 );
/* --------------------------------------------------------------------- */
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
costheta2 = clamp( ukj_d_rkl * invlkl, -1.0f, 1.0f );
theta2 = acos( costheta2 );
invsintheta2 = rsqrt( 1.0f - costheta2 * costheta2 );
jpos = posq[ jatom ];
jpos1 = jpos.xyz;
qj.x = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
//jatom.x += 1.0f;
cos_phi = clamp( dot(m,n) * invsintheta1 * invsintheta2, -1.0f, 1.0f );
jpos = posq[ jatom ];
jpos2 = jpos.xyz;
qj.y = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
//jatom.x += 1.0f;
sgnphi = sign( dot(rij, n) );
jpos = posq[ jatom ];
jpos3 = jpos.xyz;
qj.z = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
//jatom.x += 1.0f;
jpos = posq[ jatom ];
jpos4 = jpos.xyz;
qj.w = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
//jatom.x += 1.0f;
phi = sgnphi * acos( cos_phi );
mdphi = parm1.w * phi - parm1.z;
ddphi = -parm1.y * parm1.w * sin( mdphi );
/* --------------------------------------------------------------------- */
sig = isig1 + jsig;
eps = ieps1 * jeps;
qq = qi1*qj;
cos_phi = -cos_phi;
sin_phi = -sgnphi*sqrt( clamp( 1.0f - cos_phi * cos_phi, 0.0f, 1.0f) );
d1 = ipos1 - jpos1;
d2 = ipos1 - jpos2;
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
ddphi_rb = 5.0f * parm1.x;
ddphi_rb = 4.0f * parm0.w + ddphi_rb * cos_phi;
ddphi_rb = 3.0f * parm0.z + ddphi_rb * cos_phi;
ddphi_rb = 2.0f * parm0.y + ddphi_rb * cos_phi;
ddphi_rb = parm0.x + ddphi_rb * cos_phi;
ddphi_rb = -ddphi_rb * sin_phi;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce1 += fs.x * d1;
outforce1 += fs.y * d2;
outforce1 += fs.z * d3;
outforce1 += fs.w * d4;
ddphi += ddphi_rb;
/* --------------------------------------------------------------------- */
fi = (-ddphi * invlij * invsintheta1 * invsintheta1 ) * m;
fl = ( ddphi * invlkl * invsintheta2 * invsintheta2 ) * n;
//exclind.x += 1.0f;
/*
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
*/
sig = isig2 + jsig;
eps = ieps2 * jeps;
qq = qi2 * qj;
u = rij_d_ukj * invlkj * fi;
v = ukj_d_rkl * invlkj * fl;
s = u - v;
d1 = ipos2 - jpos1;
d2 = ipos2 - jpos2;
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
fj = s - fi;
fk = -(s + fl);
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce2 += fs.x * d1;
outforce2 += fs.y * d2;
outforce2 += fs.z * d3;
outforce2 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
/*
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
*/
sig = isig3 + jsig;
eps = ieps3 * jeps;
qq = qi3 * qj;
fs = -parm2.y * ( theta1 - parm2.x );
st = fs * invsintheta1;
st = clamp( st, -100000.0f, 100000.0f );
d1 = ipos3 - jpos1;
d2 = ipos3 - jpos2;
d3 = ipos3 - jpos3;
d4 = ipos3 - jpos4;
sth = st * costheta1;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
cik = st * invlkj * invlij;
cii = sth * invlij;
ckk = sth * invlkj;
outforce3 += fs.x * d1;
outforce3 += fs.y * d2;
outforce3 += fs.z * d3;
outforce3 += fs.w * d4;
fi_ang = -( cik * rkj - cii * uij );
fk_ang = -( cik * rij - ckk * ukj );
fj_ang = -fi_ang - fk_ang;
/* --------------------------------------------------------------------- */
fi += fi_ang;
fj += fj_ang;
fk += fk_ang;
fs = -parm2.w * ( theta2 - parm2.z );
st = fs * invsintheta2;
st = clamp( st, -100000.0f, 100000.0f );
//exclind.x += 1.0f;
/*
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
*/
sth = st * costheta2;
sig = isig4 + jsig;
eps = ieps4 * jeps;
qq = qi4 * qj;
cik = st * invlkj * invlkl;
cii = sth * invlkj;
ckk = sth * invlkl;
d1 = ipos4 - jpos1;
d2 = ipos4 - jpos2;
d3 = ipos4 - jpos3;
d4 = ipos4 - jpos4;
fi_ang = ( cik * rkl - cii * ukj );
fk_ang = ( cik * rkj - ckk * ukl );
fj_ang = -fi_ang - fk_ang;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
fj += fi_ang;
fk += fj_ang;
fl += fk_ang;
outforce4 += fs.x * d1;
outforce4 += fs.y * d2;
outforce4 += fs.z * d3;
outforce4 += fs.w * d4;
/* --------------------------------------------------------------------- */
fi_bond = -parm3.y * ( 1.0f - parm3.x * invlij ) * rij;
fi += fi_bond;
fj += -fi_bond;
//exclind.x -= 3.0f;
//exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
//exclind.y = floor( exclind.y + 1.25f );
fi_bond = parm3.w * ( 1.0f - parm3.z * invlkj ) * rkj;
fj += fi_bond;
fk += -fi_bond;
if( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )breakflag = -1.0f;
}
//jatom.y += 1.0f;
jatom.y = floor( jatom.y + 1.2f );
fi_bond = -parm4.y * ( 1.0f - parm4.x * invlkl ) * rkl;
fk += fi_bond;
fl += -fi_bond;
if ( parm4.z > -0.5f ) {
r2 = dot( ril, ril );
fs = scalar_force_single_CDLJ( qq, epsfac, parm4.z, parm4.w, r2, nbparams );
fi_pair = fs * ril;
fi += fi_pair;
fl -= fi_pair;
}
}
......@@ -304,7 +304,6 @@ kernel void kinvmap_gather3_4(
outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
}
//Takes three + five inverse maps
kernel void kinvmap_gather3_5(
float strwidth, //stream width of the dihedral forces
......@@ -337,6 +336,25 @@ kernel void kinvmap_gather3_5(
}
//Takes one + one inverse maps
kernel void kinvmap_gather1_1(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1[][], //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap5_1[][], //indices into the dihedral forces
float3 forces5[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
float2 idx = indexof(outforce);
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1[idx], forces3 );
outforce += do_gather_nobranch( strwidth, invmap5_1[idx], forces5 );
}
//Takes five + two inverse maps
kernel void kinvmap_gather5_2(
float strwidth, //stream width of the dihedral forces
......
......@@ -121,6 +121,15 @@ void kinvmap_gather3_5 (const float strwidth,
::brook::stream outforce);
void kinvmap_gather1_1 (const float strwidth,
::brook::stream invmap3_1,
::brook::stream forces3,
::brook::stream invmap5_1,
::brook::stream forces5,
::brook::stream inforce,
::brook::stream outforce);
void kinvmap_gather5_2 (const float strwidth,
::brook::stream invmap5_1,
::brook::stream invmap5_2,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment