Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
7ed51ce4
Commit
7ed51ce4
authored
Dec 17, 2008
by
Mark Friedrichs
Browse files
Bonded forces ok
parent
08c2e027
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
256 additions
and
1721 deletions
+256
-1721
platforms/brook/src/OpenMMBrookInterface.h
platforms/brook/src/OpenMMBrookInterface.h
+36
-4
platforms/brook/src/gpu/kforce_CDLJ.br
platforms/brook/src/gpu/kforce_CDLJ.br
+192
-1716
platforms/brook/src/gpu/kinvmap_gather.br
platforms/brook/src/gpu/kinvmap_gather.br
+19
-1
platforms/brook/src/gpu/kinvmap_gather.h
platforms/brook/src/gpu/kinvmap_gather.h
+9
-0
No files found.
platforms/brook/src/OpenMMBrookInterface.h
View file @
7ed51ce4
...
@@ -99,6 +99,17 @@ class OpenMMBrookInterface {
...
@@ -99,6 +99,17 @@ class OpenMMBrookInterface {
int
getNumberOfParticles
(
void
)
const
;
int
getNumberOfParticles
(
void
)
const
;
/**
* Set number of particles
*
* @param numberOfParticles number of particles
*
* @return DefaultReturnValue
*
*/
int
setNumberOfParticles
(
int
numberOfParticles
);
/**
/**
* Get particle stream width
* Get particle stream width
*
*
...
@@ -166,10 +177,13 @@ class OpenMMBrookInterface {
...
@@ -166,10 +177,13 @@ class OpenMMBrookInterface {
* Compute energy
* Compute energy
*
*
* @param context OpenMMContextImpl context
* @param context OpenMMContextImpl context
* @param system system reference
*
* @return potential energy
*
*
*/
*/
double
computeEnergy
(
OpenMMContextImpl
&
context
);
double
computeEnergy
(
OpenMMContextImpl
&
context
,
System
&
system
);
/**
/**
* Set trigger Force Kernel
* Set trigger Force Kernel
...
@@ -207,6 +221,24 @@ class OpenMMBrookInterface {
...
@@ -207,6 +221,24 @@ class OpenMMBrookInterface {
void
*
getTriggerEnergyKernel
(
void
)
const
;
void
*
getTriggerEnergyKernel
(
void
)
const
;
/**
* Get BrookNonBonded reference
*
* @return BrookNonBonded reference
*
*/
BrookNonBonded
&
getBrookNonBonded
(
void
);
/**
* Get BrookGbsa reference
*
* @return BrookGbsa reference
*
*/
BrookGbsa
&
getBrookGbsa
(
void
);
/**
/**
* Get BrookBondParameters for harmonic bond force
* Get BrookBondParameters for harmonic bond force
*
*
...
@@ -392,9 +424,9 @@ class OpenMMBrookInterface {
...
@@ -392,9 +424,9 @@ class OpenMMBrookInterface {
// Brook bonded, nonbonded, Gbsa
// Brook bonded, nonbonded, Gbsa
BrookBonded
*
_brookBonded
;
BrookBonded
_brookBonded
;
BrookNonBonded
*
_brookNonBonded
;
BrookNonBonded
_brookNonBonded
;
BrookGbsa
*
_brookGbsa
;
BrookGbsa
_brookGbsa
;
void
*
_triggerForceKernel
;
void
*
_triggerForceKernel
;
void
*
_triggerEnergyKernel
;
void
*
_triggerEnergyKernel
;
...
...
platforms/brook/src/gpu/kforce_CDLJ.br
View file @
7ed51ce4
...
@@ -61,6 +61,7 @@ kernel float4 get_r2_CDLJ( float3 d1, float3 d2, float3 d3, float3 d4 ) {
...
@@ -61,6 +61,7 @@ kernel float4 get_r2_CDLJ( float3 d1, float3 d2, float3 d3, float3 d4 ) {
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
r2 = float4( dot(d1, d1), dot( d2, d2 ), dot( d3, d3 ), dot( d4, d4 ) );
return r2;
return r2;
}
}
kernel void knbforce_CDLJ (
kernel void knbforce_CDLJ (
float natoms,
float natoms,
float dupfac,
float dupfac,
...
@@ -270,177 +271,7 @@ if( exclmask.w > 0.5f ){
...
@@ -270,177 +271,7 @@ if( exclmask.w > 0.5f ){
}
}
kernel void knbforce_CDLJ2(
kernel void knbforce_CDLJ4(
float natoms,
float dupfac,
float strheight,
float strwidth,
float jstrwidth,
float fstrwidth,
float epsfac,
float4 params,
float4 posq[][],
float4 isigeps[][],
float4 sigma[][],
float4 epsilon[][],
float excl[][],
out float3 outforce1<>,
out float3 outforce2<> ) {
float4 jsig, jeps;
float3 ipos1, ipos2;
float4 jpos;
float3 jpos1, jpos2, jpos3, jpos4;
float qi1, qi2;
float isig1, isig2;
float ieps1, ieps2;
float4 qj, qq;
float3 d1, d2, d3, d4;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2;
float3 pad;
float4 exclmask;
float4 exclconst;
float2 iatom;
float2 jstrindex;
float a_iatom;
float linind;
float breakflag;
float2 jatom;
float4 fs;
float jend, jstart;
float which_rep;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
outforce1 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
outforce2 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
a_iatom = 2.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
linind = fmod( a_iatom, natoms );
iatom.x = fmod( linind, strwidth );
iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
jpos = posq[ iatom ];
ipos1 = jpos.xyz;
qi1 = jpos.w;
jstrindex = isigeps[ iatom ];
isig1 = jstrindex.x;
ieps1 = jstrindex.y;
iatom.x += 1;
jpos = posq[ iatom ];
ipos2 = jpos.xyz;
qi2 = jpos.w;
jstrindex = isigeps[ iatom ];
isig2 = jstrindex.x;
ieps2 = jstrindex.y;
breakflag = 1.0f;
jatom.y = 0.0f;
which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
jend = 1 + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep * jend;
if ( which_rep > dupfac - 1 - 0.5f ) {
jend = 1e6f;
}
jend += jstart;
exclind.x = linind;
exclind.y = jstart * strwidth/4.0f;
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
linind = (jatom.x + jatom.y*strwidth)/4.0f;
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y * jstrwidth;
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
jpos = posq[ jatom ];
jpos1 = jpos.xyz;
qj.x = jpos.w;
jatom.x += 1.0f;
jpos = posq[ jatom ];
jpos2 = jpos.xyz;
qj.y = jpos.w;
jatom.x += 1.0f;
jpos = posq[ jatom ];
jpos3 = jpos.xyz;
qj.z = jpos.w;
jatom.x += 1.0f;
jpos = posq[ jatom ];
jpos4 = jpos.xyz;
qj.w = jpos.w;
jatom.x += 1.0f;
sig = isig1 + jsig;
eps = ieps1 * jeps;
qq = qi1*qj;
d1 = ipos1 - jpos1;
d2 = ipos1 - jpos2;
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + exclmask * 10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce1 += fs.x * d1;
outforce1 += fs.y * d2;
outforce1 += fs.z * d3;
outforce1 += fs.w * d4;
exclind.x += 1.0f;
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig2 + jsig;
eps = ieps2 * jeps;
qq = qi2 * qj;
d1 = ipos2 - jpos1;
d2 = ipos2 - jpos2;
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + exclmask * 10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce2 += fs.x * d1;
outforce2 += fs.y * d2;
outforce2 += fs.z * d3;
outforce2 += fs.w * d4;
exclind.x -= 1.0f;
exclind.y += 1.0f;
if ( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )
breakflag = -1.0f;
}
jatom.y += 1.0f;
}
}
kernel void knbforce_CDLJ4Ex(
float natoms,
float natoms,
float nAtomsCeiling,
float nAtomsCeiling,
float dupfac,
float dupfac,
...
@@ -489,13 +320,16 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -489,13 +320,16 @@ kernel void knbforce_CDLJ4Ex(
float4 fs;
float4 fs;
float jend, jstart;
float jend, jstart;
float which_rep;
float which_rep;
float maskMultiplier;
maskMultiplier = 1.0e+06f;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
outforce1 = float
4( 0.0f,
0.0f, 0.0f, 0.0f );
outforce1 = float
3(
0.0f, 0.0f, 0.0f );
outforce2 = float
4( 0.0f,
0.0f, 0.0f, 0.0f );
outforce2 = float
3(
0.0f, 0.0f, 0.0f );
outforce3 = float
4( 0.0f,
0.0f, 0.0f, 0.0f );
outforce3 = float
3(
0.0f, 0.0f, 0.0f );
outforce4 = float
4( 0.0f,
0.0f, 0.0f, 0.0f );
outforce4 = float
3(
0.0f, 0.0f, 0.0f );
a_iatom = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
a_iatom = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
...
@@ -540,6 +374,7 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -540,6 +374,7 @@ kernel void knbforce_CDLJ4Ex(
ipos4 = posq[ iatom ];
ipos4 = posq[ iatom ];
qi4 = charge[ iatom ];
qi4 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
jstrindex = isigeps[ iatom ];
isig4 = jstrindex.x;
isig4 = jstrindex.x;
ieps4 = jstrindex.y;
ieps4 = jstrindex.y;
...
@@ -552,28 +387,31 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -552,28 +387,31 @@ kernel void knbforce_CDLJ4Ex(
// which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
// which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
which_rep = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
which_rep = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
jend = 1 + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jend = 1
.0f
+ round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep * jend;
jstart = which_rep * jend;
if ( which_rep > dupfac - 1
- 0
.5f ) {
if ( which_rep > dupfac - 1.5f ) {
jend = 1e6f;
jend = 1e6f;
}
}
jend += jstart;
jend += jstart;
// exclind.x = linind;
exclind.x = linind;
// exclind.y = jstart * strwidth/4.0f;
exclind.y = jstart * strwidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
jatom.y = jstart;
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
while ( jatom.x < strwidth && breakflag > 0.0f ) {
//
exclusions = excl[ exclind ];
exclusions = excl[ exclind ];
//
exclmask = fmod( exclusions, exclconst );
exclmask = fmod( exclusions, exclconst );
linind = (jatom.x + jatom.y*strwidth)/4.0f;
linind = (jatom.x + jatom.y*strwidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y * jstrwidth;
jstrindex.x = linind - jstrindex.y * jstrwidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
/* --------------------------------------------------------------------- */
/* --------------------------------------------------------------------- */
...
@@ -582,19 +420,19 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -582,19 +420,19 @@ kernel void knbforce_CDLJ4Ex(
jpos1 = posq[ jatom ];
jpos1 = posq[ jatom ];
qj.x = charge[ jatom ];
qj.x = charge[ jatom ];
jatom.x
+= 1.0f
;
jatom.x
= floor( jatom.x + 1.2f )
;
jpos2 = posq[ jatom ];
jpos2 = posq[ jatom ];
qj.y = charge[ jatom ];
qj.y = charge[ jatom ];
jatom.x
+= 1.0f
;
jatom.x
= floor( jatom.x + 1.2f )
;
jpos3 = posq[ jatom ];
jpos3 = posq[ jatom ];
qj.z = charge[ jatom ];
qj.z = charge[ jatom ];
jatom.x
+= 1.0f
;
jatom.x
= floor( jatom.x + 1.2f )
;
jpos4 = posq[ jatom ];
jpos4 = posq[ jatom ];
qj.w = charge[ jatom ];
qj.w = charge[ jatom ];
jatom.x
+= 1.0f
;
jatom.x
= floor( jatom.x + 1.2f )
;
/* --------------------------------------------------------------------- */
/* --------------------------------------------------------------------- */
...
@@ -607,7 +445,7 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -607,7 +445,7 @@ kernel void knbforce_CDLJ4Ex(
d3 = ipos1 - jpos3;
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
d4 = ipos1 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 = get_r2_CDLJ( d1, d2, d3, d4 )
+ (exclmask*maskMultiplier)
;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce1 += fs.x * d1;
outforce1 += fs.x * d1;
...
@@ -618,8 +456,9 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -618,8 +456,9 @@ kernel void knbforce_CDLJ4Ex(
/* --------------------------------------------------------------------- */
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
//exclind.x += 1.0f;
//exclusions = excl[ exclind ];
exclind.x = floor( exclind.x + 1.2f );
//exclmask = fmod( exclusions, exclconst );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig2 + jsig;
sig = isig2 + jsig;
eps = ieps2 * jeps;
eps = ieps2 * jeps;
...
@@ -630,7 +469,7 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -630,7 +469,7 @@ kernel void knbforce_CDLJ4Ex(
d3 = ipos2 - jpos3;
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 = get_r2_CDLJ( d1, d2, d3, d4 )
+ (exclmask*maskMultiplier)
;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce2 += fs.x * d1;
outforce2 += fs.x * d1;
...
@@ -641,8 +480,9 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -641,8 +480,9 @@ kernel void knbforce_CDLJ4Ex(
/* --------------------------------------------------------------------- */
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
//exclind.x += 1.0f;
//exclusions = excl[ exclind ];
exclind.x = floor( exclind.x + 1.2f );
//exclmask = fmod( exclusions, exclconst );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig3 + jsig;
sig = isig3 + jsig;
eps = ieps3 * jeps;
eps = ieps3 * jeps;
...
@@ -653,7 +493,7 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -653,7 +493,7 @@ kernel void knbforce_CDLJ4Ex(
d3 = ipos3 - jpos3;
d3 = ipos3 - jpos3;
d4 = ipos3 - jpos4;
d4 = ipos3 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 = get_r2_CDLJ( d1, d2, d3, d4 )
+ (exclmask*maskMultiplier)
;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce3 += fs.x * d1;
outforce3 += fs.x * d1;
...
@@ -664,8 +504,9 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -664,8 +504,9 @@ kernel void knbforce_CDLJ4Ex(
/* --------------------------------------------------------------------- */
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
//exclind.x += 1.0f;
//exclusions = excl[ exclind ];
exclind.x = floor( exclind.x + 1.2f );
//exclmask = fmod( exclusions, exclconst );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig4 + jsig;
sig = isig4 + jsig;
eps = ieps4 * jeps;
eps = ieps4 * jeps;
...
@@ -676,7 +517,7 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -676,7 +517,7 @@ kernel void knbforce_CDLJ4Ex(
d3 = ipos4 - jpos3;
d3 = ipos4 - jpos3;
d4 = ipos4 - jpos4;
d4 = ipos4 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 = get_r2_CDLJ( d1, d2, d3, d4 )
+ (exclmask*maskMultiplier)
;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce4 += fs.x * d1;
outforce4 += fs.x * d1;
...
@@ -687,1133 +528,62 @@ kernel void knbforce_CDLJ4Ex(
...
@@ -687,1133 +528,62 @@ kernel void knbforce_CDLJ4Ex(
/* --------------------------------------------------------------------- */
/* --------------------------------------------------------------------- */
//exclind.x -= 3.0f;
//exclind.x -= 3.0f;
exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
//exclind.y += 1.0f;
exclind.y = floor( exclind.y + 1.25f );
if ( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )
if( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )breakflag = -1.0f;
breakflag = -1.0f;
}
}
jatom.y += 1.0f;
//jatom.y += 1.0f;
jatom.y = floor( jatom.y + 1.2f );
}
}
}
}
kernel void k
nb
force_CDLJ
4
(
kernel void kforce
14
_CDLJ(
float natoms,
float natoms,
float nAtomsCeiling,
float dupfac,
float strheight,
float strwidth,
float strwidth,
float jstrwidth,
float fstrwidth,
float epsfac,
float epsfac,
float4 params,
float4 params,
float3 posq[][],
float2 atoms<>,
float charge[][],
float4 posq[][],
float4 isigeps[][],
float2 sigeps<>,
float4 sigma[][],
out float3 fi<>,
float4 epsilon[][],
out float3 fj<>
float excl[][],
)
out float3 outforce1<>,
{
out float3 outforce2<>,
float2 ai, aj;
out float3 outforce3<>,
float3 d1;
out float3 outforce4<> ) {
float r2;
float qq, fs;
float4 jsig, jeps;
float3 ipos1, ipos2, ipos3, ipos4;
float4 jpos;
float3 jpos1, jpos2, jpos3, jpos4;
float qi1, qi2, qi3, qi4;
float isig1, isig2, isig3, isig4;
float ieps1, ieps2, ieps3, ieps4;
float4 qj, qq;
//ai.y = floor( atoms.x / strwidth );
float3 d1, d2, d3, d4;
ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
float2 exclind;
ai.x = atoms.x - ai.y * strwidth;
float4 sig, eps;
float exclusions;
float4 r2;
float3 pad;
float4 exclmask;
float4 exclconst;
float2 iatom;
float2 jstrindex;
float a_iatom;
float linind;
float breakflag;
//aj.y = floor( atoms.y / strwidth );
float2 jatom;
aj.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
float4 fs;
aj.x = atoms.y - aj.y * strwidth;
float jend, jstart;
float which_rep;
float maskMultiplier;
maskMultiplier = 1.0e+06f;
d1 = posq[ai].xyz - posq[aj].xyz;
qq = posq[ai].w * posq[aj].w;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f
);
r2 = dot( d1, d1
);
outforce1 = float3( 0.0f, 0.0f, 0.0f );
fs = scalar_force_single_CDLJ( qq, epsfac, sigeps.x, sigeps.y, r2, params );
outforce2 = float3( 0.0f, 0.0f, 0.0f );
outforce3 = float3( 0.0f, 0.0f, 0.0f );
outforce4 = float3( 0.0f, 0.0f, 0.0f );
a_iatom = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
fi = fs.x * d1;
fj = -fi;
}
// linind = fmod( a_iatom, natoms );
kernel void kbonded_CDLJ (
linind = fmod( a_iatom, nAtomsCeiling );
iatom.x = fmod( linind, strwidth );
iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
/* --------------------------------------------------------------------- */
ipos1 = posq[ iatom ];
qi1 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig1 = jstrindex.x;
ieps1 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
ipos2 = posq[ iatom ];
qi2 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig2 = jstrindex.x;
ieps2 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
ipos3 = posq[ iatom ];
qi3 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig3 = jstrindex.x;
ieps3 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
ipos4 = posq[ iatom ];
qi4 = charge[ iatom ];
jstrindex = isigeps[ iatom ];
isig4 = jstrindex.x;
ieps4 = jstrindex.y;
/* --------------------------------------------------------------------- */
breakflag = 1.0f;
jatom.y = 0.0f;
// which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
which_rep = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
jend = 1.0f + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep * jend;
if ( which_rep > dupfac - 1.5f ) {
jend = 1e6f;
}
jend += jstart;
exclind.x = linind;
exclind.y = jstart * strwidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
linind = (jatom.x + jatom.y*strwidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y * jstrwidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
/* --------------------------------------------------------------------- */
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
jpos1 = posq[ jatom ];
qj.x = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
jpos2 = posq[ jatom ];
qj.y = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
jpos3 = posq[ jatom ];
qj.z = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
jpos4 = posq[ jatom ];
qj.w = charge[ jatom ];
jatom.x = floor( jatom.x + 1.2f );
/* --------------------------------------------------------------------- */
sig = isig1 + jsig;
eps = ieps1 * jeps;
qq = qi1*qj;
d1 = ipos1 - jpos1;
d2 = ipos1 - jpos2;
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce1 += fs.x * d1;
outforce1 += fs.y * d2;
outforce1 += fs.z * d3;
outforce1 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig2 + jsig;
eps = ieps2 * jeps;
qq = qi2 * qj;
d1 = ipos2 - jpos1;
d2 = ipos2 - jpos2;
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce2 += fs.x * d1;
outforce2 += fs.y * d2;
outforce2 += fs.z * d3;
outforce2 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig3 + jsig;
eps = ieps3 * jeps;
qq = qi3 * qj;
d1 = ipos3 - jpos1;
d2 = ipos3 - jpos2;
d3 = ipos3 - jpos3;
d4 = ipos3 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce3 += fs.x * d1;
outforce3 += fs.y * d2;
outforce3 += fs.z * d3;
outforce3 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig4 + jsig;
eps = ieps4 * jeps;
qq = qi4 * qj;
d1 = ipos4 - jpos1;
d2 = ipos4 - jpos2;
d3 = ipos4 - jpos3;
d4 = ipos4 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 ) + (exclmask*maskMultiplier);
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce4 += fs.x * d1;
outforce4 += fs.y * d2;
outforce4 += fs.z * d3;
outforce4 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x -= 3.0f;
exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
exclind.y = floor( exclind.y + 1.25f );
if( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )breakflag = -1.0f;
}
//jatom.y += 1.0f;
jatom.y = floor( jatom.y + 1.2f );
}
}
kernel void knbforce_CDLJ_1(
float natoms,
float nAtomsCeiling,
float strwidth,
float epsfac,
float4 posq[][],
float2 isigeps[][],
float excl[][],
out float3 outforce<> ){
float jsig, jeps;
float3 ipos;
float3 jpos;
float qi, qj;
float3 d1;
float sig, eps;
float isig, ieps;
float2 exclind;
float exclusion;
float r2, invr, invrsig2, invrsig6;
float4 exclmask;
float2 iatom;
float a_iatom;
float linind;
float2 jatom;
float fs;
outforce = float3( 0.0f, 0.0f, 0.0f );
iatom.x = (indexof outforce).x;
iatom.y = (indexof outforce).y;
a_iatom = iatom.x + iatom.y*strwidth;
/* --------------------------------------------------------------------- */
ipos = posq[ iatom ].xyz;
qi = epsfac*posq[ iatom ].w;
isig = isigeps[ iatom ].x;
ieps = isigeps[ iatom ].y;
/* --------------------------------------------------------------------- */
jatom = float2( 0.0f, 0.0f );
linind = 0.0f;
exclind.x = 0.0f;
exclind.y = a_iatom;
while ( linind < natoms ){
exclusion = excl[ exclind ];
/* --------------------------------------------------------------------- */
jsig = isigeps[ jatom ].x;
jeps = isigeps[ jatom ].y;
jpos = posq[ jatom ].xyz;
qj = posq[ jatom ].w;
/* --------------------------------------------------------------------- */
sig = isig + jsig;
eps = ieps * jeps;
d1 = ipos - jpos;
r2 = dot( d1, d1 ) + exclusion*10000.0f;
invr = rsqrt( r2 );
invrsig2 = invr*sig;
invrsig2 = invrsig2*invrsig2;
invrsig6 = invrsig2*invrsig2*invrsig2;
fs = eps* ( 12.0f*invrsig6 - 6.0f ) * invrsig6;
fs += qi*qj*invr;
fs *= invr*invr;
outforce += fs*d1;
/* --------------------------------------------------------------------- */
exclind.x = floor( exclind.x + 1.05f );
jatom.x += 1.0f;
if( abs( jatom.x - strwidth ) < 0.1f ){
jatom.x = 0.0f;
jatom.y += 1.0f;
}
linind += 1.0f;
}
}
kernel void knbforce_CDLJ_1z(
float natoms,
float nAtomsCeiling,
float strwidth,
float epsfac,
float4 posq[][],
float2 isigeps[][],
float excl[][],
out float3 outforce<> ){
float jsig, jeps;
float3 ipos;
float3 jpos;
float qi, qj;
float3 d1;
float sig, eps;
float isig, ieps;
float2 exclind;
float exclusion;
float r2, invr, invrsig2, invrsig6;
float4 exclmask;
float2 iatom;
float a_iatom;
float linind;
float2 jatom;
float fs;
outforce = float3( 0.0f, 0.0f, 0.0f );
iatom.x = (indexof outforce).x;
iatom.y = (indexof outforce).y;
a_iatom = iatom.x + iatom.y*strwidth;
/* --------------------------------------------------------------------- */
ipos = posq[ iatom ].xyz;
qi = epsfac*posq[ iatom ].w;
isig = isigeps[ iatom ].x;
ieps = isigeps[ iatom ].y;
/* --------------------------------------------------------------------- */
jatom = float2( 0.0f, 0.0f );
linind = 0.0f;
exclind.x = 0.0f;
exclind.y = a_iatom;
//outforce.y = exclind.y;
//outforce.z = a_iatom;
while ( linind < natoms ){
exclusion = excl[ exclind ];
/* --------------------------------------------------------------------- */
jsig = isigeps[ jatom ].x;
jeps = isigeps[ jatom ].y;
jpos = posq[ jatom ].xyz;
qj = posq[ jatom ].w;
/* --------------------------------------------------------------------- */
eps = ieps * jeps;
d1 = ipos - jpos;
r2 = dot( d1, d1 ) + exclusion*10000.0f;
invr = rsqrt( r2 );
invrsig2 = invr*sig;
invrsig2 = invrsig2*invrsig2;
invrsig6 = invrsig2*invrsig2*invrsig2;
fs = eps* ( 12.0f*invrsig6 - 6.0f ) * invrsig6;
fs += qi*qj*invr;
fs *= invr*invr;
outforce += fs*d1;
//outforce.x += exclusion;
/* --------------------------------------------------------------------- */
exclind.x = floor( exclind.x + 1.05f );
jatom.x += 1.0f;
if( abs( jatom.x - strwidth ) < 0.1f ){
jatom.x = 0.0f;
jatom.y += 1.0f;
}
linind += 1.0f;
}
}
kernel void knbforce_CDLJ4Debug(
float natoms,
float nAtomsCeiling,
float dupfac,
float strheight,
float strwidth,
float jstrwidth,
float fstrwidth,
float epsfac,
float4 params,
float4 posq[][],
float4 isigeps[][],
float4 sigma[][],
float4 epsilon[][],
float excl[][],
out float3 outforce1<>,
out float3 outforce2<>,
out float3 outforce3<>,
out float3 outforce4<> ) {
float4 jsig, jeps;
float3 ipos1, ipos2, ipos3, ipos4;
float4 jpos;
float3 jpos1, jpos2, jpos3, jpos4;
float qi1, qi2, qi3, qi4;
float isig1, isig2, isig3, isig4;
float ieps1, ieps2, ieps3, ieps4;
float4 qj, qq;
float3 d1, d2, d3, d4;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2;
float3 pad;
float4 exclmask;
float4 exclconst;
float2 iatom;
float2 jstrindex;
float a_iatom;
float linind;
float linind1;
float breakflag;
float2 jatom;
float4 fs;
float jend, jstart;
float which_rep;
float4 one4;
float4 zero4;
float4 frac4;
float sum;
float bigValue;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
bigValue = 1000000.0f;
outforce1 = float3( 0.0f, 0.0f, 0.0f );
outforce2 = float3( 0.0f, 0.0f, 0.0f );
outforce3 = float3( 0.0f, 0.0f, 0.0f );
outforce4 = float3( 0.0f, 0.0f, 0.0f );
one4 = float4( 1.0f, 1.0f, 1.0f, 1.0f );
zero4 = float4( 0.0f, 0.0f, 0.0f, 0.0f );
frac4 = float4( 0.2f, 0.2f, 0.2f, 0.2f );
a_iatom = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
// linind = fmod( a_iatom, natoms );
linind = fmod( a_iatom, nAtomsCeiling );
iatom.x = fmod( linind, strwidth );
iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
/* --------------------------------------------------------------------- */
jpos = posq[ iatom ];
ipos1 = jpos.xyz;
qi1 = jpos.w;
jstrindex = isigeps[ iatom ];
isig1 = jstrindex.x;
ieps1 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
jpos = posq[ iatom ];
ipos2 = jpos.xyz;
qi2 = jpos.w;
jstrindex = isigeps[ iatom ];
isig2 = jstrindex.x;
ieps2 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
jpos = posq[ iatom ];
ipos3 = jpos.xyz;
qi3 = jpos.w;
jstrindex = isigeps[ iatom ];
isig3 = jstrindex.x;
ieps3 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1;
jpos = posq[ iatom ];
ipos4 = jpos.xyz;
qi4 = jpos.w;
jstrindex = isigeps[ iatom ];
isig4 = jstrindex.x;
ieps4 = jstrindex.y;
/* --------------------------------------------------------------------- */
breakflag = 1.0f;
jatom.y = 0.0f;
// which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
which_rep = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
jend = 1.0f + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
jstart = which_rep*jend;
if ( which_rep > dupfac - 1.5f ) {
jend = 1e6f;
}
jend += jstart;
exclind.x = linind;
linind1 = linind;
exclind.y = jstart*strwidth/4.0f;
exclind.y = floor( exclind.y + 0.25f );
/*
outforce1 = float3( linind, exclind.y, jstart );
outforce2 = float3( linind, exclind.y, jend );
outforce3 = float3( linind, exclind.y, which_rep );
outforce4 = float3( linind, exclind.y, exclind.y );
*/
jatom.y = jstart;
while ( jatom.y < jend && breakflag > 0.0f ) {
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
linind = (jatom.x + jatom.y*strwidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth );
jstrindex.x = linind - jstrindex.y*jstrwidth;
jstrindex.x = floor( jstrindex.x + 0.25f );
// ---------------------------------------------------------------------
jsig = sigma[ jstrindex ];
jeps = epsilon[ jstrindex ];
jpos = posq[ jatom ];
jpos1 = jpos.xyz;
qj.x = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
jpos = posq[ jatom ];
jpos2 = jpos.xyz;
qj.y = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
jpos = posq[ jatom ];
jpos3 = jpos.xyz;
qj.z = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
jpos = posq[ jatom ];
jpos4 = jpos.xyz;
qj.w = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
// ---------------------------------------------------------------------
sig = isig1 + jsig;
eps = ieps1 * jeps;
qq = qi1*qj;
d1 = ipos1 - jpos1;
d2 = ipos1 - jpos2;
d3 = ipos1 - jpos3;
d4 = ipos1 - jpos4;
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 += exclmask*10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
//r2 = 1.0f/r2;
//outforce1.x += r2.x + r2.y + r2.z + r2.w;
//outforce1.y += 4.0f;
// outforce1 += fs.x * d1;
// outforce1 += fs.y * d2;
// outforce1 += fs.z * d3;
// outforce1 += fs.w * d4;
// outforce1 += float3( 4.0, 4.0, 4.0);
// outforce1 += 1.0/r2;
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
//if( exclind.y < 5.9f && exclind.y > 4.1f ){
if( sum > 0.25f ){
outforce1.x += 1.0f;
outforce1.y += exclusions;
//outforce1.y += linind;
outforce1.z += sum;
//outforce1.z += exclind.x + exclind.y*576;
}
// ---------------------------------------------------------------------
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig2 + jsig;
eps = ieps2 * jeps;
qq = qi2 * qj;
d1 = ipos2 - jpos1;
d2 = ipos2 - jpos2;
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 += exclmask*10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
//r2 =1.0f/r2;
//outforce2.x += r2.x + r2.y + r2.z + r2.w;
//outforce2.y += 4.0f;
//
// outforce2 += fs.x * d1;
// outforce2 += fs.y * d2;
// outforce2 += fs.z * d3;
// outforce2 += fs.w * d4;
//
// outforce2 += float3( 4.0f, 4.0f, 4.0f);
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
outforce2.x += 1.0f;
outforce2.y += linind;
// outforce2.z += sum;
outforce2.z += exclind.x + exclind.y*576;
}
//
// sum = abs( fs.x ) + abs( fs.y ) + abs( fs.z ) + abs( fs.w );
// if( sum > bigValue ){
// outforce2.x = linind;
// outforce2.y = linind1;
// outforce2.z = sum;
// }
//
//
// ---------------------------------------------------------------------
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig3 + jsig;
eps = ieps3 * jeps;
qq = qi3 * qj;
d1 = ipos3 - jpos1;
d2 = ipos3 - jpos2;
d3 = ipos3 - jpos3;
d4 = ipos3 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 += exclmask * 10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
//r2 =1.0f/r2;
//outforce3.x += r2.x + r2.y + r2.z + r2.w;
//outforce3.y += 4.0f;
// outforce3 += fs.x * d1;
// outforce3 += fs.y * d2;
// outforce3 += fs.z * d3;
// outforce3 += fs.w * d4;
// outforce3 += float3( 4.0f, 4.0f, 4.0f);
// outforce3 += 1.0f/r2;
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
outforce3.x += 1.0f;
outforce3.y += linind;
//outforce3.z += sum;
outforce3.z += exclind.x + strwidth*exclind.y;
//outforce3.y += 4.0;
}
//
// sum = abs( fs.x ) + abs( fs.y ) + abs( fs.z ) + abs( fs.w );
// if( sum > bigValue ){
// outforce3.x = linind;
// outforce3.y = linind1;
// outforce3.z = sum;
// }
//
// ---------------------------------------------------------------------
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
sig = isig4 + jsig;
eps = ieps4 * jeps;
qq = qi4 * qj;
d1 = ipos4 - jpos1;
d2 = ipos4 - jpos2;
d3 = ipos4 - jpos3;
d4 = ipos4 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
r2 = exclmask * 10000.0f;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
//r2 =1.0f/r2;
//outforce4.x += r2.x + r2.y + r2.z + r2.w;
//outforce4.y += 4.0f;
// outforce4 += fs.x * d1;
// outforce4 += fs.y * d2;
// outforce4 += fs.z * d3;
// outforce4 += fs.w * d4;
// outforce4 += float3( 4.0f, 4.0f, 4.0f);
// outforce4 += 1.0f/r2;
exclmask = exclmask > frac4 ? one4 : zero4;
sum = floor( exclmask.x + exclmask.y + exclmask.z + exclmask.w + 0.25f );
if( sum > 0.25f ){
outforce4.x += 1.0f;
//outforce4.y += exclusions;
outforce4.y += linind;
//outforce4.z += sum;
outforce4.z += exclind.x + strwidth*exclind.y;
//outforce4.y += 4.0;
}
//
// sum = abs( fs.x ) + abs( fs.y ) + abs( fs.z ) + abs( fs.w );
// if( sum > bigValue ){
// outforce4.x = linind;
// outforce4.y = linind1;
// outforce4.z = sum;
// }
//
// ---------------------------------------------------------------------
exclind.x = floor( exclind.x - 2.9f );
exclind.y = floor( exclind.y + 1.2f );
if ( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )
breakflag = -1.0f;
}
jatom.y = floor( jatom.y + 1.2f );
}
}
kernel void kforce14_CDLJ(
float natoms,
float strwidth,
float epsfac,
float4 params,
float2 atoms<>,
float4 posq[][],
float2 sigeps<>,
out float3 fi<>,
out float3 fj<>
)
{
float2 ai, aj;
float3 d1;
float r2;
float qq, fs;
//ai.y = floor( atoms.x / strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
ai.x = atoms.x - ai.y * strwidth;
//aj.y = floor( atoms.y / strwidth );
aj.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
aj.x = atoms.y - aj.y * strwidth;
d1 = posq[ai].xyz - posq[aj].xyz;
qq = posq[ai].w * posq[aj].w;
r2 = dot( d1, d1 );
fs = scalar_force_single_CDLJ( qq, epsfac, sigeps.x, sigeps.y, r2, params );
fi = fs.x * d1;
fj = -fi;
}
kernel void kbonded_CDLJ (
float epsfac,
float xstrwidth,
float4 nbparams,
float3 posq[][],
float charge[][],
float4 atoms<>,
float4 parm0<>,
float4 parm1<>,
float4 parm2<>,
float4 parm3<>,
float4 parm4<>,
out float3 fi<>,
out float3 fj<>,
out float3 fk<>,
out float3 fl<>
)
{
float3 rij, rkj, rkl, ril;
float2 ai, aj, ak, al;
float3 m, n;
float sgnphi;
float cosfac;
float ddphi, ddphi_rb;
float3 u, v, s;
float invlkj, invlij, invlkl, msq, nsq, cos_phi, sin_phi;
float3 uij, ukj, ukl;
float phi, mdphi;
float rij_d_ukj, ukj_d_rkl;
float normfac;
float qq;
float3 fi_ang, fj_ang, fk_ang;
float3 fi_bond;
float cii, ckk, cik;
float fs, st, sth;
float3 fi_pair;
float r2 ;
float torsion_numerator;
float theta1, costheta1, invsintheta1;
float theta2, costheta2, invsintheta2;
float3 posqi, posqj, posqk, posql;
float chargei, chargel;
float sig, eps;
ai.y = round( (atoms.x - fmod( atoms.x, xstrwidth ))/xstrwidth );
ai.x = atoms.x - ai.y * xstrwidth;
posqi = posq[ ai ];
chargei = charge[ ai ];
if ( atoms.y > -0.5f ) {
aj.y = round( (atoms.y - fmod( atoms.y, xstrwidth ))/xstrwidth );
aj.x = atoms.y - aj.y * xstrwidth;
posqj = posq[ aj ];
}
else
posqj = float4( 0.0f, 0.0f, 1.0f, 0.0f );
if ( atoms.z > -0.5f ) {
ak.y = round( (atoms.z - fmod( atoms.z, xstrwidth ))/xstrwidth );
ak.x = atoms.z - ak.y * xstrwidth;
posqk = posq[ ak ];
}
else
posqk = float4( 0.0f, 1.0f, 1.0f, 0.0f );
if ( atoms.w > -0.5f ) {
al.y = round( (atoms.w - fmod( atoms.w, xstrwidth ))/xstrwidth );
al.x = atoms.w - al.y * xstrwidth;
posql = posq[ al ];
chargel = charge[ al ];
} else {
posql = float4( 1.0f, 1.0f, 1.0f, 0.0f );
chargel = 0.0f;
}
qq = chargei*chargel;
rij = posqi.xyz - posqj.xyz;
rkj = posqk.xyz - posqj.xyz;
rkl = posqk.xyz - posql.xyz;
ril = posqi.xyz - posql.xyz;
invlij = rsqrt( dot( rij, rij ) );
invlkj = rsqrt( dot( rkj, rkj ) );
invlkl = rsqrt( dot( rkl, rkl ) );
uij = rij * invlij;
ukj = rkj * invlkj;
ukl = rkl * invlkl;
rij_d_ukj = dot( rij, ukj );
ukj_d_rkl = dot( ukj, rkl );
m = cross( uij, ukj );
n = cross( ukj, ukl );
costheta1 = clamp( rij_d_ukj * invlij, -1.0f, 1.0f );
theta1 = acos( costheta1 );
invsintheta1 = rsqrt( 1.0f - costheta1 * costheta1 );
costheta2 = clamp( ukj_d_rkl * invlkl, -1.0f, 1.0f );
theta2 = acos( costheta2 );
invsintheta2 = rsqrt( 1.0f - costheta2 * costheta2 );
cos_phi = clamp( dot(m,n) * invsintheta1 * invsintheta2, -1.0f, 1.0f );
sgnphi = sign( dot(rij, n) );
phi = sgnphi * acos( cos_phi );
mdphi = parm1.w * phi - parm1.z;
ddphi = -parm1.y * parm1.w * sin( mdphi );
cos_phi = -cos_phi;
sin_phi = -sgnphi*sqrt( clamp( 1.0f - cos_phi * cos_phi, 0.0f, 1.0f) );
ddphi_rb = 5.0f * parm1.x;
ddphi_rb = 4.0f * parm0.w + ddphi_rb * cos_phi;
ddphi_rb = 3.0f * parm0.z + ddphi_rb * cos_phi;
ddphi_rb = 2.0f * parm0.y + ddphi_rb * cos_phi;
ddphi_rb = parm0.x + ddphi_rb * cos_phi;
ddphi_rb = -ddphi_rb * sin_phi;
ddphi += ddphi_rb;
fi = (-ddphi * invlij * invsintheta1 * invsintheta1 ) * m;
fl = ( ddphi * invlkl * invsintheta2 * invsintheta2 ) * n;
u = rij_d_ukj * invlkj * fi;
v = ukj_d_rkl * invlkj * fl;
s = u - v;
fj = s - fi;
fk = -(s + fl);
fs = -parm2.y * ( theta1 - parm2.x );
st = fs * invsintheta1;
st = clamp( st, -100000.0f, 100000.0f );
sth = st * costheta1;
cik = st * invlkj * invlij;
cii = sth * invlij;
ckk = sth * invlkj;
fi_ang = -( cik * rkj - cii * uij );
fk_ang = -( cik * rij - ckk * ukj );
fj_ang = -fi_ang - fk_ang;
fi += fi_ang;
fj += fj_ang;
fk += fk_ang;
fs = -parm2.w * ( theta2 - parm2.z );
st = fs * invsintheta2;
st = clamp( st, -100000.0f, 100000.0f );
sth = st * costheta2;
cik = st * invlkj * invlkl;
cii = sth * invlkj;
ckk = sth * invlkl;
fi_ang = ( cik * rkl - cii * ukj );
fk_ang = ( cik * rkj - ckk * ukl );
fj_ang = -fi_ang - fk_ang;
fj += fi_ang;
fk += fj_ang;
fl += fk_ang;
fi_bond = -parm3.y * ( 1.0f - parm3.x * invlij ) * rij;
fi += fi_bond;
fj += -fi_bond;
fi_bond = parm3.w * ( 1.0f - parm3.z * invlkj ) * rkj;
fj += fi_bond;
fk += -fi_bond;
fi_bond = -parm4.y * ( 1.0f - parm4.x * invlkl ) * rkl;
fk += fi_bond;
fl += -fi_bond;
if ( parm4.z > -0.5f ) {
r2 = dot( ril, ril );
fs = scalar_force_single_CDLJ( qq, epsfac, parm4.z, parm4.w, r2, nbparams );
fi_pair = fs * ril;
fi += fi_pair;
fl -= fi_pair;
}
}
kernel void kbonded_CDLJDebug (
float epsfac,
float epsfac,
float xstrwidth,
float xstrwidth,
float4 nbparams,
float4 nbparams,
float4 posq[][],
float3 posq[][],
float charge[][],
float4 atoms<>,
float4 atoms<>,
float4 parm0<>,
float4 parm0<>,
float4 parm1<>,
float4 parm1<>,
...
@@ -1841,483 +611,189 @@ kernel void kbonded_CDLJDebug (
...
@@ -1841,483 +611,189 @@ kernel void kbonded_CDLJDebug (
float qq;
float qq;
float3 fi_ang, fj_ang, fk_ang;
float3 fi_ang, fj_ang, fk_ang;
float3 fi_bond;
float3 fi_bond;
float cii, ckk, cik;
float cii, ckk, cik;
float fs, st, sth;
float fs, st, sth;
float3 fi_pair;
float3 fi_pair;
float r2 ;
float r2 ;
float torsion_numerator;
float torsion_numerator;
float theta1, costheta1, invsintheta1;
float theta1, costheta1, invsintheta1;
float theta2, costheta2, invsintheta2;
float theta2, costheta2, invsintheta2;
float4 posqi, posqj, posqk, posql;
float3 posqi, posqj, posqk, posql;
float sig, eps;
float chargei, chargel;
float sig, eps;
ai.y = round( (atoms.x - fmod( atoms.x, xstrwidth ))/xstrwidth );
ai.x = atoms.x - ai.y * xstrwidth;
posqi = posq[ ai ];
if ( atoms.y > -0.5f ) {
aj.y = round( (atoms.y - fmod( atoms.y, xstrwidth ))/xstrwidth );
aj.x = atoms.y - aj.y * xstrwidth;
posqj = posq[ aj ];
} else {
posqj = float4( 0.0f, 0.0f, 1.0f, 0.0f );
}
if ( atoms.z > -0.5f ) {
ak.y = round( (atoms.z - fmod( atoms.z, xstrwidth ))/xstrwidth );
ak.x = atoms.z - ak.y * xstrwidth;
posqk = posq[ ak ];
} else {
posqk = float4( 0.0f, 1.0f, 1.0f, 0.0f );
}
if ( atoms.w > -0.5f ) {
al.y = round( (atoms.w - fmod( atoms.w, xstrwidth ))/xstrwidth );
al.x = atoms.w - al.y * xstrwidth;
posql = posq[ al ];
} else {
posql = float4( 1.0f, 1.0f, 1.0f, 0.0f );
}
qq = posqi.w * posql.w;
rij = posqi.xyz - posqj.xyz;
rkj = posqk.xyz - posqj.xyz;
rkl = posqk.xyz - posql.xyz;
ril = posqi.xyz - posql.xyz;
invlij = rsqrt( dot( rij, rij ) );
invlkj = rsqrt( dot( rkj, rkj ) );
invlkl = rsqrt( dot( rkl, rkl ) );
uij = rij * invlij;
ukj = rkj * invlkj;
ukl = rkl * invlkl;
rij_d_ukj = dot( rij, ukj );
ukj_d_rkl = dot( ukj, rkl );
m = cross( uij, ukj );
n = cross( ukj, ukl );
costheta1 = clamp( rij_d_ukj * invlij, -1.0f, 1.0f );
theta1 = acos( costheta1 );
invsintheta1 = rsqrt( 1.0f - costheta1 * costheta1 );
costheta2 = clamp( ukj_d_rkl * invlkl, -1.0f, 1.0f );
theta2 = acos( costheta2 );
invsintheta2 = rsqrt( 1.0f - costheta2 * costheta2 );
cos_phi = clamp( dot(m,n) * invsintheta1 * invsintheta2, -1.0f, 1.0f );
sgnphi = sign( dot(rij, n) );
phi = sgnphi * acos( cos_phi );
mdphi = parm1.w * phi - parm1.z;
ddphi = -parm1.y * parm1.w * sin( mdphi );
cos_phi = -cos_phi;
sin_phi = -sgnphi*sqrt( clamp( 1.0f - cos_phi * cos_phi, 0.0f, 1.0f) );
ddphi_rb = 5.0f * parm1.x;
ddphi_rb = 4.0f * parm0.w + ddphi_rb * cos_phi;
ddphi_rb = 3.0f * parm0.z + ddphi_rb * cos_phi;
ddphi_rb = 2.0f * parm0.y + ddphi_rb * cos_phi;
ddphi_rb = parm0.x + ddphi_rb * cos_phi;
ddphi_rb = -ddphi_rb * sin_phi;
ddphi += ddphi_rb;
//ddphi = ddphi_rb;
//ddphi = 0.0f;
fi = (-ddphi * invlij * invsintheta1 * invsintheta1 ) * m;
fl = ( ddphi * invlkl * invsintheta2 * invsintheta2 ) * n;
u = rij_d_ukj * invlkj * fi;
v = ukj_d_rkl * invlkj * fl;
s = u - v;
fj = s - fi;
fk = -(s + fl);
fs = -parm2.y * ( theta1 - parm2.x );
st = fs * invsintheta1;
st = clamp( st, -100000.0f, 100000.0f );
sth = st * costheta1;
cik = st * invlkj * invlij;
cii = sth * invlij;
ckk = sth * invlkj;
fi_ang = -( cik * rkj - cii * uij );
fk_ang = -( cik * rij - ckk * ukj );
fj_ang = -fi_ang - fk_ang;
/*
fi += fi_ang;
fj += fj_ang;
fk += fk_ang;
*/
fs = -parm2.w * ( theta2 - parm2.z );
st = fs * invsintheta2;
st = clamp( st, -100000.0f, 100000.0f );
sth = st * costheta2;
cik = st * invlkj * invlkl;
cii = sth * invlkj;
ckk = sth * invlkl;
fi_ang = ( cik * rkl - cii * ukj );
fk_ang = ( cik * rkj - ckk * ukl );
fj_ang = -fi_ang - fk_ang;
/*
fj += fi_ang;
fk += fj_ang;
fl += fk_ang;
*/
// bonded terms
// k*(r - d)*deltaX/r = k*(1 - d/r)*deltaX
/*
fi = float3( 0.0f, 0.0f, 0.0f );
fj = float3( 0.0f, 0.0f, 0.0f );
fk = float3( 0.0f, 0.0f, 0.0f );
fl = float3( 0.0f, 0.0f, 0.0f );
*/
/*
// ------------------------------------------------------------
fi_bond = -parm3.y * ( 1.0f - parm3.x * invlij ) * rij;
fi += fi_bond;
fj += -fi_bond;
// ------------------------------------------------------------
fi_bond = parm3.w * ( 1.0f - parm3.z * invlkj ) * rkj;
fj += fi_bond;
fk += -fi_bond;
// ------------------------------------------------------------
fi_bond = -parm4.y * ( 1.0f - parm4.x * invlkl ) * rkl;
fk += fi_bond;
fl += -fi_bond;
*/
// ------------------------------------------------------------
// LJ14 terms
//fi = float3( 0.0f, 0.0f, 0.0f);
//fj = float3( 0.0f, 0.0f, 0.0f);
//fk = float3( 0.0f, 0.0f, 0.0f);
//fl = float3( 0.0f, 0.0f, 0.0f);
/*
if ( parm4.z > -0.5f ) {
r2 = dot( ril, ril );
fs = scalar_force_single_CDLJ( qq, epsfac, parm4.z, parm4.w, r2, nbparams );
fi_pair = fs * ril;
//fi_pair = float3( posqi.w + posql.w, posqi.w, posql.w );
//fi_pair = float3( posqi.w + posql.w, posqi.w, posql.w );
//fi_pair = float3( atoms.x, parm4.z, parm4.w );
//fi_pair = float3( 0.0f, 0.0f, 0.0f);
fi += fi_pair;
fl -= fi_pair;
//fi = fi_pair;
//fl = float3( atoms.w, parm4.z, parm4.w );
}
*/
}
kernel void knbforce_CDLJ4NoEx(
float natoms,
float nAtomsCeiling,
float dupfac,
float strheight,
float strwidth,
float jstrwidth,
float fstrwidth,
float epsfac,
float4 params,
float4 posq[][],
float4 isigeps[][],
float4 sigma[][],
float4 epsilon[][],
float excl[][],
out float3 outforce1<>,
out float3 outforce2<>,
out float3 outforce3<>,
out float3 outforce4<> ) {
float4 jsig, jeps;
float3 ipos1, ipos2, ipos3, ipos4;
float4 jpos;
float3 jpos1, jpos2, jpos3, jpos4;
float qi1, qi2, qi3, qi4;
float isig1, isig2, isig3, isig4;
float ieps1, ieps2, ieps3, ieps4;
float4 qj, qq;
float3 d1, d2, d3, d4;
float2 exclind;
float4 sig, eps;
float exclusions;
float4 r2;
float3 pad;
float4 exclmask;
float4 exclconst;
float2 iatom;
float2 jstrindex;
float a_iatom;
float linind;
float breakflag;
float2 jatom;
float4 fs;
float jend, jstart;
float which_rep;
exclconst = float4( 2.0f, 3.0f, 5.0f, 7.0f );
outforce1 = float3( 0.0f, 0.0f, 0.0f );
outforce2 = float3( 0.0f, 0.0f, 0.0f );
outforce3 = float3( 0.0f, 0.0f, 0.0f );
outforce4 = float3( 0.0f, 0.0f, 0.0f );
a_iatom = 4.0f*((indexof outforce1).x + (indexof outforce1).y * fstrwidth);
// linind = fmod( a_iatom, natoms );
linind = fmod( a_iatom, nAtomsCeiling );
iatom.x = fmod( linind, strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, xstrwidth ))/xstrwidth );
iatom.y = round( (linind - fmod(linind, strwidth))/strwidth );
ai.x = atoms.x - ai.y * xstrwidth;
posqi = posq[ ai ];
chargei = charge[ ai ];
/* --------------------------------------------------------------------- */
jpos = posq[ iatom ];
ipos1 = jpos.xyz;
qi1 = jpos.w;
jstrindex = isigeps[ iatom ];
isig1 = jstrindex.x;
ieps1 = jstrindex.y;
/* --------------------------------------------------------------------- */
iatom.x += 1.0f;
jpos = posq[ iatom ];
ipos2 = jpos.xyz;
qi2 = jpos.w;
jstrindex = isigeps[ iatom ];
if ( atoms.y > -0.5f ) {
isig2 = jstrindex.x;
ieps2 = jstrindex.y;
/* --------------------------------------------------------------------- */
aj.y = round( (atoms.y - fmod( atoms.y, xstrwidth ))/xstrwidth );
aj.x = atoms.y - aj.y * xstrwidth;
posqj = posq[ aj ];
}
else
posqj = float4( 0.0f, 0.0f, 1.0f, 0.0f );
iatom.x += 1.0f;
if ( atoms.z > -0.5f ) {
jpos = posq[ iatom ];
ipos3 = jpos.xyz;
qi3 = jpos.w;
jstrindex = isigeps[ iatom ];
ak.y = round( (atoms.z - fmod( atoms.z, xstrwidth ))/xstrwidth );
isig3 = jstrindex.x;
ak.x = atoms.z - ak.y * xstrwidth;
ieps3 = jstrindex.y;
posqk = posq[ ak ];
}
else
posqk = float4( 0.0f, 1.0f, 1.0f, 0.0f );
/* --------------------------------------------------------------------- */
if ( atoms.w > -0.5f ) {
iatom.x += 1.0f;
al.y = round( (atoms.w - fmod( atoms.w, xstrwidth ))/xstrwidth );
jpos = posq[ iatom ];
al.x = atoms.w - al.y * xstrwidth;
ipos4 = jpos.xyz;
posql = posq[ al ];
qi4 = jpos.w;
chargel = charge[ al ];
} else {
posql = float4( 1.0f, 1.0f, 1.0f, 0.0f );
chargel = 0.0f;
}
jstrindex = isigeps[ iatom ];
qq = chargei*chargel;
isig4 = jstrindex.x;
rij = posqi.xyz - posqj.xyz;
ieps4 = jstrindex.y;
rkj = posqk.xyz - posqj.xyz;
rkl = posqk.xyz - posql.xyz;
ril = posqi.xyz - posql.xyz;
/* --------------------------------------------------------------------- */
breakflag = 1.0f;
jatom.y = 0.0f;
// which_rep = round( (a_iatom - fmod(a_iatom, natoms))/natoms );
invlij = rsqrt( dot( rij, rij ) );
which_rep = round( (a_iatom - fmod(a_iatom, nAtomsCeiling ))/nAtomsCeiling );
invlkj = rsqrt( dot( rkj, rkj ) );
invlkl = rsqrt( dot( rkl, rkl ) );
jend = 1.0f + round( (natoms - fmod(natoms, (dupfac * strwidth )))/(dupfac * strwidth ) );
uij = rij * invlij;
jstart = which_rep * jend;
ukj = rkj * invlkj;
if ( which_rep > dupfac - 1.5f ) {
ukl = rkl * invlkl;
jend = 1e6f;
}
jend += jstart;
//exclind.x = linind;
//exclind.y = jstart * strwidth/4.0f;
//exclind.y = floor( exclind.y + 0.25f );
jatom.y = jstart;
rij_d_ukj = dot( rij, ukj );
while ( jatom.y < jend && breakflag > 0.0f ) {
ukj_d_rkl = dot( ukj, rkl );
jatom.x = 0;
while ( jatom.x < strwidth && breakflag > 0.0f ) {
// exclusions = excl[ exclind ]
;
m = cross( uij, ukj )
;
// exclmask = fmod( exclusions, exclconst
);
n = cross( ukj, ukl
);
linind = (jatom.x + jatom.y*strwidth)/4.0f;
linind = floor( linind + 0.25f );
jstrindex.y = round( (linind - fmod(linind, jstrwidth))/jstrwidth
);
costheta1 = clamp( rij_d_ukj * invlij, -1.0f, 1.0f
);
jstrindex.x = linind - jstrindex.y * jstrwidth
;
theta1 = acos( costheta1 )
;
jstrindex.x = floor( jstrindex.x + 0.25f
);
invsintheta1 = rsqrt( 1.0f - costheta1 * costheta1
);
/* --------------------------------------------------------------------- */
jsig = sigma[ jstrindex ];
costheta2 = clamp( ukj_d_rkl * invlkl, -1.0f, 1.0f );
jeps = epsilon[ jstrindex ];
theta2 = acos( costheta2 );
invsintheta2 = rsqrt( 1.0f - costheta2 * costheta2 );
jpos = posq[ jatom ];
cos_phi = clamp( dot(m,n) * invsintheta1 * invsintheta2, -1.0f, 1.0f );
jpos1 = jpos.xyz;
qj.x = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
//jatom.x += 1.0f;
jpos = posq[ jatom ];
sgnphi = sign( dot(rij, n) );
jpos2 = jpos.xyz;
qj.y = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
//jatom.x += 1.0f;
jpos = posq[ jatom ];
jpos3 = jpos.xyz;
qj.z = jpos.w;
jatom.x = floor( jatom.x + 1.2f );
//jatom.x += 1.0f;
jpos = posq[ jatom ];
phi = sgnphi * acos( cos_phi );
jpos4 = jpos.xyz;
mdphi = parm1.w * phi - parm1.z;
qj.w = jpos.w;
ddphi = -parm1.y * parm1.w * sin( mdphi );
jatom.x = floor( jatom.x + 1.2f );
//jatom.x += 1.0f;
/* --------------------------------------------------------------------- */
sig = isig1 + jsig;
cos_phi = -cos_phi;
eps = ieps1 * jeps;
sin_phi = -sgnphi*sqrt( clamp( 1.0f - cos_phi * cos_phi, 0.0f, 1.0f) );
qq = qi1*qj;
d1 = ipos1 - jpos1;
ddphi_rb = 5.0f * parm1.x;
d2 = ipos1 - jpos2;
ddphi_rb = 4.0f * parm0.w + ddphi_rb * cos_phi;
d3 = ipos1 - jpos3;
ddphi_rb = 3.0f * parm0.z + ddphi_rb * cos_phi;
d4 = ipos1 - jpos4;
ddphi_rb = 2.0f * parm0.y + ddphi_rb * cos_phi;
ddphi_rb = parm0.x + ddphi_rb * cos_phi;
ddphi_rb = -ddphi_rb * sin_phi;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce1 += fs.x * d1;
ddphi += ddphi_rb;
outforce1 += fs.y * d2;
outforce1 += fs.z * d3;
outforce1 += fs.w * d4;
/* --------------------------------------------------------------------- */
fi = (-ddphi * invlij * invsintheta1 * invsintheta1 ) * m;
fl = ( ddphi * invlkl * invsintheta2 * invsintheta2 ) * n;
//exclind.x += 1.0f;
/*
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
*/
sig = isig2 + jsig
;
u = rij_d_ukj * invlkj * fi
;
eps = ieps2 * jeps
;
v = ukj_d_rkl * invlkj * fl
;
qq = qi2 * qj
;
s = u - v
;
d1 = ipos2 - jpos1;
fj = s - fi;
d2 = ipos2 - jpos2;
fk = -(s + fl);
d3 = ipos2 - jpos3;
d4 = ipos2 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
outforce2 += fs.x * d1;
outforce2 += fs.y * d2;
outforce2 += fs.z * d3;
outforce2 += fs.w * d4;
/* --------------------------------------------------------------------- */
//exclind.x += 1.0f;
/*
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
*/
sig = isig3 + jsig
;
fs = -parm2.y * ( theta1 - parm2.x )
;
eps = ieps3 * jeps
;
st = fs * invsintheta1
;
qq = qi3 * qj
;
st = clamp( st, -100000.0f, 100000.0f )
;
d1 = ipos3 - jpos1;
sth = st * costheta1;
d2 = ipos3 - jpos2;
d3 = ipos3 - jpos3;
d4 = ipos3 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
cik = st * invlkj * invlij;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
cii = sth * invlij;
ckk = sth * invlkj;
outforce3 += fs.x * d1;
fi_ang = -( cik * rkj - cii * uij );
outforce3 += fs.y * d2;
fk_ang = -( cik * rij - ckk * ukj );
outforce3 += fs.z * d3;
fj_ang = -fi_ang - fk_ang;
outforce3 += fs.w * d4;
/* --------------------------------------------------------------------- */
fi += fi_ang;
fj += fj_ang;
fk += fk_ang;
fs = -parm2.w * ( theta2 - parm2.z );
st = fs * invsintheta2;
st = clamp( st, -100000.0f, 100000.0f );
//exclind.x += 1.0f;
sth = st * costheta2;
/*
exclind.x = floor( exclind.x + 1.2f );
exclusions = excl[ exclind ];
exclmask = fmod( exclusions, exclconst );
*/
sig = isig4 + jsig
;
cik = st * invlkj * invlkl
;
eps = ieps4 * jeps
;
cii = sth * invlkj
;
qq = qi4 * qj
;
ckk = sth * invlkl
;
d1 = ipos4 - jpos1;
fi_ang = ( cik * rkl - cii * ukj );
d2 = ipos4 - jpos2;
fk_ang = ( cik * rkj - ckk * ukl );
d3 = ipos4 - jpos3;
fj_ang = -fi_ang - fk_ang;
d4 = ipos4 - jpos4;
r2 = get_r2_CDLJ( d1, d2, d3, d4 );
fj += fi_ang;
fs = scalar_force_CDLJ( qq, epsfac, sig, eps, r2, params );
fk += fj_ang;
fl += fk_ang;
outforce4 += fs.x * d1;
outforce4 += fs.y * d2;
outforce4 += fs.z * d3;
outforce4 += fs.w * d4;
/* --------------------------------------------------------------------- */
fi_bond = -parm3.y * ( 1.0f - parm3.x * invlij ) * rij;
fi += fi_bond;
fj += -fi_bond;
//exclind.x -= 3.0f;
//exclind.x = floor( exclind.x - 2.9f );
//exclind.y += 1.0f;
fi_bond = parm3.w * ( 1.0f - parm3.z * invlkj ) * rkj;
//exclind.y = floor( exclind.y + 1.25f );
fj += fi_bond;
fk += -fi_bond;
if( natoms - (jatom.y*strwidth + jatom.x) < 0.9f )breakflag = -1.0f;
}
//jatom.y += 1.0f;
fi_bond = -parm4.y * ( 1.0f - parm4.x * invlkl ) * rkl;
jatom.y = floor( jatom.y + 1.2f );
fk += fi_bond;
fl += -fi_bond;
if ( parm4.z > -0.5f ) {
r2 = dot( ril, ril );
fs = scalar_force_single_CDLJ( qq, epsfac, parm4.z, parm4.w, r2, nbparams );
fi_pair = fs * ril;
fi += fi_pair;
fl -= fi_pair;
}
}
}
}
platforms/brook/src/gpu/kinvmap_gather.br
View file @
7ed51ce4
...
@@ -304,7 +304,6 @@ kernel void kinvmap_gather3_4(
...
@@ -304,7 +304,6 @@ kernel void kinvmap_gather3_4(
outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
outforce += do_gather_nobranch( strwidth, invmap4_4, forces4 );
}
}
//Takes three + five inverse maps
//Takes three + five inverse maps
kernel void kinvmap_gather3_5(
kernel void kinvmap_gather3_5(
float strwidth, //stream width of the dihedral forces
float strwidth, //stream width of the dihedral forces
...
@@ -337,6 +336,25 @@ kernel void kinvmap_gather3_5(
...
@@ -337,6 +336,25 @@ kernel void kinvmap_gather3_5(
}
}
//Takes one + one inverse maps
kernel void kinvmap_gather1_1(
float strwidth, //stream width of the dihedral forces
float4 invmap3_1[][], //indices into the dihedral forces
float3 forces3[][], //dihedral forces
float4 invmap5_1[][], //indices into the dihedral forces
float3 forces5[][], //dihedral forces
float3 inforce<>, //particle forces before
out float3 outforce<> //particle forces after
)
{
float2 idx = indexof(outforce);
outforce = inforce;
outforce += do_gather_nobranch( strwidth, invmap3_1[idx], forces3 );
outforce += do_gather_nobranch( strwidth, invmap5_1[idx], forces5 );
}
//Takes five + two inverse maps
//Takes five + two inverse maps
kernel void kinvmap_gather5_2(
kernel void kinvmap_gather5_2(
float strwidth, //stream width of the dihedral forces
float strwidth, //stream width of the dihedral forces
...
...
platforms/brook/src/gpu/kinvmap_gather.h
View file @
7ed51ce4
...
@@ -121,6 +121,15 @@ void kinvmap_gather3_5 (const float strwidth,
...
@@ -121,6 +121,15 @@ void kinvmap_gather3_5 (const float strwidth,
::
brook
::
stream
outforce
);
::
brook
::
stream
outforce
);
void
kinvmap_gather1_1
(
const
float
strwidth
,
::
brook
::
stream
invmap3_1
,
::
brook
::
stream
forces3
,
::
brook
::
stream
invmap5_1
,
::
brook
::
stream
forces5
,
::
brook
::
stream
inforce
,
::
brook
::
stream
outforce
);
void
kinvmap_gather5_2
(
const
float
strwidth
,
void
kinvmap_gather5_2
(
const
float
strwidth
,
::
brook
::
stream
invmap5_1
,
::
brook
::
stream
invmap5_1
,
::
brook
::
stream
invmap5_2
,
::
brook
::
stream
invmap5_2
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment