Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ef5f9282
Commit
ef5f9282
authored
Dec 29, 2008
by
Mark Friedrichs
Browse files
Mods
parent
b7c8afa9
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
80 additions
and
257 deletions
+80
-257
platforms/brook/src/gpu/kshakeh.br
platforms/brook/src/gpu/kshakeh.br
+77
-254
platforms/brook/src/gpu/kshakeh.h
platforms/brook/src/gpu/kshakeh.h
+3
-3
No files found.
platforms/brook/src/gpu/kshakeh.br
View file @
ef5f9282
...
@@ -50,9 +50,10 @@
...
@@ -50,9 +50,10 @@
* */
* */
kernel void
kernel void
kshakeh_fix1(
kshakeh_fix1(
float
nit
, //number of iterations
float
maxIterations
, //number of iterations
float strwidth, //stream width of posq
float strwidth, //stream width of posq
float invmH, //inverse mass of hydrogen
float invmH, //inverse mass of hydrogen
float inputTolerance, //tolerance
float4 atoms<>, //heavy0, h1, h2, h3
float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update
float3 posq[][], //positions before update
float3 posqp[][], //changes to positions
float3 posqp[][], //changes to positions
...
@@ -67,7 +68,7 @@ kshakeh_fix1(
...
@@ -67,7 +68,7 @@ kshakeh_fix1(
float3 xi, xj1, xj2, xj3; //coordinates
float3 xi, xj1, xj2, xj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 rij1, rij2, rij3, rpij, dr;
float3 rij1, rij2, rij3, rpij, dr;
float rpsqij, rrpr, acor;
float rpsqij, rrpr, acor
, tolerance, converged
;
float mask2, mask3;
float mask2, mask3;
float diff;
float diff;
float rij1sq, rij2sq, rij3sq;
float rij1sq, rij2sq, rij3sq;
...
@@ -85,285 +86,108 @@ kshakeh_fix1(
...
@@ -85,285 +86,108 @@ kshakeh_fix1(
//just set to the coordinates of the first
//just set to the coordinates of the first
//so we don't have any junk memory accesses
//so we don't have any junk memory accesses
//or nans/infs in the calcs.
//or nans/infs in the calcs.
if ( atoms.z > -0.5f ) {
if( atoms.z > -0.5f ){
// aj2.y = floor( atoms.z / strwidth );
// aj2.y = floor( atoms.z / strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth;
aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f;
mask2 = 1.0f;
}
} else {
else {
aj2 = aj1;
aj2 = aj1;
mask2 = 0.0f;
mask2 = 0.0f;
}
}
if
( atoms.w > -0.5f )
{
if( atoms.w > -0.5f ){
// aj3.y = floor( atoms.w / strwidth );
// aj3.y = floor( atoms.w / strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth;
aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f;
mask3 = 1.0f;
}
} else {
else {
aj3 = aj1;
aj3 = aj1;
mask3 = 0.0f;
mask3 = 0.0f;
}
}
cposq0 = posq[ai];
cposq0
= posq[ai];
cposq1 = posq[aj1];
cposq1
= posq[aj1];
cposq2 = posq[aj2];
cposq2
= posq[aj2];
cposq3 = posq[aj3];
cposq3
= posq[aj3];
xi = cposq0;
xi
= cposq0;
xj1 = cposq1;
xj1
= cposq1;
xj2 = cposq2;
xj2
= cposq2;
xj3 = cposq3;
xj3
= cposq3;
rij1 = xi - xj1;
rij1 = xi - xj1;
rij2 = xi - xj2;
rij2 = xi - xj2;
rij3 = xi - xj3;
rij3 = xi - xj3;
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
ld1 = params.z - rij1sq;
ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq;
/*
xpi = posqp[ai];
xpj1 = posqp[aj1];
xpj2 = posqp[aj2];
xpj3 = posqp[aj3];
*/
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
xpi = posqp[ai] - xi;
ld1 = params.z - rij1sq;
xpj1 = posqp[aj1] - xj1;
ld2 = params.z - rij2sq;
xpj2 = posqp[aj2] - xj2;
ld3 = params.z - rij3sq;
xpj3 = posqp[aj3] - xj3;
xpi = posqp[ai] - xi;
xpj1 = posqp[aj1] - xj1;
xpj2 = posqp[aj2] - xj2;
xpj3 = posqp[aj3] - xj3;
// XXX
// cposq0 = posqp[ai];
// cposq1 = posqp[aj1];
// cposq2 = posqp[aj2];
// cposq3 = posqp[aj3];
// OK
// XXX
i = 0.0f;
converged = 1.0f;
tolerance = 2.0f*inputTolerance;
while( i < maxIterations && converged > 0.0f ){
i = 0.0f;
while ( i < 15.0f ) {
//First hydrogen
//First hydrogen
rpij = xpi - xpj1; //This is really rpij - rij
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rrpr = dot( rij1, rpij ); //This is r.deltar
//for debugging only
//params.y = mu/2, params.z = blen*blen
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
dr = rij1 * acor;
xpi += dr * params.x;
xpj1 -= dr * invmH;
//Second hydrogen
rpij = xpi - xpj1; //This is really rpij - rij
rpij = xpi - xpj2;
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rpsqij = dot( rpij, rpij );
rrpr = dot( rij1, rpij ); //This is r.deltar
rrpr = dot( rij2, rpij );
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
//for debugging only
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * tolerance );
diff = abs( ld2 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
acor = (diff < 1.0f) ? 0.0f : acor;
if ( diff < 1.0f )
converged = acor;
acor = 0.0f;
else
acor = mask2 * ( ld2 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
dr = rij2 * acor;
xpi += dr * params.x;
xpj2 -= dr * invmH;
//Third hydrogen
dr = rij1 * acor;
rpij = xpi - xpj3;
xpi += dr * params.x;
rpsqij = dot( rpij, rpij );
xpj1 -= dr * invmH;
rrpr = dot( rij3, rpij );
diff = abs( ld3 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
// for debugging only
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask3 * ( ld3 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
dr = rij3 * acor;
xpi += dr * params.x;
xpj3 -= dr * invmH;
i += 1.0f;
}
//Output modified delta's
//Second hydrogen
cposq0 = xpi;
cposq1 = xpj1;
cposq2 = xpj2;
cposq3 = xpj3;
}
kernel void
kshakeh_fix2(
float nit, //number of iterations
float strwidth, //stream width of posq
float invmH, //inverse mass of hydrogen
float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update
float3 posqp[][], //changes to positions
float4 params<>, // (1/m0, mu/2, blensq, 0.0f )
out float3 cposq0<>, //constrained position for heavy atom
out float3 cposq1<>, //ditto for h1
out float3 cposq2<>, //ditto for h2
out float3 cposq3<> //ditto for h3
) {
float2 ai, aj1, aj2, aj3; //2d indices, can be precalc.
float i; //iteration count
float3 xi, xj1, xj2, xj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 rij1, rij2, rij3, rpij, dr;
float rpsqij, rrpr, acor;
float mask2, mask3;
float diff;
float rij1sq, rij2sq, rij3sq;
float ld1, ld2, ld3;
// ai.y = floor( atoms.x / strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
ai.x = atoms.x - ai.y * strwidth;
// aj1.y = floor( atoms.y / strwidth );
aj1.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
aj1.x = atoms.y - aj1.y * strwidth;
//If further hydrogens are absent,
//just set to the coordinates of the first
//so we don't have any junk memory accesses
//or nans/infs in the calcs.
if ( atoms.z > -0.5f ) {
// aj2.y = floor( atoms.z / strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f;
}
else {
aj2 = aj1;
mask2 = 0.0f;
}
if ( atoms.w > -0.5f ) {
// aj3.y = floor( atoms.w / strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f;
}
else {
aj3 = aj1;
mask3 = 0.0f;
}
cposq0 = posq[ai];
cposq1 = posq[aj1];
cposq2 = posq[aj2];
cposq3 = posq[aj3];
xi = cposq0;
xj1 = cposq1;
xj2 = cposq2;
xj3 = cposq3;
rij1 = xi - xj1;
rij2 = xi - xj2;
rij3 = xi - xj3;
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
ld1 = params.z - rij1sq;
ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq;
xpi = posqp[ai];
xpj1 = posqp[aj1];
xpj2 = posqp[aj2];
xpj3 = posqp[aj3];
/*
xpi = posqp[ai] - xi;
xpj1 = posqp[aj1] - xj1;
xpj2 = posqp[aj2] - xj2;
xpj3 = posqp[aj3] - xj3;
*/
// XXX
rpij = xpi - xpj2;
// cposq0 = posqp[ai];
rpsqij = dot( rpij, rpij );
// cposq1 = posqp[aj1];
rrpr = dot( rij2, rpij );
// cposq2 = posqp[aj2];
// cposq3 = posqp[aj3];
// OK
// XXX
diff = abs( ld2 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
acor = mask2 * ( ld2 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
acor = (diff < 1.0f) ? 0.0f : acor;
converged += acor;
i = 0.0f;
dr = rij2 * acor;
while ( i < 15.0f ) {
xpi += dr * params.x;
//First hydrogen
xpj2 -= dr * invmH;
rpij = xpi - xpj1; //This is really rpij - rij
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rrpr = dot( rij1, rpij ); //This is r.deltar
//for debugging only
//params.y = mu/2, params.z = blen*blen
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
dr = rij1 * acor;
xpi += dr * params.x;
xpj1 -= dr * invmH;
//Second hydrogen
rpij = xpi - xpj2;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij2, rpij );
//for debugging only
diff = abs( ld2 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask2 * ( ld2 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
dr = rij2 * acor;
xpi += dr * params.x;
xpj2 -= dr * invmH;
//Third hydrogen
//Third hydrogen
rpij = xpi - xpj3;
rp
sq
ij
= dot( rpij, rpij )
;
rpij
= xpi - xpj3
;
r
rpr
= dot( rij
3
, rpij );
r
psqij
= dot( r
p
ij, rpij );
diff = abs( ld3 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f
);
rrpr = dot( rij3, rpij
);
// for debugging only
diff = abs( ld3 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
if ( diff < 1.0f )
acor = mask3 * ( ld3 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
acor
= 0.0f
;
acor
= (diff < 1.0f) ? 0.0f : acor
;
else
converged += acor;
acor = mask3 * ( ld3 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
dr = rij3 * acor;
dr
= rij3 * acor;
xpi += dr * params.x;
xpi
+= dr * params.x;
xpj3 -= dr * invmH;
xpj3
-= dr * invmH;
i += 1.0f;
i += 1.0f;
}
}
//Output modified delta's
//Output modified delta's
cposq0 = xpi;
cposq0 = xpi;
cposq1 = xpj1;
cposq1 = xpj1;
cposq2 = xpj2;
cposq2 = xpj2;
...
@@ -371,7 +195,6 @@ kshakeh_fix2(
...
@@ -371,7 +195,6 @@ kshakeh_fix2(
}
}
kernel void kshakeh_update2_fix1(
kernel void kshakeh_update2_fix1(
float strwidth, //width of cposq streams
float strwidth, //width of cposq streams
float2 invmap<>, //shakeh inverse map
float2 invmap<>, //shakeh inverse map
...
@@ -391,25 +214,24 @@ kernel void kshakeh_update2_fix1(
...
@@ -391,25 +214,24 @@ kernel void kshakeh_update2_fix1(
if ( invmap.y < 0 ){
if ( invmap.y < 0 ){
oposq = posqp - posq;
oposq = posqp - posq;
} else if ( invmap.y < 0.5f )
} else if ( invmap.y < 0.5f )
{
oposq = cposq0[ atom ];
oposq = cposq0[ atom ];
else if ( invmap.y < 1.5f)
}
else if ( invmap.y < 1.5f)
{
oposq = cposq1[ atom ];
oposq = cposq1[ atom ];
else if ( invmap.y < 2.5f )
}
else if ( invmap.y < 2.5f )
{
oposq = cposq2[ atom ];
oposq = cposq2[ atom ];
else if ( invmap.y < 3.5f )
}
else if ( invmap.y < 3.5f )
{
oposq = cposq3[ atom ];
oposq = cposq3[ atom ];
}
oposq += posq;
oposq += posq;
}
}
kernel void kshakeh_update1_fix1(
kernel void kshakeh_update1_fix1(
float strwidth, //width of cposq streams
float strwidth, //width of cposq streams
float sdFactor,
float2 invmap<>, //shakeh inverse map
float2 invmap<>, //shakeh inverse map
float3 posq<>, //old positions
float3 posq<>, //old positions
float3 posqp<>, //deltas from sd2
float3 posqp<>, //deltas from sd2
float3 vp<>, //deltas from sd2
float3 cposq0[][], //constrained delta for heavy atom
float3 cposq0[][], //constrained delta for heavy atom
float3 cposq1[][], //ditto for h1
float3 cposq1[][], //ditto for h1
float3 cposq2[][], //ditto for h2
float3 cposq2[][], //ditto for h2
...
@@ -425,13 +247,14 @@ kernel void kshakeh_update1_fix1(
...
@@ -425,13 +247,14 @@ kernel void kshakeh_update1_fix1(
oposq = posq;
oposq = posq;
if ( invmap.y < 0 ){
if ( invmap.y < 0 ){
oposq = posqp;
oposq = posqp;
} else if ( invmap.y < 0.5f )
} else if ( invmap.y < 0.5f )
{
oposq += cposq0[ atom ];
oposq += cposq0[ atom ];
else if ( invmap.y < 1.5f )
}
else if ( invmap.y < 1.5f )
{
oposq += cposq1[ atom ];
oposq += cposq1[ atom ];
else if ( invmap.y < 2.5f )
}
else if ( invmap.y < 2.5f )
{
oposq += cposq2[ atom ];
oposq += cposq2[ atom ];
else if ( invmap.y < 3.5f )
}
else if ( invmap.y < 3.5f )
{
oposq += cposq3[ atom ];
oposq += cposq3[ atom ];
}
}
}
platforms/brook/src/gpu/kshakeh.h
View file @
ef5f9282
...
@@ -29,9 +29,11 @@
...
@@ -29,9 +29,11 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
void
kshakeh_fix1
(
const
float
nit
,
void
kshakeh_fix1
(
const
float
maxIterations
,
const
float
strwidth
,
const
float
strwidth
,
const
float
invmH
,
const
float
invmH
,
const
float
tolerance
,
::
brook
::
stream
atoms
,
::
brook
::
stream
atoms
,
::
brook
::
stream
posq
,
::
brook
::
stream
posq
,
::
brook
::
stream
posqp
,
::
brook
::
stream
posqp
,
...
@@ -43,11 +45,9 @@ void kshakeh_fix1 (const float nit,
...
@@ -43,11 +45,9 @@ void kshakeh_fix1 (const float nit,
void
kshakeh_update1_fix1
(
void
kshakeh_update1_fix1
(
const
float
strwidth
,
const
float
strwidth
,
const
float
sdpc1
,
::
brook
::
stream
invmap
,
::
brook
::
stream
invmap
,
::
brook
::
stream
posq
,
::
brook
::
stream
posq
,
::
brook
::
stream
posqp
,
::
brook
::
stream
posqp
,
::
brook
::
stream
vPrime
,
::
brook
::
stream
cposq0
,
::
brook
::
stream
cposq0
,
::
brook
::
stream
cposq1
,
::
brook
::
stream
cposq1
,
::
brook
::
stream
cposq2
,
::
brook
::
stream
cposq2
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment