Commit ef5f9282 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Mods

parent b7c8afa9
...@@ -50,9 +50,10 @@ ...@@ -50,9 +50,10 @@
* */ * */
kernel void kernel void
kshakeh_fix1( kshakeh_fix1(
float nit, //number of iterations float maxIterations, //number of iterations
float strwidth, //stream width of posq float strwidth, //stream width of posq
float invmH, //inverse mass of hydrogen float invmH, //inverse mass of hydrogen
float inputTolerance, //tolerance
float4 atoms<>, //heavy0, h1, h2, h3 float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update float3 posq[][], //positions before update
float3 posqp[][], //changes to positions float3 posqp[][], //changes to positions
...@@ -67,7 +68,7 @@ kshakeh_fix1( ...@@ -67,7 +68,7 @@ kshakeh_fix1(
float3 xi, xj1, xj2, xj3; //coordinates float3 xi, xj1, xj2, xj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 rij1, rij2, rij3, rpij, dr; float3 rij1, rij2, rij3, rpij, dr;
float rpsqij, rrpr, acor; float rpsqij, rrpr, acor, tolerance, converged;
float mask2, mask3; float mask2, mask3;
float diff; float diff;
float rij1sq, rij2sq, rij3sq; float rij1sq, rij2sq, rij3sq;
...@@ -85,24 +86,23 @@ kshakeh_fix1( ...@@ -85,24 +86,23 @@ kshakeh_fix1(
//just set to the coordinates of the first //just set to the coordinates of the first
//so we don't have any junk memory accesses //so we don't have any junk memory accesses
//or nans/infs in the calcs. //or nans/infs in the calcs.
if ( atoms.z > -0.5f ) {
if( atoms.z > -0.5f ){
// aj2.y = floor( atoms.z / strwidth ); // aj2.y = floor( atoms.z / strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth ); aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth; aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f; mask2 = 1.0f;
} } else {
else {
aj2 = aj1; aj2 = aj1;
mask2 = 0.0f; mask2 = 0.0f;
} }
if ( atoms.w > -0.5f ) { if( atoms.w > -0.5f ){
// aj3.y = floor( atoms.w / strwidth ); // aj3.y = floor( atoms.w / strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth ); aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth; aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f; mask3 = 1.0f;
} } else {
else {
aj3 = aj1; aj3 = aj1;
mask3 = 0.0f; mask3 = 0.0f;
} }
...@@ -129,233 +129,56 @@ kshakeh_fix1( ...@@ -129,233 +129,56 @@ kshakeh_fix1(
ld2 = params.z - rij2sq; ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq; ld3 = params.z - rij3sq;
/*
xpi = posqp[ai];
xpj1 = posqp[aj1];
xpj2 = posqp[aj2];
xpj3 = posqp[aj3];
*/
xpi = posqp[ai] - xi; xpi = posqp[ai] - xi;
xpj1 = posqp[aj1] - xj1; xpj1 = posqp[aj1] - xj1;
xpj2 = posqp[aj2] - xj2; xpj2 = posqp[aj2] - xj2;
xpj3 = posqp[aj3] - xj3; xpj3 = posqp[aj3] - xj3;
// XXX
// cposq0 = posqp[ai];
// cposq1 = posqp[aj1];
// cposq2 = posqp[aj2];
// cposq3 = posqp[aj3];
// OK
// XXX
i = 0.0f; i = 0.0f;
while ( i < 15.0f ) { converged = 1.0f;
tolerance = 2.0f*inputTolerance;
while( i < maxIterations && converged > 0.0f ){
//First hydrogen //First hydrogen
rpij = xpi - xpj1; //This is really rpij - rij rpij = xpi - xpj1; //This is really rpij - rij
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2 rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rrpr = dot( rij1, rpij ); //This is r.deltar rrpr = dot( rij1, rpij ); //This is r.deltar
//for debugging only
//params.y = mu/2, params.z = blen*blen
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ; acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * tolerance );
acor = (diff < 1.0f) ? 0.0f : acor;
converged = acor;
dr = rij1 * acor; dr = rij1 * acor;
xpi += dr * params.x; xpi += dr * params.x;
xpj1 -= dr * invmH; xpj1 -= dr * invmH;
//Second hydrogen //Second hydrogen
rpij = xpi - xpj2; rpij = xpi - xpj2;
rpsqij = dot( rpij, rpij ); rpsqij = dot( rpij, rpij );
rrpr = dot( rij2, rpij ); rrpr = dot( rij2, rpij );
//for debugging only
diff = abs( ld2 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask2 * ( ld2 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
dr = rij2 * acor;
xpi += dr * params.x;
xpj2 -= dr * invmH;
//Third hydrogen
rpij = xpi - xpj3;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij3, rpij );
diff = abs( ld3 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
// for debugging only
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask3 * ( ld3 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
dr = rij3 * acor;
xpi += dr * params.x;
xpj3 -= dr * invmH;
i += 1.0f;
}
//Output modified delta's diff = abs( ld2 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
cposq0 = xpi; acor = mask2 * ( ld2 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
cposq1 = xpj1; acor = (diff < 1.0f) ? 0.0f : acor;
cposq2 = xpj2; converged += acor;
cposq3 = xpj3;
}
kernel void
kshakeh_fix2(
float nit, //number of iterations
float strwidth, //stream width of posq
float invmH, //inverse mass of hydrogen
float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update
float3 posqp[][], //changes to positions
float4 params<>, // (1/m0, mu/2, blensq, 0.0f )
out float3 cposq0<>, //constrained position for heavy atom
out float3 cposq1<>, //ditto for h1
out float3 cposq2<>, //ditto for h2
out float3 cposq3<> //ditto for h3
) {
float2 ai, aj1, aj2, aj3; //2d indices, can be precalc.
float i; //iteration count
float3 xi, xj1, xj2, xj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 rij1, rij2, rij3, rpij, dr;
float rpsqij, rrpr, acor;
float mask2, mask3;
float diff;
float rij1sq, rij2sq, rij3sq;
float ld1, ld2, ld3;
// ai.y = floor( atoms.x / strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
ai.x = atoms.x - ai.y * strwidth;
// aj1.y = floor( atoms.y / strwidth );
aj1.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
aj1.x = atoms.y - aj1.y * strwidth;
//If further hydrogens are absent,
//just set to the coordinates of the first
//so we don't have any junk memory accesses
//or nans/infs in the calcs.
if ( atoms.z > -0.5f ) {
// aj2.y = floor( atoms.z / strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f;
}
else {
aj2 = aj1;
mask2 = 0.0f;
}
if ( atoms.w > -0.5f ) {
// aj3.y = floor( atoms.w / strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f;
}
else {
aj3 = aj1;
mask3 = 0.0f;
}
cposq0 = posq[ai];
cposq1 = posq[aj1];
cposq2 = posq[aj2];
cposq3 = posq[aj3];
xi = cposq0;
xj1 = cposq1;
xj2 = cposq2;
xj3 = cposq3;
rij1 = xi - xj1;
rij2 = xi - xj2;
rij3 = xi - xj3;
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
ld1 = params.z - rij1sq;
ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq;
xpi = posqp[ai];
xpj1 = posqp[aj1];
xpj2 = posqp[aj2];
xpj3 = posqp[aj3];
/*
xpi = posqp[ai] - xi;
xpj1 = posqp[aj1] - xj1;
xpj2 = posqp[aj2] - xj2;
xpj3 = posqp[aj3] - xj3;
*/
// XXX
// cposq0 = posqp[ai];
// cposq1 = posqp[aj1];
// cposq2 = posqp[aj2];
// cposq3 = posqp[aj3];
// OK
// XXX
i = 0.0f;
while ( i < 15.0f ) {
//First hydrogen
rpij = xpi - xpj1; //This is really rpij - rij
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rrpr = dot( rij1, rpij ); //This is r.deltar
//for debugging only
//params.y = mu/2, params.z = blen*blen
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
dr = rij1 * acor;
xpi += dr * params.x;
xpj1 -= dr * invmH;
//Second hydrogen
rpij = xpi - xpj2;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij2, rpij );
//for debugging only
diff = abs( ld2 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask2 * ( ld2 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
dr = rij2 * acor; dr = rij2 * acor;
xpi += dr * params.x; xpi += dr * params.x;
xpj2 -= dr * invmH; xpj2 -= dr * invmH;
//Third hydrogen //Third hydrogen
rpij = xpi - xpj3; rpij = xpi - xpj3;
rpsqij = dot( rpij, rpij ); rpsqij = dot( rpij, rpij );
rrpr = dot( rij3, rpij ); rrpr = dot( rij3, rpij );
diff = abs( ld3 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f ); diff = abs( ld3 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
// for debugging only acor = mask3 * ( ld3 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
if ( diff < 1.0f ) acor = (diff < 1.0f) ? 0.0f : acor;
acor = 0.0f; converged += acor;
else
acor = mask3 * ( ld3 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
dr = rij3 * acor; dr = rij3 * acor;
xpi += dr * params.x; xpi += dr * params.x;
xpj3 -= dr * invmH; xpj3 -= dr * invmH;
...@@ -364,6 +187,7 @@ kshakeh_fix2( ...@@ -364,6 +187,7 @@ kshakeh_fix2(
} }
//Output modified delta's //Output modified delta's
cposq0 = xpi; cposq0 = xpi;
cposq1 = xpj1; cposq1 = xpj1;
cposq2 = xpj2; cposq2 = xpj2;
...@@ -371,7 +195,6 @@ kshakeh_fix2( ...@@ -371,7 +195,6 @@ kshakeh_fix2(
} }
kernel void kshakeh_update2_fix1( kernel void kshakeh_update2_fix1(
float strwidth, //width of cposq streams float strwidth, //width of cposq streams
float2 invmap<>, //shakeh inverse map float2 invmap<>, //shakeh inverse map
...@@ -391,25 +214,24 @@ kernel void kshakeh_update2_fix1( ...@@ -391,25 +214,24 @@ kernel void kshakeh_update2_fix1(
if ( invmap.y < 0 ){ if ( invmap.y < 0 ){
oposq = posqp - posq; oposq = posqp - posq;
} else if ( invmap.y < 0.5f ) } else if ( invmap.y < 0.5f ){
oposq = cposq0[ atom ]; oposq = cposq0[ atom ];
else if ( invmap.y < 1.5f) } else if ( invmap.y < 1.5f){
oposq = cposq1[ atom ]; oposq = cposq1[ atom ];
else if ( invmap.y < 2.5f ) } else if ( invmap.y < 2.5f ){
oposq = cposq2[ atom ]; oposq = cposq2[ atom ];
else if ( invmap.y < 3.5f ) } else if ( invmap.y < 3.5f ){
oposq = cposq3[ atom ]; oposq = cposq3[ atom ];
}
oposq += posq; oposq += posq;
} }
kernel void kshakeh_update1_fix1( kernel void kshakeh_update1_fix1(
float strwidth, //width of cposq streams float strwidth, //width of cposq streams
float sdFactor,
float2 invmap<>, //shakeh inverse map float2 invmap<>, //shakeh inverse map
float3 posq<>, //old positions float3 posq<>, //old positions
float3 posqp<>, //deltas from sd2 float3 posqp<>, //deltas from sd2
float3 vp<>, //deltas from sd2
float3 cposq0[][], //constrained delta for heavy atom float3 cposq0[][], //constrained delta for heavy atom
float3 cposq1[][], //ditto for h1 float3 cposq1[][], //ditto for h1
float3 cposq2[][], //ditto for h2 float3 cposq2[][], //ditto for h2
...@@ -425,13 +247,14 @@ kernel void kshakeh_update1_fix1( ...@@ -425,13 +247,14 @@ kernel void kshakeh_update1_fix1(
oposq = posq; oposq = posq;
if ( invmap.y < 0 ){ if ( invmap.y < 0 ){
oposq = posqp; oposq = posqp;
} else if ( invmap.y < 0.5f ) } else if ( invmap.y < 0.5f ){
oposq += cposq0[ atom ]; oposq += cposq0[ atom ];
else if ( invmap.y < 1.5f ) } else if ( invmap.y < 1.5f ){
oposq += cposq1[ atom ]; oposq += cposq1[ atom ];
else if ( invmap.y < 2.5f ) } else if ( invmap.y < 2.5f ){
oposq += cposq2[ atom ]; oposq += cposq2[ atom ];
else if ( invmap.y < 3.5f ) } else if ( invmap.y < 3.5f ){
oposq += cposq3[ atom ]; oposq += cposq3[ atom ];
}
} }
...@@ -29,9 +29,11 @@ ...@@ -29,9 +29,11 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. * * USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
void kshakeh_fix1 (const float nit, void kshakeh_fix1 (
const float maxIterations,
const float strwidth, const float strwidth,
const float invmH, const float invmH,
const float tolerance,
::brook::stream atoms, ::brook::stream atoms,
::brook::stream posq, ::brook::stream posq,
::brook::stream posqp, ::brook::stream posqp,
...@@ -43,11 +45,9 @@ void kshakeh_fix1 (const float nit, ...@@ -43,11 +45,9 @@ void kshakeh_fix1 (const float nit,
void kshakeh_update1_fix1 ( void kshakeh_update1_fix1 (
const float strwidth, const float strwidth,
const float sdpc1,
::brook::stream invmap, ::brook::stream invmap,
::brook::stream posq, ::brook::stream posq,
::brook::stream posqp, ::brook::stream posqp,
::brook::stream vPrime,
::brook::stream cposq0, ::brook::stream cposq0,
::brook::stream cposq1, ::brook::stream cposq1,
::brook::stream cposq2, ::brook::stream cposq2,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment