Commit ef5f9282 authored by Mark Friedrichs's avatar Mark Friedrichs
Browse files

Mods

parent b7c8afa9
......@@ -50,9 +50,10 @@
* */
kernel void
kshakeh_fix1(
float nit, //number of iterations
float maxIterations, //number of iterations
float strwidth, //stream width of posq
float invmH, //inverse mass of hydrogen
float inputTolerance, //tolerance
float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update
float3 posqp[][], //changes to positions
......@@ -67,7 +68,7 @@ kshakeh_fix1(
float3 xi, xj1, xj2, xj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 rij1, rij2, rij3, rpij, dr;
float rpsqij, rrpr, acor;
float rpsqij, rrpr, acor, tolerance, converged;
float mask2, mask3;
float diff;
float rij1sq, rij2sq, rij3sq;
......@@ -85,285 +86,108 @@ kshakeh_fix1(
//just set to the coordinates of the first
//so we don't have any junk memory accesses
//or nans/infs in the calcs.
if ( atoms.z > -0.5f ) {
if( atoms.z > -0.5f ){
// aj2.y = floor( atoms.z / strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f;
}
else {
} else {
aj2 = aj1;
mask2 = 0.0f;
}
if ( atoms.w > -0.5f ) {
if( atoms.w > -0.5f ){
// aj3.y = floor( atoms.w / strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f;
}
else {
} else {
aj3 = aj1;
mask3 = 0.0f;
}
cposq0 = posq[ai];
cposq1 = posq[aj1];
cposq2 = posq[aj2];
cposq3 = posq[aj3];
cposq0 = posq[ai];
cposq1 = posq[aj1];
cposq2 = posq[aj2];
cposq3 = posq[aj3];
xi = cposq0;
xj1 = cposq1;
xj2 = cposq2;
xj3 = cposq3;
xi = cposq0;
xj1 = cposq1;
xj2 = cposq2;
xj3 = cposq3;
rij1 = xi - xj1;
rij2 = xi - xj2;
rij3 = xi - xj3;
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
ld1 = params.z - rij1sq;
ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq;
/*
xpi = posqp[ai];
xpj1 = posqp[aj1];
xpj2 = posqp[aj2];
xpj3 = posqp[aj3];
*/
rij1 = xi - xj1;
rij2 = xi - xj2;
rij3 = xi - xj3;
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
xpi = posqp[ai] - xi;
xpj1 = posqp[aj1] - xj1;
xpj2 = posqp[aj2] - xj2;
xpj3 = posqp[aj3] - xj3;
ld1 = params.z - rij1sq;
ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq;
xpi = posqp[ai] - xi;
xpj1 = posqp[aj1] - xj1;
xpj2 = posqp[aj2] - xj2;
xpj3 = posqp[aj3] - xj3;
// XXX
// cposq0 = posqp[ai];
// cposq1 = posqp[aj1];
// cposq2 = posqp[aj2];
// cposq3 = posqp[aj3];
// OK
// XXX
i = 0.0f;
converged = 1.0f;
tolerance = 2.0f*inputTolerance;
while( i < maxIterations && converged > 0.0f ){
i = 0.0f;
while ( i < 15.0f ) {
//First hydrogen
rpij = xpi - xpj1; //This is really rpij - rij
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rrpr = dot( rij1, rpij ); //This is r.deltar
//for debugging only
//params.y = mu/2, params.z = blen*blen
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
dr = rij1 * acor;
xpi += dr * params.x;
xpj1 -= dr * invmH;
//Second hydrogen
rpij = xpi - xpj2;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij2, rpij );
//for debugging only
diff = abs( ld2 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask2 * ( ld2 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
dr = rij2 * acor;
xpi += dr * params.x;
xpj2 -= dr * invmH;
rpij = xpi - xpj1; //This is really rpij - rij
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rrpr = dot( rij1, rpij ); //This is r.deltar
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * tolerance );
acor = (diff < 1.0f) ? 0.0f : acor;
converged = acor;
//Third hydrogen
rpij = xpi - xpj3;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij3, rpij );
diff = abs( ld3 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
// for debugging only
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask3 * ( ld3 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
dr = rij3 * acor;
xpi += dr * params.x;
xpj3 -= dr * invmH;
i += 1.0f;
}
dr = rij1 * acor;
xpi += dr * params.x;
xpj1 -= dr * invmH;
//Output modified delta's
cposq0 = xpi;
cposq1 = xpj1;
cposq2 = xpj2;
cposq3 = xpj3;
}
kernel void
kshakeh_fix2(
float nit, //number of iterations
float strwidth, //stream width of posq
float invmH, //inverse mass of hydrogen
float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update
float3 posqp[][], //changes to positions
float4 params<>, // (1/m0, mu/2, blensq, 0.0f )
out float3 cposq0<>, //constrained position for heavy atom
out float3 cposq1<>, //ditto for h1
out float3 cposq2<>, //ditto for h2
out float3 cposq3<> //ditto for h3
) {
float2 ai, aj1, aj2, aj3; //2d indices, can be precalc.
float i; //iteration count
float3 xi, xj1, xj2, xj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 rij1, rij2, rij3, rpij, dr;
float rpsqij, rrpr, acor;
float mask2, mask3;
float diff;
float rij1sq, rij2sq, rij3sq;
float ld1, ld2, ld3;
// ai.y = floor( atoms.x / strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
ai.x = atoms.x - ai.y * strwidth;
// aj1.y = floor( atoms.y / strwidth );
aj1.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
aj1.x = atoms.y - aj1.y * strwidth;
//If further hydrogens are absent,
//just set to the coordinates of the first
//so we don't have any junk memory accesses
//or nans/infs in the calcs.
if ( atoms.z > -0.5f ) {
// aj2.y = floor( atoms.z / strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f;
}
else {
aj2 = aj1;
mask2 = 0.0f;
}
if ( atoms.w > -0.5f ) {
// aj3.y = floor( atoms.w / strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f;
}
else {
aj3 = aj1;
mask3 = 0.0f;
}
cposq0 = posq[ai];
cposq1 = posq[aj1];
cposq2 = posq[aj2];
cposq3 = posq[aj3];
xi = cposq0;
xj1 = cposq1;
xj2 = cposq2;
xj3 = cposq3;
rij1 = xi - xj1;
rij2 = xi - xj2;
rij3 = xi - xj3;
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
ld1 = params.z - rij1sq;
ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq;
xpi = posqp[ai];
xpj1 = posqp[aj1];
xpj2 = posqp[aj2];
xpj3 = posqp[aj3];
/*
xpi = posqp[ai] - xi;
xpj1 = posqp[aj1] - xj1;
xpj2 = posqp[aj2] - xj2;
xpj3 = posqp[aj3] - xj3;
*/
//Second hydrogen
// XXX
// cposq0 = posqp[ai];
// cposq1 = posqp[aj1];
// cposq2 = posqp[aj2];
// cposq3 = posqp[aj3];
// OK
// XXX
rpij = xpi - xpj2;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij2, rpij );
diff = abs( ld2 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
acor = mask2 * ( ld2 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
acor = (diff < 1.0f) ? 0.0f : acor;
converged += acor;
i = 0.0f;
while ( i < 15.0f ) {
//First hydrogen
rpij = xpi - xpj1; //This is really rpij - rij
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rrpr = dot( rij1, rpij ); //This is r.deltar
//for debugging only
//params.y = mu/2, params.z = blen*blen
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
dr = rij1 * acor;
xpi += dr * params.x;
xpj1 -= dr * invmH;
//Second hydrogen
rpij = xpi - xpj2;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij2, rpij );
//for debugging only
diff = abs( ld2 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask2 * ( ld2 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
dr = rij2 * acor;
xpi += dr * params.x;
xpj2 -= dr * invmH;
dr = rij2 * acor;
xpi += dr * params.x;
xpj2 -= dr * invmH;
//Third hydrogen
rpij = xpi - xpj3;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij3, rpij );
diff = abs( ld3 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
// for debugging only
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask3 * ( ld3 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
dr = rij3 * acor;
xpi += dr * params.x;
xpj3 -= dr * invmH;
rpij = xpi - xpj3;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij3, rpij );
diff = abs( ld3 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
acor = mask3 * ( ld3 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
acor = (diff < 1.0f) ? 0.0f : acor;
converged += acor;
dr = rij3 * acor;
xpi += dr * params.x;
xpj3 -= dr * invmH;
i += 1.0f;
}
//Output modified delta's
cposq0 = xpi;
cposq1 = xpj1;
cposq2 = xpj2;
......@@ -371,7 +195,6 @@ kshakeh_fix2(
}
kernel void kshakeh_update2_fix1(
float strwidth, //width of cposq streams
float2 invmap<>, //shakeh inverse map
......@@ -391,25 +214,24 @@ kernel void kshakeh_update2_fix1(
if ( invmap.y < 0 ){
oposq = posqp - posq;
} else if ( invmap.y < 0.5f )
} else if ( invmap.y < 0.5f ){
oposq = cposq0[ atom ];
else if ( invmap.y < 1.5f)
} else if ( invmap.y < 1.5f){
oposq = cposq1[ atom ];
else if ( invmap.y < 2.5f )
} else if ( invmap.y < 2.5f ){
oposq = cposq2[ atom ];
else if ( invmap.y < 3.5f )
} else if ( invmap.y < 3.5f ){
oposq = cposq3[ atom ];
}
oposq += posq;
}
kernel void kshakeh_update1_fix1(
float strwidth, //width of cposq streams
float sdFactor,
float2 invmap<>, //shakeh inverse map
float3 posq<>, //old positions
float3 posqp<>, //deltas from sd2
float3 vp<>, //deltas from sd2
float3 cposq0[][], //constrained delta for heavy atom
float3 cposq1[][], //ditto for h1
float3 cposq2[][], //ditto for h2
......@@ -425,13 +247,14 @@ kernel void kshakeh_update1_fix1(
oposq = posq;
if ( invmap.y < 0 ){
oposq = posqp;
} else if ( invmap.y < 0.5f )
} else if ( invmap.y < 0.5f ){
oposq += cposq0[ atom ];
else if ( invmap.y < 1.5f )
} else if ( invmap.y < 1.5f ){
oposq += cposq1[ atom ];
else if ( invmap.y < 2.5f )
} else if ( invmap.y < 2.5f ){
oposq += cposq2[ atom ];
else if ( invmap.y < 3.5f )
} else if ( invmap.y < 3.5f ){
oposq += cposq3[ atom ];
}
}
......@@ -29,9 +29,11 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
void kshakeh_fix1 (const float nit,
void kshakeh_fix1 (
const float maxIterations,
const float strwidth,
const float invmH,
const float tolerance,
::brook::stream atoms,
::brook::stream posq,
::brook::stream posqp,
......@@ -43,11 +45,9 @@ void kshakeh_fix1 (const float nit,
void kshakeh_update1_fix1 (
const float strwidth,
const float sdpc1,
::brook::stream invmap,
::brook::stream posq,
::brook::stream posqp,
::brook::stream vPrime,
::brook::stream cposq0,
::brook::stream cposq1,
::brook::stream cposq2,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment