Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ef5f9282
"platforms/reference/tests/TestReferenceCheckpoints.cpp" did not exist on "27a2456b0fa62eb3df0c3dedbcd3af9ff86a1ec8"
Commit
ef5f9282
authored
Dec 29, 2008
by
Mark Friedrichs
Browse files
Mods
parent
b7c8afa9
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
80 additions
and
257 deletions
+80
-257
platforms/brook/src/gpu/kshakeh.br
platforms/brook/src/gpu/kshakeh.br
+77
-254
platforms/brook/src/gpu/kshakeh.h
platforms/brook/src/gpu/kshakeh.h
+3
-3
No files found.
platforms/brook/src/gpu/kshakeh.br
View file @
ef5f9282
...
@@ -50,9 +50,10 @@
...
@@ -50,9 +50,10 @@
* */
* */
kernel void
kernel void
kshakeh_fix1(
kshakeh_fix1(
float
nit
, //number of iterations
float
maxIterations
, //number of iterations
float strwidth, //stream width of posq
float strwidth, //stream width of posq
float invmH, //inverse mass of hydrogen
float invmH, //inverse mass of hydrogen
float inputTolerance, //tolerance
float4 atoms<>, //heavy0, h1, h2, h3
float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update
float3 posq[][], //positions before update
float3 posqp[][], //changes to positions
float3 posqp[][], //changes to positions
...
@@ -67,7 +68,7 @@ kshakeh_fix1(
...
@@ -67,7 +68,7 @@ kshakeh_fix1(
float3 xi, xj1, xj2, xj3; //coordinates
float3 xi, xj1, xj2, xj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 rij1, rij2, rij3, rpij, dr;
float3 rij1, rij2, rij3, rpij, dr;
float rpsqij, rrpr, acor;
float rpsqij, rrpr, acor
, tolerance, converged
;
float mask2, mask3;
float mask2, mask3;
float diff;
float diff;
float rij1sq, rij2sq, rij3sq;
float rij1sq, rij2sq, rij3sq;
...
@@ -85,285 +86,108 @@ kshakeh_fix1(
...
@@ -85,285 +86,108 @@ kshakeh_fix1(
//just set to the coordinates of the first
//just set to the coordinates of the first
//so we don't have any junk memory accesses
//so we don't have any junk memory accesses
//or nans/infs in the calcs.
//or nans/infs in the calcs.
if ( atoms.z > -0.5f ) {
if( atoms.z > -0.5f ){
// aj2.y = floor( atoms.z / strwidth );
// aj2.y = floor( atoms.z / strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth;
aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f;
mask2 = 1.0f;
}
} else {
else {
aj2 = aj1;
aj2 = aj1;
mask2 = 0.0f;
mask2 = 0.0f;
}
}
if
( atoms.w > -0.5f )
{
if( atoms.w > -0.5f ){
// aj3.y = floor( atoms.w / strwidth );
// aj3.y = floor( atoms.w / strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth;
aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f;
mask3 = 1.0f;
}
} else {
else {
aj3 = aj1;
aj3 = aj1;
mask3 = 0.0f;
mask3 = 0.0f;
}
}
cposq0 = posq[ai];
cposq0
= posq[ai];
cposq1 = posq[aj1];
cposq1
= posq[aj1];
cposq2 = posq[aj2];
cposq2
= posq[aj2];
cposq3 = posq[aj3];
cposq3
= posq[aj3];
xi = cposq0;
xi
= cposq0;
xj1 = cposq1;
xj1
= cposq1;
xj2 = cposq2;
xj2
= cposq2;
xj3 = cposq3;
xj3
= cposq3;
rij1 = xi - xj1;
rij1 = xi - xj1;
rij2 = xi - xj2;
rij2 = xi - xj2;
rij3 = xi - xj3;
rij3 = xi - xj3;
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
ld1 = params.z - rij1sq;
ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq;
/*
xpi = posqp[ai];
xpj1 = posqp[aj1];
xpj2 = posqp[aj2];
xpj3 = posqp[aj3];
*/
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
xpi = posqp[ai] - xi;
ld1 = params.z - rij1sq;
xpj1 = posqp[aj1] - xj1;
ld2 = params.z - rij2sq;
xpj2 = posqp[aj2] - xj2;
ld3 = params.z - rij3sq;
xpj3 = posqp[aj3] - xj3;
xpi = posqp[ai] - xi;
xpj1 = posqp[aj1] - xj1;
xpj2 = posqp[aj2] - xj2;
xpj3 = posqp[aj3] - xj3;
// XXX
// cposq0 = posqp[ai];
// cposq1 = posqp[aj1];
// cposq2 = posqp[aj2];
// cposq3 = posqp[aj3];
// OK
// XXX
i = 0.0f;
converged = 1.0f;
tolerance = 2.0f*inputTolerance;
while( i < maxIterations && converged > 0.0f ){
i = 0.0f;
while ( i < 15.0f ) {
//First hydrogen
//First hydrogen
rpij = xpi - xpj1; //This is really rpij - rij
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rrpr = dot( rij1, rpij ); //This is r.deltar
//for debugging only
//params.y = mu/2, params.z = blen*blen
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
dr = rij1 * acor;
xpi += dr * params.x;
xpj1 -= dr * invmH;
//Second hydrogen
rpij = xpi - xpj1; //This is really rpij - rij
rpij = xpi - xpj2;
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rpsqij = dot( rpij, rpij );
rrpr = dot( rij1, rpij ); //This is r.deltar
rrpr = dot( rij2, rpij );
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
//for debugging only
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * tolerance );
diff = abs( ld2 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
acor = (diff < 1.0f) ? 0.0f : acor;
if ( diff < 1.0f )
converged = acor;
acor = 0.0f;
else
acor = mask2 * ( ld2 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
dr = rij2 * acor;
xpi += dr * params.x;
xpj2 -= dr * invmH;
//Third hydrogen
dr = rij1 * acor;
rpij = xpi - xpj3;
xpi += dr * params.x;
rpsqij = dot( rpij, rpij );
xpj1 -= dr * invmH;
rrpr = dot( rij3, rpij );
diff = abs( ld3 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
// for debugging only
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask3 * ( ld3 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
dr = rij3 * acor;
xpi += dr * params.x;
xpj3 -= dr * invmH;
i += 1.0f;
}
//Output modified delta's
//Second hydrogen
cposq0 = xpi;
cposq1 = xpj1;
cposq2 = xpj2;
cposq3 = xpj3;
}
kernel void
kshakeh_fix2(
float nit, //number of iterations
float strwidth, //stream width of posq
float invmH, //inverse mass of hydrogen
float4 atoms<>, //heavy0, h1, h2, h3
float3 posq[][], //positions before update
float3 posqp[][], //changes to positions
float4 params<>, // (1/m0, mu/2, blensq, 0.0f )
out float3 cposq0<>, //constrained position for heavy atom
out float3 cposq1<>, //ditto for h1
out float3 cposq2<>, //ditto for h2
out float3 cposq3<> //ditto for h3
) {
float2 ai, aj1, aj2, aj3; //2d indices, can be precalc.
float i; //iteration count
float3 xi, xj1, xj2, xj3; //coordinates
float3 xpi, xpj1, xpj2, xpj3; //coordinates
float3 rij1, rij2, rij3, rpij, dr;
float rpsqij, rrpr, acor;
float mask2, mask3;
float diff;
float rij1sq, rij2sq, rij3sq;
float ld1, ld2, ld3;
// ai.y = floor( atoms.x / strwidth );
ai.y = round( (atoms.x - fmod( atoms.x, strwidth ))/strwidth );
ai.x = atoms.x - ai.y * strwidth;
// aj1.y = floor( atoms.y / strwidth );
aj1.y = round( (atoms.y - fmod( atoms.y, strwidth ))/strwidth );
aj1.x = atoms.y - aj1.y * strwidth;
//If further hydrogens are absent,
//just set to the coordinates of the first
//so we don't have any junk memory accesses
//or nans/infs in the calcs.
if ( atoms.z > -0.5f ) {
// aj2.y = floor( atoms.z / strwidth );
aj2.y = round( (atoms.z - fmod( atoms.z, strwidth ))/strwidth );
aj2.x = atoms.z - aj2.y * strwidth;
mask2 = 1.0f;
}
else {
aj2 = aj1;
mask2 = 0.0f;
}
if ( atoms.w > -0.5f ) {
// aj3.y = floor( atoms.w / strwidth );
aj3.y = round( (atoms.w - fmod( atoms.w, strwidth ))/strwidth );
aj3.x = atoms.w - aj3.y * strwidth;
mask3 = 1.0f;
}
else {
aj3 = aj1;
mask3 = 0.0f;
}
cposq0 = posq[ai];
cposq1 = posq[aj1];
cposq2 = posq[aj2];
cposq3 = posq[aj3];
xi = cposq0;
xj1 = cposq1;
xj2 = cposq2;
xj3 = cposq3;
rij1 = xi - xj1;
rij2 = xi - xj2;
rij3 = xi - xj3;
rij1sq = dot( rij1, rij1 );
rij2sq = dot( rij2, rij2 );
rij3sq = dot( rij3, rij3 );
ld1 = params.z - rij1sq;
ld2 = params.z - rij2sq;
ld3 = params.z - rij3sq;
xpi = posqp[ai];
xpj1 = posqp[aj1];
xpj2 = posqp[aj2];
xpj3 = posqp[aj3];
/*
xpi = posqp[ai] - xi;
xpj1 = posqp[aj1] - xj1;
xpj2 = posqp[aj2] - xj2;
xpj3 = posqp[aj3] - xj3;
*/
// XXX
rpij = xpi - xpj2;
// cposq0 = posqp[ai];
rpsqij = dot( rpij, rpij );
// cposq1 = posqp[aj1];
rrpr = dot( rij2, rpij );
// cposq2 = posqp[aj2];
// cposq3 = posqp[aj3];
// OK
// XXX
diff = abs( ld2 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
acor = mask2 * ( ld2 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
acor = (diff < 1.0f) ? 0.0f : acor;
converged += acor;
i = 0.0f;
dr = rij2 * acor;
while ( i < 15.0f ) {
xpi += dr * params.x;
//First hydrogen
xpj2 -= dr * invmH;
rpij = xpi - xpj1; //This is really rpij - rij
rpsqij = dot( rpij, rpij ); //This is really deltar ^ 2
rrpr = dot( rij1, rpij ); //This is r.deltar
//for debugging only
//params.y = mu/2, params.z = blen*blen
diff = abs( ld1 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = ( ld1 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij1sq ) ;
dr = rij1 * acor;
xpi += dr * params.x;
xpj1 -= dr * invmH;
//Second hydrogen
rpij = xpi - xpj2;
rpsqij = dot( rpij, rpij );
rrpr = dot( rij2, rpij );
//for debugging only
diff = abs( ld2 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f );
if ( diff < 1.0f )
acor = 0.0f;
else
acor = mask2 * ( ld2 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij2sq ) ;
dr = rij2 * acor;
xpi += dr * params.x;
xpj2 -= dr * invmH;
//Third hydrogen
//Third hydrogen
rpij = xpi - xpj3;
rp
sq
ij
= dot( rpij, rpij )
;
rpij
= xpi - xpj3
;
r
rpr
= dot( rij
3
, rpij );
r
psqij
= dot( r
p
ij, rpij );
diff = abs( ld3 - 2 * rrpr - rpsqij ) / (params.z * 2 * 1e-4f
);
rrpr = dot( rij3, rpij
);
// for debugging only
diff = abs( ld3 - 2.0f * rrpr - rpsqij ) / (params.z * tolerance );
if ( diff < 1.0f )
acor = mask3 * ( ld3 - 2.0f * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
acor
= 0.0f
;
acor
= (diff < 1.0f) ? 0.0f : acor
;
else
converged += acor;
acor = mask3 * ( ld3 - 2 * rrpr - rpsqij ) * params.y / ( rrpr + rij3sq ) ;
dr = rij3 * acor;
dr
= rij3 * acor;
xpi += dr * params.x;
xpi
+= dr * params.x;
xpj3 -= dr * invmH;
xpj3
-= dr * invmH;
i += 1.0f;
i += 1.0f;
}
}
//Output modified delta's
//Output modified delta's
cposq0 = xpi;
cposq0 = xpi;
cposq1 = xpj1;
cposq1 = xpj1;
cposq2 = xpj2;
cposq2 = xpj2;
...
@@ -371,7 +195,6 @@ kshakeh_fix2(
...
@@ -371,7 +195,6 @@ kshakeh_fix2(
}
}
kernel void kshakeh_update2_fix1(
kernel void kshakeh_update2_fix1(
float strwidth, //width of cposq streams
float strwidth, //width of cposq streams
float2 invmap<>, //shakeh inverse map
float2 invmap<>, //shakeh inverse map
...
@@ -391,25 +214,24 @@ kernel void kshakeh_update2_fix1(
...
@@ -391,25 +214,24 @@ kernel void kshakeh_update2_fix1(
if ( invmap.y < 0 ){
if ( invmap.y < 0 ){
oposq = posqp - posq;
oposq = posqp - posq;
} else if ( invmap.y < 0.5f )
} else if ( invmap.y < 0.5f )
{
oposq = cposq0[ atom ];
oposq = cposq0[ atom ];
else if ( invmap.y < 1.5f)
}
else if ( invmap.y < 1.5f)
{
oposq = cposq1[ atom ];
oposq = cposq1[ atom ];
else if ( invmap.y < 2.5f )
}
else if ( invmap.y < 2.5f )
{
oposq = cposq2[ atom ];
oposq = cposq2[ atom ];
else if ( invmap.y < 3.5f )
}
else if ( invmap.y < 3.5f )
{
oposq = cposq3[ atom ];
oposq = cposq3[ atom ];
}
oposq += posq;
oposq += posq;
}
}
kernel void kshakeh_update1_fix1(
kernel void kshakeh_update1_fix1(
float strwidth, //width of cposq streams
float strwidth, //width of cposq streams
float sdFactor,
float2 invmap<>, //shakeh inverse map
float2 invmap<>, //shakeh inverse map
float3 posq<>, //old positions
float3 posq<>, //old positions
float3 posqp<>, //deltas from sd2
float3 posqp<>, //deltas from sd2
float3 vp<>, //deltas from sd2
float3 cposq0[][], //constrained delta for heavy atom
float3 cposq0[][], //constrained delta for heavy atom
float3 cposq1[][], //ditto for h1
float3 cposq1[][], //ditto for h1
float3 cposq2[][], //ditto for h2
float3 cposq2[][], //ditto for h2
...
@@ -425,13 +247,14 @@ kernel void kshakeh_update1_fix1(
...
@@ -425,13 +247,14 @@ kernel void kshakeh_update1_fix1(
oposq = posq;
oposq = posq;
if ( invmap.y < 0 ){
if ( invmap.y < 0 ){
oposq = posqp;
oposq = posqp;
} else if ( invmap.y < 0.5f )
} else if ( invmap.y < 0.5f )
{
oposq += cposq0[ atom ];
oposq += cposq0[ atom ];
else if ( invmap.y < 1.5f )
}
else if ( invmap.y < 1.5f )
{
oposq += cposq1[ atom ];
oposq += cposq1[ atom ];
else if ( invmap.y < 2.5f )
}
else if ( invmap.y < 2.5f )
{
oposq += cposq2[ atom ];
oposq += cposq2[ atom ];
else if ( invmap.y < 3.5f )
}
else if ( invmap.y < 3.5f )
{
oposq += cposq3[ atom ];
oposq += cposq3[ atom ];
}
}
}
platforms/brook/src/gpu/kshakeh.h
View file @
ef5f9282
...
@@ -29,9 +29,11 @@
...
@@ -29,9 +29,11 @@
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
void
kshakeh_fix1
(
const
float
nit
,
void
kshakeh_fix1
(
const
float
maxIterations
,
const
float
strwidth
,
const
float
strwidth
,
const
float
invmH
,
const
float
invmH
,
const
float
tolerance
,
::
brook
::
stream
atoms
,
::
brook
::
stream
atoms
,
::
brook
::
stream
posq
,
::
brook
::
stream
posq
,
::
brook
::
stream
posqp
,
::
brook
::
stream
posqp
,
...
@@ -43,11 +45,9 @@ void kshakeh_fix1 (const float nit,
...
@@ -43,11 +45,9 @@ void kshakeh_fix1 (const float nit,
void
kshakeh_update1_fix1
(
void
kshakeh_update1_fix1
(
const
float
strwidth
,
const
float
strwidth
,
const
float
sdpc1
,
::
brook
::
stream
invmap
,
::
brook
::
stream
invmap
,
::
brook
::
stream
posq
,
::
brook
::
stream
posq
,
::
brook
::
stream
posqp
,
::
brook
::
stream
posqp
,
::
brook
::
stream
vPrime
,
::
brook
::
stream
cposq0
,
::
brook
::
stream
cposq0
,
::
brook
::
stream
cposq1
,
::
brook
::
stream
cposq1
,
::
brook
::
stream
cposq2
,
::
brook
::
stream
cposq2
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment