Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
d3469b87
Commit
d3469b87
authored
Jan 08, 2014
by
peastman
Browse files
Merge pull request #279 from peastman/master
Optimizations to CPU GBSA
parents
6d356ddc
cf52e69f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
42 additions
and
34 deletions
+42
-34
platforms/cpu/src/CpuGBSAOBCForce.cpp
platforms/cpu/src/CpuGBSAOBCForce.cpp
+42
-34
No files found.
platforms/cpu/src/CpuGBSAOBCForce.cpp
View file @
d3469b87
...
...
@@ -249,7 +249,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomCharge
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
fvec4
blockAtomForce
[
4
]
;
fvec4
blockAtomForce
X
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
),
blockAtomBornForce
(
0.0
f
)
;
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
atomx
[
i
]
=
posq
[
4
*
atomIndex
];
...
...
@@ -257,7 +257,6 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
atomz
[
i
]
=
posq
[
4
*
atomIndex
+
2
];
atomCharge
[
i
]
=
preFactor
*
posq
[
4
*
atomIndex
+
3
];
blockMask
[
i
]
=
0xFFFFFFFF
;
blockAtomForce
[
i
]
=
0.0
f
;
}
fvec4
radii
(
&
bornRadii
[
blockStart
]);
fvec4
x
(
atomx
);
...
...
@@ -277,35 +276,40 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
fvec4
r
=
sqrt
(
r2
);
fvec4
alpha2_ij
=
radii
*
bornRadii
[
atomJ
];
fvec4
D_ij
=
r2
/
(
4.0
f
*
alpha2_ij
);
fvec4
expTerm
(
exp
(
-
D_ij
[
0
]),
exp
(
-
D_ij
[
1
]),
exp
(
-
D_ij
[
2
]),
exp
(
-
D_ij
[
3
]));
fvec4
expTerm
(
exp
f
(
-
D_ij
[
0
]),
exp
f
(
-
D_ij
[
1
]),
exp
f
(
-
D_ij
[
2
]),
exp
f
(
-
D_ij
[
3
]));
fvec4
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
fvec4
denominator
=
sqrt
(
denominator2
);
fvec4
Gpol
=
(
partialChargeI
*
posJ
[
3
])
/
denominator
;
fvec4
dGpol_dr
=
-
Gpol
*
(
1.0
f
-
0.25
f
*
expTerm
)
/
denominator2
;
fvec4
dGpol_dalpha2_ij
=
-
0.5
f
*
Gpol
*
expTerm
*
(
1.0
f
+
D_ij
)
/
denominator2
;
fvec4
result
[
4
]
=
{
dx
*
dGpol_dr
,
dy
*
dGpol_dr
,
dz
*
dGpol_dr
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atomJ
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
float
termEnergy
=
Gpol
[
j
];
if
(
blockStart
+
j
!=
atomJ
)
{
if
(
cutoff
)
termEnergy
-=
partialChargeI
[
j
]
*
posJ
[
3
]
/
cutoffDistance
;
bornForces
[
atomJ
]
+=
dGpol_dalpha2_ij
[
j
]
*
radii
[
j
];
blockAtomForce
[
j
]
-=
result
[
j
];
(
fvec4
(
forces
+
4
*
atomJ
)
+
result
[
j
]).
store
(
forces
+
4
*
atomJ
);
}
else
termEnergy
*=
0.5
f
;
energy
+=
termEnergy
;
bornForces
[
blockStart
+
j
]
+=
dGpol_dalpha2_ij
[
j
]
*
bornRadii
[
atomJ
];
}
}
dGpol_dr
=
blend
(
0.0
f
,
dGpol_dr
,
include
);
dGpol_dalpha2_ij
=
blend
(
0.0
f
,
dGpol_dalpha2_ij
,
include
);
fvec4
fx
=
dx
*
dGpol_dr
;
fvec4
fy
=
dy
*
dGpol_dr
;
fvec4
fz
=
dz
*
dGpol_dr
;
blockAtomForceX
-=
fx
;
blockAtomForceY
-=
fy
;
blockAtomForceZ
-=
fz
;
blockAtomBornForce
+=
dGpol_dalpha2_ij
*
bornRadii
[
atomJ
];
float
*
atomForce
=
forces
+
4
*
atomJ
;
fvec4
one
(
1.0
f
);
atomForce
[
0
]
+=
dot4
(
fx
,
one
);
atomForce
[
1
]
+=
dot4
(
fy
,
one
);
atomForce
[
2
]
+=
dot4
(
fz
,
one
);
ivec4
atomJMask
=
include
&
(
blockAtomIndex
!=
ivec4
(
atomJ
));
fvec4
termEnergy
=
blend
(
0.0
f
,
Gpol
,
include
);
if
(
cutoff
)
termEnergy
-=
blend
(
0.0
f
,
partialChargeI
*
posJ
[
3
]
/
cutoffDistance
,
atomJMask
);
termEnergy
*=
blend
(
0.5
f
,
1.0
f
,
atomJMask
);
energy
+=
dot4
(
termEnergy
,
one
);
bornForces
[
atomJ
]
+=
dot4
(
blend
(
0.0
f
,
dGpol_dalpha2_ij
,
atomJMask
),
radii
);
}
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
(
fvec4
(
forces
+
4
*
atomIndex
)
+
blockAtomForce
[
i
]).
store
(
forces
+
4
*
atomIndex
);
(
fvec4
(
forces
+
4
*
atomIndex
)
+
f
[
i
]).
store
(
forces
+
4
*
atomIndex
);
bornForces
[
atomIndex
]
+=
blockAtomBornForce
[
i
];
}
}
threads
.
syncThreads
();
...
...
@@ -325,7 +329,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
ivec4
blockAtomIndex
(
blockStart
,
blockStart
+
1
,
blockStart
+
2
,
blockStart
+
3
);
float
atomRadius
[
4
],
atomx
[
4
],
atomy
[
4
],
atomz
[
4
];
int
blockMask
[
4
]
=
{
0
,
0
,
0
,
0
};
fvec4
blockAtomForce
[
4
]
;
fvec4
blockAtomForce
X
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
)
;
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
atomRadius
[
i
]
=
particleParams
[
atomIndex
].
first
;
...
...
@@ -333,7 +337,6 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
atomy
[
i
]
=
posq
[
4
*
atomIndex
+
1
];
atomz
[
i
]
=
posq
[
4
*
atomIndex
+
2
];
blockMask
[
i
]
=
0xFFFFFFFF
;
blockAtomForce
[
i
]
=
0.0
f
;
}
fvec4
offsetRadiusI
(
atomRadius
);
fvec4
x
(
atomx
);
...
...
@@ -364,18 +367,23 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
fvec4
t3
=
0.125
f
*
(
1.0
f
+
scaledRadiusJ2
*
r2Inverse
)
*
(
l_ij2
-
u_ij2
)
+
0.25
f
*
logRatio
*
r2Inverse
;
fvec4
de
=
bornForce
*
t3
*
rInverse
;
de
=
blend
(
0.0
f
,
de
,
include
);
fvec4
result
[
4
]
=
{
dx
*
de
,
dy
*
de
,
dz
*
de
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atomJ
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
atomForce
.
store
(
forces
+
4
*
atomJ
);
fvec4
fx
=
dx
*
de
;
fvec4
fy
=
dy
*
de
;
fvec4
fz
=
dz
*
de
;
blockAtomForceX
+=
fx
;
blockAtomForceY
+=
fy
;
blockAtomForceZ
+=
fz
;
float
*
atomForce
=
forces
+
4
*
atomJ
;
fvec4
one
(
1.0
f
);
atomForce
[
0
]
-=
dot4
(
fx
,
one
);
atomForce
[
1
]
-=
dot4
(
fy
,
one
);
atomForce
[
2
]
-=
dot4
(
fz
,
one
);
}
fvec4
f
[
4
]
=
{
blockAtomForceX
,
blockAtomForceY
,
blockAtomForceZ
,
0.0
f
};
transpose
(
f
[
0
],
f
[
1
],
f
[
2
],
f
[
3
]);
for
(
int
i
=
0
;
i
<
numInBlock
;
i
++
)
{
int
atomIndex
=
blockStart
+
i
;
(
fvec4
(
forces
+
4
*
atomIndex
)
+
blockAtomForce
[
i
]).
store
(
forces
+
4
*
atomIndex
);
(
fvec4
(
forces
+
4
*
atomIndex
)
+
f
[
i
]).
store
(
forces
+
4
*
atomIndex
);
}
}
threadEnergy
[
threadIndex
]
=
energy
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment