Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
fd2708b4
Commit
fd2708b4
authored
Nov 07, 2013
by
peastman
Browse files
Merge pull request #212 from peastman/master
Tabulated log(x) to improve GB performance
parents
59893e57
f39a5545
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
53 additions
and
4 deletions
+53
-4
platforms/cpu/include/CpuGBSAOBCForce.h
platforms/cpu/include/CpuGBSAOBCForce.h
+9
-0
platforms/cpu/src/CpuGBSAOBCForce.cpp
platforms/cpu/src/CpuGBSAOBCForce.cpp
+44
-4
No files found.
platforms/cpu/include/CpuGBSAOBCForce.h
View file @
fd2708b4
...
@@ -101,16 +101,25 @@ private:
...
@@ -101,16 +101,25 @@ private:
std
::
vector
<
std
::
vector
<
float
>
>
threadBornForces
;
std
::
vector
<
std
::
vector
<
float
>
>
threadBornForces
;
std
::
vector
<
float
>
obcChain
;
std
::
vector
<
float
>
obcChain
;
std
::
vector
<
double
>
threadEnergy
;
std
::
vector
<
double
>
threadEnergy
;
std
::
vector
<
float
>
logTable
;
float
logDX
,
logDXInv
;
// The following variables are used to make information accessible to the individual threads.
// The following variables are used to make information accessible to the individual threads.
float
const
*
posq
;
float
const
*
posq
;
std
::
vector
<
std
::
vector
<
float
>
>*
threadForce
;
std
::
vector
<
std
::
vector
<
float
>
>*
threadForce
;
bool
includeEnergy
;
bool
includeEnergy
;
static
const
int
NUM_TABLE_POINTS
;
/**
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
* periodic boundary conditions.
*/
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Evaluate log(x) using a lookup table for speed.
*/
fvec4
fastLog
(
fvec4
x
);
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
platforms/cpu/src/CpuGBSAOBCForce.cpp
View file @
fd2708b4
...
@@ -24,12 +24,15 @@
...
@@ -24,12 +24,15 @@
#include "CpuGBSAOBCForce.h"
#include "CpuGBSAOBCForce.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "openmm/internal/SplineFitter.h"
#include "openmm/internal/vectorize.h"
#include "openmm/internal/vectorize.h"
#include <cmath>
#include <cmath>
using
namespace
std
;
using
namespace
std
;
using
namespace
OpenMM
;
using
namespace
OpenMM
;
const
int
CpuGBSAOBCForce
::
NUM_TABLE_POINTS
=
1025
;
class
CpuGBSAOBCForce
::
ComputeTask
:
public
ThreadPool
::
Task
{
class
CpuGBSAOBCForce
::
ComputeTask
:
public
ThreadPool
::
Task
{
public:
public:
ComputeTask
(
CpuGBSAOBCForce
&
owner
)
:
owner
(
owner
)
{
ComputeTask
(
CpuGBSAOBCForce
&
owner
)
:
owner
(
owner
)
{
...
@@ -41,6 +44,23 @@ public:
...
@@ -41,6 +44,23 @@ public:
};
};
CpuGBSAOBCForce
::
CpuGBSAOBCForce
()
:
cutoff
(
false
),
periodic
(
false
)
{
CpuGBSAOBCForce
::
CpuGBSAOBCForce
()
:
cutoff
(
false
),
periodic
(
false
)
{
logDX
=
0.5
/
NUM_TABLE_POINTS
;
logDXInv
=
1.0
f
/
logDX
;
vector
<
double
>
x
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
y
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
deriv
;
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
1
;
i
++
)
{
x
[
i
]
=
0.5
+
i
*
0.5
/
NUM_TABLE_POINTS
;
y
[
i
]
=
log
(
x
[
i
]);
}
SplineFitter
::
createNaturalSpline
(
x
,
y
,
deriv
);
logTable
.
resize
(
4
*
NUM_TABLE_POINTS
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
;
i
++
)
{
logTable
[
4
*
i
]
=
(
float
)
y
[
i
];
logTable
[
4
*
i
+
1
]
=
(
float
)
y
[
i
+
1
];
logTable
[
4
*
i
+
2
]
=
(
float
)
(
deriv
[
i
]
*
logDX
*
logDX
/
6
);
logTable
[
4
*
i
+
3
]
=
(
float
)
(
deriv
[
i
+
1
]
*
logDX
*
logDX
/
6
);
}
}
}
void
CpuGBSAOBCForce
::
setUseCutoff
(
float
distance
)
{
void
CpuGBSAOBCForce
::
setUseCutoff
(
float
distance
)
{
...
@@ -161,8 +181,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
...
@@ -161,8 +181,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
fvec4
u_ij2
=
u_ij
*
u_ij
;
fvec4
u_ij2
=
u_ij
*
u_ij
;
fvec4
rInverse
=
1.0
f
/
r
;
fvec4
rInverse
=
1.0
f
/
r
;
fvec4
r2Inverse
=
rInverse
*
rInverse
;
fvec4
r2Inverse
=
rInverse
*
rInverse
;
fvec4
ratio
=
u_ij
/
l_ij
;
fvec4
logRatio
=
fastLog
(
u_ij
/
l_ij
);
fvec4
logRatio
(
logf
(
ratio
[
0
]),
logf
(
ratio
[
1
]),
logf
(
ratio
[
2
]),
logf
(
ratio
[
3
]));
fvec4
term
=
l_ij
-
u_ij
+
0.25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0.5
f
*
rInverse
*
logRatio
)
+
(
0.25
f
*
scaledRadiusJ
*
scaledRadiusJ
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
fvec4
term
=
l_ij
-
u_ij
+
0.25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0.5
f
*
rInverse
*
logRatio
)
+
(
0.25
f
*
scaledRadiusJ
*
scaledRadiusJ
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
if
(
include
[
j
])
{
...
@@ -329,8 +348,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
...
@@ -329,8 +348,7 @@ void CpuGBSAOBCForce::threadComputeForce(ThreadPool& threads, int threadIndex) {
fvec4
u_ij2
=
u_ij
*
u_ij
;
fvec4
u_ij2
=
u_ij
*
u_ij
;
fvec4
rInverse
=
1.0
f
/
r
;
fvec4
rInverse
=
1.0
f
/
r
;
fvec4
r2Inverse
=
rInverse
*
rInverse
;
fvec4
r2Inverse
=
rInverse
*
rInverse
;
fvec4
ratio
=
u_ij
/
l_ij
;
fvec4
logRatio
=
fastLog
(
u_ij
/
l_ij
);
fvec4
logRatio
(
logf
(
ratio
[
0
]),
logf
(
ratio
[
1
]),
logf
(
ratio
[
2
]),
logf
(
ratio
[
3
]));
fvec4
t3
=
0.125
f
*
(
1.0
f
+
scaledRadiusJ2
*
r2Inverse
)
*
(
l_ij2
-
u_ij2
)
+
0.25
f
*
logRatio
*
r2Inverse
;
fvec4
t3
=
0.125
f
*
(
1.0
f
+
scaledRadiusJ2
*
r2Inverse
)
*
(
l_ij2
-
u_ij2
)
+
0.25
f
*
logRatio
*
r2Inverse
;
fvec4
de
=
bornForce
*
t3
*
rInverse
;
fvec4
de
=
bornForce
*
t3
*
rInverse
;
fvec4
result
[
4
]
=
{
dx
*
de
,
dy
*
de
,
dz
*
de
,
0.0
f
};
fvec4
result
[
4
]
=
{
dx
*
de
,
dy
*
de
,
dz
*
de
,
0.0
f
};
...
@@ -363,3 +381,25 @@ void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4&
...
@@ -363,3 +381,25 @@ void CpuGBSAOBCForce::getDeltaR(const fvec4& posI, const fvec4& x, const fvec4&
}
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
}
fvec4
CpuGBSAOBCForce
::
fastLog
(
fvec4
x
)
{
// Evaluate log(x) using a lookup table for speed.
float
y
[
4
];
fvec4
x1
=
(
x
-
0.5
f
)
*
logDXInv
;
ivec4
index
=
floor
(
x1
);
fvec4
coeff
[
4
];
coeff
[
1
]
=
x1
-
index
;
coeff
[
0
]
=
1.0
f
-
coeff
[
1
];
coeff
[
2
]
=
coeff
[
0
]
*
coeff
[
0
]
*
coeff
[
0
]
-
coeff
[
0
];
coeff
[
3
]
=
coeff
[
1
]
*
coeff
[
1
]
*
coeff
[
1
]
-
coeff
[
1
];
transpose
(
coeff
[
0
],
coeff
[
1
],
coeff
[
2
],
coeff
[
3
]);
static
float
maxdiff
=
0.0
f
;
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
if
(
index
[
i
]
>=
0
&&
index
[
i
]
<
NUM_TABLE_POINTS
)
y
[
i
]
=
dot4
(
coeff
[
i
],
fvec4
(
&
logTable
[
4
*
index
[
i
]]));
else
y
[
i
]
=
logf
(
x
[
i
]);
}
return
fvec4
(
y
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment