Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b278bb6d
Commit
b278bb6d
authored
Oct 11, 2013
by
peastman
Browse files
Direct space nonbonded is multithreaded
parent
3c435fb8
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
167 additions
and
89 deletions
+167
-89
platforms/cpu/include/CpuKernels.h
platforms/cpu/include/CpuKernels.h
+2
-0
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+42
-35
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+7
-9
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+116
-45
No files found.
platforms/cpu/include/CpuKernels.h
View file @
b278bb6d
...
...
@@ -34,6 +34,7 @@
#include "CpuPlatform.h"
#include "CpuNeighborList.h"
#include "CpuNonbondedForce.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
...
...
@@ -87,6 +88,7 @@ private:
std
::
vector
<
float
>
forces
;
NonbondedMethod
nonbondedMethod
;
CpuNeighborList
neighborList
;
CpuNonbondedForce
nonbonded
;
Kernel
optimizedPme
;
};
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
b278bb6d
...
...
@@ -26,6 +26,7 @@
#define OPENMM_CPU_NONBONDED_FORCE_H__
#include "ReferencePairIxn.h"
#include <pthread.h>
#include <set>
#include <utility>
#include <vector>
...
...
@@ -33,25 +34,8 @@
// ---------------------------------------------------------------------------------------
class
CpuNonbondedForce
{
private:
bool
cutoff
;
bool
useSwitch
;
bool
periodic
;
bool
ewald
;
bool
pme
;
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>*
neighborList
;
float
periodicBoxSize
[
3
];
float
cutoffDistance
,
switchingDistance
;
float
krf
,
crf
;
float
alphaEwald
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
__m128
boxSize
,
invBoxSize
,
half
;
static
float
TWO_OVER_SQRT_PI
;
public:
class
ThreadData
;
/**---------------------------------------------------------------------------------------
...
...
@@ -59,7 +43,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
(
);
CpuNonbondedForce
();
/**---------------------------------------------------------------------------------------
...
...
@@ -67,7 +51,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
~
CpuNonbondedForce
(
);
~
CpuNonbondedForce
();
/**---------------------------------------------------------------------------------------
...
...
@@ -79,7 +63,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void
setUseCutoff
(
float
distance
,
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>&
neighbors
,
float
solventDielectric
);
void
setUseCutoff
(
float
distance
,
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>&
neighbors
,
float
solventDielectric
);
/**---------------------------------------------------------------------------------------
...
...
@@ -89,7 +73,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void
setUseSwitchingFunction
(
float
distance
);
void
setUseSwitchingFunction
(
float
distance
);
/**---------------------------------------------------------------------------------------
...
...
@@ -101,7 +85,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void
setPeriodic
(
float
*
periodicBoxSize
);
void
setPeriodic
(
float
*
periodicBoxSize
);
/**---------------------------------------------------------------------------------------
...
...
@@ -165,9 +149,40 @@ class CpuNonbondedForce {
void
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
float
*
fixedParameters
,
float
*
forces
,
float
*
totalEnergy
)
const
;
float
*
fixedParameters
,
float
*
forces
,
float
*
totalEnergy
);
/**
* This routine contains the code executed by each thread.
*/
void
runThread
(
int
index
,
std
::
vector
<
float
>&
threadForce
,
double
&
threadEnergy
);
private:
bool
cutoff
;
bool
useSwitch
;
bool
periodic
;
bool
ewald
;
bool
pme
;
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>*
neighborList
;
float
periodicBoxSize
[
3
];
float
cutoffDistance
,
switchingDistance
;
float
krf
,
crf
;
float
alphaEwald
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
__m128
boxSize
,
invBoxSize
,
half
;
bool
isDeleted
;
int
numThreads
,
waitCount
;
std
::
vector
<
pthread_t
>
thread
;
std
::
vector
<
ThreadData
*>
threadData
;
pthread_cond_t
startCondition
,
endCondition
;
pthread_mutex_t
lock
;
// The following variables are used to make information accessible to the individual threads.
float
*
posq
;
std
::
vector
<
std
::
pair
<
float
,
float
>
>
atomParameters
;
std
::
vector
<
std
::
set
<
int
>
>
exclusions
;
bool
includeEnergy
;
static
float
TWO_OVER_SQRT_PI
;
/**---------------------------------------------------------------------------------------
...
...
@@ -175,16 +190,12 @@ private:
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param posq atom coordinates and charges
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateOneIxn
(
int
atom1
,
int
atom2
,
float
*
posq
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
float
*
forces
,
double
*
totalEnergy
)
const
;
void
calculateOneIxn
(
int
atom1
,
int
atom2
,
float
*
forces
,
double
*
totalEnergy
);
/**---------------------------------------------------------------------------------------
...
...
@@ -192,16 +203,12 @@ private:
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param posq atom coordinates and charges
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateOneEwaldIxn
(
int
atom1
,
int
atom2
,
float
*
posq
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
float
*
forces
,
double
*
totalEnergy
)
const
;
void
calculateOneEwaldIxn
(
int
atom1
,
int
atom2
,
float
*
forces
,
double
*
totalEnergy
);
void
getDeltaR
(
const
__m128
&
posI
,
const
__m128
&
posJ
,
__m128
&
deltaR
,
float
&
r2
,
bool
periodic
)
const
;
...
...
platforms/cpu/src/CpuKernels.cpp
View file @
b278bb6d
...
...
@@ -30,7 +30,6 @@
* -------------------------------------------------------------------------- */
#include "CpuKernels.h"
#include "CpuNonbondedForce.h"
#include "ReferenceBondForce.h"
#include "ReferenceLJCoulomb14.h"
#include "openmm/Context.h"
...
...
@@ -197,7 +196,6 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
RealVec
boxSize
=
extractBoxSize
(
context
);
float
floatBoxSize
[
3
]
=
{(
float
)
boxSize
[
0
],
(
float
)
boxSize
[
1
],
(
float
)
boxSize
[
2
]};
double
energy
=
ewaldSelfEnergy
;
CpuNonbondedForce
clj
;
bool
periodic
=
(
nonbondedMethod
==
CutoffPeriodic
);
bool
ewald
=
(
nonbondedMethod
==
Ewald
);
bool
pme
=
(
nonbondedMethod
==
PME
);
...
...
@@ -221,23 +219,23 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
forces
[
i
]
=
0.0
f
;
if
(
nonbondedMethod
!=
NoCutoff
)
{
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
periodic
||
ewald
||
pme
,
nonbondedCutoff
);
clj
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
.
getNeighbors
(),
rfDielectric
);
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
.
getNeighbors
(),
rfDielectric
);
}
if
(
periodic
||
ewald
||
pme
)
{
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
if
(
boxSize
[
0
]
<
minAllowedSize
||
boxSize
[
1
]
<
minAllowedSize
||
boxSize
[
2
]
<
minAllowedSize
)
throw
OpenMMException
(
"The periodic box size has decreased to less than twice the nonbonded cutoff."
);
clj
.
setPeriodic
(
floatBoxSize
);
nonbonded
.
setPeriodic
(
floatBoxSize
);
}
if
(
ewald
)
clj
.
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
nonbonded
.
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
if
(
pme
)
clj
.
setUsePME
(
ewaldAlpha
,
gridSize
);
nonbonded
.
setUsePME
(
ewaldAlpha
,
gridSize
);
if
(
useSwitchingFunction
)
clj
.
setUseSwitchingFunction
(
switchingDistance
);
nonbonded
.
setUseSwitchingFunction
(
switchingDistance
);
float
nonbondedEnergy
=
0
;
if
(
includeDirect
)
clj
.
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
particleParams
,
exclusions
,
0
,
&
forces
[
0
],
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
.
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
particleParams
,
exclusions
,
0
,
&
forces
[
0
],
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
if
(
includeReciprocal
)
{
if
(
useOptimizedPme
)
{
PmeIO
io
(
&
posq
[
0
],
&
forces
[
0
],
numParticles
);
...
...
@@ -246,7 +244,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
else
clj
.
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
0
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
.
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
0
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
}
energy
+=
nonbondedEnergy
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
b278bb6d
...
...
@@ -30,6 +30,7 @@
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "openmm/internal/hardware.h"
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
...
...
@@ -39,6 +40,24 @@ using namespace std;
float
CpuNonbondedForce
::
TWO_OVER_SQRT_PI
=
(
float
)
(
2
/
sqrt
(
PI_M
));
class
CpuNonbondedForce
::
ThreadData
{
public:
ThreadData
(
int
index
,
CpuNonbondedForce
&
owner
)
:
index
(
index
),
owner
(
owner
)
{
}
int
index
;
CpuNonbondedForce
&
owner
;
vector
<
float
>
threadForce
;
double
threadEnergy
;
};
static
void
*
threadBody
(
void
*
args
)
{
CpuNonbondedForce
::
ThreadData
&
data
=
*
reinterpret_cast
<
CpuNonbondedForce
::
ThreadData
*>
(
args
);
data
.
owner
.
runThread
(
data
.
index
,
data
.
threadForce
,
data
.
threadEnergy
);
delete
&
data
;
return
0
;
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor
...
...
@@ -46,13 +65,17 @@ float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
)
{
// ---------------------------------------------------------------------------------------
// static const char* methodName = "\nCpuNonbondedForce::CpuNonbondedForce";
// ---------------------------------------------------------------------------------------
isDeleted
=
false
;
numThreads
=
getNumProcessors
();
pthread_cond_init
(
&
startCondition
,
NULL
);
pthread_cond_init
(
&
endCondition
,
NULL
);
pthread_mutex_init
(
&
lock
,
NULL
);
thread
.
resize
(
numThreads
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
ThreadData
*
data
=
new
ThreadData
(
i
,
*
this
);
threadData
.
push_back
(
data
);
pthread_create
(
&
thread
[
i
],
NULL
,
threadBody
,
data
);
}
}
/**---------------------------------------------------------------------------------------
...
...
@@ -62,13 +85,15 @@ CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), period
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
::~
CpuNonbondedForce
(){
// ---------------------------------------------------------------------------------------
// static const char* methodName = "\nCpuNonbondedForce::~CpuNonbondedForce";
// ---------------------------------------------------------------------------------------
isDeleted
=
true
;
pthread_mutex_lock
(
&
lock
);
pthread_cond_broadcast
(
&
startCondition
);
pthread_mutex_unlock
(
&
lock
);
for
(
int
i
=
0
;
i
<
(
int
)
thread
.
size
();
i
++
)
pthread_join
(
thread
[
i
],
NULL
);
pthread_mutex_destroy
(
&
lock
);
pthread_cond_destroy
(
&
startCondition
);
pthread_cond_destroy
(
&
endCondition
);
}
/**---------------------------------------------------------------------------------------
...
...
@@ -280,18 +305,36 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, v
void
CpuNonbondedForce
::
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
float
*
fixedParameters
,
float
*
forces
,
float
*
totalEnergy
)
const
{
float
*
fixedParameters
,
float
*
forces
,
float
*
totalEnergy
)
{
// Record the parameters for the threads.
double
directEnergy
=
0
;
double
*
energyPtr
=
(
to
talEnergy
==
NULL
?
NULL
:
&
directEnergy
)
;
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
this
->
posq
=
posq
;
this
->
atomParameters
=
a
to
mParameters
;
this
->
exclusions
=
exclusions
;
includeEnergy
=
(
totalEnergy
!=
NULL
);
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
->
size
();
i
++
)
{
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateOneEwaldIxn
(
pair
.
first
,
pair
.
second
,
posq
,
atomParameters
,
forces
,
energyPtr
);
// Signal the threads to start running and wait for them to finish.
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
pthread_cond_broadcast
(
&
startCondition
);
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
// Combine the results from all the threads.
double
directEnergy
=
0
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
directEnergy
+=
threadData
[
i
]
->
threadEnergy
;
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
)
{
__m128
f
=
_mm_loadu_ps
(
forces
+
4
*
i
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
=
_mm_add_ps
(
f
,
_mm_loadu_ps
(
&
threadData
[
j
]
->
threadForce
[
4
*
i
]));
_mm_storeu_ps
(
forces
+
4
*
i
,
f
);
}
if
(
ewald
||
pme
)
{
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
)
...
...
@@ -315,36 +358,66 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq,
__m128
result
=
_mm_mul_ps
(
deltaR
,
_mm_set1_ps
(
dEdR
));
_mm_storeu_ps
(
forces
+
4
*
ii
,
_mm_sub_ps
(
_mm_loadu_ps
(
forces
+
4
*
ii
),
result
));
_mm_storeu_ps
(
forces
+
4
*
jj
,
_mm_add_ps
(
_mm_loadu_ps
(
forces
+
4
*
jj
),
result
));
if
(
energyPtr
!=
NULL
)
if
(
includeEnergy
)
directEnergy
-=
chargeProd
*
inverseR
*
erfAlphaR
;
}
}
}
}
if
(
totalEnergy
!=
NULL
)
*
totalEnergy
+=
(
float
)
directEnergy
;
}
void
CpuNonbondedForce
::
runThread
(
int
index
,
vector
<
float
>&
threadForce
,
double
&
threadEnergy
)
{
while
(
true
)
{
// Wait for the signal to start running.
pthread_mutex_lock
(
&
lock
);
waitCount
++
;
pthread_cond_signal
(
&
endCondition
);
pthread_cond_wait
(
&
startCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
if
(
isDeleted
)
break
;
// Compute this thread's subset of interactions.
threadEnergy
=
0
;
double
*
energyPtr
=
(
includeEnergy
?
&
threadEnergy
:
NULL
);
int
numberOfAtoms
=
atomParameters
.
size
();
threadForce
.
resize
(
4
*
numberOfAtoms
,
0.0
f
);
for
(
int
i
=
0
;
i
<
4
*
numberOfAtoms
;
i
++
)
threadForce
[
i
]
=
0.0
f
;
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
index
;
i
<
(
int
)
neighborList
->
size
();
i
+=
numThreads
)
{
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateOneEwaldIxn
(
pair
.
first
,
pair
.
second
,
&
threadForce
[
0
],
energyPtr
);
}
}
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
->
size
();
i
++
)
{
for
(
int
i
=
index
;
i
<
(
int
)
neighborList
->
size
();
i
+=
numThreads
)
{
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateOneIxn
(
pair
.
first
,
pair
.
second
,
posq
,
atomParameters
,
f
orce
s
,
energyPtr
);
calculateOneIxn
(
pair
.
first
,
pair
.
second
,
&
threadF
orce
[
0
]
,
energyPtr
);
}
}
else
{
// Loop over all atom pairs
for
(
int
ii
=
0
;
ii
<
numberOfAtoms
;
ii
++
){
for
(
int
jj
=
ii
+
1
;
jj
<
numberOfAtoms
;
jj
++
)
if
(
exclusions
[
jj
].
find
(
ii
)
==
exclusions
[
jj
].
end
())
calculateOneIxn
(
ii
,
jj
,
posq
,
atomParameters
,
forces
,
energyPtr
);
for
(
int
i
=
index
;
i
<
numberOfAtoms
;
i
+=
numThreads
){
for
(
int
j
=
i
+
1
;
j
<
numberOfAtoms
;
j
++
)
if
(
exclusions
[
j
].
find
(
i
)
==
exclusions
[
j
].
end
())
calculateOneIxn
(
i
,
j
,
&
threadForce
[
0
],
energyPtr
);
}
}
}
if
(
totalEnergy
!=
NULL
)
*
totalEnergy
+=
(
float
)
directEnergy
;
}
void
CpuNonbondedForce
::
calculateOneIxn
(
int
ii
,
int
jj
,
float
*
posq
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
float
*
forces
,
double
*
totalEnergy
)
const
{
void
CpuNonbondedForce
::
calculateOneIxn
(
int
ii
,
int
jj
,
float
*
forces
,
double
*
totalEnergy
)
{
// get deltaR, R2, and R between 2 atoms
__m128
deltaR
;
...
...
@@ -396,9 +469,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* posq,
_mm_storeu_ps
(
forces
+
4
*
jj
,
_mm_sub_ps
(
_mm_loadu_ps
(
forces
+
4
*
jj
),
result
));
}
void
CpuNonbondedForce
::
calculateOneEwaldIxn
(
int
ii
,
int
jj
,
float
*
posq
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
float
*
forces
,
double
*
totalEnergy
)
const
{
void
CpuNonbondedForce
::
calculateOneEwaldIxn
(
int
ii
,
int
jj
,
float
*
forces
,
double
*
totalEnergy
)
{
__m128
deltaR
;
__m128
posI
=
_mm_loadu_ps
(
posq
+
4
*
ii
);
__m128
posJ
=
_mm_loadu_ps
(
posq
+
4
*
jj
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment