Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b278bb6d
"plugins/rpmd/platforms/hip/tests/TestHipRpmd.cpp" did not exist on "26a34a73b9eefc3f696750a07a3e19ead8bcc072"
Commit
b278bb6d
authored
Oct 11, 2013
by
peastman
Browse files
Direct space nonbonded is multithreaded
parent
3c435fb8
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
167 additions
and
89 deletions
+167
-89
platforms/cpu/include/CpuKernels.h
platforms/cpu/include/CpuKernels.h
+2
-0
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+42
-35
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+7
-9
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+116
-45
No files found.
platforms/cpu/include/CpuKernels.h
View file @
b278bb6d
...
...
@@ -34,6 +34,7 @@
#include "CpuPlatform.h"
#include "CpuNeighborList.h"
#include "CpuNonbondedForce.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
...
...
@@ -87,6 +88,7 @@ private:
std
::
vector
<
float
>
forces
;
NonbondedMethod
nonbondedMethod
;
CpuNeighborList
neighborList
;
CpuNonbondedForce
nonbonded
;
Kernel
optimizedPme
;
};
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
b278bb6d
...
...
@@ -26,6 +26,7 @@
#define OPENMM_CPU_NONBONDED_FORCE_H__
#include "ReferencePairIxn.h"
#include <pthread.h>
#include <set>
#include <utility>
#include <vector>
...
...
@@ -33,25 +34,8 @@
// ---------------------------------------------------------------------------------------
class
CpuNonbondedForce
{
private:
bool
cutoff
;
bool
useSwitch
;
bool
periodic
;
bool
ewald
;
bool
pme
;
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>*
neighborList
;
float
periodicBoxSize
[
3
];
float
cutoffDistance
,
switchingDistance
;
float
krf
,
crf
;
float
alphaEwald
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
__m128
boxSize
,
invBoxSize
,
half
;
static
float
TWO_OVER_SQRT_PI
;
public:
class
ThreadData
;
/**---------------------------------------------------------------------------------------
...
...
@@ -59,7 +43,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
(
);
CpuNonbondedForce
();
/**---------------------------------------------------------------------------------------
...
...
@@ -67,7 +51,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
~
CpuNonbondedForce
(
);
~
CpuNonbondedForce
();
/**---------------------------------------------------------------------------------------
...
...
@@ -79,7 +63,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void
setUseCutoff
(
float
distance
,
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>&
neighbors
,
float
solventDielectric
);
void
setUseCutoff
(
float
distance
,
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>&
neighbors
,
float
solventDielectric
);
/**---------------------------------------------------------------------------------------
...
...
@@ -89,7 +73,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void
setUseSwitchingFunction
(
float
distance
);
void
setUseSwitchingFunction
(
float
distance
);
/**---------------------------------------------------------------------------------------
...
...
@@ -101,7 +85,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void
setPeriodic
(
float
*
periodicBoxSize
);
void
setPeriodic
(
float
*
periodicBoxSize
);
/**---------------------------------------------------------------------------------------
...
...
@@ -165,9 +149,40 @@ class CpuNonbondedForce {
void
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
float
*
fixedParameters
,
float
*
forces
,
float
*
totalEnergy
)
const
;
float
*
fixedParameters
,
float
*
forces
,
float
*
totalEnergy
);
/**
* This routine contains the code executed by each thread.
*/
void
runThread
(
int
index
,
std
::
vector
<
float
>&
threadForce
,
double
&
threadEnergy
);
private:
bool
cutoff
;
bool
useSwitch
;
bool
periodic
;
bool
ewald
;
bool
pme
;
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>*
neighborList
;
float
periodicBoxSize
[
3
];
float
cutoffDistance
,
switchingDistance
;
float
krf
,
crf
;
float
alphaEwald
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
__m128
boxSize
,
invBoxSize
,
half
;
bool
isDeleted
;
int
numThreads
,
waitCount
;
std
::
vector
<
pthread_t
>
thread
;
std
::
vector
<
ThreadData
*>
threadData
;
pthread_cond_t
startCondition
,
endCondition
;
pthread_mutex_t
lock
;
// The following variables are used to make information accessible to the individual threads.
float
*
posq
;
std
::
vector
<
std
::
pair
<
float
,
float
>
>
atomParameters
;
std
::
vector
<
std
::
set
<
int
>
>
exclusions
;
bool
includeEnergy
;
static
float
TWO_OVER_SQRT_PI
;
/**---------------------------------------------------------------------------------------
...
...
@@ -175,16 +190,12 @@ private:
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param posq atom coordinates and charges
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateOneIxn
(
int
atom1
,
int
atom2
,
float
*
posq
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
float
*
forces
,
double
*
totalEnergy
)
const
;
void
calculateOneIxn
(
int
atom1
,
int
atom2
,
float
*
forces
,
double
*
totalEnergy
);
/**---------------------------------------------------------------------------------------
...
...
@@ -192,16 +203,12 @@ private:
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param posq atom coordinates and charges
@param atomParameters atom parameters (sigma/2, 2*sqrt(epsilon))
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateOneEwaldIxn
(
int
atom1
,
int
atom2
,
float
*
posq
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
float
*
forces
,
double
*
totalEnergy
)
const
;
void
calculateOneEwaldIxn
(
int
atom1
,
int
atom2
,
float
*
forces
,
double
*
totalEnergy
);
void
getDeltaR
(
const
__m128
&
posI
,
const
__m128
&
posJ
,
__m128
&
deltaR
,
float
&
r2
,
bool
periodic
)
const
;
...
...
platforms/cpu/src/CpuKernels.cpp
View file @
b278bb6d
...
...
@@ -30,7 +30,6 @@
* -------------------------------------------------------------------------- */
#include "CpuKernels.h"
#include "CpuNonbondedForce.h"
#include "ReferenceBondForce.h"
#include "ReferenceLJCoulomb14.h"
#include "openmm/Context.h"
...
...
@@ -197,7 +196,6 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
RealVec
boxSize
=
extractBoxSize
(
context
);
float
floatBoxSize
[
3
]
=
{(
float
)
boxSize
[
0
],
(
float
)
boxSize
[
1
],
(
float
)
boxSize
[
2
]};
double
energy
=
ewaldSelfEnergy
;
CpuNonbondedForce
clj
;
bool
periodic
=
(
nonbondedMethod
==
CutoffPeriodic
);
bool
ewald
=
(
nonbondedMethod
==
Ewald
);
bool
pme
=
(
nonbondedMethod
==
PME
);
...
...
@@ -221,23 +219,23 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
forces
[
i
]
=
0.0
f
;
if
(
nonbondedMethod
!=
NoCutoff
)
{
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
periodic
||
ewald
||
pme
,
nonbondedCutoff
);
clj
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
.
getNeighbors
(),
rfDielectric
);
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
.
getNeighbors
(),
rfDielectric
);
}
if
(
periodic
||
ewald
||
pme
)
{
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
if
(
boxSize
[
0
]
<
minAllowedSize
||
boxSize
[
1
]
<
minAllowedSize
||
boxSize
[
2
]
<
minAllowedSize
)
throw
OpenMMException
(
"The periodic box size has decreased to less than twice the nonbonded cutoff."
);
clj
.
setPeriodic
(
floatBoxSize
);
nonbonded
.
setPeriodic
(
floatBoxSize
);
}
if
(
ewald
)
clj
.
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
nonbonded
.
setUseEwald
(
ewaldAlpha
,
kmax
[
0
],
kmax
[
1
],
kmax
[
2
]);
if
(
pme
)
clj
.
setUsePME
(
ewaldAlpha
,
gridSize
);
nonbonded
.
setUsePME
(
ewaldAlpha
,
gridSize
);
if
(
useSwitchingFunction
)
clj
.
setUseSwitchingFunction
(
switchingDistance
);
nonbonded
.
setUseSwitchingFunction
(
switchingDistance
);
float
nonbondedEnergy
=
0
;
if
(
includeDirect
)
clj
.
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
particleParams
,
exclusions
,
0
,
&
forces
[
0
],
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
.
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
particleParams
,
exclusions
,
0
,
&
forces
[
0
],
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
if
(
includeReciprocal
)
{
if
(
useOptimizedPme
)
{
PmeIO
io
(
&
posq
[
0
],
&
forces
[
0
],
numParticles
);
...
...
@@ -246,7 +244,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
optimizedPme
.
getAs
<
CalcPmeReciprocalForceKernel
>
().
finishComputation
(
io
);
}
else
clj
.
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
0
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
.
calculateReciprocalIxn
(
numParticles
,
&
posq
[
0
],
posData
,
particleParams
,
exclusions
,
0
,
forceData
,
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
}
energy
+=
nonbondedEnergy
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
b278bb6d
...
...
@@ -30,6 +30,7 @@
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "openmm/internal/hardware.h"
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
...
...
@@ -39,6 +40,24 @@ using namespace std;
float
CpuNonbondedForce
::
TWO_OVER_SQRT_PI
=
(
float
)
(
2
/
sqrt
(
PI_M
));
class
CpuNonbondedForce
::
ThreadData
{
public:
ThreadData
(
int
index
,
CpuNonbondedForce
&
owner
)
:
index
(
index
),
owner
(
owner
)
{
}
int
index
;
CpuNonbondedForce
&
owner
;
vector
<
float
>
threadForce
;
double
threadEnergy
;
};
static
void
*
threadBody
(
void
*
args
)
{
CpuNonbondedForce
::
ThreadData
&
data
=
*
reinterpret_cast
<
CpuNonbondedForce
::
ThreadData
*>
(
args
);
data
.
owner
.
runThread
(
data
.
index
,
data
.
threadForce
,
data
.
threadEnergy
);
delete
&
data
;
return
0
;
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor
...
...
@@ -46,13 +65,17 @@ float CpuNonbondedForce::TWO_OVER_SQRT_PI = (float) (2/sqrt(PI_M));
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
)
{
// ---------------------------------------------------------------------------------------
// static const char* methodName = "\nCpuNonbondedForce::CpuNonbondedForce";
// ---------------------------------------------------------------------------------------
isDeleted
=
false
;
numThreads
=
getNumProcessors
();
pthread_cond_init
(
&
startCondition
,
NULL
);
pthread_cond_init
(
&
endCondition
,
NULL
);
pthread_mutex_init
(
&
lock
,
NULL
);
thread
.
resize
(
numThreads
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
ThreadData
*
data
=
new
ThreadData
(
i
,
*
this
);
threadData
.
push_back
(
data
);
pthread_create
(
&
thread
[
i
],
NULL
,
threadBody
,
data
);
}
}
/**---------------------------------------------------------------------------------------
...
...
@@ -62,13 +85,15 @@ CpuNonbondedForce::CpuNonbondedForce() : cutoff(false), useSwitch(false), period
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
::~
CpuNonbondedForce
(){
// ---------------------------------------------------------------------------------------
// static const char* methodName = "\nCpuNonbondedForce::~CpuNonbondedForce";
// ---------------------------------------------------------------------------------------
isDeleted
=
true
;
pthread_mutex_lock
(
&
lock
);
pthread_cond_broadcast
(
&
startCondition
);
pthread_mutex_unlock
(
&
lock
);
for
(
int
i
=
0
;
i
<
(
int
)
thread
.
size
();
i
++
)
pthread_join
(
thread
[
i
],
NULL
);
pthread_mutex_destroy
(
&
lock
);
pthread_cond_destroy
(
&
startCondition
);
pthread_cond_destroy
(
&
endCondition
);
}
/**---------------------------------------------------------------------------------------
...
...
@@ -280,18 +305,36 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, v
void
CpuNonbondedForce
::
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
float
*
fixedParameters
,
float
*
forces
,
float
*
totalEnergy
)
const
{
float
*
fixedParameters
,
float
*
forces
,
float
*
totalEnergy
)
{
// Record the parameters for the threads.
double
directEnergy
=
0
;
double
*
energyPtr
=
(
to
talEnergy
==
NULL
?
NULL
:
&
directEnergy
)
;
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
this
->
posq
=
posq
;
this
->
atomParameters
=
a
to
mParameters
;
this
->
exclusions
=
exclusions
;
includeEnergy
=
(
totalEnergy
!=
NULL
);
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
->
size
();
i
++
)
{
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateOneEwaldIxn
(
pair
.
first
,
pair
.
second
,
posq
,
atomParameters
,
forces
,
energyPtr
);
// Signal the threads to start running and wait for them to finish.
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
pthread_cond_broadcast
(
&
startCondition
);
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
// Combine the results from all the threads.
double
directEnergy
=
0
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
directEnergy
+=
threadData
[
i
]
->
threadEnergy
;
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
)
{
__m128
f
=
_mm_loadu_ps
(
forces
+
4
*
i
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
=
_mm_add_ps
(
f
,
_mm_loadu_ps
(
&
threadData
[
j
]
->
threadForce
[
4
*
i
]));
_mm_storeu_ps
(
forces
+
4
*
i
,
f
);
}
if
(
ewald
||
pme
)
{
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
)
...
...
@@ -315,36 +358,66 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq,
__m128
result
=
_mm_mul_ps
(
deltaR
,
_mm_set1_ps
(
dEdR
));
_mm_storeu_ps
(
forces
+
4
*
ii
,
_mm_sub_ps
(
_mm_loadu_ps
(
forces
+
4
*
ii
),
result
));
_mm_storeu_ps
(
forces
+
4
*
jj
,
_mm_add_ps
(
_mm_loadu_ps
(
forces
+
4
*
jj
),
result
));
if
(
energyPtr
!=
NULL
)
if
(
includeEnergy
)
directEnergy
-=
chargeProd
*
inverseR
*
erfAlphaR
;
}
}
}
}
if
(
totalEnergy
!=
NULL
)
*
totalEnergy
+=
(
float
)
directEnergy
;
}
void
CpuNonbondedForce
::
runThread
(
int
index
,
vector
<
float
>&
threadForce
,
double
&
threadEnergy
)
{
while
(
true
)
{
// Wait for the signal to start running.
pthread_mutex_lock
(
&
lock
);
waitCount
++
;
pthread_cond_signal
(
&
endCondition
);
pthread_cond_wait
(
&
startCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
if
(
isDeleted
)
break
;
// Compute this thread's subset of interactions.
threadEnergy
=
0
;
double
*
energyPtr
=
(
includeEnergy
?
&
threadEnergy
:
NULL
);
int
numberOfAtoms
=
atomParameters
.
size
();
threadForce
.
resize
(
4
*
numberOfAtoms
,
0.0
f
);
for
(
int
i
=
0
;
i
<
4
*
numberOfAtoms
;
i
++
)
threadForce
[
i
]
=
0.0
f
;
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
index
;
i
<
(
int
)
neighborList
->
size
();
i
+=
numThreads
)
{
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateOneEwaldIxn
(
pair
.
first
,
pair
.
second
,
&
threadForce
[
0
],
energyPtr
);
}
}
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
->
size
();
i
++
)
{
for
(
int
i
=
index
;
i
<
(
int
)
neighborList
->
size
();
i
+=
numThreads
)
{
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateOneIxn
(
pair
.
first
,
pair
.
second
,
posq
,
atomParameters
,
f
orce
s
,
energyPtr
);
calculateOneIxn
(
pair
.
first
,
pair
.
second
,
&
threadF
orce
[
0
]
,
energyPtr
);
}
}
else
{
// Loop over all atom pairs
for
(
int
ii
=
0
;
ii
<
numberOfAtoms
;
ii
++
){
for
(
int
jj
=
ii
+
1
;
jj
<
numberOfAtoms
;
jj
++
)
if
(
exclusions
[
jj
].
find
(
ii
)
==
exclusions
[
jj
].
end
())
calculateOneIxn
(
ii
,
jj
,
posq
,
atomParameters
,
forces
,
energyPtr
);
for
(
int
i
=
index
;
i
<
numberOfAtoms
;
i
+=
numThreads
){
for
(
int
j
=
i
+
1
;
j
<
numberOfAtoms
;
j
++
)
if
(
exclusions
[
j
].
find
(
i
)
==
exclusions
[
j
].
end
())
calculateOneIxn
(
i
,
j
,
&
threadForce
[
0
],
energyPtr
);
}
}
}
if
(
totalEnergy
!=
NULL
)
*
totalEnergy
+=
(
float
)
directEnergy
;
}
void
CpuNonbondedForce
::
calculateOneIxn
(
int
ii
,
int
jj
,
float
*
posq
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
float
*
forces
,
double
*
totalEnergy
)
const
{
void
CpuNonbondedForce
::
calculateOneIxn
(
int
ii
,
int
jj
,
float
*
forces
,
double
*
totalEnergy
)
{
// get deltaR, R2, and R between 2 atoms
__m128
deltaR
;
...
...
@@ -396,9 +469,7 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* posq,
_mm_storeu_ps
(
forces
+
4
*
jj
,
_mm_sub_ps
(
_mm_loadu_ps
(
forces
+
4
*
jj
),
result
));
}
void
CpuNonbondedForce
::
calculateOneEwaldIxn
(
int
ii
,
int
jj
,
float
*
posq
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
float
*
forces
,
double
*
totalEnergy
)
const
{
void
CpuNonbondedForce
::
calculateOneEwaldIxn
(
int
ii
,
int
jj
,
float
*
forces
,
double
*
totalEnergy
)
{
__m128
deltaR
;
__m128
posI
=
_mm_loadu_ps
(
posq
+
4
*
ii
);
__m128
posJ
=
_mm_loadu_ps
(
posq
+
4
*
jj
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment