Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
eda69200
Commit
eda69200
authored
Oct 30, 2013
by
peastman
Browse files
Merge pull request #191 from peastman/master
Further improvements to CPU platform
parents
9c656d86
a69227ca
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
479 additions
and
293 deletions
+479
-293
CMakeLists.txt
CMakeLists.txt
+2
-2
openmmapi/include/openmm/internal/ThreadPool.h
openmmapi/include/openmm/internal/ThreadPool.h
+104
-0
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+4
-0
openmmapi/src/ThreadPool.cpp
openmmapi/src/ThreadPool.cpp
+127
-0
platforms/cpu/include/CpuKernels.h
platforms/cpu/include/CpuKernels.h
+2
-0
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+36
-19
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+13
-18
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+2
-2
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+86
-114
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+100
-137
platforms/cpu/tests/TestCpuNeighborList.cpp
platforms/cpu/tests/TestCpuNeighborList.cpp
+3
-1
No files found.
CMakeLists.txt
View file @
eda69200
...
...
@@ -296,9 +296,9 @@ ENDIF(OPENMM_BUILD_C_AND_FORTRAN_WRAPPERS)
# On Linux need to link to libdl
FIND_LIBRARY
(
DL_LIBRARY dl
)
IF
(
DL_LIBRARY
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
DL_LIBRARY
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
DL_LIBRARY
}
${
PTHREADS_LIB
}
)
IF
(
OPENMM_BUILD_STATIC_LIB
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
DL_LIBRARY
}
)
TARGET_LINK_LIBRARIES
(
${
STATIC_TARGET
}
${
DL_LIBRARY
}
${
PTHREADS_LIB
}
)
ENDIF
(
OPENMM_BUILD_STATIC_LIB
)
ENDIF
(
DL_LIBRARY
)
IF
(
WIN32
)
...
...
openmmapi/include/openmm/internal/ThreadPool.h
0 → 100644
View file @
eda69200
#ifndef OPENMM_THREAD_POOL_H_
#define OPENMM_THREAD_POOL_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "windowsExport.h"
#include <pthread.h>
#include <vector>
namespace
OpenMM
{
/**
* A ThreadPool creates a set of worker threads that can be used to execute tasks in parallel.
* After creating a ThreadPool, call execute() to start a task running then waitForThreads()
* to block until all threads have finished. You also can synchronize the threads in the middle
* of the task by having them call syncThreads(). In this case, the parent thread should call
* waitForThreads() an additional time; each call waits until all worker threads have reached the
* next syncThreads(), and the final call waits until they exit from the Task's execute() method.
* After calling waitForThreads() to block at a synchronization point, the parent thread should
* call resumeThreads() to instruct the worker threads to resume.
*/
class
OPENMM_EXPORT
ThreadPool
{
public:
class
Task
;
class
ThreadData
;
ThreadPool
();
~
ThreadPool
();
/**
* Get the number of worker threads in the pool.
*/
int
getNumThreads
()
const
;
/**
* Execute a Task in parallel on the worker threads.
*/
void
execute
(
Task
&
task
);
/**
* This is called by the worker threads to block until all threads have reached the same point
* and the master thread instructs them to continue by calling resumeThreads().
*/
void
syncThreads
();
/**
* This is called by the master thread to wait until all threads have completed the Task. Alternatively,
* if the threads call syncThreads(), this blocks until all threads have reached the synchronization point.
*/
void
waitForThreads
();
/**
* Instruct the threads to resume running after blocking at a synchronization point.
*/
void
resumeThreads
();
private:
bool
isDeleted
;
int
numThreads
,
waitCount
;
std
::
vector
<
pthread_t
>
thread
;
std
::
vector
<
ThreadData
*>
threadData
;
pthread_cond_t
startCondition
,
endCondition
;
pthread_mutex_t
lock
;
};
/**
* This defines a task that can be executed in parallel by the worker threads.
*/
class
OPENMM_EXPORT
ThreadPool
::
Task
{
public:
/**
* Execute the task on each thread.
*
* @param pool the ThreadPool being used to execute the task
* @param threadIndex the index of the thread invoking this method
*/
virtual
void
execute
(
ThreadPool
&
pool
,
int
threadIndex
)
=
0
;
};
}
// namespace OpenMM
#endif // OPENMM_THREAD_POOL_H_
openmmapi/include/openmm/internal/vectorize.h
View file @
eda69200
...
...
@@ -212,6 +212,10 @@ static inline float dot4(fvec4 v1, fvec4 v2) {
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
}
static
inline
void
transpose
(
fvec4
&
v1
,
fvec4
&
v2
,
fvec4
&
v3
,
fvec4
&
v4
)
{
_MM_TRANSPOSE4_PS
(
v1
,
v2
,
v3
,
v4
);
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
ivec4
v1
,
ivec4
v2
)
{
...
...
openmmapi/src/ThreadPool.cpp
0 → 100644
View file @
eda69200
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/hardware.h"
using
namespace
std
;
namespace
OpenMM
{
class
ThreadPool
::
ThreadData
{
public:
ThreadData
(
ThreadPool
&
owner
,
int
index
)
:
owner
(
owner
),
index
(
index
),
isDeleted
(
false
)
{
}
ThreadPool
&
owner
;
int
index
;
bool
isDeleted
;
Task
*
currentTask
;
};
static
void
*
threadBody
(
void
*
args
)
{
ThreadPool
::
ThreadData
&
data
=
*
reinterpret_cast
<
ThreadPool
::
ThreadData
*>
(
args
);
while
(
true
)
{
// Wait for the signal to start running.
data
.
owner
.
syncThreads
();
if
(
data
.
isDeleted
)
break
;
data
.
currentTask
->
execute
(
data
.
owner
,
data
.
index
);
}
delete
&
data
;
return
0
;
}
ThreadPool
::
ThreadPool
()
{
numThreads
=
getNumProcessors
();
pthread_cond_init
(
&
startCondition
,
NULL
);
pthread_cond_init
(
&
endCondition
,
NULL
);
pthread_mutex_init
(
&
lock
,
NULL
);
thread
.
resize
(
numThreads
);
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
ThreadData
*
data
=
new
ThreadData
(
*
this
,
i
);
data
->
isDeleted
=
false
;
threadData
.
push_back
(
data
);
pthread_create
(
&
thread
[
i
],
NULL
,
threadBody
,
data
);
}
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
}
ThreadPool
::~
ThreadPool
()
{
for
(
int
i
=
0
;
i
<
(
int
)
threadData
.
size
();
i
++
)
threadData
[
i
]
->
isDeleted
=
true
;
pthread_mutex_lock
(
&
lock
);
pthread_cond_broadcast
(
&
startCondition
);
pthread_mutex_unlock
(
&
lock
);
for
(
int
i
=
0
;
i
<
(
int
)
thread
.
size
();
i
++
)
pthread_join
(
thread
[
i
],
NULL
);
pthread_mutex_destroy
(
&
lock
);
pthread_cond_destroy
(
&
startCondition
);
pthread_cond_destroy
(
&
endCondition
);
}
int
ThreadPool
::
getNumThreads
()
const
{
return
numThreads
;
}
void
ThreadPool
::
execute
(
Task
&
task
)
{
for
(
int
i
=
0
;
i
<
(
int
)
threadData
.
size
();
i
++
)
threadData
[
i
]
->
currentTask
=
&
task
;
resumeThreads
();
}
void
ThreadPool
::
syncThreads
()
{
pthread_mutex_lock
(
&
lock
);
waitCount
++
;
pthread_cond_signal
(
&
endCondition
);
pthread_cond_wait
(
&
startCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
}
void
ThreadPool
::
waitForThreads
()
{
pthread_mutex_lock
(
&
lock
);
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
}
void
ThreadPool
::
resumeThreads
()
{
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
pthread_cond_broadcast
(
&
startCondition
);
pthread_mutex_unlock
(
&
lock
);
}
}
// namespace OpenMM
platforms/cpu/include/CpuKernels.h
View file @
eda69200
...
...
@@ -37,6 +37,7 @@
#include "CpuNonbondedForce.h"
#include "openmm/kernels.h"
#include "openmm/System.h"
#include "openmm/internal/ThreadPool.h"
namespace
OpenMM
{
...
...
@@ -90,6 +91,7 @@ private:
NonbondedMethod
nonbondedMethod
;
CpuNeighborList
neighborList
;
CpuNonbondedForce
nonbonded
;
ThreadPool
threads
;
Kernel
optimizedPme
;
};
...
...
platforms/cpu/include/CpuNeighborList.h
View file @
eda69200
#ifndef OPENMM_CPU_NEIGHBORLIST_H_
#define OPENMM_CPU_NEIGHBORLIST_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "windowsExportCpu.h"
#include
<pt
hread.h
>
#include
"openmm/internal/T
hread
Pool
.h
"
#include <set>
#include <utility>
#include <vector>
...
...
@@ -11,13 +42,12 @@ namespace OpenMM {
class
OPENMM_EXPORT_CPU
CpuNeighborList
{
public:
class
Thread
Data
;
class
Thread
Task
;
class
Voxels
;
static
const
int
BlockSize
;
CpuNeighborList
();
~
CpuNeighborList
();
void
computeNeighborList
(
int
numAtoms
,
const
std
::
vector
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
);
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
);
int
getNumBlocks
()
const
;
const
std
::
vector
<
int
>&
getSortedAtoms
()
const
;
const
std
::
vector
<
int
>&
getBlockNeighbors
(
int
blockIndex
)
const
;
...
...
@@ -25,25 +55,12 @@ public:
/**
* This routine contains the code executed by each thread.
*/
void
threadComputeNeighborList
(
ThreadPool
&
threads
,
int
threadIndex
);
void
runThread
(
int
index
);
private:
/**
* This is called by the worker threads to wait until the master thread instructs them to advance.
*/
void
threadWait
();
/**
* This is called by the master thread to instruct all the worker threads to advance.
*/
void
advanceThreads
();
bool
isDeleted
;
int
numThreads
,
waitCount
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
std
::
vector
<
pthread_t
>
thread
;
std
::
vector
<
ThreadData
*>
threadData
;
pthread_cond_t
startCondition
,
endCondition
;
pthread_mutex_t
lock
;
// The following variables are used to make information accessible to the individual threads.
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
atomBins
;
...
...
@@ -58,4 +75,4 @@ private:
}
// namespace OpenMM
#endif // OPENMM_
REFERENCE
_NEIGHBORLIST_H_
#endif // OPENMM_
CPU
_NEIGHBORLIST_H_
platforms/cpu/include/CpuNonbondedForce.h
View file @
eda69200
...
...
@@ -27,8 +27,8 @@
#include "CpuNeighborList.h"
#include "ReferencePairIxn.h"
#include "openmm/internal/ThreadPool.h"
#include "openmm/internal/vectorize.h"
#include <pthread.h>
#include <set>
#include <utility>
#include <vector>
...
...
@@ -38,7 +38,7 @@ namespace OpenMM {
class
CpuNonbondedForce
{
public:
class
ThreadData
;
class
ComputeDirectTask
;
/**---------------------------------------------------------------------------------------
...
...
@@ -48,14 +48,6 @@ class CpuNonbondedForce {
CpuNonbondedForce
();
/**---------------------------------------------------------------------------------------
Destructor
--------------------------------------------------------------------------------------- */
~
CpuNonbondedForce
();
/**---------------------------------------------------------------------------------------
Set the force to use a cutoff.
...
...
@@ -145,16 +137,17 @@ class CpuNonbondedForce {
exclusions[atomIndex] contains the list of exclusions for that atom
@param forces force array (forces added)
@param totalEnergy total energy
@param threads the thread pool to use
--------------------------------------------------------------------------------------- */
void
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
float
*
forces
,
float
*
totalEnergy
);
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
float
*
forces
,
float
*
totalEnergy
,
ThreadPool
&
threads
);
/**
* This routine contains the code executed by each thread.
*/
void
runT
hread
(
int
index
,
std
::
vector
<
float
>&
threadForce
,
double
&
thread
Energy
);
void
t
hread
ComputeDirect
(
ThreadPool
&
threads
,
int
thread
Index
);
private:
bool
cutoff
;
...
...
@@ -171,12 +164,8 @@ private:
int
meshDim
[
3
];
std
::
vector
<
float
>
ewaldScaleTable
;
float
ewaldDX
,
ewaldDXInv
;
bool
isDeleted
;
int
numThreads
,
waitCount
;
std
::
vector
<
pthread_t
>
thread
;
std
::
vector
<
ThreadData
*>
threadData
;
pthread_cond_t
startCondition
,
endCondition
;
pthread_mutex_t
lock
;
std
::
vector
<
std
::
vector
<
float
>
>
threadForce
;
std
::
vector
<
double
>
threadEnergy
;
// The following variables are used to make information accessible to the individual threads.
int
numberOfAtoms
;
float
*
posq
;
...
...
@@ -230,6 +219,12 @@ private:
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
...
...
platforms/cpu/src/CpuKernels.cpp
View file @
eda69200
...
...
@@ -260,7 +260,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
}
}
if
(
needRecompute
)
{
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
periodic
||
ewald
||
pme
,
nonbondedCutoff
+
padding
);
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
periodic
||
ewald
||
pme
,
nonbondedCutoff
+
padding
,
threads
);
lastPositions
=
posData
;
}
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
,
rfDielectric
);
...
...
@@ -279,7 +279,7 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
nonbonded
.
setUseSwitchingFunction
(
switchingDistance
);
float
nonbondedEnergy
=
0
;
if
(
includeDirect
)
nonbonded
.
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
particleParams
,
exclusions
,
&
forces
[
0
],
includeEnergy
?
&
nonbondedEnergy
:
NULL
);
nonbonded
.
calculateDirectIxn
(
numParticles
,
&
posq
[
0
],
particleParams
,
exclusions
,
&
forces
[
0
],
includeEnergy
?
&
nonbondedEnergy
:
NULL
,
threads
);
if
(
includeReciprocal
)
{
if
(
useOptimizedPme
)
{
PmeIO
io
(
&
posq
[
0
],
&
forces
[
0
],
numParticles
);
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
eda69200
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include "CpuNeighborList.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/vectorize.h"
...
...
@@ -255,54 +286,21 @@ private:
vector
<
vector
<
vector
<
pair
<
float
,
int
>
>
>
>
bins
;
};
class
CpuNeighborList
::
Thread
Data
{
class
CpuNeighborList
::
Thread
Task
:
public
ThreadPool
::
Task
{
public:
ThreadData
(
int
index
,
CpuNeighborList
&
owner
)
:
index
(
index
),
owner
(
owner
)
{
ThreadTask
(
CpuNeighborList
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadComputeNeighborList
(
threads
,
threadIndex
);
}
int
index
;
CpuNeighborList
&
owner
;
};
static
void
*
threadBody
(
void
*
args
)
{
CpuNeighborList
::
ThreadData
&
data
=
*
reinterpret_cast
<
CpuNeighborList
::
ThreadData
*>
(
args
);
data
.
owner
.
runThread
(
data
.
index
);
delete
&
data
;
return
0
;
}
CpuNeighborList
::
CpuNeighborList
()
{
isDeleted
=
false
;
numThreads
=
getNumProcessors
();
pthread_cond_init
(
&
startCondition
,
NULL
);
pthread_cond_init
(
&
endCondition
,
NULL
);
pthread_mutex_init
(
&
lock
,
NULL
);
thread
.
resize
(
numThreads
);
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
ThreadData
*
data
=
new
ThreadData
(
i
,
*
this
);
threadData
.
push_back
(
data
);
pthread_create
(
&
thread
[
i
],
NULL
,
threadBody
,
data
);
}
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
}
CpuNeighborList
::~
CpuNeighborList
()
{
isDeleted
=
true
;
pthread_mutex_lock
(
&
lock
);
pthread_cond_broadcast
(
&
startCondition
);
pthread_mutex_unlock
(
&
lock
);
for
(
int
i
=
0
;
i
<
(
int
)
thread
.
size
();
i
++
)
pthread_join
(
thread
[
i
],
NULL
);
pthread_mutex_destroy
(
&
lock
);
pthread_cond_destroy
(
&
startCondition
);
pthread_cond_destroy
(
&
endCondition
);
}
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
vector
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
)
{
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
,
ThreadPool
&
threads
)
{
int
numBlocks
=
(
numAtoms
+
BlockSize
-
1
)
/
BlockSize
;
blockNeighbors
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
...
...
@@ -336,8 +334,9 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
// Sort the atoms based on a Hilbert curve.
atomBins
.
resize
(
numAtoms
);
pthread_mutex_lock
(
&
lock
);
advanceThreads
();
ThreadTask
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
sort
(
atomBins
.
begin
(),
atomBins
.
end
());
// Build the voxel hash.
...
...
@@ -360,8 +359,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
// Signal the threads to start running and wait for them to finish.
advanc
eThreads
();
p
thread
_mutex_unlock
(
&
lock
);
threads
.
resum
eThreads
();
thread
s
.
waitForThreads
(
);
// Add padding atoms to fill up the last block.
...
...
@@ -393,82 +392,55 @@ const std::vector<char>& CpuNeighborList::getBlockExclusions(int blockIndex) con
}
void
CpuNeighborList
::
runThread
(
int
index
)
{
while
(
true
)
{
// Wait for the signal to start running.
threadWait
();
if
(
isDeleted
)
break
;
// Compute the positions of atoms along the Hilbert curve.
void
CpuNeighborList
::
threadComputeNeighborList
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute the positions of atoms along the Hilbert curve.
float
binWidth
=
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
f
;
float
invBinWidth
=
1.0
f
/
binWidth
;
bitmask_t
coords
[
3
];
for
(
int
i
=
index
;
i
<
numAtoms
;
i
+=
numThreads
)
{
const
float
*
pos
=
&
atomLocations
[
4
*
i
];
coords
[
0
]
=
(
bitmask_t
)
((
pos
[
0
]
-
minx
)
*
invBinWidth
);
coords
[
1
]
=
(
bitmask_t
)
((
pos
[
1
]
-
miny
)
*
invBinWidth
);
coords
[
2
]
=
(
bitmask_t
)
((
pos
[
2
]
-
minz
)
*
invBinWidth
);
int
bin
=
(
int
)
hilbert_c2i
(
3
,
8
,
coords
);
atomBins
[
i
]
=
pair
<
int
,
int
>
(
bin
,
i
);
}
threadWait
();
// Compute this thread's subset of neighbors.
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
for
(
int
i
=
index
;
i
<
numBlocks
;
i
+=
numThreads
)
{
{
int
firstIndex
=
BlockSize
*
i
;
int
atomsInBlock
=
min
(
BlockSize
,
numAtoms
-
firstIndex
);
blockAtoms
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
}
float
binWidth
=
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
f
;
float
invBinWidth
=
1.0
f
/
binWidth
;
bitmask_t
coords
[
3
];
int
numThreads
=
threads
.
getNumThreads
();
for
(
int
i
=
threadIndex
;
i
<
numAtoms
;
i
+=
numThreads
)
{
const
float
*
pos
=
&
atomLocations
[
4
*
i
];
coords
[
0
]
=
(
bitmask_t
)
((
pos
[
0
]
-
minx
)
*
invBinWidth
);
coords
[
1
]
=
(
bitmask_t
)
((
pos
[
1
]
-
miny
)
*
invBinWidth
);
coords
[
2
]
=
(
bitmask_t
)
((
pos
[
2
]
-
minz
)
*
invBinWidth
);
int
bin
=
(
int
)
hilbert_c2i
(
3
,
8
,
coords
);
atomBins
[
i
]
=
pair
<
int
,
int
>
(
bin
,
i
);
}
threads
.
syncThreads
();
int
firstIndex
=
BlockSize
*
i
;
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
maxPos
=
minPos
;
int
atomsInBlock
=
min
(
BlockSize
,
numAtoms
-
firstIndex
);
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
fvec4
pos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
+
j
]]);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
);
// Record the exclusions for this block.
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
const
set
<
int
>&
atomExclusions
=
(
*
exclusions
)[
sortedAtoms
[
firstIndex
+
j
]];
char
mask
=
1
<<
j
;
for
(
int
k
=
0
;
k
<
(
int
)
blockNeighbors
[
i
].
size
();
k
++
)
{
int
atomIndex
=
blockNeighbors
[
i
][
k
];
if
(
atomExclusions
.
find
(
atomIndex
)
!=
atomExclusions
.
end
())
blockExclusions
[
i
][
k
]
|=
mask
;
}
}
// Compute this thread's subset of neighbors.
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
// Find the atoms in this block and compute their bounding box.
int
firstIndex
=
BlockSize
*
i
;
int
atomsInBlock
=
min
(
BlockSize
,
numAtoms
-
firstIndex
);
blockAtoms
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
maxPos
=
minPos
;
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
fvec4
pos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
+
j
]]);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
}
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
);
void
CpuNeighborList
::
threadWait
()
{
pthread_mutex_lock
(
&
lock
);
waitCount
++
;
pthread_cond_signal
(
&
endCondition
);
pthread_cond_wait
(
&
startCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
}
// Record the exclusions for this block.
void
CpuNeighborList
::
advanceThreads
()
{
waitCount
=
0
;
pthread_cond_broadcast
(
&
startCondition
);
while
(
waitCount
<
numThreads
)
{
pthread_cond_wait
(
&
endCondition
,
&
lock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
const
set
<
int
>&
atomExclusions
=
(
*
exclusions
)[
sortedAtoms
[
firstIndex
+
j
]];
char
mask
=
1
<<
j
;
for
(
int
k
=
0
;
k
<
(
int
)
blockNeighbors
[
i
].
size
();
k
++
)
{
int
atomIndex
=
blockNeighbors
[
i
][
k
];
if
(
atomExclusions
.
find
(
atomIndex
)
!=
atomExclusions
.
end
())
blockExclusions
[
i
][
k
]
|=
mask
;
}
}
}
}
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
eda69200
...
...
@@ -30,7 +30,6 @@
#include "CpuNonbondedForce.h"
#include "ReferenceForce.h"
#include "ReferencePME.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/SplineFitter.h"
#include "openmm/internal/vectorize.h"
...
...
@@ -44,23 +43,16 @@ using namespace OpenMM;
const
float
CpuNonbondedForce
::
TWO_OVER_SQRT_PI
=
(
float
)
(
2
/
sqrt
(
PI_M
));
const
int
CpuNonbondedForce
::
NUM_TABLE_POINTS
=
1025
;
class
CpuNonbondedForce
::
ThreadData
{
class
CpuNonbondedForce
::
ComputeDirectTask
:
public
ThreadPool
::
Task
{
public:
ThreadData
(
int
index
,
CpuNonbondedForce
&
owner
)
:
index
(
index
),
owner
(
owner
)
{
ComputeDirectTask
(
CpuNonbondedForce
&
owner
)
:
owner
(
owner
)
{
}
void
execute
(
ThreadPool
&
threads
,
int
threadIndex
)
{
owner
.
threadComputeDirect
(
threads
,
threadIndex
);
}
int
index
;
CpuNonbondedForce
&
owner
;
vector
<
float
>
threadForce
;
double
threadEnergy
;
};
static
void
*
threadBody
(
void
*
args
)
{
CpuNonbondedForce
::
ThreadData
&
data
=
*
reinterpret_cast
<
CpuNonbondedForce
::
ThreadData
*>
(
args
);
data
.
owner
.
runThread
(
data
.
index
,
data
.
threadForce
,
data
.
threadEnergy
);
delete
&
data
;
return
0
;
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForce constructor
...
...
@@ -68,40 +60,6 @@ static void* threadBody(void* args) {
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
::
CpuNonbondedForce
()
:
cutoff
(
false
),
useSwitch
(
false
),
periodic
(
false
),
ewald
(
false
),
pme
(
false
)
{
isDeleted
=
false
;
numThreads
=
getNumProcessors
();
pthread_cond_init
(
&
startCondition
,
NULL
);
pthread_cond_init
(
&
endCondition
,
NULL
);
pthread_mutex_init
(
&
lock
,
NULL
);
thread
.
resize
(
numThreads
);
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
{
ThreadData
*
data
=
new
ThreadData
(
i
,
*
this
);
threadData
.
push_back
(
data
);
pthread_create
(
&
thread
[
i
],
NULL
,
threadBody
,
data
);
}
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
}
/**---------------------------------------------------------------------------------------
CpuNonbondedForce destructor
--------------------------------------------------------------------------------------- */
CpuNonbondedForce
::~
CpuNonbondedForce
(){
isDeleted
=
true
;
pthread_mutex_lock
(
&
lock
);
pthread_cond_broadcast
(
&
startCondition
);
pthread_mutex_unlock
(
&
lock
);
for
(
int
i
=
0
;
i
<
(
int
)
thread
.
size
();
i
++
)
pthread_join
(
thread
[
i
],
NULL
);
pthread_mutex_destroy
(
&
lock
);
pthread_cond_destroy
(
&
startCondition
);
pthread_cond_destroy
(
&
endCondition
);
}
/**---------------------------------------------------------------------------------------
...
...
@@ -334,7 +292,7 @@ void CpuNonbondedForce::calculateReciprocalIxn(int numberOfAtoms, float* posq, v
void
CpuNonbondedForce
::
calculateDirectIxn
(
int
numberOfAtoms
,
float
*
posq
,
const
vector
<
pair
<
float
,
float
>
>&
atomParameters
,
const
vector
<
set
<
int
>
>&
exclusions
,
float
*
forces
,
float
*
totalEnergy
)
{
const
vector
<
set
<
int
>
>&
exclusions
,
float
*
forces
,
float
*
totalEnergy
,
ThreadPool
&
threads
)
{
// Record the parameters for the threads.
this
->
numberOfAtoms
=
numberOfAtoms
;
...
...
@@ -342,25 +300,25 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
this
->
atomParameters
=
&
atomParameters
[
0
];
this
->
exclusions
=
&
exclusions
[
0
];
includeEnergy
=
(
totalEnergy
!=
NULL
);
threadEnergy
.
resize
(
threads
.
getNumThreads
());
threadForce
.
resize
(
threads
.
getNumThreads
());
// Signal the threads to start running and wait for them to finish.
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
pthread_cond_broadcast
(
&
startCondition
);
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
ComputeDirectTask
task
(
*
this
);
threads
.
execute
(
task
);
threads
.
waitForThreads
();
// Combine the results from all the threads.
double
directEnergy
=
0
;
int
numThreads
=
threads
.
getNumThreads
();
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
directEnergy
+=
threadData
[
i
]
->
threadEnergy
;
directEnergy
+=
threadEnergy
[
i
]
;
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
)
{
fvec4
f
(
forces
+
4
*
i
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
+=
fvec4
(
&
threadData
[
j
]
->
threadForce
[
4
*
i
]);
f
+=
fvec4
(
&
threadForce
[
j
][
4
*
i
]);
f
.
store
(
forces
+
4
*
i
);
}
...
...
@@ -368,76 +326,65 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
*
totalEnergy
+=
(
float
)
directEnergy
;
}
void
CpuNonbondedForce
::
threadComputeDirect
(
ThreadPool
&
threads
,
int
threadIndex
)
{
// Compute this thread's subset of interactions.
int
numThreads
=
threads
.
getNumThreads
();
threadEnergy
[
threadIndex
]
=
0
;
double
*
energyPtr
=
(
includeEnergy
?
&
threadEnergy
[
threadIndex
]
:
NULL
);
threadForce
[
threadIndex
].
resize
(
4
*
numberOfAtoms
,
0.0
f
);
float
*
forces
=
&
threadForce
[
threadIndex
][
0
];
for
(
int
i
=
0
;
i
<
4
*
numberOfAtoms
;
i
++
)
forces
[
i
]
=
0.0
f
;
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
threadIndex
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
calculateBlockEwaldIxn
(
i
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
void
CpuNonbondedForce
::
runThread
(
int
index
,
vector
<
float
>&
threadForce
,
double
&
threadEnergy
)
{
while
(
true
)
{
// Wait for the signal to start running.
pthread_mutex_lock
(
&
lock
);
waitCount
++
;
pthread_cond_signal
(
&
endCondition
);
pthread_cond_wait
(
&
startCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
if
(
isDeleted
)
break
;
// Compute this thread's subset of interactions.
threadEnergy
=
0
;
double
*
energyPtr
=
(
includeEnergy
?
&
threadEnergy
:
NULL
);
threadForce
.
resize
(
4
*
numberOfAtoms
,
0.0
f
);
for
(
int
i
=
0
;
i
<
4
*
numberOfAtoms
;
i
++
)
threadForce
[
i
]
=
0.0
f
;
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
index
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
calculateBlockEwaldIxn
(
i
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
for
(
int
i
=
index
;
i
<
numberOfAtoms
;
i
+=
numThreads
)
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
{
if
(
*
iter
>
i
)
{
int
j
=
*
iter
;
fvec4
deltaR
;
fvec4
posI
(
posq
+
4
*
i
);
fvec4
posJ
(
posq
+
4
*
j
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
false
,
boxSize
,
invBoxSize
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
)[
0
];
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
&
threadForce
[
4
*
i
])
-
result
).
store
(
&
threadForce
[
4
*
i
]);
(
fvec4
(
&
threadForce
[
4
*
j
])
+
result
).
store
(
&
threadForce
[
4
*
j
]);
if
(
includeEnergy
)
threadEnergy
-=
chargeProd
*
inverseR
*
(
1.0
f
-
erfcAlphaR
);
}
for
(
int
i
=
threadIndex
;
i
<
numberOfAtoms
;
i
+=
numThreads
)
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
{
if
(
*
iter
>
i
)
{
int
j
=
*
iter
;
fvec4
deltaR
;
fvec4
posI
(
posq
+
4
*
i
);
fvec4
posJ
(
posq
+
4
*
j
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
false
,
boxSize
,
invBoxSize
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
)[
0
];
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
forces
+
4
*
i
)
-
result
).
store
(
forces
+
4
*
i
);
(
fvec4
(
forces
+
4
*
j
)
+
result
).
store
(
forces
+
4
*
j
);
if
(
includeEnergy
)
threadEnergy
[
threadIndex
]
-=
chargeProd
*
inverseR
*
(
1.0
f
-
erfcAlphaR
);
}
}
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
}
}
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
i
ndex
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
calculateBlockIxn
(
i
,
&
threadF
orce
[
0
]
,
energyPtr
,
boxSize
,
invBoxSize
);
}
else
{
// Loop over all atom pairs
for
(
int
i
=
threadI
ndex
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
calculateBlockIxn
(
i
,
f
orce
s
,
energyPtr
,
boxSize
,
invBoxSize
);
}
else
{
// Loop over all atom pairs
for
(
int
i
=
index
;
i
<
numberOfAtoms
;
i
+=
numThreads
){
for
(
int
j
=
i
+
1
;
j
<
numberOfAtoms
;
j
++
)
if
(
exclusions
[
j
].
find
(
i
)
==
exclusions
[
j
].
end
())
calculateOneIxn
(
i
,
j
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
}
for
(
int
i
=
threadIndex
;
i
<
numberOfAtoms
;
i
+=
numThreads
){
for
(
int
j
=
i
+
1
;
j
<
numberOfAtoms
;
j
++
)
if
(
exclusions
[
j
].
find
(
i
)
==
exclusions
[
j
].
end
())
calculateOneIxn
(
i
,
j
,
forces
,
energyPtr
,
boxSize
,
invBoxSize
);
}
}
}
...
...
@@ -507,6 +454,9 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
...
...
@@ -515,9 +465,7 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
float
blockAtomR2
[
4
];
bool
include
[
4
];
fvec4
blockAtomDelta
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
...
...
@@ -527,9 +475,10 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Compute the distances to the block atoms.
bool
any
=
false
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
getDeltaR
(
atomPosq
,
blockAtomPosq
[
j
],
blockAtomDelta
[
j
],
blockAtomR2
[
j
],
periodic
,
boxSize
,
invBoxSize
);
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
(
!
cutoff
||
blockAtomR2
[
j
]
<
cutoffDistance
*
cutoffDistance
));
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
(
!
cutoff
||
r2
[
j
]
<
cutoffDistance
*
cutoffDistance
));
any
|=
include
[
j
];
}
if
(
!
any
)
...
...
@@ -537,7 +486,6 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Compute the interactions.
fvec4
r2
(
blockAtomR2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
...
...
@@ -578,12 +526,13 @@ void CpuNonbondedForce::calculateBlockIxn(int blockIndex, float* forces, double*
// Accumulate forces.
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
fvec4
result
=
blockAtomDelta
[
j
]
*
dEdR
[
j
];
blockAtomForce
[
j
]
+=
result
;
atomForce
-=
result
;
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
...
...
@@ -606,6 +555,9 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
...
...
@@ -614,9 +566,7 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
float
blockAtomR2
[
4
];
bool
include
[
4
];
fvec4
blockAtomDelta
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
...
...
@@ -626,9 +576,10 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Compute the distances to the block atoms.
bool
any
=
false
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
(
atomPosq
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
getDeltaR
(
atomPosq
,
blockAtomPosq
[
j
],
blockAtomDelta
[
j
],
blockAtomR2
[
j
],
periodic
,
boxSize
,
invBoxSize
);
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
blockAtomR2
[
j
]
<
cutoffDistance
*
cutoffDistance
);
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
r2
[
j
]
<
cutoffDistance
*
cutoffDistance
);
any
|=
include
[
j
];
}
if
(
!
any
)
...
...
@@ -636,7 +587,6 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Compute the interactions.
fvec4
r2
(
blockAtomR2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
...
...
@@ -671,12 +621,13 @@ void CpuNonbondedForce::calculateBlockEwaldIxn(int blockIndex, float* forces, do
// Accumulate forces.
fvec4
result
[
4
]
=
{
dx
*
dEdR
,
dy
*
dEdR
,
dz
*
dEdR
,
0.0
f
};
transpose
(
result
[
0
],
result
[
1
],
result
[
2
],
result
[
3
]);
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
fvec4
result
=
blockAtomDelta
[
j
]
*
dEdR
[
j
];
blockAtomForce
[
j
]
+=
result
;
atomForce
-=
result
;
blockAtomForce
[
j
]
+=
result
[
j
];
atomForce
-=
result
[
j
];
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
...
...
@@ -697,6 +648,18 @@ void CpuNonbondedForce::getDeltaR(const fvec4& posI, const fvec4& posJ, fvec4& d
r2
=
dot3
(
deltaR
,
deltaR
);
}
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
fvec4
CpuNonbondedForce
::
erfcApprox
(
fvec4
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
...
...
@@ -720,7 +683,7 @@ fvec4 CpuNonbondedForce::ewaldScaleFunction(fvec4 x) {
coeff
[
0
]
=
1.0
f
-
coeff
[
1
];
coeff
[
2
]
=
coeff
[
0
]
*
coeff
[
0
]
*
coeff
[
0
]
-
coeff
[
0
];
coeff
[
3
]
=
coeff
[
1
]
*
coeff
[
1
]
*
coeff
[
1
]
-
coeff
[
1
];
_MM_TRANSPOSE4_PS
(
coeff
[
0
],
coeff
[
1
],
coeff
[
2
],
coeff
[
3
]);
transpose
(
coeff
[
0
],
coeff
[
1
],
coeff
[
2
],
coeff
[
3
]);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
if
(
index
[
i
]
<
NUM_TABLE_POINTS
)
y
[
i
]
=
dot4
(
coeff
[
i
],
fvec4
(
&
ewaldScaleTable
[
4
*
index
[
i
]]));
...
...
platforms/cpu/tests/TestCpuNeighborList.cpp
View file @
eda69200
...
...
@@ -34,6 +34,7 @@
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/ThreadPool.h"
#include "CpuNeighborList.h"
#include "CpuPlatform.h"
#include "sfmt/SFMT.h"
...
...
@@ -63,8 +64,9 @@ void testNeighborList(bool periodic) {
exclusions
[
i
-
j
].
insert
(
i
);
}
}
ThreadPool
threads
;
CpuNeighborList
neighborList
;
neighborList
.
computeNeighborList
(
numParticles
,
positions
,
exclusions
,
boxSize
,
periodic
,
cutoff
);
neighborList
.
computeNeighborList
(
numParticles
,
positions
,
exclusions
,
boxSize
,
periodic
,
cutoff
,
threads
);
// Convert the neighbor list to a set for faster lookup.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment