Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
8167c79b
Commit
8167c79b
authored
Oct 30, 2013
by
leeping
Browse files
Merge branch 'master' of github.com:leeping/openmm
parents
855ece90
99ef4344
Changes
31
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1054 additions
and
270 deletions
+1054
-270
CMakeLists.txt
CMakeLists.txt
+1
-0
libraries/sfmt/include/sfmt/SFMT-sse2.h
libraries/sfmt/include/sfmt/SFMT-sse2.h
+16
-14
libraries/sfmt/src/SFMT.cpp
libraries/sfmt/src/SFMT.cpp
+2
-2
openmmapi/include/openmm/internal/vectorize.h
openmmapi/include/openmm/internal/vectorize.h
+248
-0
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+21
-5
platforms/cpu/include/CpuNonbondedForce.h
platforms/cpu/include/CpuNonbondedForce.h
+30
-14
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+33
-5
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+325
-118
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+253
-103
platforms/cpu/tests/TestCpuNeighborList.cpp
platforms/cpu/tests/TestCpuNeighborList.cpp
+16
-5
plugins/amoeba/openmmapi/include/openmm/AmoebaMultipoleForce.h
...ns/amoeba/openmmapi/include/openmm/AmoebaMultipoleForce.h
+8
-0
plugins/amoeba/openmmapi/include/openmm/amoebaKernels.h
plugins/amoeba/openmmapi/include/openmm/amoebaKernels.h
+2
-0
plugins/amoeba/openmmapi/include/openmm/internal/AmoebaMultipoleForceImpl.h
...nmmapi/include/openmm/internal/AmoebaMultipoleForceImpl.h
+2
-0
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
+4
-0
plugins/amoeba/openmmapi/src/AmoebaMultipoleForceImpl.cpp
plugins/amoeba/openmmapi/src/AmoebaMultipoleForceImpl.cpp
+4
-0
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
+18
-0
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
+7
-0
plugins/amoeba/platforms/cuda/tests/TestCudaAmoebaMultipoleForce.cpp
...eba/platforms/cuda/tests/TestCudaAmoebaMultipoleForce.cpp
+38
-0
plugins/amoeba/platforms/reference/src/AmoebaReferenceKernels.cpp
...amoeba/platforms/reference/src/AmoebaReferenceKernels.cpp
+19
-4
plugins/amoeba/platforms/reference/src/AmoebaReferenceKernels.h
...s/amoeba/platforms/reference/src/AmoebaReferenceKernels.h
+7
-0
No files found.
CMakeLists.txt
View file @
8167c79b
...
...
@@ -252,6 +252,7 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
## OpenMM was previously installed there.
INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
subdir
}
/include
)
ENDFOREACH
(
subdir
)
SET_SOURCE_FILES_PROPERTIES
(
${
CMAKE_SOURCE_DIR
}
/libraries/sfmt/src/SFMT.cpp PROPERTIES COMPILE_FLAGS
"-msse2 -DHAVE_SSE2=1"
)
# If API wrappers are being generated, and add them to the build.
FIND_PROGRAM
(
GCCXML_PATH gccxml PATH
...
...
libraries/sfmt/include/sfmt/SFMT-sse2.h
View file @
8167c79b
...
...
@@ -49,22 +49,23 @@ PRE_ALWAYS static __m128i mm_recursion(__m128i *a, __m128i *b,
* This function fills the internal state array with pseudorandom
* integers.
*/
inline
static
void
gen_rand_all
(
void
)
{
inline
static
void
gen_rand_all
(
SFMT
&
sfmt
)
{
int
i
;
__m128i
r
,
r1
,
r2
,
mask
;
mask
=
_mm_set_epi32
(
MSK4
,
MSK3
,
MSK2
,
MSK1
);
r1
=
_mm_load_si128
(
&
sfmt
[
N
-
2
].
si
);
r2
=
_mm_load_si128
(
&
sfmt
[
N
-
1
].
si
);
SFMTData
&
data
=
*
sfmt
.
data
;
r1
=
_mm_load_si128
(
&
data
.
sfmt
[
N
-
2
].
si
);
r2
=
_mm_load_si128
(
&
data
.
sfmt
[
N
-
1
].
si
);
for
(
i
=
0
;
i
<
N
-
POS1
;
i
++
)
{
r
=
mm_recursion
(
&
sfmt
[
i
].
si
,
&
sfmt
[
i
+
POS1
].
si
,
r1
,
r2
,
mask
);
_mm_store_si128
(
&
sfmt
[
i
].
si
,
r
);
r
=
mm_recursion
(
&
data
.
sfmt
[
i
].
si
,
&
data
.
sfmt
[
i
+
POS1
].
si
,
r1
,
r2
,
mask
);
_mm_store_si128
(
&
data
.
sfmt
[
i
].
si
,
r
);
r1
=
r2
;
r2
=
r
;
}
for
(;
i
<
N
;
i
++
)
{
r
=
mm_recursion
(
&
sfmt
[
i
].
si
,
&
sfmt
[
i
+
POS1
-
N
].
si
,
r1
,
r2
,
mask
);
_mm_store_si128
(
&
sfmt
[
i
].
si
,
r
);
r
=
mm_recursion
(
&
data
.
sfmt
[
i
].
si
,
&
data
.
sfmt
[
i
+
POS1
-
N
].
si
,
r1
,
r2
,
mask
);
_mm_store_si128
(
&
data
.
sfmt
[
i
].
si
,
r
);
r1
=
r2
;
r2
=
r
;
}
...
...
@@ -77,21 +78,22 @@ inline static void gen_rand_all(void) {
* @param array an 128-bit array to be filled by pseudorandom numbers.
* @param size number of 128-bit pesudorandom numbers to be generated.
*/
inline
static
void
gen_rand_array
(
w128_t
*
array
,
int
size
)
{
inline
static
void
gen_rand_array
(
w128_t
*
array
,
int
size
,
SFMT
&
sfmt
)
{
int
i
,
j
;
__m128i
r
,
r1
,
r2
,
mask
;
mask
=
_mm_set_epi32
(
MSK4
,
MSK3
,
MSK2
,
MSK1
);
r1
=
_mm_load_si128
(
&
sfmt
[
N
-
2
].
si
);
r2
=
_mm_load_si128
(
&
sfmt
[
N
-
1
].
si
);
SFMTData
&
data
=
*
sfmt
.
data
;
r1
=
_mm_load_si128
(
&
data
.
sfmt
[
N
-
2
].
si
);
r2
=
_mm_load_si128
(
&
data
.
sfmt
[
N
-
1
].
si
);
for
(
i
=
0
;
i
<
N
-
POS1
;
i
++
)
{
r
=
mm_recursion
(
&
sfmt
[
i
].
si
,
&
sfmt
[
i
+
POS1
].
si
,
r1
,
r2
,
mask
);
r
=
mm_recursion
(
&
data
.
sfmt
[
i
].
si
,
&
data
.
sfmt
[
i
+
POS1
].
si
,
r1
,
r2
,
mask
);
_mm_store_si128
(
&
array
[
i
].
si
,
r
);
r1
=
r2
;
r2
=
r
;
}
for
(;
i
<
N
;
i
++
)
{
r
=
mm_recursion
(
&
sfmt
[
i
].
si
,
&
array
[
i
+
POS1
-
N
].
si
,
r1
,
r2
,
mask
);
r
=
mm_recursion
(
&
data
.
sfmt
[
i
].
si
,
&
array
[
i
+
POS1
-
N
].
si
,
r1
,
r2
,
mask
);
_mm_store_si128
(
&
array
[
i
].
si
,
r
);
r1
=
r2
;
r2
=
r
;
...
...
@@ -106,13 +108,13 @@ inline static void gen_rand_array(w128_t *array, int size) {
}
for
(
j
=
0
;
j
<
2
*
N
-
size
;
j
++
)
{
r
=
_mm_load_si128
(
&
array
[
j
+
size
-
N
].
si
);
_mm_store_si128
(
&
sfmt
[
j
].
si
,
r
);
_mm_store_si128
(
&
data
.
sfmt
[
j
].
si
,
r
);
}
for
(;
i
<
size
;
i
++
)
{
r
=
mm_recursion
(
&
array
[
i
-
N
].
si
,
&
array
[
i
+
POS1
-
N
].
si
,
r1
,
r2
,
mask
);
_mm_store_si128
(
&
array
[
i
].
si
,
r
);
_mm_store_si128
(
&
sfmt
[
j
++
].
si
,
r
);
_mm_store_si128
(
&
data
.
sfmt
[
j
++
].
si
,
r
);
r1
=
r2
;
r2
=
r
;
}
...
...
libraries/sfmt/src/SFMT.cpp
View file @
8167c79b
...
...
@@ -144,9 +144,9 @@ inline static void swap(w128_t *array, int size);
#endif
#if defined(HAVE_ALTIVEC)
#include "SFMT-alti.h"
#include "
sfmt/
SFMT-alti.h"
#elif defined(HAVE_SSE2)
#include "SFMT-sse2.h"
#include "
sfmt/
SFMT-sse2.h"
#endif
/**
...
...
openmmapi/include/openmm/internal/vectorize.h
0 → 100644
View file @
8167c79b
#ifndef OPENMM_VECTORIZE_H_
#define OPENMM_VECTORIZE_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2013 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <smmintrin.h>
// This file defines classes and functions to simplify vectorizing code with SSE.
class
ivec4
;
/**
* A four element vector of floats.
*/
class
fvec4
{
public:
__m128
val
;
fvec4
()
{}
fvec4
(
float
v
)
:
val
(
_mm_set1_ps
(
v
))
{}
fvec4
(
float
v1
,
float
v2
,
float
v3
,
float
v4
)
:
val
(
_mm_set_ps
(
v4
,
v3
,
v2
,
v1
))
{}
fvec4
(
__m128
v
)
:
val
(
v
)
{}
fvec4
(
const
float
*
v
)
:
val
(
_mm_loadu_ps
(
v
))
{}
operator
__m128
()
const
{
return
val
;
}
float
operator
[](
int
i
)
const
{
int
resultBits
=
_mm_extract_ps
(
val
,
i
);
return
*
((
float
*
)
&
resultBits
);
}
void
store
(
float
*
v
)
const
{
_mm_storeu_ps
(
v
,
val
);
}
fvec4
operator
+
(
fvec4
other
)
const
{
return
_mm_add_ps
(
val
,
other
);
}
fvec4
operator
-
(
fvec4
other
)
const
{
return
_mm_sub_ps
(
val
,
other
);
}
fvec4
operator
*
(
fvec4
other
)
const
{
return
_mm_mul_ps
(
val
,
other
);
}
fvec4
operator
/
(
fvec4
other
)
const
{
return
_mm_div_ps
(
val
,
other
);
}
void
operator
+=
(
fvec4
other
)
{
val
=
_mm_add_ps
(
val
,
other
);
}
void
operator
-=
(
fvec4
other
)
{
val
=
_mm_sub_ps
(
val
,
other
);
}
void
operator
*=
(
fvec4
other
)
{
val
=
_mm_mul_ps
(
val
,
other
);
}
void
operator
/=
(
fvec4
other
)
{
val
=
_mm_div_ps
(
val
,
other
);
}
fvec4
operator
-
()
const
{
return
_mm_sub_ps
(
_mm_set1_ps
(
0.0
f
),
val
);
}
fvec4
operator
&
(
fvec4
other
)
const
{
return
_mm_and_ps
(
val
,
other
);
}
fvec4
operator
==
(
fvec4
other
)
const
{
return
_mm_cmpeq_ps
(
val
,
other
);
}
fvec4
operator
!=
(
fvec4
other
)
const
{
return
_mm_cmpneq_ps
(
val
,
other
);
}
fvec4
operator
>
(
fvec4
other
)
const
{
return
_mm_cmpgt_ps
(
val
,
other
);
}
fvec4
operator
<
(
fvec4
other
)
const
{
return
_mm_cmplt_ps
(
val
,
other
);
}
fvec4
operator
>=
(
fvec4
other
)
const
{
return
_mm_cmpge_ps
(
val
,
other
);
}
fvec4
operator
<=
(
fvec4
other
)
const
{
return
_mm_cmple_ps
(
val
,
other
);
}
operator
ivec4
()
const
;
};
/**
* A four element vector of ints.
*/
class
ivec4
{
public:
__m128i
val
;
ivec4
()
{}
ivec4
(
int
v
)
:
val
(
_mm_set1_epi32
(
v
))
{}
ivec4
(
int
v1
,
int
v2
,
int
v3
,
int
v4
)
:
val
(
_mm_set_epi32
(
v4
,
v3
,
v2
,
v1
))
{}
ivec4
(
__m128i
v
)
:
val
(
v
)
{}
ivec4
(
const
int
*
v
)
:
val
(
_mm_loadu_si128
((
const
__m128i
*
)
v
))
{}
operator
__m128i
()
const
{
return
val
;
}
int
operator
[](
int
i
)
const
{
return
_mm_extract_epi32
(
val
,
i
);
}
void
store
(
int
*
v
)
const
{
_mm_storeu_si128
((
__m128i
*
)
v
,
val
);
}
ivec4
operator
+
(
ivec4
other
)
const
{
return
_mm_add_epi32
(
val
,
other
);
}
ivec4
operator
-
(
ivec4
other
)
const
{
return
_mm_sub_epi32
(
val
,
other
);
}
ivec4
operator
*
(
ivec4
other
)
const
{
return
_mm_mul_epi32
(
val
,
other
);
}
void
operator
+=
(
ivec4
other
)
{
val
=
_mm_add_epi32
(
val
,
other
);
}
void
operator
-=
(
ivec4
other
)
{
val
=
_mm_sub_epi32
(
val
,
other
);
}
void
operator
*=
(
ivec4
other
)
{
val
=
_mm_mul_epi32
(
val
,
other
);
}
ivec4
operator
-
()
const
{
return
_mm_sub_epi32
(
_mm_set1_epi32
(
0
),
val
);
}
ivec4
operator
&
(
ivec4
other
)
const
{
return
_mm_and_si128
(
val
,
other
);
}
ivec4
operator
==
(
ivec4
other
)
const
{
return
_mm_cmpeq_epi32
(
val
,
other
);
}
operator
fvec4
()
const
;
};
// Conversion operators.
inline
fvec4
::
operator
ivec4
()
const
{
return
_mm_cvttps_epi32
(
val
);
}
inline
ivec4
::
operator
fvec4
()
const
{
return
_mm_cvtepi32_ps
(
val
);
}
// Functions that operate on fvec4s.
static
inline
fvec4
floor
(
fvec4
v
)
{
return
fvec4
(
_mm_floor_ps
(
v
.
val
));
}
static
inline
fvec4
ceil
(
fvec4
v
)
{
return
fvec4
(
_mm_ceil_ps
(
v
.
val
));
}
static
inline
fvec4
round
(
fvec4
v
)
{
return
fvec4
(
_mm_round_ps
(
v
.
val
,
_MM_FROUND_TO_NEAREST_INT
));
}
static
inline
fvec4
min
(
fvec4
v1
,
fvec4
v2
)
{
return
fvec4
(
_mm_min_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
max
(
fvec4
v1
,
fvec4
v2
)
{
return
fvec4
(
_mm_max_ps
(
v1
.
val
,
v2
.
val
));
}
static
inline
fvec4
abs
(
fvec4
v
)
{
static
const
__m128
mask
=
_mm_castsi128_ps
(
_mm_set1_epi32
(
0x7FFFFFFF
));
return
fvec4
(
_mm_and_ps
(
v
.
val
,
mask
));
}
static
inline
fvec4
sqrt
(
fvec4
v
)
{
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
static
inline
float
dot3
(
fvec4
v1
,
fvec4
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
static
inline
float
dot4
(
fvec4
v1
,
fvec4
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0xF1
));
}
// Functions that operate on ivec4s.
static
inline
ivec4
min
(
ivec4
v1
,
ivec4
v2
)
{
return
ivec4
(
_mm_min_epi32
(
v1
.
val
,
v2
.
val
));
}
static
inline
ivec4
max
(
ivec4
v1
,
ivec4
v2
)
{
return
ivec4
(
_mm_max_epi32
(
v1
.
val
,
v2
.
val
));
}
static
inline
ivec4
abs
(
ivec4
v
)
{
return
ivec4
(
_mm_abs_epi32
(
v
.
val
));
}
// Mathematical operators involving a scalar and a vector.
static
inline
fvec4
operator
+
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
+
v2
;
}
static
inline
fvec4
operator
-
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
-
v2
;
}
static
inline
fvec4
operator
*
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
*
v2
;
}
static
inline
fvec4
operator
/
(
float
v1
,
fvec4
v2
)
{
return
fvec4
(
v1
)
/
v2
;
}
#endif
/*OPENMM_VECTORIZE_H_*/
platforms/cpu/include/CpuNeighborList.h
View file @
8167c79b
...
...
@@ -12,26 +12,42 @@ namespace OpenMM {
class
OPENMM_EXPORT_CPU
CpuNeighborList
{
public:
class
ThreadData
;
class
VoxelHash
;
class
Voxels
;
static
const
int
BlockSize
;
CpuNeighborList
();
~
CpuNeighborList
();
void
computeNeighborList
(
int
numAtoms
,
const
std
::
vector
<
float
>&
atomLocations
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
);
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>&
getNeighbors
();
int
getNumBlocks
()
const
;
const
std
::
vector
<
int
>&
getSortedAtoms
()
const
;
const
std
::
vector
<
int
>&
getBlockNeighbors
(
int
blockIndex
)
const
;
const
std
::
vector
<
char
>&
getBlockExclusions
(
int
blockIndex
)
const
;
/**
* This routine contains the code executed by each thread.
*/
void
runThread
(
int
index
,
std
::
vector
<
std
::
pair
<
int
,
int
>
>&
threadNeighbors
);
void
runThread
(
int
index
);
private:
/**
* This is called by the worker threads to wait until the master thread instructs them to advance.
*/
void
threadWait
();
/**
* This is called by the master thread to instruct all the worker threads to advance.
*/
void
advanceThreads
();
bool
isDeleted
;
int
numThreads
,
waitCount
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
neighbors
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
std
::
vector
<
pthread_t
>
thread
;
std
::
vector
<
ThreadData
*>
threadData
;
pthread_cond_t
startCondition
,
endCondition
;
pthread_mutex_t
lock
;
// The following variables are used to make information accessible to the individual threads.
VoxelHash
*
voxelHash
;
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
atomBins
;
Voxels
*
voxels
;
const
std
::
vector
<
std
::
set
<
int
>
>*
exclusions
;
const
float
*
atomLocations
;
const
float
*
periodicBoxSize
;
...
...
platforms/cpu/include/CpuNonbondedForce.h
View file @
8167c79b
...
...
@@ -25,14 +25,17 @@
#ifndef OPENMM_CPU_NONBONDED_FORCE_H__
#define OPENMM_CPU_NONBONDED_FORCE_H__
#include "CpuNeighborList.h"
#include "ReferencePairIxn.h"
#include "openmm/internal/vectorize.h"
#include <pthread.h>
#include <set>
#include <utility>
#include <vector>
#include <smmintrin.h>
// ---------------------------------------------------------------------------------------
namespace
OpenMM
{
class
CpuNonbondedForce
{
public:
class
ThreadData
;
...
...
@@ -63,7 +66,7 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void
setUseCutoff
(
float
distance
,
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>
&
neighbors
,
float
solventDielectric
);
void
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
);
/**---------------------------------------------------------------------------------------
...
...
@@ -127,9 +130,9 @@ class CpuNonbondedForce {
--------------------------------------------------------------------------------------- */
void
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
std
::
vector
<
OpenMM
::
RealVec
>&
atomCoordinates
,
void
calculateReciprocalIxn
(
int
numberOfAtoms
,
float
*
posq
,
std
::
vector
<
RealVec
>&
atomCoordinates
,
const
std
::
vector
<
std
::
pair
<
float
,
float
>
>&
atomParameters
,
const
std
::
vector
<
std
::
set
<
int
>
>&
exclusions
,
std
::
vector
<
OpenMM
::
RealVec
>&
forces
,
float
*
totalEnergy
)
const
;
std
::
vector
<
RealVec
>&
forces
,
float
*
totalEnergy
)
const
;
/**---------------------------------------------------------------------------------------
...
...
@@ -159,14 +162,14 @@ private:
bool
periodic
;
bool
ewald
;
bool
pme
;
const
std
::
vector
<
std
::
pair
<
int
,
int
>
>
*
neighborList
;
const
CpuNeighborList
*
neighborList
;
float
periodicBoxSize
[
3
];
float
cutoffDistance
,
switchingDistance
;
float
krf
,
crf
;
float
alphaEwald
;
int
numRx
,
numRy
,
numRz
;
int
meshDim
[
3
];
std
::
vector
<
float
>
ewaldScale
X
,
ewaldScaleY
,
ewaldScaleDeriv
;
std
::
vector
<
float
>
ewaldScale
Table
;
float
ewaldDX
,
ewaldDXInv
;
bool
isDeleted
;
int
numThreads
,
waitCount
;
...
...
@@ -195,31 +198,42 @@ private:
--------------------------------------------------------------------------------------- */
void
calculateOneIxn
(
int
atom1
,
int
atom2
,
float
*
forces
,
double
*
totalEnergy
,
const
__m128
&
boxSize
,
const
__m128
&
invBoxSize
);
void
calculateOneIxn
(
int
atom1
,
int
atom2
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**---------------------------------------------------------------------------------------
Calculate
LJ Coulomb pair ixn between two atoms
Calculate
all the interactions for one atom block.
@param atom1 the index of the first atom
@param atom2 the index of the second atom
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**---------------------------------------------------------------------------------------
Calculate all the interactions for one atom block.
@param blockIndex the index of the atom block
@param forces force array (forces added)
@param totalEnergy total energy
--------------------------------------------------------------------------------------- */
void
calculate
One
EwaldIxn
(
int
atom1
,
int
atom2
,
float
*
forces
,
double
*
totalEnergy
,
const
__m128
&
boxSize
,
const
__m128
&
invBoxSize
);
void
calculate
Block
EwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
/**
* Compute the displacement and squared distance between two points, optionally using
* periodic boundary conditions.
*/
void
getDeltaR
(
const
__m128
&
posI
,
const
__m128
&
posJ
,
__m128
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
__m128
&
boxSize
,
const
__m128
&
invBoxSize
)
const
;
void
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
* Compute a fast approximation to erfc(x).
*/
static
f
loat
erfcApprox
(
f
loat
x
);
static
f
vec4
erfcApprox
(
f
vec4
x
);
/**
* Create a lookup table for the scale factor used with Ewald and PME.
...
...
@@ -229,9 +243,11 @@ private:
/**
* Evaluate the scale factor used with Ewald and PME: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
*/
f
loat
ewaldScaleFunction
(
f
loat
x
);
f
vec4
ewaldScaleFunction
(
f
vec4
x
);
};
}
// namespace OpenMM
// ---------------------------------------------------------------------------------------
#endif // OPENMM_CPU_NONBONDED_FORCE_H__
platforms/cpu/src/CpuKernels.cpp
View file @
8167c79b
...
...
@@ -221,20 +221,48 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
if
(
nonbondedMethod
!=
NoCutoff
)
{
// Determine whether we need to recompute the neighbor list.
double
padding
=
0.1
*
nonbondedCutoff
;
double
padding
=
0.1
5
*
nonbondedCutoff
;
bool
needRecompute
=
false
;
double
closeCutoff2
=
0.25
*
padding
*
padding
;
double
farCutoff2
=
0.5
*
padding
*
padding
;
int
maxNumMoved
=
numParticles
/
10
;
vector
<
int
>
moved
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
RealVec
delta
=
posData
[
i
]
-
lastPositions
[
i
];
if
(
delta
.
dot
(
delta
)
>
0.25
*
padding
*
padding
)
{
double
dist2
=
delta
.
dot
(
delta
);
if
(
dist2
>
closeCutoff2
)
{
moved
.
push_back
(
i
);
if
(
dist2
>
farCutoff2
||
moved
.
size
()
>
maxNumMoved
)
{
needRecompute
=
true
;
break
;
}
}
}
if
(
!
needRecompute
&&
moved
.
size
()
>
0
)
{
// Some particles have moved further than half the padding distance. Look for pairs
// that are missing from the neighbor list.
int
numMoved
=
moved
.
size
();
double
cutoff2
=
nonbondedCutoff
*
nonbondedCutoff
;
for
(
int
i
=
1
;
i
<
numMoved
&&
!
needRecompute
;
i
++
)
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
RealVec
delta
=
posData
[
moved
[
i
]]
-
posData
[
moved
[
j
]];
if
(
delta
.
dot
(
delta
)
<
cutoff2
)
{
// These particles should interact. See if they are in the neighbor list.
RealVec
oldDelta
=
lastPositions
[
moved
[
i
]]
-
lastPositions
[
moved
[
j
]];
if
(
oldDelta
.
dot
(
oldDelta
)
>
cutoff2
)
{
needRecompute
=
true
;
break
;
}
}
}
}
if
(
needRecompute
)
{
neighborList
.
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
floatBoxSize
,
periodic
||
ewald
||
pme
,
nonbondedCutoff
+
padding
);
lastPositions
=
posData
;
}
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
.
getNeighbors
()
,
rfDielectric
);
nonbonded
.
setUseCutoff
(
nonbondedCutoff
,
neighborList
,
rfDielectric
);
}
if
(
periodic
||
ewald
||
pme
)
{
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
8167c79b
#include "CpuNeighborList.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/vectorize.h"
#include "hilbert.h"
#include <algorithm>
#include <set>
#include <map>
#include <cmath>
...
...
@@ -9,137 +12,236 @@ using namespace std;
namespace
OpenMM
{
const
int
CpuNeighborList
::
BlockSize
=
4
;
class
VoxelIndex
{
public:
VoxelIndex
(
int
xx
,
int
yy
,
int
zz
)
:
x
(
xx
),
y
(
yy
),
z
(
zz
)
{
}
// operator<() needed for map
bool
operator
<
(
const
VoxelIndex
&
other
)
const
{
if
(
x
<
other
.
x
)
return
true
;
else
if
(
x
>
other
.
x
)
return
false
;
else
if
(
y
<
other
.
y
)
return
true
;
else
if
(
y
>
other
.
y
)
return
false
;
else
if
(
z
<
other
.
z
)
return
true
;
else
return
false
;
VoxelIndex
(
int
x
,
int
y
)
:
x
(
x
),
y
(
y
)
{
}
int
x
;
int
y
;
int
z
;
};
typedef
pair
<
const
float
*
,
int
>
VoxelItem
;
typedef
vector
<
VoxelItem
>
Voxel
;
class
CpuNeighborList
::
VoxelHash
{
/**
* This data structure organizes the particles spatially. It divides them into bins along the x and y axes,
* then sorts each bin along the z axis so ranges can be identified quickly with a binary search.
*/
class
CpuNeighborList
::
Voxels
{
public:
Voxel
Hash
(
float
vsx
,
float
vsy
,
float
vsz
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
)
:
voxelSizeX
(
vsx
),
voxelSizeY
(
vsy
),
voxelSizeZ
(
vsz
),
periodicBoxSize
(
periodicBoxSize
),
usePeriodic
(
usePeriodic
)
{
Voxel
s
(
float
vsx
,
float
vsy
,
float
minx
,
float
maxx
,
float
miny
,
float
maxy
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
)
:
voxelSizeX
(
vsx
),
voxelSizeY
(
vsy
),
minx
(
minx
),
maxx
(
maxx
),
miny
(
miny
),
maxy
(
maxy
),
periodicBoxSize
(
periodicBoxSize
),
usePeriodic
(
usePeriodic
)
{
if
(
usePeriodic
)
{
nx
=
(
int
)
floorf
(
periodicBoxSize
[
0
]
/
voxelSizeX
+
0.5
f
);
ny
=
(
int
)
floorf
(
periodicBoxSize
[
1
]
/
voxelSizeY
+
0.5
f
);
nz
=
(
int
)
floorf
(
periodicBoxSize
[
2
]
/
voxelSizeZ
+
0.5
f
);
voxelSizeX
=
periodicBoxSize
[
0
]
/
nx
;
voxelSizeY
=
periodicBoxSize
[
1
]
/
ny
;
voxelSizeZ
=
periodicBoxSize
[
2
]
/
nz
;
}
else
{
nx
=
max
(
1
,
(
int
)
floorf
((
maxx
-
minx
)
/
voxelSizeX
+
0.5
f
));
ny
=
max
(
1
,
(
int
)
floorf
((
maxy
-
miny
)
/
voxelSizeY
+
0.5
f
));
if
(
maxx
>
minx
)
voxelSizeX
=
(
maxx
-
minx
)
/
nx
;
if
(
maxy
>
miny
)
voxelSizeY
=
(
maxy
-
miny
)
/
ny
;
}
bins
.
resize
(
nx
);
for
(
int
i
=
0
;
i
<
nx
;
i
++
)
{
bins
[
i
].
resize
(
ny
);
for
(
int
j
=
0
;
j
<
ny
;
j
++
)
bins
[
i
][
j
].
resize
(
0
);
}
}
void
insert
(
const
int
&
item
,
const
float
*
location
)
{
/**
* Insert a particle into the voxel data structure.
*/
void
insert
(
const
int
&
atom
,
const
float
*
location
)
{
VoxelIndex
voxelIndex
=
getVoxelIndex
(
location
);
if
(
voxelMap
.
find
(
voxelIndex
)
==
voxelMap
.
end
())
voxelMap
[
voxelIndex
]
=
Voxel
();
Voxel
&
voxel
=
voxelMap
.
find
(
voxelIndex
)
->
second
;
voxel
.
push_back
(
VoxelItem
(
location
,
item
));
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
].
push_back
(
make_pair
(
location
[
2
],
VoxelItem
(
location
,
atom
)));
}
/**
* Sort the particles in each voxel by z coordinate.
*/
void
sortItems
()
{
for
(
int
i
=
0
;
i
<
nx
;
i
++
)
for
(
int
j
=
0
;
j
<
ny
;
j
++
)
sort
(
bins
[
i
][
j
].
begin
(),
bins
[
i
][
j
].
end
());
}
/**
* Find the index of the first particle in voxel (x,y) whose z coordinate in >= the specified value.
*/
int
findLowerBound
(
int
x
,
int
y
,
double
z
)
const
{
const
vector
<
pair
<
float
,
VoxelItem
>
>&
bin
=
bins
[
x
][
y
];
int
lower
=
0
;
int
upper
=
bin
.
size
();
while
(
lower
<
upper
)
{
int
middle
=
(
lower
+
upper
)
/
2
;
if
(
bin
[
middle
].
first
<
z
)
lower
=
middle
+
1
;
else
upper
=
middle
;
}
return
lower
;
}
/**
* Find the index of the first particle in voxel (x,y) whose z coordinate in greater than the specified value.
*/
int
findUpperBound
(
int
x
,
int
y
,
double
z
)
const
{
const
vector
<
pair
<
float
,
VoxelItem
>
>&
bin
=
bins
[
x
][
y
];
int
lower
=
0
;
int
upper
=
bin
.
size
();
while
(
lower
<
upper
)
{
int
middle
=
(
lower
+
upper
)
/
2
;
if
(
bin
[
middle
].
first
>
z
)
upper
=
middle
;
else
lower
=
middle
+
1
;
}
return
upper
;
}
/**
* Get the voxel index containing a particular location.
*/
VoxelIndex
getVoxelIndex
(
const
float
*
location
)
const
{
float
xperiodic
,
yperiodic
,
zperiodic
;
float
xperiodic
,
yperiodic
;
if
(
!
usePeriodic
)
{
xperiodic
=
location
[
0
];
yperiodic
=
location
[
1
];
zperiodic
=
location
[
2
];
xperiodic
=
location
[
0
]
-
minx
;
yperiodic
=
location
[
1
]
-
miny
;
}
else
{
xperiodic
=
location
[
0
]
-
periodicBoxSize
[
0
]
*
floorf
(
location
[
0
]
/
periodicBoxSize
[
0
]);
yperiodic
=
location
[
1
]
-
periodicBoxSize
[
1
]
*
floorf
(
location
[
1
]
/
periodicBoxSize
[
1
]);
zperiodic
=
location
[
2
]
-
periodicBoxSize
[
2
]
*
floorf
(
location
[
2
]
/
periodicBoxSize
[
2
]);
}
int
x
=
int
(
floorf
(
xperiodic
/
voxelSizeX
));
int
y
=
int
(
floorf
(
yperiodic
/
voxelSizeY
));
int
z
=
int
(
floorf
(
zperiodic
/
voxelSizeZ
));
int
x
=
min
(
nx
-
1
,
int
(
floorf
(
xperiodic
/
voxelSizeX
)));
int
y
=
min
(
ny
-
1
,
int
(
floorf
(
yperiodic
/
voxelSizeY
)));
return
VoxelIndex
(
x
,
y
,
z
);
return
VoxelIndex
(
x
,
y
);
}
void
getNeighbors
(
vector
<
pair
<
int
,
int
>
>&
neighbors
,
const
VoxelItem
&
referencePoint
,
const
vector
<
set
<
int
>
>&
exclusions
,
float
maxDistance
)
const
{
// Loop over neighboring voxels
// TODO use more clever selection of neighboring voxels
const
int
atomI
=
referencePoint
.
second
;
const
float
*
locationI
=
referencePoint
.
first
;
__m128
posI
=
_mm_loadu_ps
(
locationI
);
__m128
boxSize
=
_mm_set_ps
(
0
,
periodicBoxSize
[
2
],
periodicBoxSize
[
1
],
periodicBoxSize
[
0
]);
__m128
invBoxSize
=
_mm_set_ps
(
0
,
(
1
/
periodicBoxSize
[
2
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
0
]));
__m128
half
=
_mm_set1_ps
(
0.5
);
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
fvec4
blockCenter
,
fvec4
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>
blockAtoms
,
const
float
*
atomLocations
)
const
{
neighbors
.
resize
(
0
);
exclusions
.
resize
(
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
(
1
/
periodicBoxSize
[
0
],
1
/
periodicBoxSize
[
1
],
1
/
periodicBoxSize
[
2
],
0
);
float
maxDistanceSquared
=
maxDistance
*
maxDistance
;
int
dIndexX
=
int
(
maxDistance
/
voxelSizeX
)
+
1
;
// How may voxels away do we have to look?
int
dIndexY
=
int
(
maxDistance
/
voxelSizeY
)
+
1
;
int
dIndexZ
=
int
(
maxDistance
/
voxelSizeZ
)
+
1
;
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
locationI
);
int
lastx
=
centerVoxelIndex
.
x
+
dIndexX
;
int
lasty
=
centerVoxelIndex
.
y
+
dIndexY
;
int
lastz
=
centerVoxelIndex
.
z
+
dIndexZ
;
float
refineCutoff
=
maxDistance
-
max
(
max
(
blockWidth
[
0
],
blockWidth
[
1
]),
blockWidth
[
2
]);
float
refineCutoffSquared
=
refineCutoff
*
refineCutoff
;
int
dIndexX
=
min
(
nx
/
2
,
int
((
maxDistance
+
blockWidth
[
0
])
/
voxelSizeX
)
+
1
);
// How may voxels away do we have to look?
int
dIndexY
=
min
(
ny
/
2
,
int
((
maxDistance
+
blockWidth
[
1
])
/
voxelSizeY
)
+
1
);
float
centerPos
[
4
];
blockCenter
.
store
(
centerPos
);
VoxelIndex
centerVoxelIndex
=
getVoxelIndex
(
centerPos
);
int
startx
=
centerVoxelIndex
.
x
-
dIndexX
;
int
starty
=
centerVoxelIndex
.
y
-
dIndexY
;
int
endx
=
centerVoxelIndex
.
x
+
dIndexX
;
int
endy
=
centerVoxelIndex
.
y
+
dIndexY
;
int
numRanges
;
if
(
usePeriodic
)
{
last
x
=
min
(
last
x
,
centerVoxelIndex
.
x
-
dIndexX
+
nx
-
1
);
last
y
=
min
(
last
y
,
centerVoxelIndex
.
y
-
dIndexY
+
ny
-
1
);
lastz
=
min
(
lastz
,
centerVoxelIndex
.
z
-
dIndexZ
+
nz
-
1
)
;
end
x
=
min
(
end
x
,
centerVoxelIndex
.
x
-
dIndexX
+
nx
-
1
);
end
y
=
min
(
end
y
,
centerVoxelIndex
.
y
-
dIndexY
+
ny
-
1
);
numRanges
=
2
;
}
VoxelIndex
voxelIndex
(
0
,
0
,
0
);
for
(
int
x
=
centerVoxelIndex
.
x
-
dIndexX
;
x
<=
lastx
;
++
x
)
{
else
{
startx
=
max
(
startx
,
0
);
starty
=
max
(
starty
,
0
);
endx
=
min
(
endx
,
nx
-
1
);
endy
=
min
(
endy
,
ny
-
1
);
numRanges
=
1
;
}
int
lastSortedIndex
=
BlockSize
*
(
blockIndex
+
1
);
VoxelIndex
voxelIndex
(
0
,
0
);
for
(
int
x
=
startx
;
x
<=
endx
;
++
x
)
{
voxelIndex
.
x
=
x
;
if
(
usePeriodic
)
voxelIndex
.
x
=
(
x
<
0
?
x
+
nx
:
(
x
>=
nx
?
x
-
nx
:
x
));
for
(
int
y
=
centerVoxelIndex
.
y
-
dIndexY
;
y
<=
lasty
;
++
y
)
{
float
dx
=
max
(
0.0
f
,
voxelSizeX
*
max
(
0
,
abs
(
centerVoxelIndex
.
x
-
x
)
-
1
)
-
blockWidth
[
0
]);
for
(
int
y
=
starty
;
y
<=
endy
;
++
y
)
{
voxelIndex
.
y
=
y
;
if
(
usePeriodic
)
voxelIndex
.
y
=
(
y
<
0
?
y
+
ny
:
(
y
>=
ny
?
y
-
ny
:
y
));
for
(
int
z
=
centerVoxelIndex
.
z
-
dIndexZ
;
z
<=
lastz
;
++
z
)
{
voxelIndex
.
z
=
z
;
if
(
usePeriodic
)
voxelIndex
.
z
=
(
z
<
0
?
z
+
nz
:
(
z
>=
nz
?
z
-
nz
:
z
));
const
map
<
VoxelIndex
,
Voxel
>::
const_iterator
voxelEntry
=
voxelMap
.
find
(
voxelIndex
);
if
(
voxelEntry
==
voxelMap
.
end
())
continue
;
// no such voxel; skip
const
Voxel
&
voxel
=
voxelEntry
->
second
;
for
(
Voxel
::
const_iterator
itemIter
=
voxel
.
begin
();
itemIter
!=
voxel
.
end
();
++
itemIter
)
{
const
int
atomJ
=
itemIter
->
second
;
float
dy
=
max
(
0.0
f
,
voxelSizeY
*
max
(
0
,
abs
(
centerVoxelIndex
.
y
-
y
)
-
1
)
-
blockWidth
[
1
]);
// Identify the range of atoms within this bin we need to search. When using periodic boundary
// conditions, there may be two separate ranges.
float
dz
=
maxDistance
+
blockWidth
[
2
];
dz
=
sqrtf
(
max
(
0.0
f
,
dz
*
dz
-
dx
*
dx
-
dy
*
dy
));
int
rangeStart
[
2
];
int
rangeEnd
[
2
];
rangeStart
[
0
]
=
findLowerBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
-
dz
);
if
(
usePeriodic
)
{
rangeEnd
[
0
]
=
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
+
dz
);
if
(
rangeStart
[
0
]
>
0
)
{
rangeStart
[
1
]
=
0
;
rangeEnd
[
1
]
=
min
(
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
+
dz
-
periodicBoxSize
[
2
]),
rangeStart
[
0
]);
}
else
{
rangeStart
[
1
]
=
max
(
findLowerBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
-
dz
+
periodicBoxSize
[
2
]),
rangeEnd
[
0
]);
rangeEnd
[
1
]
=
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
].
size
();
}
}
else
rangeEnd
[
0
]
=
findUpperBound
(
voxelIndex
.
x
,
voxelIndex
.
y
,
centerPos
[
2
]
+
dz
);
// Loop over atoms and check to see if they are neighbors of this block.
for
(
int
range
=
0
;
range
<
numRanges
;
range
++
)
{
for
(
int
item
=
rangeStart
[
range
];
item
<
rangeEnd
[
range
];
item
++
)
{
const
int
sortedIndex
=
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
][
item
].
second
.
second
;
// Avoid duplicate entries.
if
(
atomJ
>=
atomI
)
break
;
if
(
sortedIndex
>=
lastSortedIndex
)
continue
;
__m128
posJ
=
_mm_loadu_ps
(
itemIter
->
first
);
__m128
delta
=
_mm_sub_ps
(
posJ
,
posI
)
;
fvec4
atomPos
(
bins
[
voxelIndex
.
x
][
voxelIndex
.
y
][
item
].
second
.
first
);
fvec4
delta
=
atomPos
-
centerPos
;
if
(
usePeriodic
)
{
__m128
base
=
_mm_mul_ps
(
_mm_floor_ps
(
_mm_add_ps
(
_mm_mul_ps
(
delta
,
invBoxSize
)
,
half
)),
boxSize
)
;
delta
=
_mm_sub_ps
(
delta
,
base
)
;
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
=
delta
-
base
;
}
float
dSquared
=
_mm_cvtss_f32
(
_mm_dp_ps
(
delta
,
delta
,
0x71
));
delta
=
max
(
0.0
f
,
abs
(
delta
)
-
blockWidth
);
float
dSquared
=
dot3
(
delta
,
delta
);
if
(
dSquared
>
maxDistanceSquared
)
continue
;
// Ignore exclusions.
if
(
exclusions
[
atomI
].
find
(
atomJ
)
!=
exclusions
[
atomI
].
end
())
if
(
dSquared
>
refineCutoffSquared
)
{
// The distance is large enough that there might not be any actual interactions.
// Check individual atom pairs to be sure.
bool
any
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
fvec4
pos1
(
&
atomLocations
[
4
*
blockAtoms
[
k
]]);
delta
=
atomPos
-
pos1
;
if
(
usePeriodic
)
{
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
=
delta
-
base
;
}
float
r2
=
dot3
(
delta
,
delta
);
if
(
r2
<
maxDistanceSquared
)
{
any
=
true
;
break
;
}
}
if
(
!
any
)
continue
;
}
// Add this atom to the list of neighbors.
neighbors
.
push_back
(
make_pair
(
atomI
,
atomJ
));
neighbors
.
push_back
(
sortedAtoms
[
sortedIndex
]);
if
(
sortedIndex
<
BlockSize
*
blockIndex
)
exclusions
.
push_back
(
0
);
else
exclusions
.
push_back
(
0xF
&
(
0xF
<<
(
sortedIndex
-
BlockSize
*
blockIndex
)));
}
}
}
...
...
@@ -147,11 +249,12 @@ public:
}
private:
float
voxelSizeX
,
voxelSizeY
,
voxelSizeZ
;
int
nx
,
ny
,
nz
;
float
voxelSizeX
,
voxelSizeY
;
float
minx
,
maxx
,
miny
,
maxy
;
int
nx
,
ny
;
const
float
*
periodicBoxSize
;
const
bool
usePeriodic
;
map
<
VoxelIndex
,
Voxel
>
voxelMap
;
vector
<
vector
<
vector
<
pair
<
float
,
VoxelItem
>
>
>
>
bins
;
};
class
CpuNeighborList
::
ThreadData
{
...
...
@@ -160,12 +263,11 @@ public:
}
int
index
;
CpuNeighborList
&
owner
;
vector
<
pair
<
int
,
int
>
>
threadNeighbors
;
};
static
void
*
threadBody
(
void
*
args
)
{
CpuNeighborList
::
ThreadData
&
data
=
*
reinterpret_cast
<
CpuNeighborList
::
ThreadData
*>
(
args
);
data
.
owner
.
runThread
(
data
.
index
,
data
.
threadNeighbors
);
data
.
owner
.
runThread
(
data
.
index
);
delete
&
data
;
return
0
;
}
...
...
@@ -203,23 +305,13 @@ CpuNeighborList::~CpuNeighborList() {
void
CpuNeighborList
::
computeNeighborList
(
int
numAtoms
,
const
vector
<
float
>&
atomLocations
,
const
vector
<
set
<
int
>
>&
exclusions
,
const
float
*
periodicBoxSize
,
bool
usePeriodic
,
float
maxDistance
)
{
// Build the voxel hash.
float
edgeSizeX
,
edgeSizeY
,
edgeSizeZ
;
if
(
!
usePeriodic
)
edgeSizeX
=
edgeSizeY
=
edgeSizeZ
=
maxDistance
;
// TODO - adjust this as needed
else
{
edgeSizeX
=
0.5
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeY
=
0.5
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
edgeSizeZ
=
0.5
f
*
periodicBoxSize
[
2
]
/
floorf
(
periodicBoxSize
[
2
]
/
maxDistance
);
}
VoxelHash
voxelHash
(
edgeSizeX
,
edgeSizeY
,
edgeSizeZ
,
periodicBoxSize
,
usePeriodic
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
voxelHash
.
insert
(
i
,
&
atomLocations
[
4
*
i
]);
int
numBlocks
=
(
numAtoms
+
BlockSize
-
1
)
/
BlockSize
;
blockNeighbors
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
sortedAtoms
.
resize
(
numAtoms
);
// Record the parameters for the threads.
this
->
voxelHash
=
&
voxelHash
;
this
->
exclusions
=
&
exclusions
;
this
->
atomLocations
=
&
atomLocations
[
0
];
this
->
periodicBoxSize
=
periodicBoxSize
;
...
...
@@ -227,43 +319,158 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const vector<float>& ato
this
->
usePeriodic
=
usePeriodic
;
this
->
maxDistance
=
maxDistance
;
// Signal the threads to start running and wait for them to finish.
// Identify the range of atom positions along each axis.
fvec4
minPos
(
&
atomLocations
[
0
]);
fvec4
maxPos
=
minPos
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
fvec4
pos
(
&
atomLocations
[
4
*
i
]);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
minx
=
minPos
[
0
];
maxx
=
maxPos
[
0
];
miny
=
minPos
[
1
];
maxy
=
maxPos
[
1
];
minz
=
minPos
[
2
];
maxz
=
maxPos
[
2
];
// Sort the atoms based on a Hilbert curve.
atomBins
.
resize
(
numAtoms
);
pthread_mutex_lock
(
&
lock
);
waitCount
=
0
;
pthread_cond_broadcast
(
&
startCondition
);
while
(
waitCount
<
numThreads
)
pthread_cond_wait
(
&
endCondition
,
&
lock
);
advanceThreads
();
sort
(
atomBins
.
begin
(),
atomBins
.
end
());
// Build the voxel hash.
float
edgeSizeX
,
edgeSizeY
;
if
(
!
usePeriodic
)
edgeSizeX
=
edgeSizeY
=
maxDistance
;
// TODO - adjust this as needed
else
{
edgeSizeX
=
0.5
f
*
periodicBoxSize
[
0
]
/
floorf
(
periodicBoxSize
[
0
]
/
maxDistance
);
edgeSizeY
=
0.5
f
*
periodicBoxSize
[
1
]
/
floorf
(
periodicBoxSize
[
1
]
/
maxDistance
);
}
Voxels
voxels
(
edgeSizeX
,
edgeSizeY
,
minx
,
maxx
,
miny
,
maxy
,
periodicBoxSize
,
usePeriodic
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
atomIndex
=
atomBins
[
i
].
second
;
sortedAtoms
[
i
]
=
atomIndex
;
voxels
.
insert
(
i
,
&
atomLocations
[
4
*
atomIndex
]);
}
voxels
.
sortItems
();
this
->
voxels
=
&
voxels
;
// Signal the threads to start running and wait for them to finish.
advanceThreads
();
pthread_mutex_unlock
(
&
lock
);
// Combine the results from all the threads.
// Add padding atoms to fill up the last block.
int
numPadding
=
numBlocks
*
BlockSize
-
numAtoms
;
if
(
numPadding
>
0
)
{
char
mask
=
(
0xF0
>>
numPadding
)
&
0xF
;
for
(
int
i
=
0
;
i
<
numPadding
;
i
++
)
sortedAtoms
.
push_back
(
0
);
vector
<
char
>&
exc
=
blockExclusions
[
blockExclusions
.
size
()
-
1
];
for
(
int
i
=
0
;
i
<
(
int
)
exc
.
size
();
i
++
)
exc
[
i
]
|=
mask
;
}
}
int
CpuNeighborList
::
getNumBlocks
()
const
{
return
sortedAtoms
.
size
()
/
BlockSize
;
}
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
return
sortedAtoms
;
}
neighbors
.
clear
();
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
neighbors
.
insert
(
neighbors
.
end
(),
threadData
[
i
]
->
threadNeighbors
.
begin
(),
threadData
[
i
]
->
threadNeighbors
.
end
());
const
std
::
vector
<
int
>&
CpuNeighborList
::
getBlockNeighbors
(
int
blockIndex
)
const
{
return
blockNeighbors
[
blockIndex
];
}
const
vector
<
pair
<
int
,
int
>
>&
CpuNeighborList
::
getNeighbors
()
{
return
neighbors
;
const
std
::
vector
<
char
>&
CpuNeighborList
::
getBlockExclusions
(
int
blockIndex
)
const
{
return
blockExclusions
[
blockIndex
];
}
void
CpuNeighborList
::
runThread
(
int
index
,
vector
<
pair
<
int
,
int
>
>&
threadNeighbors
)
{
void
CpuNeighborList
::
runThread
(
int
index
)
{
while
(
true
)
{
// Wait for the signal to start running.
threadWait
();
if
(
isDeleted
)
break
;
// Compute the positions of atoms along the Hilbert curve.
float
binWidth
=
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
f
;
float
invBinWidth
=
1.0
f
/
binWidth
;
bitmask_t
coords
[
3
];
for
(
int
i
=
index
;
i
<
numAtoms
;
i
+=
numThreads
)
{
const
float
*
pos
=
&
atomLocations
[
4
*
i
];
coords
[
0
]
=
(
bitmask_t
)
((
pos
[
0
]
-
minx
)
*
invBinWidth
);
coords
[
1
]
=
(
bitmask_t
)
((
pos
[
1
]
-
miny
)
*
invBinWidth
);
coords
[
2
]
=
(
bitmask_t
)
((
pos
[
2
]
-
minz
)
*
invBinWidth
);
int
bin
=
(
int
)
hilbert_c2i
(
3
,
8
,
coords
);
atomBins
[
i
]
=
pair
<
int
,
int
>
(
bin
,
i
);
}
threadWait
();
// Compute this thread's subset of neighbors.
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
for
(
int
i
=
index
;
i
<
numBlocks
;
i
+=
numThreads
)
{
{
int
firstIndex
=
BlockSize
*
i
;
int
atomsInBlock
=
min
(
BlockSize
,
numAtoms
-
firstIndex
);
blockAtoms
.
resize
(
atomsInBlock
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
}
int
firstIndex
=
BlockSize
*
i
;
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]]);
fvec4
maxPos
=
minPos
;
int
atomsInBlock
=
min
(
BlockSize
,
numAtoms
-
firstIndex
);
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
fvec4
pos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
+
j
]]);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
);
// Record the exclusions for this block.
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
const
set
<
int
>&
atomExclusions
=
(
*
exclusions
)[
sortedAtoms
[
firstIndex
+
j
]];
char
mask
=
1
<<
j
;
for
(
int
k
=
0
;
k
<
(
int
)
blockNeighbors
[
i
].
size
();
k
++
)
{
int
atomIndex
=
blockNeighbors
[
i
][
k
];
if
(
atomExclusions
.
find
(
atomIndex
)
!=
atomExclusions
.
end
())
blockExclusions
[
i
][
k
]
|=
mask
;
}
}
}
}
}
void
CpuNeighborList
::
threadWait
()
{
pthread_mutex_lock
(
&
lock
);
waitCount
++
;
pthread_cond_signal
(
&
endCondition
);
pthread_cond_wait
(
&
startCondition
,
&
lock
);
pthread_mutex_unlock
(
&
lock
);
if
(
isDeleted
)
break
;
// Compute this thread's subset of neighbors.
}
threadNeighbors
.
clear
();
for
(
int
i
=
index
;
i
<
numAtoms
;
i
+=
numThreads
)
voxelHash
->
getNeighbors
(
threadNeighbors
,
VoxelItem
(
&
atomLocations
[
4
*
i
],
i
),
*
exclusions
,
maxDistance
);
void
CpuNeighborList
::
advanceThreads
()
{
waitCount
=
0
;
pthread_cond_broadcast
(
&
startCondition
);
while
(
waitCount
<
numThreads
)
{
pthread_cond_wait
(
&
endCondition
,
&
lock
);
}
}
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
8167c79b
...
...
@@ -32,6 +32,7 @@
#include "ReferencePME.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/SplineFitter.h"
#include "openmm/internal/vectorize.h"
// In case we're using some primitive version of Visual Studio this will
// make sure that erf() and erfc() are defined.
...
...
@@ -113,7 +114,7 @@ CpuNonbondedForce::~CpuNonbondedForce(){
--------------------------------------------------------------------------------------- */
void
CpuNonbondedForce
::
setUseCutoff
(
float
distance
,
const
vector
<
pair
<
int
,
int
>
>
&
neighbors
,
float
solventDielectric
)
{
void
CpuNonbondedForce
::
setUseCutoff
(
float
distance
,
const
CpuNeighborList
&
neighbors
,
float
solventDielectric
)
{
cutoff
=
true
;
cutoffDistance
=
distance
;
...
...
@@ -199,23 +200,22 @@ void CpuNonbondedForce::setUseSwitchingFunction(float distance) {
void
CpuNonbondedForce
::
tabulateEwaldScaleFactor
()
{
ewaldDX
=
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
);
ewaldDXInv
=
1.0
f
/
ewaldDX
;
vector
<
double
>
x
(
NUM_TABLE_POINTS
);
vector
<
double
>
y
(
NUM_TABLE_POINTS
);
vector
<
double
>
x
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
y
(
NUM_TABLE_POINTS
+
1
);
vector
<
double
>
deriv
;
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
;
i
++
)
{
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
+
1
;
i
++
)
{
double
r
=
i
*
cutoffDistance
/
(
NUM_TABLE_POINTS
-
2
);
double
alphaR
=
alphaEwald
*
r
;
x
[
i
]
=
r
;
y
[
i
]
=
erfc
(
alphaR
)
+
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
);
}
SplineFitter
::
createNaturalSpline
(
x
,
y
,
deriv
);
ewaldScaleX
.
resize
(
NUM_TABLE_POINTS
);
ewaldScaleY
.
resize
(
NUM_TABLE_POINTS
);
ewaldScaleDeriv
.
resize
(
NUM_TABLE_POINTS
);
ewaldScaleTable
.
resize
(
4
*
NUM_TABLE_POINTS
);
for
(
int
i
=
0
;
i
<
NUM_TABLE_POINTS
;
i
++
)
{
ewaldScaleX
[
i
]
=
(
float
)
x
[
i
];
ewaldScaleY
[
i
]
=
(
float
)
y
[
i
];
ewaldScaleDeriv
[
i
]
=
(
float
)
(
deriv
[
i
]
*
ewaldDX
*
ewaldDX
/
6
);
ewaldScaleTable
[
4
*
i
]
=
(
float
)
y
[
i
];
ewaldScaleTable
[
4
*
i
+
1
]
=
(
float
)
y
[
i
+
1
];
ewaldScaleTable
[
4
*
i
+
2
]
=
(
float
)
(
deriv
[
i
]
*
ewaldDX
*
ewaldDX
/
6
);
ewaldScaleTable
[
4
*
i
+
3
]
=
(
float
)
(
deriv
[
i
+
1
]
*
ewaldDX
*
ewaldDX
/
6
);
}
}
...
...
@@ -358,42 +358,12 @@ void CpuNonbondedForce::calculateDirectIxn(int numberOfAtoms, float* posq, const
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
directEnergy
+=
threadData
[
i
]
->
threadEnergy
;
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
)
{
__m128
f
=
_mm_loadu_ps
(
forces
+
4
*
i
);
fvec4
f
(
forces
+
4
*
i
);
for
(
int
j
=
0
;
j
<
numThreads
;
j
++
)
f
=
_mm_add_ps
(
f
,
_mm_loadu_ps
(
&
threadData
[
j
]
->
threadForce
[
4
*
i
])
)
;
_mm_
store
u_ps
(
forces
+
4
*
i
,
f
);
f
+
=
fvec4
(
&
threadData
[
j
]
->
threadForce
[
4
*
i
]);
f
.
store
(
forces
+
4
*
i
);
}
if
(
ewald
||
pme
)
{
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
__m128
boxSize
=
_mm_set_ps
(
0
,
periodicBoxSize
[
2
],
periodicBoxSize
[
1
],
periodicBoxSize
[
0
]);
__m128
invBoxSize
=
_mm_set_ps
(
0
,
(
1
/
periodicBoxSize
[
2
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
0
]));
for
(
int
i
=
0
;
i
<
numberOfAtoms
;
i
++
)
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
{
if
(
*
iter
>
i
)
{
int
ii
=
i
;
int
jj
=
*
iter
;
__m128
deltaR
;
__m128
posI
=
_mm_loadu_ps
(
posq
+
4
*
ii
);
__m128
posJ
=
_mm_loadu_ps
(
posq
+
4
*
jj
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
false
,
boxSize
,
invBoxSize
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
ii
+
3
]
*
posq
[
4
*
jj
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
);
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
__m128
result
=
_mm_mul_ps
(
deltaR
,
_mm_set1_ps
(
dEdR
));
_mm_storeu_ps
(
forces
+
4
*
ii
,
_mm_sub_ps
(
_mm_loadu_ps
(
forces
+
4
*
ii
),
result
));
_mm_storeu_ps
(
forces
+
4
*
jj
,
_mm_add_ps
(
_mm_loadu_ps
(
forces
+
4
*
jj
),
result
));
if
(
includeEnergy
)
directEnergy
-=
chargeProd
*
inverseR
*
(
1.0
f
-
erfcAlphaR
);
}
}
}
if
(
totalEnergy
!=
NULL
)
*
totalEnergy
+=
(
float
)
directEnergy
;
}
...
...
@@ -418,23 +388,47 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double&
threadForce
.
resize
(
4
*
numberOfAtoms
,
0.0
f
);
for
(
int
i
=
0
;
i
<
4
*
numberOfAtoms
;
i
++
)
threadForce
[
i
]
=
0.0
f
;
__m128
boxSize
=
_mm_set_ps
(
0
,
periodicBoxSize
[
2
],
periodicBoxSize
[
1
],
periodicBoxSize
[
0
]
);
__m128
invBoxSize
=
_mm_set_ps
(
0
,
(
1
/
periodicBoxSize
[
2
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
0
]));
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
(
(
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
])
,
0
);
if
(
ewald
||
pme
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
index
;
i
<
(
int
)
neighborList
->
size
();
i
+=
numThreads
)
{
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateOneEwaldIxn
(
pair
.
first
,
pair
.
second
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
for
(
int
i
=
index
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
calculateBlockEwaldIxn
(
i
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
// Now subtract off the exclusions, since they were implicitly included in the reciprocal space sum.
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
invBoxSize
((
1
/
periodicBoxSize
[
0
]),
(
1
/
periodicBoxSize
[
1
]),
(
1
/
periodicBoxSize
[
2
]),
0
);
for
(
int
i
=
index
;
i
<
numberOfAtoms
;
i
+=
numThreads
)
for
(
set
<
int
>::
const_iterator
iter
=
exclusions
[
i
].
begin
();
iter
!=
exclusions
[
i
].
end
();
++
iter
)
{
if
(
*
iter
>
i
)
{
int
j
=
*
iter
;
fvec4
deltaR
;
fvec4
posI
(
posq
+
4
*
i
);
fvec4
posJ
(
posq
+
4
*
j
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
false
,
boxSize
,
invBoxSize
);
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
erfcAlphaR
=
erfcApprox
(
alphaR
)[
0
];
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erfcAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
&
threadForce
[
4
*
i
])
-
result
).
store
(
&
threadForce
[
4
*
i
]);
(
fvec4
(
&
threadForce
[
4
*
j
])
+
result
).
store
(
&
threadForce
[
4
*
j
]);
if
(
includeEnergy
)
threadEnergy
-=
chargeProd
*
inverseR
*
(
1.0
f
-
erfcAlphaR
);
}
}
}
else
if
(
cutoff
)
{
// Compute the interactions from the neighbor list.
for
(
int
i
=
index
;
i
<
(
int
)
neighborList
->
size
();
i
+=
numThreads
)
{
pair
<
int
,
int
>
pair
=
(
*
neighborList
)[
i
];
calculateOneIxn
(
pair
.
first
,
pair
.
second
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
}
for
(
int
i
=
index
;
i
<
neighborList
->
getNumBlocks
();
i
+=
numThreads
)
calculateBlockIxn
(
i
,
&
threadForce
[
0
],
energyPtr
,
boxSize
,
invBoxSize
);
}
else
{
// Loop over all atom pairs
...
...
@@ -448,12 +442,12 @@ void CpuNonbondedForce::runThread(int index, vector<float>& threadForce, double&
}
}
void
CpuNonbondedForce
::
calculateOneIxn
(
int
ii
,
int
jj
,
float
*
forces
,
double
*
totalEnergy
,
const
__m128
&
boxSize
,
const
__m128
&
invBoxSize
)
{
void
CpuNonbondedForce
::
calculateOneIxn
(
int
ii
,
int
jj
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// get deltaR, R2, and R between 2 atoms
__m128
deltaR
;
__m128
posI
=
_mm_loadu_ps
(
posq
+
4
*
ii
);
__m128
posJ
=
_mm_loadu_ps
(
posq
+
4
*
jj
);
fvec4
deltaR
;
fvec4
posI
(
posq
+
4
*
ii
);
fvec4
posJ
(
posq
+
4
*
jj
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
periodic
,
boxSize
,
invBoxSize
);
if
(
cutoff
&&
r2
>=
cutoffDistance
*
cutoffDistance
)
...
...
@@ -497,83 +491,239 @@ void CpuNonbondedForce::calculateOneIxn(int ii, int jj, float* forces, double* t
// accumulate forces
__m128
result
=
_mm_mul_ps
(
deltaR
,
_mm_set1_ps
(
dEdR
))
;
_mm_storeu_ps
(
forces
+
4
*
ii
,
_mm_add_ps
(
_mm_loadu_ps
(
forces
+
4
*
ii
)
,
result
))
;
_mm_storeu_ps
(
forces
+
4
*
jj
,
_mm_sub_ps
(
_mm_loadu_ps
(
forces
+
4
*
jj
)
,
result
))
;
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
forces
+
4
*
ii
)
+
result
).
store
(
forces
+
4
*
ii
);
(
fvec4
(
forces
+
4
*
jj
)
-
result
).
store
(
forces
+
4
*
jj
);
}
void
CpuNonbondedForce
::
calculateOneEwaldIxn
(
int
ii
,
int
jj
,
float
*
forces
,
double
*
totalEnergy
,
const
__m128
&
boxSize
,
const
__m128
&
invBoxSize
)
{
__m128
deltaR
;
__m128
posI
=
_mm_loadu_ps
(
posq
+
4
*
ii
);
__m128
posJ
=
_mm_loadu_ps
(
posq
+
4
*
jj
);
float
r2
;
getDeltaR
(
posJ
,
posI
,
deltaR
,
r2
,
true
,
boxSize
,
invBoxSize
);
if
(
r2
<
cutoffDistance
*
cutoffDistance
)
{
float
r
=
sqrtf
(
r2
);
float
inverseR
=
1
/
r
;
float
switchValue
=
1
,
switchDeriv
=
0
;
if
(
useSwitch
&&
r
>
switchingDistance
)
{
float
t
=
(
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
);
switchValue
=
1
+
t
*
t
*
t
*
(
-
10
+
t
*
(
15
-
t
*
6
));
switchDeriv
=
t
*
t
*
(
-
30
+
t
*
(
60
-
t
*
30
))
/
(
cutoffDistance
-
switchingDistance
);
void
CpuNonbondedForce
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
ii
+
3
]
*
posq
[
4
*
jj
+
3
];
float
dEdR
=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
float
sig
=
atomParameters
[
ii
].
first
+
atomParameters
[
jj
].
first
;
float
sig2
=
inverseR
*
sig
;
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
float
blockAtomR2
[
4
];
bool
include
[
4
];
fvec4
blockAtomDelta
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
bool
any
=
false
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
getDeltaR
(
atomPosq
,
blockAtomPosq
[
j
],
blockAtomDelta
[
j
],
blockAtomR2
[
j
],
periodic
,
boxSize
,
invBoxSize
);
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
(
!
cutoff
||
blockAtomR2
[
j
]
<
cutoffDistance
*
cutoffDistance
));
any
|=
include
[
j
];
}
if
(
!
any
)
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r2
(
blockAtomR2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
}
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
atomParameters
[
ii
].
second
*
atomParameters
[
jj
].
second
;
dEdR
+=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
eps
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
;
fvec4
dEdR
=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
if
(
cutoff
)
dEdR
+=
chargeProd
*
(
inverseR
-
2.0
f
*
krf
*
r2
);
else
dEdR
+=
chargeProd
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
f
loat
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
f
vec4
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
// accumulate forces
// Accumulate energies.
if
(
totalEnergy
)
{
if
(
cutoff
)
energy
+=
chargeProd
*
(
inverseR
+
krf
*
r2
-
crf
);
else
energy
+=
chargeProd
*
inverseR
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
if
(
include
[
j
])
*
totalEnergy
+=
energy
[
j
];
}
__m128
result
=
_mm_mul_ps
(
deltaR
,
_mm_set1_ps
(
dEdR
));
_mm_storeu_ps
(
forces
+
4
*
ii
,
_mm_add_ps
(
_mm_loadu_ps
(
forces
+
4
*
ii
),
result
));
_mm_storeu_ps
(
forces
+
4
*
jj
,
_mm_sub_ps
(
_mm_loadu_ps
(
forces
+
4
*
jj
),
result
));
// Accumulate forces.
// accumulate energies
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
fvec4
result
=
blockAtomDelta
[
j
]
*
dEdR
[
j
];
blockAtomForce
[
j
]
+=
result
;
atomForce
-=
result
;
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
// Load the positions and parameters of the atoms in the block.
int
blockAtom
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForce
[
4
];
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtom
[
i
]
=
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
+
i
];
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomForce
[
i
]
=
fvec4
(
0.0
f
);
}
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
// Loop over neighbors for this block.
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
float
blockAtomR2
[
4
];
bool
include
[
4
];
fvec4
blockAtomDelta
[
4
];
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
// Load the next neighbor.
int
atom
=
neighbors
[
i
];
fvec4
atomPosq
(
posq
+
4
*
atom
);
// Compute the distances to the block atoms.
bool
any
=
false
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
getDeltaR
(
atomPosq
,
blockAtomPosq
[
j
],
blockAtomDelta
[
j
],
blockAtomR2
[
j
],
periodic
,
boxSize
,
invBoxSize
);
include
[
j
]
=
(((
exclusions
[
i
]
>>
j
)
&
1
)
==
0
&&
blockAtomR2
[
j
]
<
cutoffDistance
*
cutoffDistance
);
any
|=
include
[
j
];
}
if
(
!
any
)
continue
;
// No interactions to compute.
// Compute the interactions.
fvec4
r2
(
blockAtomR2
);
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
switchValue
(
1.0
f
),
switchDeriv
(
0.0
f
);
if
(
useSwitch
)
{
fvec4
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
/
(
cutoffDistance
-
switchingDistance
));
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
/
(
cutoffDistance
-
switchingDistance
);
}
fvec4
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec4
dEdR
=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
fvec4
sig
=
blockAtomSigma
+
atomParameters
[
atom
].
first
;
fvec4
sig2
=
inverseR
*
sig
;
sig2
*=
sig2
;
fvec4
sig6
=
sig2
*
sig2
*
sig2
;
fvec4
eps
=
blockAtomEpsilon
*
atomParameters
[
atom
].
second
;
dEdR
+=
switchValue
*
eps
*
(
12.0
f
*
sig6
-
6.0
f
)
*
sig6
;
dEdR
*=
inverseR
*
inverseR
;
fvec4
energy
=
eps
*
(
sig6
-
1.0
f
)
*
sig6
;
if
(
useSwitch
)
{
dEdR
-=
energy
*
switchDeriv
*
inverseR
;
energy
*=
switchValue
;
}
// Accumulate energies.
if
(
totalEnergy
)
{
energy
+=
(
float
)
(
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
));
*
totalEnergy
+=
energy
;
energy
+=
chargeProd
*
inverseR
*
erfcApprox
(
alphaEwald
*
r
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
if
(
include
[
j
])
*
totalEnergy
+=
energy
[
j
];
}
// Accumulate forces.
fvec4
atomForce
(
forces
+
4
*
atom
);
for
(
int
j
=
0
;
j
<
4
;
j
++
)
{
if
(
include
[
j
])
{
fvec4
result
=
blockAtomDelta
[
j
]
*
dEdR
[
j
];
blockAtomForce
[
j
]
+=
result
;
atomForce
-=
result
;
}
}
atomForce
.
store
(
forces
+
4
*
atom
);
}
// Record the forces on the block atoms.
for
(
int
j
=
0
;
j
<
4
;
j
++
)
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
blockAtomForce
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
void
CpuNonbondedForce
::
getDeltaR
(
const
__m128
&
posI
,
const
__m128
&
posJ
,
__m128
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
__m128
&
boxSize
,
const
__m128
&
invBoxSize
)
const
{
deltaR
=
_mm_sub_ps
(
posJ
,
posI
)
;
void
CpuNonbondedForce
::
getDeltaR
(
const
fvec4
&
posI
,
const
fvec4
&
posJ
,
fvec4
&
deltaR
,
float
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
deltaR
=
posJ
-
posI
;
if
(
periodic
)
{
__m128
base
=
_mm_mul_ps
(
_mm_floor_ps
(
_mm_add_ps
(
_mm_mul_ps
(
deltaR
,
invBoxSize
)
,
_mm_set1_ps
(
0.5
))),
boxSize
)
;
deltaR
=
_mm_sub_ps
(
deltaR
,
base
)
;
fvec4
base
=
round
(
deltaR
*
invBoxSize
)
*
boxSize
;
deltaR
=
deltaR
-
base
;
}
r2
=
_mm_cvtss_f32
(
_mm_dp_ps
(
deltaR
,
deltaR
,
0x71
)
);
r2
=
dot3
(
deltaR
,
deltaR
);
}
f
loat
CpuNonbondedForce
::
erfcApprox
(
f
loat
x
)
{
f
vec4
CpuNonbondedForce
::
erfcApprox
(
f
vec4
x
)
{
// This approximation for erfc is from Abramowitz and Stegun (1964) p. 299. They cite the following as
// the original source: C. Hastings, Jr., Approximations for Digital Computers (1955). It has a maximum
// error of 3e-7.
f
loat
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
f
vec4
t
=
1.0
f
+
(
0.0705230784
f
+
(
0.0422820123
f
+
(
0.0092705272
f
+
(
0.0001520143
f
+
(
0.0002765672
f
+
0.0000430638
f
*
x
)
*
x
)
*
x
)
*
x
)
*
x
)
*
x
;
t
*=
t
;
t
*=
t
;
t
*=
t
;
return
1.0
f
/
(
t
*
t
);
}
f
loat
CpuNonbondedForce
::
ewaldScaleFunction
(
f
loat
x
)
{
f
vec4
CpuNonbondedForce
::
ewaldScaleFunction
(
f
vec4
x
)
{
// Compute the tabulated Ewald scale factor: erfc(alpha*r) + 2*alpha*r*exp(-alpha*alpha*r*r)/sqrt(PI)
int
lower
=
(
int
)
(
x
*
ewaldDXInv
);
int
upper
=
lower
+
1
;
float
a
=
(
ewaldScaleX
[
upper
]
-
x
)
*
ewaldDXInv
;
float
b
=
1.0
f
-
a
;
return
a
*
ewaldScaleY
[
lower
]
+
b
*
ewaldScaleY
[
upper
]
+
((
a
*
a
*
a
-
a
)
*
ewaldScaleDeriv
[
lower
]
+
(
b
*
b
*
b
-
b
)
*
ewaldScaleDeriv
[
upper
]);
float
y
[
4
];
fvec4
x1
=
x
*
ewaldDXInv
;
ivec4
index
=
floor
(
x1
);
fvec4
coeff
[
4
];
coeff
[
1
]
=
x1
-
index
;
coeff
[
0
]
=
1.0
f
-
coeff
[
1
];
coeff
[
2
]
=
coeff
[
0
]
*
coeff
[
0
]
*
coeff
[
0
]
-
coeff
[
0
];
coeff
[
3
]
=
coeff
[
1
]
*
coeff
[
1
]
*
coeff
[
1
]
-
coeff
[
1
];
_MM_TRANSPOSE4_PS
(
coeff
[
0
],
coeff
[
1
],
coeff
[
2
],
coeff
[
3
]);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
if
(
index
[
i
]
<
NUM_TABLE_POINTS
)
y
[
i
]
=
dot4
(
coeff
[
i
],
fvec4
(
&
ewaldScaleTable
[
4
*
index
[
i
]]));
}
return
fvec4
(
y
);
}
platforms/cpu/tests/TestCpuNeighborList.cpp
View file @
8167c79b
...
...
@@ -39,6 +39,7 @@
#include "sfmt/SFMT.h"
#include <iostream>
#include <set>
#include <utility>
#include <vector>
using
namespace
OpenMM
;
...
...
@@ -68,11 +69,20 @@ void testNeighborList(bool periodic) {
// Convert the neighbor list to a set for faster lookup.
set
<
pair
<
int
,
int
>
>
neighbors
;
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
.
getNeighbors
().
size
();
i
++
)
{
pair
<
int
,
int
>
entry
=
neighborList
.
getNeighbors
()[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
neighborList
.
getSortedAtoms
().
size
();
i
++
)
{
int
blockIndex
=
i
/
CpuNeighborList
::
BlockSize
;
int
indexInBlock
=
i
-
blockIndex
*
CpuNeighborList
::
BlockSize
;
char
mask
=
1
<<
indexInBlock
;
for
(
int
j
=
0
;
j
<
(
int
)
neighborList
.
getBlockExclusions
(
blockIndex
).
size
();
j
++
)
{
if
((
neighborList
.
getBlockExclusions
(
blockIndex
)[
j
]
&
mask
)
==
0
)
{
int
atom1
=
neighborList
.
getSortedAtoms
()[
i
];
int
atom2
=
neighborList
.
getBlockNeighbors
(
blockIndex
)[
j
];
pair
<
int
,
int
>
entry
=
make_pair
(
min
(
atom1
,
atom2
),
max
(
atom1
,
atom2
));
ASSERT
(
neighbors
.
find
(
entry
)
==
neighbors
.
end
()
&&
neighbors
.
find
(
make_pair
(
entry
.
second
,
entry
.
first
))
==
neighbors
.
end
());
// No duplicates
neighbors
.
insert
(
entry
);
}
}
}
// Check each particle pair and figure out whether they should be in the neighbor list.
...
...
@@ -90,7 +100,8 @@ void testNeighborList(bool periodic) {
if
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cutoff
*
cutoff
)
shouldInclude
=
false
;
bool
isIncluded
=
(
neighbors
.
find
(
make_pair
(
i
,
j
))
!=
neighbors
.
end
()
||
neighbors
.
find
(
make_pair
(
j
,
i
))
!=
neighbors
.
end
());
ASSERT_EQUAL
(
shouldInclude
,
isIncluded
);
if
(
shouldInclude
)
ASSERT
(
isIncluded
);
}
}
...
...
plugins/amoeba/openmmapi/include/openmm/AmoebaMultipoleForce.h
View file @
8167c79b
...
...
@@ -301,6 +301,14 @@ public:
*/
void
setEwaldErrorTolerance
(
double
tol
);
/**
* Get the induced dipole moments of all particles.
*
* @param context the Context for which to get the induced dipoles
* @param dipoles the induced dipole moment of particle i is stored into the i'th element
*/
void
getInducedDipoles
(
Context
&
context
,
std
::
vector
<
Vec3
>&
dipoles
);
/**
* Get the electrostatic potential.
*
...
...
plugins/amoeba/openmmapi/include/openmm/amoebaKernels.h
View file @
8167c79b
...
...
@@ -348,6 +348,8 @@ public:
*/
virtual
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
=
0
;
virtual
void
getInducedDipoles
(
ContextImpl
&
context
,
std
::
vector
<
Vec3
>&
dipoles
)
=
0
;
virtual
void
getElectrostaticPotential
(
ContextImpl
&
context
,
const
std
::
vector
<
Vec3
>&
inputGrid
,
std
::
vector
<
double
>&
outputElectrostaticPotential
)
=
0
;
...
...
plugins/amoeba/openmmapi/include/openmm/internal/AmoebaMultipoleForceImpl.h
View file @
8167c79b
...
...
@@ -82,6 +82,8 @@ public:
*/
static
void
getCovalentDegree
(
const
AmoebaMultipoleForce
&
force
,
std
::
vector
<
int
>&
covalentDegree
);
void
getInducedDipoles
(
ContextImpl
&
context
,
std
::
vector
<
Vec3
>&
dipoles
);
void
getElectrostaticPotential
(
ContextImpl
&
context
,
const
std
::
vector
<
Vec3
>&
inputGrid
,
std
::
vector
<
double
>&
outputElectrostaticPotential
);
...
...
plugins/amoeba/openmmapi/src/AmoebaMultipoleForce.cpp
View file @
8167c79b
...
...
@@ -226,6 +226,10 @@ void AmoebaMultipoleForce::getCovalentMaps(int index, std::vector< std::vector<i
}
}
void
AmoebaMultipoleForce
::
getInducedDipoles
(
Context
&
context
,
vector
<
Vec3
>&
dipoles
)
{
dynamic_cast
<
AmoebaMultipoleForceImpl
&>
(
getImplInContext
(
context
)).
getInducedDipoles
(
getContextImpl
(
context
),
dipoles
);
}
void
AmoebaMultipoleForce
::
getElectrostaticPotential
(
const
std
::
vector
<
Vec3
>&
inputGrid
,
Context
&
context
,
std
::
vector
<
double
>&
outputElectrostaticPotential
){
dynamic_cast
<
AmoebaMultipoleForceImpl
&>
(
getImplInContext
(
context
)).
getElectrostaticPotential
(
getContextImpl
(
context
),
inputGrid
,
outputElectrostaticPotential
);
}
...
...
plugins/amoeba/openmmapi/src/AmoebaMultipoleForceImpl.cpp
View file @
8167c79b
...
...
@@ -183,6 +183,10 @@ void AmoebaMultipoleForceImpl::getCovalentDegree( const AmoebaMultipoleForce& fo
return
;
}
void
AmoebaMultipoleForceImpl
::
getInducedDipoles
(
ContextImpl
&
context
,
vector
<
Vec3
>&
dipoles
)
{
kernel
.
getAs
<
CalcAmoebaMultipoleForceKernel
>
().
getInducedDipoles
(
context
,
dipoles
);
}
void
AmoebaMultipoleForceImpl
::
getElectrostaticPotential
(
ContextImpl
&
context
,
const
std
::
vector
<
Vec3
>&
inputGrid
,
std
::
vector
<
double
>&
outputElectrostaticPotential
){
kernel
.
getAs
<
CalcAmoebaMultipoleForceKernel
>
().
getElectrostaticPotential
(
context
,
inputGrid
,
outputElectrostaticPotential
);
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.cpp
View file @
8167c79b
...
...
@@ -1639,6 +1639,24 @@ void CudaCalcAmoebaMultipoleForceKernel::ensureMultipolesValid(ContextImpl& cont
context
.
calcForcesAndEnergy
(
false
,
false
,
-
1
);
}
void
CudaCalcAmoebaMultipoleForceKernel
::
getInducedDipoles
(
ContextImpl
&
context
,
vector
<
Vec3
>&
dipoles
)
{
ensureMultipolesValid
(
context
);
int
numParticles
=
cu
.
getNumAtoms
();
dipoles
.
resize
(
numParticles
);
if
(
cu
.
getUseDoublePrecision
())
{
vector
<
double
>
d
;
inducedDipole
->
download
(
d
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
dipoles
[
i
]
=
Vec3
(
d
[
3
*
i
],
d
[
3
*
i
+
1
],
d
[
3
*
i
+
2
]);
}
else
{
vector
<
float
>
d
;
inducedDipole
->
download
(
d
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
dipoles
[
i
]
=
Vec3
(
d
[
3
*
i
],
d
[
3
*
i
+
1
],
d
[
3
*
i
+
2
]);
}
}
void
CudaCalcAmoebaMultipoleForceKernel
::
getElectrostaticPotential
(
ContextImpl
&
context
,
const
vector
<
Vec3
>&
inputGrid
,
vector
<
double
>&
outputElectrostaticPotential
)
{
ensureMultipolesValid
(
context
);
int
numPoints
=
inputGrid
.
size
();
...
...
plugins/amoeba/platforms/cuda/src/AmoebaCudaKernels.h
View file @
8167c79b
...
...
@@ -327,6 +327,13 @@ public:
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Get the induced dipole moments of all particles.
*
* @param context the Context for which to get the induced dipoles
* @param dipoles the induced dipole moment of particle i is stored into the i'th element
*/
void
getInducedDipoles
(
ContextImpl
&
context
,
std
::
vector
<
Vec3
>&
dipoles
);
/**
* Execute the kernel to calculate the electrostatic potential
*
...
...
plugins/amoeba/platforms/cuda/tests/TestCudaAmoebaMultipoleForce.cpp
View file @
8167c79b
...
...
@@ -2700,6 +2700,40 @@ static void testPMEMutualPolarizationLargeWater( FILE* log ) {
}
// test querying particle induced dipoles
static
void
testParticleInducedDipoles
()
{
int
numberOfParticles
=
8
;
int
inputPmeGridDimension
=
0
;
double
cutoff
=
9000000.0
;
std
::
vector
<
Vec3
>
forces
;
double
energy
;
System
system
;
AmoebaMultipoleForce
*
amoebaMultipoleForce
=
new
AmoebaMultipoleForce
();;
setupMultipoleAmmonia
(
system
,
amoebaMultipoleForce
,
AmoebaMultipoleForce
::
NoCutoff
,
AmoebaMultipoleForce
::
Mutual
,
cutoff
,
inputPmeGridDimension
);
LangevinIntegrator
integrator
(
0.0
,
0.1
,
0.01
);
Context
context
(
system
,
integrator
,
Platform
::
getPlatformByName
(
"CUDA"
));
getForcesEnergyMultipoleAmmonia
(
context
,
forces
,
energy
);
std
::
vector
<
Vec3
>
dipole
;
amoebaMultipoleForce
->
getInducedDipoles
(
context
,
dipole
);
// Compare to values calculated by TINKER.
std
::
vector
<
Vec3
>
expectedDipole
(
numberOfParticles
);
expectedDipole
[
0
]
=
Vec3
(
0.0031710288
,
9.3687453e-7
,
-
0.0006919963
);
expectedDipole
[
1
]
=
Vec3
(
8.0279737504e-5
,
-
0.000279376
,
4.778060103e-5
);
expectedDipole
[
2
]
=
Vec3
(
0.000079322
,
0.0002789804
,
4.8696656126e-5
);
expectedDipole
[
3
]
=
Vec3
(
-
0.0001407394
,
1.540638116e-6
,
-
0.0007077775
);
expectedDipole
[
4
]
=
Vec3
(
0.0019564439
,
-
1.0409717e-7
,
0.0007332188
);
expectedDipole
[
5
]
=
Vec3
(
0.0008213891
,
-
0.0007749618
,
-
0.0003883865
);
expectedDipole
[
6
]
=
Vec3
(
0.0046133992
,
-
7.2868019e-7
,
0.0002500622
);
expectedDipole
[
7
]
=
Vec3
(
0.0008204731
,
0.0007772727
,
-
0.0003856176
);
for
(
int
i
=
0
;
i
<
numberOfParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
expectedDipole
[
i
],
dipole
[
i
],
1e-4
);
}
// test computation of system multipole moments
static
void
testSystemMultipoleMoments
(
FILE
*
log
)
{
...
...
@@ -2963,6 +2997,10 @@ int main(int argc, char* argv[]) {
testMultipoleIonsAndWaterPMEMutualPolarization
(
log
);
testMultipoleIonsAndWaterPMEDirectPolarization
(
log
);
// test querying induced dipoles
testParticleInducedDipoles
();
// test computation of system multipole moments
testSystemMultipoleMoments
(
log
);
...
...
plugins/amoeba/platforms/reference/src/AmoebaReferenceKernels.cpp
View file @
8167c79b
...
...
@@ -683,6 +683,25 @@ double ReferenceCalcAmoebaMultipoleForceKernel::execute(ContextImpl& context, bo
return
static_cast
<
double
>
(
energy
);
}
void
ReferenceCalcAmoebaMultipoleForceKernel
::
getInducedDipoles
(
ContextImpl
&
context
,
vector
<
Vec3
>&
outputDipoles
)
{
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
outputDipoles
.
resize
(
numParticles
);
// Create an AmoebaReferenceMultipoleForce to do the calculation.
AmoebaReferenceMultipoleForce
*
amoebaReferenceMultipoleForce
=
setupAmoebaReferenceMultipoleForce
(
context
);
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
// Retrieve the induced dipoles.
vector
<
RealVec
>
inducedDipoles
;
amoebaReferenceMultipoleForce
->
calculateInducedDipoles
(
posData
,
charges
,
dipoles
,
quadrupoles
,
tholes
,
dampingFactors
,
polarity
,
axisTypes
,
multipoleAtomZs
,
multipoleAtomXs
,
multipoleAtomYs
,
multipoleAtomCovalentInfo
,
inducedDipoles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
outputDipoles
[
i
]
=
inducedDipoles
[
i
];
delete
amoebaReferenceMultipoleForce
;
}
void
ReferenceCalcAmoebaMultipoleForceKernel
::
getElectrostaticPotential
(
ContextImpl
&
context
,
const
std
::
vector
<
Vec3
>&
inputGrid
,
std
::
vector
<
double
>&
outputElectrostaticPotential
){
...
...
@@ -704,8 +723,6 @@ void ReferenceCalcAmoebaMultipoleForceKernel::getElectrostaticPotential(ContextI
}
delete
amoebaReferenceMultipoleForce
;
return
;
}
void
ReferenceCalcAmoebaMultipoleForceKernel
::
getSystemMultipoleMoments
(
ContextImpl
&
context
,
std
::
vector
<
double
>&
outputMultipoleMoments
){
...
...
@@ -726,8 +743,6 @@ void ReferenceCalcAmoebaMultipoleForceKernel::getSystemMultipoleMoments(ContextI
multipoleAtomCovalentInfo
,
outputMultipoleMoments
);
delete
amoebaReferenceMultipoleForce
;
return
;
}
void
ReferenceCalcAmoebaMultipoleForceKernel
::
copyParametersToContext
(
ContextImpl
&
context
,
const
AmoebaMultipoleForce
&
force
)
{
...
...
plugins/amoeba/platforms/reference/src/AmoebaReferenceKernels.h
View file @
8167c79b
...
...
@@ -366,6 +366,13 @@ public:
* @return the potential energy due to the force
*/
double
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
);
/**
* Get the induced dipole moments of all particles.
*
* @param context the Context for which to get the induced dipoles
* @param dipoles the induced dipole moment of particle i is stored into the i'th element
*/
void
getInducedDipoles
(
ContextImpl
&
context
,
std
::
vector
<
Vec3
>&
dipoles
);
/**
* Calculate the electrostatic potential given vector of grid coordinates.
*
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment