Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
59bccb15
Commit
59bccb15
authored
May 15, 2015
by
peastman
Browse files
Merge branch 'master' into pthreads
Conflicts: libraries/pthreads/include/pthread.h
parents
6cf75568
4bdbcf4d
Changes
79
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
445 additions
and
196 deletions
+445
-196
openmmapi/include/openmm/internal/vectorize_neon.h
openmmapi/include/openmm/internal/vectorize_neon.h
+6
-2
openmmapi/include/openmm/internal/vectorize_pnacl.h
openmmapi/include/openmm/internal/vectorize_pnacl.h
+6
-2
openmmapi/include/openmm/internal/vectorize_sse.h
openmmapi/include/openmm/internal/vectorize_sse.h
+12
-0
platforms/cpu/include/CpuNeighborList.h
platforms/cpu/include/CpuNeighborList.h
+2
-1
platforms/cpu/include/CpuNonbondedForceVec4.h
platforms/cpu/include/CpuNonbondedForceVec4.h
+6
-6
platforms/cpu/include/CpuNonbondedForceVec8.h
platforms/cpu/include/CpuNonbondedForceVec8.h
+6
-6
platforms/cpu/src/CpuCustomManyParticleForce.cpp
platforms/cpu/src/CpuCustomManyParticleForce.cpp
+1
-1
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+71
-41
platforms/cpu/src/CpuNonbondedForce.cpp
platforms/cpu/src/CpuNonbondedForce.cpp
+5
-5
platforms/cpu/src/CpuNonbondedForceVec4.cpp
platforms/cpu/src/CpuNonbondedForceVec4.cpp
+137
-43
platforms/cpu/src/CpuNonbondedForceVec8.cpp
platforms/cpu/src/CpuNonbondedForceVec8.cpp
+139
-45
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+3
-2
platforms/cuda/src/CudaExpressionUtilities.cpp
platforms/cuda/src/CudaExpressionUtilities.cpp
+3
-0
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+20
-23
platforms/cuda/src/kernels/customIntegratorGlobal.cu
platforms/cuda/src/kernels/customIntegratorGlobal.cu
+1
-1
platforms/cuda/src/kernels/customIntegratorPerDof.cu
platforms/cuda/src/kernels/customIntegratorPerDof.cu
+1
-1
platforms/cuda/tests/TestCudaCustomIntegrator.cpp
platforms/cuda/tests/TestCudaCustomIntegrator.cpp
+1
-1
platforms/cuda/tests/TestCudaLocalEnergyMinimizer.cpp
platforms/cuda/tests/TestCudaLocalEnergyMinimizer.cpp
+8
-8
platforms/opencl/include/OpenCLFFT3D.h
platforms/opencl/include/OpenCLFFT3D.h
+14
-6
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+3
-2
No files found.
openmmapi/include/openmm/internal/vectorize_neon.h
View file @
59bccb15
...
@@ -251,11 +251,15 @@ static inline fvec4 abs(const fvec4& v) {
...
@@ -251,11 +251,15 @@ static inline fvec4 abs(const fvec4& v) {
return
vabsq_f32
(
v
);
return
vabsq_f32
(
v
);
}
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
r
sqrt
(
const
fvec4
&
v
)
{
float32x4_t
recipSqrt
=
vrsqrteq_f32
(
v
);
float32x4_t
recipSqrt
=
vrsqrteq_f32
(
v
);
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
recipSqrt
=
vmulq_f32
(
recipSqrt
,
vrsqrtsq_f32
(
vmulq_f32
(
recipSqrt
,
v
),
recipSqrt
));
return
vmulq_f32
(
v
,
recipSqrt
);
return
recipSqrt
;
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
rsqrt
(
v
)
*
v
;
}
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
...
...
openmmapi/include/openmm/internal/vectorize_pnacl.h
View file @
59bccb15
...
@@ -330,7 +330,7 @@ static inline fvec4 ceil(const fvec4& v) {
...
@@ -330,7 +330,7 @@ static inline fvec4 ceil(const fvec4& v) {
return
truncated
+
blend
(
0.0
f
,
1.0
f
,
truncated
<
v
);
return
truncated
+
blend
(
0.0
f
,
1.0
f
,
truncated
<
v
);
}
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
static
inline
fvec4
r
sqrt
(
const
fvec4
&
v
)
{
// Initial estimate of rsqrt().
// Initial estimate of rsqrt().
ivec4
i
=
(
__m128i
)
v
;
ivec4
i
=
(
__m128i
)
v
;
...
@@ -343,7 +343,11 @@ static inline fvec4 sqrt(const fvec4& v) {
...
@@ -343,7 +343,11 @@ static inline fvec4 sqrt(const fvec4& v) {
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
y
*=
1.5
f
-
x2
*
y
*
y
;
return
y
*
v
;
return
y
;
}
static
inline
fvec4
sqrt
(
const
fvec4
&
v
)
{
return
rsqrt
(
v
)
*
v
;
}
}
#endif
/*OPENMM_VECTORIZE_PNACL_H_*/
#endif
/*OPENMM_VECTORIZE_PNACL_H_*/
...
...
openmmapi/include/openmm/internal/vectorize_sse.h
View file @
59bccb15
...
@@ -241,6 +241,18 @@ static inline fvec4 sqrt(const fvec4& v) {
...
@@ -241,6 +241,18 @@ static inline fvec4 sqrt(const fvec4& v) {
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
return
fvec4
(
_mm_sqrt_ps
(
v
.
val
));
}
}
static
inline
fvec4
rsqrt
(
const
fvec4
&
v
)
{
// Initial estimate of rsqrt().
fvec4
y
(
_mm_rsqrt_ps
(
v
.
val
));
// Perform an iteration of Newton refinement.
fvec4
x2
=
v
*
0.5
f
;
y
*=
fvec4
(
1.5
f
)
-
x2
*
y
*
y
;
return
y
;
}
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
static
inline
float
dot3
(
const
fvec4
&
v1
,
const
fvec4
&
v2
)
{
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
return
_mm_cvtss_f32
(
_mm_dp_ps
(
v1
,
v2
,
0x71
));
}
}
...
...
platforms/cpu/include/CpuNeighborList.h
View file @
59bccb15
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2013 Stanford University and the Authors.
*
* Portions copyright (c) 2013
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -61,6 +61,7 @@ public:
...
@@ -61,6 +61,7 @@ public:
private:
private:
int
blockSize
;
int
blockSize
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
int
>
sortedAtoms
;
std
::
vector
<
float
>
sortedPositions
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
int
>
>
blockNeighbors
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
std
::
vector
<
std
::
vector
<
char
>
>
blockExclusions
;
// The following variables are used to make information accessible to the individual threads.
// The following variables are used to make information accessible to the individual threads.
...
...
platforms/cpu/include/CpuNonbondedForceVec4.h
View file @
59bccb15
...
@@ -56,8 +56,8 @@ protected:
...
@@ -56,8 +56,8 @@ protected:
/**
/**
* Templatized implementation of calculateBlockIxn.
* Templatized implementation of calculateBlockIxn.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -74,15 +74,15 @@ protected:
...
@@ -74,15 +74,15 @@ protected:
/**
/**
* Templatized implementation of calculateBlockEwaldIxn.
* Templatized implementation of calculateBlockEwaldIxn.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
* periodic boundary conditions.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
getDeltaR
(
const
f
loat
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
void
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
/**
* Compute a fast approximation to erfc(x).
* Compute a fast approximation to erfc(x).
...
...
platforms/cpu/include/CpuNonbondedForceVec8.h
View file @
59bccb15
...
@@ -55,8 +55,8 @@ protected:
...
@@ -55,8 +55,8 @@ protected:
/**
/**
* Templatized implementation of calculateBlockIxn.
* Templatized implementation of calculateBlockIxn.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
void
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**---------------------------------------------------------------------------------------
/**---------------------------------------------------------------------------------------
...
@@ -73,15 +73,15 @@ protected:
...
@@ -73,15 +73,15 @@ protected:
/**
/**
* Templatized implementation of calculateBlockEwaldIxn.
* Templatized implementation of calculateBlockEwaldIxn.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
);
void
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
);
/**
/**
* Compute the displacement and squared distance between a collection of points, optionally using
* Compute the displacement and squared distance between a collection of points, optionally using
* periodic boundary conditions.
* periodic boundary conditions.
*/
*/
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
getDeltaR
(
const
f
loat
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
void
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
;
/**
/**
* Compute a fast approximation to erfc(x).
* Compute a fast approximation to erfc(x).
...
...
platforms/cpu/src/CpuCustomManyParticleForce.cpp
View file @
59bccb15
...
@@ -199,7 +199,7 @@ void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
...
@@ -199,7 +199,7 @@ void CpuCustomManyParticleForce::setUseCutoff(RealOpenMM distance) {
}
}
void
CpuCustomManyParticleForce
::
setPeriodic
(
RealVec
*
periodicBoxVectors
)
{
void
CpuCustomManyParticleForce
::
setPeriodic
(
RealVec
*
periodicBoxVectors
)
{
assert
(
c
utoff
);
assert
(
useC
utoff
);
assert
(
periodicBoxVectors
[
0
][
0
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
0
][
0
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
1
][
1
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
1
][
1
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
2
][
2
]
>=
2.0
*
cutoffDistance
);
assert
(
periodicBoxVectors
[
2
][
2
]
>=
2.0
*
cutoffDistance
);
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
59bccb15
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2013 Stanford University and the Authors.
*
* Portions copyright (c) 2013
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -164,8 +164,8 @@ public:
...
@@ -164,8 +164,8 @@ public:
return
VoxelIndex
(
y
,
z
);
return
VoxelIndex
(
y
,
z
);
}
}
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
const
fvec4
&
blockCenter
,
const
fvec4
&
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>&
blockAtoms
,
const
float
*
atomLoca
tions
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
void
getNeighbors
(
vector
<
int
>&
neighbors
,
int
blockIndex
,
const
fvec4
&
blockCenter
,
const
fvec4
&
blockWidth
,
const
vector
<
int
>&
sortedAtoms
,
vector
<
char
>&
exclusions
,
float
maxDistance
,
const
vector
<
int
>&
blockAtoms
,
const
vector
<
float
>&
blockAtomX
,
const
vector
<
float
>&
blockAtomY
,
const
vector
<
float
>&
blockAtomZ
,
const
vector
<
float
>&
sortedPosi
tions
,
const
vector
<
VoxelIndex
>&
atomVoxelIndex
)
const
{
neighbors
.
resize
(
0
);
neighbors
.
resize
(
0
);
exclusions
.
resize
(
0
);
exclusions
.
resize
(
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
fvec4
boxSize
(
periodicBoxSize
[
0
],
periodicBoxSize
[
1
],
periodicBoxSize
[
2
],
0
);
...
@@ -233,24 +233,34 @@ public:
...
@@ -233,24 +233,34 @@ public:
float
minx
=
centerPos
[
0
];
float
minx
=
centerPos
[
0
];
float
maxx
=
centerPos
[
0
];
float
maxx
=
centerPos
[
0
];
fvec4
offset
(
-
xoffset
,
-
yoffset
+
voxelSizeY
*
y
+
(
usePeriodic
?
0.0
f
:
miny
),
voxelSizeZ
*
z
+
(
usePeriodic
?
0.0
f
:
minz
),
0
);
float
offset
[
3
]
=
{
-
xoffset
,
-
yoffset
+
voxelSizeY
*
y
+
(
usePeriodic
?
0.0
f
:
miny
),
voxelSizeZ
*
z
+
(
usePeriodic
?
0.0
f
:
minz
)};
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
+=
4
)
{
const
float
*
atomPos
=
&
atomLocations
[
4
*
blockAtoms
[
k
]];
fvec4
dist2
=
maxDistanceSquared
;
fvec4
posVec
(
atomPos
);
if
(
y
!=
atomVoxelIndex
[
k
].
y
)
{
fvec4
delta1
=
offset
-
posVec
;
fvec4
dy1
=
offset
[
1
]
-
fvec4
(
&
blockAtomY
[
k
]);
fvec4
delta2
=
delta1
+
fvec4
(
0
,
voxelSizeY
,
voxelSizeZ
,
0
);
fvec4
dy2
=
dy1
+
voxelSizeY
;
if
(
usePeriodic
)
{
if
(
usePeriodic
)
{
delta1
-=
round
(
delta1
*
invBoxSize
)
*
boxSize
;
dy1
-=
round
(
dy1
*
invBoxSize
[
1
])
*
boxSize
[
1
];
delta2
-=
round
(
delta2
*
invBoxSize
)
*
boxSize
;
dy2
-=
round
(
dy2
*
invBoxSize
[
1
])
*
boxSize
[
1
];
}
fvec4
dy
=
min
(
abs
(
dy1
),
abs
(
dy2
));
dist2
-=
dy
*
dy
;
}
if
(
z
!=
atomVoxelIndex
[
k
].
z
)
{
fvec4
dz1
=
offset
[
2
]
-
fvec4
(
&
blockAtomZ
[
k
]);
fvec4
dz2
=
dz1
+
voxelSizeZ
;
if
(
usePeriodic
)
{
dz1
-=
round
(
dz1
*
invBoxSize
[
2
])
*
boxSize
[
2
];
dz2
-=
round
(
dz2
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
fvec4
dz
=
min
(
abs
(
dz1
),
abs
(
dz2
));
dist2
-=
dz
*
dz
;
}
}
fvec4
delta
=
min
(
abs
(
delta1
),
abs
(
delta2
));
fvec4
dist
=
sqrt
(
dist2
);
float
dy
=
(
y
==
atomVoxelIndex
[
k
].
y
?
0.0
f
:
delta
[
1
]);
int
numToCheck
=
min
(
4
,
(
int
)
(
blockAtoms
.
size
()
-
k
));
float
dz
=
(
z
==
atomVoxelIndex
[
k
].
z
?
0.0
f
:
delta
[
2
]);
for
(
int
m
=
0
;
m
<
numToCheck
;
m
++
)
{
float
dist2
=
maxDistanceSquared
-
dy
*
dy
-
dz
*
dz
;
minx
=
min
(
minx
,
blockAtomX
[
k
+
m
]
-
dist
[
m
]
-
xoffset
);
if
(
dist2
>
0
)
{
maxx
=
max
(
maxx
,
blockAtomX
[
k
+
m
]
+
dist
[
m
]
-
xoffset
);
float
dist
=
sqrtf
(
dist2
);
minx
=
min
(
minx
,
atomPos
[
0
]
-
dist
-
xoffset
);
maxx
=
max
(
maxx
,
atomPos
[
0
]
+
dist
-
xoffset
);
}
}
}
}
if
(
minx
==
maxx
)
if
(
minx
==
maxx
)
...
@@ -290,12 +300,10 @@ public:
...
@@ -290,12 +300,10 @@ public:
if
(
sortedIndex
>=
lastSortedIndex
)
if
(
sortedIndex
>=
lastSortedIndex
)
continue
;
continue
;
fvec4
atomPos
(
atomLocations
+
4
*
sortedAtoms
[
sortedIndex
]);
fvec4
atomPos
(
&
sortedPositions
[
4
*
sortedIndex
]);
fvec4
delta
=
atomPos
-
centerPos
;
fvec4
delta
=
atomPos
-
centerPos
;
if
(
periodicRectangular
)
{
if
(
periodicRectangular
)
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
-=
round
(
delta
*
invBoxSize
)
*
boxSize
;
delta
=
delta
-
base
;
}
else
if
(
needPeriodic
)
{
else
if
(
needPeriodic
)
{
delta
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
...
@@ -310,26 +318,34 @@ public:
...
@@ -310,26 +318,34 @@ public:
// The distance is large enough that there might not be any actual interactions.
// The distance is large enough that there might not be any actual interactions.
// Check individual atom pairs to be sure.
// Check individual atom pairs to be sure.
bool
any
=
false
;
bool
anyInteraction
=
false
;
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
+=
4
)
{
fvec4
pos1
(
&
atomLocations
[
4
*
blockAtoms
[
k
]]);
fvec4
dx
=
fvec4
(
&
blockAtomX
[
k
])
-
atomPos
[
0
];
delta
=
atomPos
-
pos1
;
fvec4
dy
=
fvec4
(
&
blockAtomY
[
k
])
-
atomPos
[
1
];
fvec4
dz
=
fvec4
(
&
blockAtomZ
[
k
])
-
atomPos
[
2
];
if
(
periodicRectangular
)
{
if
(
periodicRectangular
)
{
fvec4
base
=
round
(
delta
*
invBoxSize
)
*
boxSize
;
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
delta
=
delta
-
base
;
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
}
else
if
(
needPeriodic
)
{
else
if
(
needPeriodic
)
{
delta
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
fvec4
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
delta
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
delta
-=
periodicBoxVec4
[
0
]
*
floorf
(
delta
[
0
]
*
recipBoxSize
[
0
]
+
0.5
f
);
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec4
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec4
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
}
f
loat
r2
=
d
ot3
(
delta
,
delta
)
;
f
vec4
r2
=
d
x
*
dx
+
dy
*
dy
+
dz
*
dz
;
if
(
r2
<
maxDistanceSquared
)
{
if
(
any
(
r2
<
maxDistanceSquared
)
)
{
any
=
true
;
any
Interaction
=
true
;
break
;
break
;
}
}
}
}
if
(
!
any
)
if
(
!
any
Interaction
)
continue
;
continue
;
}
}
...
@@ -379,6 +395,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -379,6 +395,7 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
blockNeighbors
.
resize
(
numBlocks
);
blockNeighbors
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
blockExclusions
.
resize
(
numBlocks
);
sortedAtoms
.
resize
(
numAtoms
);
sortedAtoms
.
resize
(
numAtoms
);
sortedPositions
.
resize
(
4
*
numAtoms
);
// Record the parameters for the threads.
// Record the parameters for the threads.
...
@@ -428,6 +445,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
...
@@ -428,6 +445,8 @@ void CpuNeighborList::computeNeighborList(int numAtoms, const AlignedArray<float
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
atomIndex
=
atomBins
[
i
].
second
;
int
atomIndex
=
atomBins
[
i
].
second
;
sortedAtoms
[
i
]
=
atomIndex
;
sortedAtoms
[
i
]
=
atomIndex
;
fvec4
atomPos
(
&
atomLocations
[
4
*
atomIndex
]);
atomPos
.
store
(
&
sortedPositions
[
4
*
i
]);
voxels
.
insert
(
i
,
&
atomLocations
[
4
*
atomIndex
]);
voxels
.
insert
(
i
,
&
atomLocations
[
4
*
atomIndex
]);
}
}
voxels
.
sortItems
();
voxels
.
sortItems
();
...
@@ -489,6 +508,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
...
@@ -489,6 +508,7 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
int
numBlocks
=
blockNeighbors
.
size
();
int
numBlocks
=
blockNeighbors
.
size
();
vector
<
int
>
blockAtoms
;
vector
<
int
>
blockAtoms
;
vector
<
float
>
blockAtomX
(
blockSize
),
blockAtomY
(
blockSize
),
blockAtomZ
(
blockSize
);
vector
<
VoxelIndex
>
atomVoxelIndex
;
vector
<
VoxelIndex
>
atomVoxelIndex
;
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
for
(
int
i
=
threadIndex
;
i
<
numBlocks
;
i
+=
numThreads
)
{
// Find the atoms in this block and compute their bounding box.
// Find the atoms in this block and compute their bounding box.
...
@@ -501,14 +521,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
...
@@ -501,14 +521,24 @@ void CpuNeighborList::threadComputeNeighborList(ThreadPool& threads, int threadI
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
blockAtoms
[
j
]
=
sortedAtoms
[
firstIndex
+
j
];
atomVoxelIndex
[
j
]
=
voxels
->
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
j
]]);
atomVoxelIndex
[
j
]
=
voxels
->
getVoxelIndex
(
&
atomLocations
[
4
*
blockAtoms
[
j
]]);
}
}
fvec4
minPos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
]
]
);
fvec4
minPos
(
&
sortedPositions
[
4
*
firstIndex
]);
fvec4
maxPos
=
minPos
;
fvec4
maxPos
=
minPos
;
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
for
(
int
j
=
1
;
j
<
atomsInBlock
;
j
++
)
{
fvec4
pos
(
&
atomLocations
[
4
*
sortedAtoms
[
firstIndex
+
j
]
]);
fvec4
pos
(
&
sortedPositions
[
4
*
(
firstIndex
+
j
)
]);
minPos
=
min
(
minPos
,
pos
);
minPos
=
min
(
minPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
maxPos
=
max
(
maxPos
,
pos
);
}
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
atomLocations
,
atomVoxelIndex
);
for
(
int
j
=
0
;
j
<
atomsInBlock
;
j
++
)
{
blockAtomX
[
j
]
=
sortedPositions
[
4
*
(
firstIndex
+
j
)];
blockAtomY
[
j
]
=
sortedPositions
[
4
*
(
firstIndex
+
j
)
+
1
];
blockAtomZ
[
j
]
=
sortedPositions
[
4
*
(
firstIndex
+
j
)
+
2
];
}
for
(
int
j
=
atomsInBlock
;
j
<
blockSize
;
j
++
)
{
blockAtomX
[
j
]
=
1e10
;
blockAtomY
[
j
]
=
1e10
;
blockAtomZ
[
j
]
=
1e10
;
}
voxels
->
getNeighbors
(
blockNeighbors
[
i
],
i
,
(
maxPos
+
minPos
)
*
0.5
f
,
(
maxPos
-
minPos
)
*
0.5
f
,
sortedAtoms
,
blockExclusions
[
i
],
maxDistance
,
blockAtoms
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
sortedPositions
,
atomVoxelIndex
);
// Record the exclusions for this block.
// Record the exclusions for this block.
...
...
platforms/cpu/src/CpuNonbondedForce.cpp
View file @
59bccb15
/* Portions copyright (c) 2006-201
3
Stanford University and Simbios.
/* Portions copyright (c) 2006-201
5
Stanford University and Simbios.
* Contributors: Pande Group
* Contributors: Pande Group
*
*
* Permission is hereby granted, free of charge, to any person obtaining
* Permission is hereby granted, free of charge, to any person obtaining
...
@@ -364,15 +364,15 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
...
@@ -364,15 +364,15 @@ void CpuNonbondedForce::threadComputeDirect(ThreadPool& threads, int threadIndex
float
inverseR
=
1
/
r
;
float
inverseR
=
1
/
r
;
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
chargeProd
=
ONE_4PI_EPS0
*
posq
[
4
*
i
+
3
]
*
posq
[
4
*
j
+
3
];
float
alphaR
=
alphaEwald
*
r
;
float
alphaR
=
alphaEwald
*
r
;
float
erf
c
AlphaR
=
erf
cApprox
(
alphaR
);
float
erfAlphaR
=
erf
(
alphaR
);
if
(
1
-
erf
c
AlphaR
>
1e-6
f
)
{
if
(
erfAlphaR
>
1e-6
f
)
{
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
float
dEdR
=
(
float
)
(
chargeProd
*
inverseR
*
inverseR
*
inverseR
);
dEdR
=
(
float
)
(
dEdR
*
(
1.0
f
-
erf
c
AlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
dEdR
=
(
float
)
(
dEdR
*
(
erfAlphaR
-
TWO_OVER_SQRT_PI
*
alphaR
*
exp
(
-
alphaR
*
alphaR
)));
fvec4
result
=
deltaR
*
dEdR
;
fvec4
result
=
deltaR
*
dEdR
;
(
fvec4
(
forces
+
4
*
i
)
-
result
).
store
(
forces
+
4
*
i
);
(
fvec4
(
forces
+
4
*
i
)
-
result
).
store
(
forces
+
4
*
i
);
(
fvec4
(
forces
+
4
*
j
)
+
result
).
store
(
forces
+
4
*
j
);
(
fvec4
(
forces
+
4
*
j
)
+
result
).
store
(
forces
+
4
*
j
);
if
(
includeEnergy
)
if
(
includeEnergy
)
threadEnergy
[
threadIndex
]
-=
chargeProd
*
inverseR
*
(
1.0
f
-
erf
c
AlphaR
)
;
threadEnergy
[
threadIndex
]
-=
chargeProd
*
inverseR
*
erfAlphaR
;
}
}
}
}
}
}
...
...
platforms/cpu/src/CpuNonbondedForceVec4.cpp
View file @
59bccb15
...
@@ -44,30 +44,76 @@ CpuNonbondedForce* createCpuNonbondedForceVec4() {
...
@@ -44,30 +44,76 @@ CpuNonbondedForce* createCpuNonbondedForceVec4() {
CpuNonbondedForceVec4
::
CpuNonbondedForceVec4
()
{
CpuNonbondedForceVec4
::
CpuNonbondedForceVec4
()
{
}
}
enum
PeriodicType
{
NoPeriodic
,
PeriodicPerAtom
,
PeriodicPerInteraction
,
PeriodicTriclinic
};
void
CpuNonbondedForceVec4
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec4
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
// Determine whether we need to apply periodic boundary conditions.
calculateBlockIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
PeriodicType
periodicType
;
calculateBlockIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec4
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -82,7 +128,10 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -82,7 +128,10 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
ivec4
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -95,8 +144,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -95,8 +144,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
rsqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
energy
,
dEdR
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
if
(
atomEpsilon
!=
0.0
f
)
{
...
@@ -108,6 +156,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -108,6 +156,7 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
if
(
useSwitch
)
{
fvec4
r
=
r2
*
inverseR
;
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
t
=
blend
(
0.0
f
,
(
r
-
switchingDistance
)
*
invSwitchingInterval
,
r
>
switchingDistance
);
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
fvec4
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
...
@@ -162,29 +211,73 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -162,29 +211,73 @@ void CpuNonbondedForceVec4::calculateBlockIxnImpl(int blockIndex, float* forces,
}
}
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
// Determine whether we need to apply periodic boundary conditions.
calculateBlockEwaldIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
PeriodicType
periodicType
;
calculateBlockEwaldIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
4
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockEwaldIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockEwaldIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockEwaldIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockEwaldIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec4
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomPosq
[
4
];
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec4
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
for
(
int
i
=
0
;
i
<
4
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomX
=
fvec4
(
blockAtomPosq
[
0
][
0
],
blockAtomPosq
[
1
][
0
],
blockAtomPosq
[
2
][
0
],
blockAtomPosq
[
3
][
0
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomY
=
fvec4
(
blockAtomPosq
[
0
][
1
],
blockAtomPosq
[
1
][
1
],
blockAtomPosq
[
2
][
1
],
blockAtomPosq
[
3
][
1
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomZ
=
fvec4
(
blockAtomPosq
[
0
][
2
],
blockAtomPosq
[
1
][
2
],
blockAtomPosq
[
2
][
2
],
blockAtomPosq
[
3
][
2
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomCharge
=
fvec4
(
ONE_4PI_EPS0
)
*
fvec4
(
blockAtomPosq
[
0
][
3
],
blockAtomPosq
[
1
][
3
],
blockAtomPosq
[
2
][
3
],
blockAtomPosq
[
3
][
3
]);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
fvec4
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -199,7 +292,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -199,7 +292,10 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec4
dx
,
dy
,
dz
,
r2
;
fvec4
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
posq
+
4
*
atom
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec4
include
;
ivec4
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -212,8 +308,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -212,8 +308,8 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions.
// Compute the interactions.
fvec4
r
=
sqrt
(
r2
);
fvec4
inverseR
=
r
sqrt
(
r2
);
fvec4
inverseR
=
fvec4
(
1.0
f
)
/
r
;
fvec4
r
=
r2
*
inverseR
;
fvec4
energy
,
dEdR
;
fvec4
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
if
(
atomEpsilon
!=
0.0
f
)
{
...
@@ -272,28 +368,26 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -272,28 +368,26 @@ void CpuNonbondedForceVec4::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
f
loat
*
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForceVec4
::
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec4
&
x
,
const
fvec4
&
y
,
const
fvec4
&
z
,
fvec4
&
dx
,
fvec4
&
dy
,
fvec4
&
dz
,
fvec4
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
if
(
PERIODIC_TYPE
==
PeriodicTriclinic
)
{
if
(
TRICLINIC
)
{
fvec4
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
fvec4
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec4
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
fvec4
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec4
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
fvec4
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
}
else
if
(
PERIODIC_TYPE
==
PeriodicPerInteraction
)
{
else
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
}
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
}
...
...
platforms/cpu/src/CpuNonbondedForceVec8.cpp
View file @
59bccb15
...
@@ -50,7 +50,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
...
@@ -50,7 +50,7 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
*/
*/
bool
isVec8Supported
()
{
bool
isVec8Supported
()
{
// Make sure the CPU supports AVX.
// Make sure the CPU supports AVX.
int
cpuInfo
[
4
];
int
cpuInfo
[
4
];
cpuid
(
cpuInfo
,
0
);
cpuid
(
cpuInfo
,
0
);
if
(
cpuInfo
[
0
]
>=
1
)
{
if
(
cpuInfo
[
0
]
>=
1
)
{
...
@@ -76,29 +76,75 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
...
@@ -76,29 +76,75 @@ CpuNonbondedForce* createCpuNonbondedForceVec8() {
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
CpuNonbondedForceVec8
::
CpuNonbondedForceVec8
()
{
}
}
enum
PeriodicType
{
NoPeriodic
,
PeriodicPerAtom
,
PeriodicPerInteraction
,
PeriodicTriclinic
};
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
// Determine whether we need to apply periodic boundary conditions.
calculateBlockIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
PeriodicType
periodicType
;
calculateBlockIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
8
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec8
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
fvec4
blockAtomPosq
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -113,7 +159,10 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -113,7 +159,10 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
ivec8
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -126,8 +175,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -126,8 +175,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
// Compute the interactions.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
rsqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
energy
,
dEdR
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
if
(
atomEpsilon
!=
0.0
f
)
{
...
@@ -139,6 +187,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -139,6 +187,7 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
dEdR
=
epsSig6
*
(
12.0
f
*
sig6
-
6.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
energy
=
epsSig6
*
(
sig6
-
1.0
f
);
if
(
useSwitch
)
{
if
(
useSwitch
)
{
fvec8
r
=
r2
*
inverseR
;
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
t
=
(
r
>
switchingDistance
)
&
((
r
-
switchingDistance
)
*
invSwitchingInterval
);
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchValue
=
1
+
t
*
t
*
t
*
(
-
10.0
f
+
t
*
(
15.0
f
-
t
*
6.0
f
));
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
fvec8
switchDeriv
=
t
*
t
*
(
-
30.0
f
+
t
*
(
60.0
f
-
t
*
30.0
f
))
*
invSwitchingInterval
;
...
@@ -193,28 +242,72 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
...
@@ -193,28 +242,72 @@ void CpuNonbondedForceVec8::calculateBlockIxnImpl(int blockIndex, float* forces,
}
}
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxn
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
if
(
triclinic
)
// Determine whether we need to apply periodic boundary conditions.
calculateBlockEwaldIxnImpl
<
true
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
else
PeriodicType
periodicType
;
calculateBlockEwaldIxnImpl
<
false
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
);
fvec4
blockCenter
;
if
(
!
periodic
)
{
periodicType
=
NoPeriodic
;
blockCenter
=
0.0
f
;
}
else
{
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
float
minx
,
maxx
,
miny
,
maxy
,
minz
,
maxz
;
minx
=
maxx
=
posq
[
4
*
blockAtom
[
0
]];
miny
=
maxy
=
posq
[
4
*
blockAtom
[
0
]
+
1
];
minz
=
maxz
=
posq
[
4
*
blockAtom
[
0
]
+
2
];
for
(
int
i
=
1
;
i
<
8
;
i
++
)
{
minx
=
min
(
minx
,
posq
[
4
*
blockAtom
[
i
]]);
maxx
=
max
(
maxx
,
posq
[
4
*
blockAtom
[
i
]]);
miny
=
min
(
miny
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
maxy
=
max
(
maxy
,
posq
[
4
*
blockAtom
[
i
]
+
1
]);
minz
=
min
(
minz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
maxz
=
max
(
maxz
,
posq
[
4
*
blockAtom
[
i
]
+
2
]);
}
blockCenter
=
fvec4
(
0.5
f
*
(
minx
+
maxx
),
0.5
f
*
(
miny
+
maxy
),
0.5
f
*
(
minz
+
maxz
),
0.0
f
);
if
(
!
(
minx
<
cutoffDistance
||
miny
<
cutoffDistance
||
minz
<
cutoffDistance
||
maxx
>
boxSize
[
0
]
-
cutoffDistance
||
maxy
>
boxSize
[
1
]
-
cutoffDistance
||
maxz
>
boxSize
[
2
]
-
cutoffDistance
))
periodicType
=
NoPeriodic
;
else
if
(
triclinic
)
periodicType
=
PeriodicTriclinic
;
else
if
(
0.5
f
*
(
boxSize
[
0
]
-
(
maxx
-
minx
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
1
]
-
(
maxy
-
miny
))
>=
cutoffDistance
&&
0.5
f
*
(
boxSize
[
2
]
-
(
maxz
-
minz
))
>=
cutoffDistance
)
periodicType
=
PeriodicPerAtom
;
else
periodicType
=
PeriodicPerInteraction
;
}
// Call the appropriate version depending on what calculation is required for periodic boundary conditions.
if
(
periodicType
==
NoPeriodic
)
calculateBlockEwaldIxnImpl
<
NoPeriodic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerAtom
)
calculateBlockEwaldIxnImpl
<
PeriodicPerAtom
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicPerInteraction
)
calculateBlockEwaldIxnImpl
<
PeriodicPerInteraction
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
else
if
(
periodicType
==
PeriodicTriclinic
)
calculateBlockEwaldIxnImpl
<
PeriodicTriclinic
>
(
blockIndex
,
forces
,
totalEnergy
,
boxSize
,
invBoxSize
,
blockCenter
);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
{
void
CpuNonbondedForceVec8
::
calculateBlockEwaldIxnImpl
(
int
blockIndex
,
float
*
forces
,
double
*
totalEnergy
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
,
const
fvec4
&
blockCenter
)
{
// Load the positions and parameters of the atoms in the block.
// Load the positions and parameters of the atoms in the block.
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
8
*
blockIndex
];
fvec4
blockAtomPosq
[
8
];
fvec4
blockAtomPosq
[
8
];
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomForceX
(
0.0
f
),
blockAtomForceY
(
0.0
f
),
blockAtomForceZ
(
0.0
f
);
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
fvec8
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
blockAtomPosq
[
i
]
=
fvec4
(
posq
+
4
*
blockAtom
[
i
]);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
blockAtomPosq
[
i
]
-=
floor
((
blockAtomPosq
[
i
]
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
}
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
transpose
(
blockAtomPosq
[
0
],
blockAtomPosq
[
1
],
blockAtomPosq
[
2
],
blockAtomPosq
[
3
],
blockAtomPosq
[
4
],
blockAtomPosq
[
5
],
blockAtomPosq
[
6
],
blockAtomPosq
[
7
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
blockAtomCharge
);
blockAtomCharge
*=
ONE_4PI_EPS0
;
blockAtomCharge
*=
ONE_4PI_EPS0
;
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomSigma
(
atomParameters
[
blockAtom
[
0
]].
first
,
atomParameters
[
blockAtom
[
1
]].
first
,
atomParameters
[
blockAtom
[
2
]].
first
,
atomParameters
[
blockAtom
[
3
]].
first
,
atomParameters
[
blockAtom
[
4
]].
first
,
atomParameters
[
blockAtom
[
5
]].
first
,
atomParameters
[
blockAtom
[
6
]].
first
,
atomParameters
[
blockAtom
[
7
]].
first
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
fvec8
blockAtomEpsilon
(
atomParameters
[
blockAtom
[
0
]].
second
,
atomParameters
[
blockAtom
[
1
]].
second
,
atomParameters
[
blockAtom
[
2
]].
second
,
atomParameters
[
blockAtom
[
3
]].
second
,
atomParameters
[
blockAtom
[
4
]].
second
,
atomParameters
[
blockAtom
[
5
]].
second
,
atomParameters
[
blockAtom
[
6
]].
second
,
atomParameters
[
blockAtom
[
7
]].
second
);
bool
needPeriodic
=
(
periodic
&&
(
any
(
blockAtomX
<
cutoffDistance
)
||
any
(
blockAtomY
<
cutoffDistance
)
||
any
(
blockAtomZ
<
cutoffDistance
)
||
const
bool
needPeriodic
=
(
PERIODIC_TYPE
==
PeriodicPerInteraction
||
PERIODIC_TYPE
==
PeriodicTriclinic
);
any
(
blockAtomX
>
boxSize
[
0
]
-
cutoffDistance
)
||
any
(
blockAtomY
>
boxSize
[
1
]
-
cutoffDistance
)
||
any
(
blockAtomZ
>
boxSize
[
2
]
-
cutoffDistance
)));
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
const
float
invSwitchingInterval
=
1
/
(
cutoffDistance
-
switchingDistance
);
// Loop over neighbors for this block.
// Loop over neighbors for this block.
...
@@ -229,7 +322,10 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -229,7 +322,10 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the distances to the block atoms.
// Compute the distances to the block atoms.
fvec8
dx
,
dy
,
dz
,
r2
;
fvec8
dx
,
dy
,
dz
,
r2
;
getDeltaR
<
TRICLINIC
>
(
&
posq
[
4
*
atom
],
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
fvec4
atomPos
(
posq
+
4
*
atom
);
if
(
PERIODIC_TYPE
==
PeriodicPerAtom
)
atomPos
-=
floor
((
atomPos
-
blockCenter
)
*
invBoxSize
+
0.5
f
)
*
boxSize
;
getDeltaR
<
PERIODIC_TYPE
>
(
atomPos
,
blockAtomX
,
blockAtomY
,
blockAtomZ
,
dx
,
dy
,
dz
,
r2
,
needPeriodic
,
boxSize
,
invBoxSize
);
ivec8
include
;
ivec8
include
;
char
excl
=
exclusions
[
i
];
char
excl
=
exclusions
[
i
];
if
(
excl
==
0
)
if
(
excl
==
0
)
...
@@ -242,8 +338,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -242,8 +338,8 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
// Compute the interactions.
// Compute the interactions.
fvec8
r
=
sqrt
(
r2
);
fvec8
inverseR
=
r
sqrt
(
r2
);
fvec8
inverseR
=
fvec8
(
1.0
f
)
/
r
;
fvec8
r
=
r2
*
inverseR
;
fvec8
energy
,
dEdR
;
fvec8
energy
,
dEdR
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
float
atomEpsilon
=
atomParameters
[
atom
].
second
;
if
(
atomEpsilon
!=
0.0
f
)
{
if
(
atomEpsilon
!=
0.0
f
)
{
...
@@ -268,7 +364,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -268,7 +364,7 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
}
}
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
fvec8
chargeProd
=
blockAtomCharge
*
posq
[
4
*
atom
+
3
];
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
+=
chargeProd
*
inverseR
*
ewaldScaleFunction
(
r
);
dEdR
*=
inverseR
*
inverseR
;
dEdR
*=
inverseR
*
inverseR
;
// Accumulate energies.
// Accumulate energies.
...
@@ -302,28 +398,26 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
...
@@ -302,28 +398,26 @@ void CpuNonbondedForceVec8::calculateBlockEwaldIxnImpl(int blockIndex, float* fo
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
(
fvec4
(
forces
+
4
*
blockAtom
[
j
])
+
f
[
j
]).
store
(
forces
+
4
*
blockAtom
[
j
]);
}
}
template
<
bool
TRICLINIC
>
template
<
int
PERIODIC_TYPE
>
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
f
loat
*
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
void
CpuNonbondedForceVec8
::
getDeltaR
(
const
f
vec4
&
posI
,
const
fvec8
&
x
,
const
fvec8
&
y
,
const
fvec8
&
z
,
fvec8
&
dx
,
fvec8
&
dy
,
fvec8
&
dz
,
fvec8
&
r2
,
bool
periodic
,
const
fvec4
&
boxSize
,
const
fvec4
&
invBoxSize
)
const
{
dx
=
x
-
posI
[
0
];
dx
=
x
-
posI
[
0
];
dy
=
y
-
posI
[
1
];
dy
=
y
-
posI
[
1
];
dz
=
z
-
posI
[
2
];
dz
=
z
-
posI
[
2
];
if
(
periodic
)
{
if
(
PERIODIC_TYPE
==
PeriodicTriclinic
)
{
if
(
TRICLINIC
)
{
fvec8
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
fvec8
scale3
=
floor
(
dz
*
recipBoxSize
[
2
]
+
0.5
f
);
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dx
-=
scale3
*
periodicBoxVectors
[
2
][
0
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dy
-=
scale3
*
periodicBoxVectors
[
2
][
1
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
dz
-=
scale3
*
periodicBoxVectors
[
2
][
2
];
fvec8
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
fvec8
scale2
=
floor
(
dy
*
recipBoxSize
[
1
]
+
0.5
f
);
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dx
-=
scale2
*
periodicBoxVectors
[
1
][
0
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
dy
-=
scale2
*
periodicBoxVectors
[
1
][
1
];
fvec8
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
fvec8
scale1
=
floor
(
dx
*
recipBoxSize
[
0
]
+
0.5
f
);
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
dx
-=
scale1
*
periodicBoxVectors
[
0
][
0
];
}
}
else
if
(
PERIODIC_TYPE
==
PeriodicPerInteraction
)
{
else
{
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dx
-=
round
(
dx
*
invBoxSize
[
0
])
*
boxSize
[
0
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dy
-=
round
(
dy
*
invBoxSize
[
1
])
*
boxSize
[
1
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
dz
-=
round
(
dz
*
invBoxSize
[
2
])
*
boxSize
[
2
];
}
}
}
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
}
}
...
...
platforms/cuda/include/CudaKernels.h
View file @
59bccb15
...
@@ -1282,7 +1282,8 @@ private:
...
@@ -1282,7 +1282,8 @@ private:
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
recordChangedParameters
(
ContextImpl
&
context
);
void
recordChangedParameters
(
ContextImpl
&
context
);
CudaContext
&
cu
;
CudaContext
&
cu
;
double
prevStepSize
;
double
prevStepSize
,
energy
;
float
energyFloat
;
int
numGlobalVariables
;
int
numGlobalVariables
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
modifiesParameters
,
keNeedsForce
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
modifiesParameters
,
keNeedsForce
;
mutable
bool
localValuesAreCurrent
;
mutable
bool
localValuesAreCurrent
;
...
@@ -1303,7 +1304,7 @@ private:
...
@@ -1303,7 +1304,7 @@ private:
std
::
vector
<
std
::
vector
<
CUfunction
>
>
kernels
;
std
::
vector
<
std
::
vector
<
CUfunction
>
>
kernels
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
void
*>
>
>
kernelArgs
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
void
*>
>
>
kernelArgs
;
std
::
vector
<
void
*>
kineticEnergyArgs
;
std
::
vector
<
void
*>
kineticEnergyArgs
;
CUfunction
sumPotentialEnergyKernel
,
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
CUfunction
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
std
::
vector
<
bool
>
needsForces
;
std
::
vector
<
bool
>
needsForces
;
std
::
vector
<
bool
>
needsEnergy
;
std
::
vector
<
bool
>
needsEnergy
;
...
...
platforms/cuda/src/CudaExpressionUtilities.cpp
View file @
59bccb15
...
@@ -463,6 +463,9 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -463,6 +463,9 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
case
Operation
::
CEIL
:
case
Operation
::
CEIL
:
out
<<
"ceil("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"ceil("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
SELECT
:
out
<<
"("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
" != 0 ? "
<<
getTempName
(
node
.
getChildren
()[
1
],
temps
)
<<
" : "
<<
getTempName
(
node
.
getChildren
()[
2
],
temps
)
<<
")"
;
break
;
default:
default:
throw
OpenMMException
(
"Internal error: Unknown operation in user-defined expression: "
+
node
.
getOperation
().
getName
());
throw
OpenMMException
(
"Internal error: Unknown operation in user-defined expression: "
+
node
.
getOperation
().
getName
());
}
}
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
59bccb15
...
@@ -5731,7 +5731,7 @@ string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& vari
...
@@ -5731,7 +5731,7 @@ string CudaIntegrateCustomStepKernel::createGlobalComputation(const string& vari
variables
[
"dt"
]
=
"dt[0].y"
;
variables
[
"dt"
]
=
"dt[0].y"
;
variables
[
"uniform"
]
=
"uniform"
;
variables
[
"uniform"
]
=
"uniform"
;
variables
[
"gaussian"
]
=
"gaussian"
;
variables
[
"gaussian"
]
=
"gaussian"
;
variables
[
energyName
]
=
"energy
[0]
"
;
variables
[
energyName
]
=
"energy"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
parameterNames
.
size
();
i
++
)
...
@@ -5767,7 +5767,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
...
@@ -5767,7 +5767,7 @@ string CudaIntegrateCustomStepKernel::createPerDofComputation(const string& vari
variables
[
"m"
]
=
"mass"
;
variables
[
"m"
]
=
"mass"
;
variables
[
"dt"
]
=
"stepSize"
;
variables
[
"dt"
]
=
"stepSize"
;
if
(
energyName
!=
""
)
if
(
energyName
!=
""
)
variables
[
energyName
]
=
"energy
[0]
"
;
variables
[
energyName
]
=
"energy"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumGlobalVariables
();
i
++
)
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
variables
[
integrator
.
getGlobalVariableName
(
i
)]
=
"globals["
+
cu
.
intToString
(
i
)
+
"]"
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumPerDofVariables
();
i
++
)
...
@@ -6000,7 +6000,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6000,7 +6000,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
NULL
);
args1
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
if
(
cu
.
getUseDoublePrecision
())
args1
.
push_back
(
&
energy
);
else
args1
.
push_back
(
&
energyFloat
);
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
args1
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
kernelArgs
[
step
].
push_back
(
args1
);
kernelArgs
[
step
].
push_back
(
args1
);
...
@@ -6049,7 +6052,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6049,7 +6052,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args
.
push_back
(
&
contextParameterValues
->
getDevicePointer
());
args
.
push_back
(
NULL
);
args
.
push_back
(
NULL
);
args
.
push_back
(
NULL
);
args
.
push_back
(
NULL
);
args
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
if
(
cu
.
getUseDoublePrecision
())
args
.
push_back
(
&
energy
);
else
args
.
push_back
(
&
energyFloat
);
kernelArgs
[
step
].
push_back
(
args
);
kernelArgs
[
step
].
push_back
(
args
);
}
}
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
)
{
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainPositions
)
{
...
@@ -6089,13 +6095,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6089,13 +6095,6 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
CUmodule
randomProgram
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
randomKernel
=
cu
.
getKernel
(
randomProgram
,
"generateRandomNumbers"
);
// Create the kernel for summing the potential energy.
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
cu
.
getEnergyBuffer
().
getSize
());
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
customIntegrator
,
defines
);
sumPotentialEnergyKernel
=
cu
.
getKernel
(
module
,
cu
.
getUseDoublePrecision
()
?
"computeDoubleSum"
:
"computeFloatSum"
);
// Create the kernel for computing kinetic energy.
// Create the kernel for computing kinetic energy.
stringstream
computeKE
;
stringstream
computeKE
;
...
@@ -6117,10 +6116,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6117,10 +6116,11 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
args
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ "
<<
valueName
;
args
<<
", "
<<
buffer
.
getType
()
<<
"* __restrict__ "
<<
valueName
;
}
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
defines
[
"SUM_OUTPUT_INDEX"
]
=
"0"
;
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
3
*
numAtoms
);
defines
[
"SUM_BUFFER_SIZE"
]
=
cu
.
intToString
(
3
*
numAtoms
);
if
(
defines
.
find
(
"LOAD_POS_AS_DELTA"
)
!=
defines
.
end
())
if
(
defines
.
find
(
"LOAD_POS_AS_DELTA"
)
!=
defines
.
end
())
defines
.
erase
(
"LOAD_POS_AS_DELTA"
);
defines
.
erase
(
"LOAD_POS_AS_DELTA"
);
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
customIntegratorPerDof
,
replacements
),
defines
);
CUmodule
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
customIntegratorPerDof
,
replacements
),
defines
);
kineticEnergyKernel
=
cu
.
getKernel
(
module
,
"computePerDof"
);
kineticEnergyKernel
=
cu
.
getKernel
(
module
,
"computePerDof"
);
kineticEnergyArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
...
@@ -6134,7 +6134,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6134,7 +6134,10 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
NULL
);
kineticEnergyArgs
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
uniformRandoms
->
getDevicePointer
());
kineticEnergyArgs
.
push_back
(
&
potentialEnergy
->
getDevicePointer
());
if
(
cu
.
getUseDoublePrecision
())
kineticEnergyArgs
.
push_back
(
&
energy
);
else
kineticEnergyArgs
.
push_back
(
&
energyFloat
);
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
perDofValues
->
getBuffers
().
size
();
i
++
)
kineticEnergyArgs
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
kineticEnergyArgs
.
push_back
(
&
perDofValues
->
getBuffers
()[
i
].
getMemory
());
keNeedsForce
=
usesVariable
(
keExpression
,
"f"
);
keNeedsForce
=
usesVariable
(
keExpression
,
"f"
);
...
@@ -6240,11 +6243,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -6240,11 +6243,8 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
}
}
else
{
else
{
recordChangedParameters
(
context
);
recordChangedParameters
(
context
);
context
.
calcForcesAndEnergy
(
computeForce
,
computeEnergy
,
forceGroup
[
i
]);
energy
=
context
.
calcForcesAndEnergy
(
computeForce
,
computeEnergy
,
forceGroup
[
i
]);
if
(
computeEnergy
)
{
energyFloat
=
(
float
)
energy
;
void
*
args
[]
=
{
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
potentialEnergy
->
getDevicePointer
()};
cu
.
executeKernel
(
sumPotentialEnergyKernel
,
&
args
[
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
}
}
}
forcesAreValid
=
true
;
forcesAreValid
=
true
;
}
}
...
@@ -6315,11 +6315,8 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
...
@@ -6315,11 +6315,8 @@ double CudaIntegrateCustomStepKernel::computeKineticEnergy(ContextImpl& context,
bool
willNeedEnergy
=
false
;
bool
willNeedEnergy
=
false
;
for
(
int
i
=
0
;
i
<
integrator
.
getNumComputations
();
i
++
)
for
(
int
i
=
0
;
i
<
integrator
.
getNumComputations
();
i
++
)
willNeedEnergy
|=
needsEnergy
[
i
];
willNeedEnergy
|=
needsEnergy
[
i
];
context
.
calcForcesAndEnergy
(
true
,
willNeedEnergy
,
-
1
);
energy
=
context
.
calcForcesAndEnergy
(
true
,
willNeedEnergy
,
-
1
);
if
(
willNeedEnergy
)
{
energyFloat
=
(
float
)
energy
;
void
*
args
[]
=
{
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
potentialEnergy
->
getDevicePointer
()};
cu
.
executeKernel
(
sumPotentialEnergyKernel
,
&
args
[
0
],
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlockSize
);
}
forcesAreValid
=
true
;
forcesAreValid
=
true
;
}
}
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
CUdeviceptr
posCorrection
=
(
cu
.
getUseMixedPrecision
()
?
cu
.
getPosqCorrection
().
getDevicePointer
()
:
0
);
...
...
platforms/cuda/src/kernels/customIntegratorGlobal.cu
View file @
59bccb15
extern
"C"
__global__
void
computeGlobal
(
mixed2
*
__restrict__
dt
,
mixed
*
__restrict__
globals
,
mixed
*
__restrict__
params
,
extern
"C"
__global__
void
computeGlobal
(
mixed2
*
__restrict__
dt
,
mixed
*
__restrict__
globals
,
mixed
*
__restrict__
params
,
float
uniform
,
float
gaussian
,
const
real
*
__restrict__
energy
)
{
float
uniform
,
float
gaussian
,
const
real
energy
)
{
COMPUTE_STEP
COMPUTE_STEP
}
}
platforms/cuda/src/kernels/customIntegratorPerDof.cu
View file @
59bccb15
...
@@ -34,7 +34,7 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
...
@@ -34,7 +34,7 @@ inline __device__ mixed4 convertFromDouble4(double4 a) {
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
extern
"C"
__global__
void
computePerDof
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
const
mixed2
*
__restrict__
dt
,
const
mixed
*
__restrict__
globals
,
const
mixed
*
__restrict__
params
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
const
mixed
*
__restrict__
params
,
mixed
*
__restrict__
sum
,
const
float4
*
__restrict__
gaussianValues
,
unsigned
int
gaussianBaseIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
*
__restrict__
energy
unsigned
int
gaussianBaseIndex
,
const
float4
*
__restrict__
uniformValues
,
const
real
energy
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
mixed
stepSize
=
dt
[
0
].
y
;
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
...
...
platforms/cuda/tests/TestCudaCustomIntegrator.cpp
View file @
59bccb15
...
@@ -334,7 +334,7 @@ void testMonteCarlo() {
...
@@ -334,7 +334,7 @@ void testMonteCarlo() {
integrator
.
addComputePerDof
(
"oldx"
,
"x"
);
integrator
.
addComputePerDof
(
"oldx"
,
"x"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*gaussian"
);
integrator
.
addComputePerDof
(
"x"
,
"x+dt*gaussian"
);
integrator
.
addComputeGlobal
(
"accept"
,
"step(exp((oldE-energy)/kT)-uniform)"
);
integrator
.
addComputeGlobal
(
"accept"
,
"step(exp((oldE-energy)/kT)-uniform)"
);
integrator
.
addComputePerDof
(
"x"
,
"
accept*x + (1-
accept
)*
oldx"
);
integrator
.
addComputePerDof
(
"x"
,
"
select(
accept
, x,
oldx
)
"
);
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
forceField
->
addBond
(
0
,
1
,
2.0
,
10.0
);
forceField
->
addBond
(
0
,
1
,
2.0
,
10.0
);
system
.
addForce
(
forceField
);
system
.
addForce
(
forceField
);
...
...
platforms/cuda/tests/TestCudaLocalEnergyMinimizer.cpp
View file @
59bccb15
...
@@ -7,7 +7,7 @@
...
@@ -7,7 +7,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2010-201
4
Stanford University and the Authors. *
* Portions copyright (c) 2010-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -81,7 +81,7 @@ void testLargeSystem() {
...
@@ -81,7 +81,7 @@ void testLargeSystem() {
const
int
numParticles
=
numMolecules
*
2
;
const
int
numParticles
=
numMolecules
*
2
;
const
double
cutoff
=
2.0
;
const
double
cutoff
=
2.0
;
const
double
boxSize
=
4.0
;
const
double
boxSize
=
4.0
;
const
double
tolerance
=
5
;
const
double
tolerance
=
10
;
System
system
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
...
@@ -114,7 +114,7 @@ void testLargeSystem() {
...
@@ -114,7 +114,7 @@ void testLargeSystem() {
State
finalState
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
|
State
::
Positions
);
State
finalState
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
|
State
::
Positions
);
ASSERT
(
finalState
.
getPotentialEnergy
()
<
initialState
.
getPotentialEnergy
());
ASSERT
(
finalState
.
getPotentialEnergy
()
<
initialState
.
getPotentialEnergy
());
// Compute the force magnitude, sub
s
tracting off any component parallel to a constraint, and
// Compute the force magnitude, subtracting off any component parallel to a constraint, and
// check that it satisfies the requested tolerance.
// check that it satisfies the requested tolerance.
double
forceNorm
=
0.0
;
double
forceNorm
=
0.0
;
...
@@ -129,8 +129,8 @@ void testLargeSystem() {
...
@@ -129,8 +129,8 @@ void testLargeSystem() {
f
-=
dir
*
dir
.
dot
(
f
);
f
-=
dir
*
dir
.
dot
(
f
);
forceNorm
+=
f
.
dot
(
f
);
forceNorm
+=
f
.
dot
(
f
);
}
}
forceNorm
=
sqrt
(
forceNorm
/
(
4
*
numMolecules
));
forceNorm
=
sqrt
(
forceNorm
/
(
5
*
numMolecules
));
ASSERT
(
forceNorm
<
3
*
tolerance
);
ASSERT
(
forceNorm
<
2
*
tolerance
);
}
}
void
testVirtualSites
()
{
void
testVirtualSites
()
{
...
@@ -138,7 +138,7 @@ void testVirtualSites() {
...
@@ -138,7 +138,7 @@ void testVirtualSites() {
const
int
numParticles
=
numMolecules
*
3
;
const
int
numParticles
=
numMolecules
*
3
;
const
double
cutoff
=
2.0
;
const
double
cutoff
=
2.0
;
const
double
boxSize
=
4.0
;
const
double
boxSize
=
4.0
;
const
double
tolerance
=
5
;
const
double
tolerance
=
10
;
System
system
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
...
@@ -195,8 +195,8 @@ void testVirtualSites() {
...
@@ -195,8 +195,8 @@ void testVirtualSites() {
ASSERT_EQUAL_VEC
((
finalState
.
getPositions
()[
i
+
1
]
+
finalState
.
getPositions
()[
i
])
*
0.5
,
finalState
.
getPositions
()[
i
+
2
],
1e-5
);
ASSERT_EQUAL_VEC
((
finalState
.
getPositions
()[
i
+
1
]
+
finalState
.
getPositions
()[
i
])
*
0.5
,
finalState
.
getPositions
()[
i
+
2
],
1e-5
);
}
}
forceNorm
=
sqrt
(
forceNorm
/
(
4
*
numMolecules
));
forceNorm
=
sqrt
(
forceNorm
/
(
5
*
numMolecules
));
ASSERT
(
forceNorm
<
3
*
tolerance
);
ASSERT
(
forceNorm
<
2
*
tolerance
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
...
...
platforms/opencl/include/OpenCLFFT3D.h
View file @
59bccb15
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009 Stanford University and the Authors.
*
* Portions copyright (c) 2009
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -40,7 +40,7 @@ namespace OpenMM {
...
@@ -40,7 +40,7 @@ namespace OpenMM {
* 15, 207–228 (2000).
* 15, 207–228 (2000).
* <p>
* <p>
* This class places certain restrictions on the allowed dimensions of the grid. First,
* This class places certain restrictions on the allowed dimensions of the grid. First,
* the size of each dimension may have no prime factors other than 2, 3, and
5
. You
* the size of each dimension may have no prime factors other than 2, 3,
5,
and
7
. You
* can call findLegalDimension() to determine the smallest size that satisfies this
* can call findLegalDimension() to determine the smallest size that satisfies this
* requirement and is greater than or equal to a specified minimum size. Second, the size
* requirement and is greater than or equal to a specified minimum size. Second, the size
* of each dimension must be small enough to compute each 1D transform entirely in local
* of each dimension must be small enough to compute each 1D transform entirely in local
...
@@ -61,12 +61,17 @@ public:
...
@@ -61,12 +61,17 @@ public:
* @param xsize the first dimension of the data sets on which FFTs will be performed
* @param xsize the first dimension of the data sets on which FFTs will be performed
* @param ysize the second dimension of the data sets on which FFTs will be performed
* @param ysize the second dimension of the data sets on which FFTs will be performed
* @param zsize the third dimension of the data sets on which FFTs will be performed
* @param zsize the third dimension of the data sets on which FFTs will be performed
* @param realToComplex if true, a real-to-complex transform will be done. Otherwise, it is complex-to-complex.
*/
*/
OpenCLFFT3D
(
OpenCLContext
&
context
,
int
xsize
,
int
ysize
,
int
zsize
);
OpenCLFFT3D
(
OpenCLContext
&
context
,
int
xsize
,
int
ysize
,
int
zsize
,
bool
realToComplex
=
false
);
/**
/**
* Perform a Fourier transform. The transform cannot be done in-place: the input and output
* Perform a Fourier transform. The transform cannot be done in-place: the input and output
* arrays must be different. Also, the input array is used as workspace, so its contents
* arrays must be different. Also, the input array is used as workspace, so its contents
* are destroyed.
* are destroyed. This also means that both arrays must be large enough to hold complex values,
* even when performing a real-to-complex transform.
* <p>
* When performing a real-to-complex transform, the output data is of size xsize*ysize*(zsize/2+1)
* and contains only the non-redundant elements.
*
*
* @param in the data to transform, ordered such that in[x*ysize*zsize + y*zsize + z] contains element (x, y, z)
* @param in the data to transform, ordered such that in[x*ysize*zsize + y*zsize + z] contains element (x, y, z)
* @param out on exit, this contains the transformed data
* @param out on exit, this contains the transformed data
...
@@ -75,17 +80,20 @@ public:
...
@@ -75,17 +80,20 @@ public:
void
execFFT
(
OpenCLArray
&
in
,
OpenCLArray
&
out
,
bool
forward
=
true
);
void
execFFT
(
OpenCLArray
&
in
,
OpenCLArray
&
out
,
bool
forward
=
true
);
/**
/**
* Get the smallest legal size for a dimension of the grid (that is, a size with no prime
* Get the smallest legal size for a dimension of the grid (that is, a size with no prime
* factors other than 2, 3, and
5
).
* factors other than 2, 3,
5,
and
7
).
*
*
* @param minimum the minimum size the return value must be greater than or equal to
* @param minimum the minimum size the return value must be greater than or equal to
*/
*/
static
int
findLegalDimension
(
int
minimum
);
static
int
findLegalDimension
(
int
minimum
);
private:
private:
cl
::
Kernel
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
,
int
&
threads
);
cl
::
Kernel
createKernel
(
int
xsize
,
int
ysize
,
int
zsize
,
int
&
threads
,
int
axis
,
bool
forward
,
bool
inputIsReal
);
int
xsize
,
ysize
,
zsize
;
int
xsize
,
ysize
,
zsize
;
int
xthreads
,
ythreads
,
zthreads
;
int
xthreads
,
ythreads
,
zthreads
;
bool
packRealAsComplex
;
OpenCLContext
&
context
;
OpenCLContext
&
context
;
cl
::
Kernel
xkernel
,
ykernel
,
zkernel
;
cl
::
Kernel
xkernel
,
ykernel
,
zkernel
;
cl
::
Kernel
invxkernel
,
invykernel
,
invzkernel
;
cl
::
Kernel
packForwardKernel
,
unpackForwardKernel
,
packBackwardKernel
,
unpackBackwardKernel
;
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
59bccb15
...
@@ -639,6 +639,7 @@ private:
...
@@ -639,6 +639,7 @@ private:
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeFinishSpreadChargeKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeConvolutionKernel
;
cl
::
Kernel
pmeEvalEnergyKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
cl
::
Kernel
pmeInterpolateForceKernel
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
pmeDefines
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
std
::
vector
<
std
::
pair
<
int
,
int
>
>
exceptionAtoms
;
...
@@ -1272,7 +1273,7 @@ private:
...
@@ -1272,7 +1273,7 @@ private:
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
prepareForComputation
(
ContextImpl
&
context
,
CustomIntegrator
&
integrator
,
bool
&
forcesAreValid
);
void
recordChangedParameters
(
ContextImpl
&
context
);
void
recordChangedParameters
(
ContextImpl
&
context
);
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
double
prevStepSize
;
double
prevStepSize
,
energy
;
int
numGlobalVariables
;
int
numGlobalVariables
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
modifiesParameters
,
keNeedsForce
;
bool
hasInitializedKernels
,
deviceValuesAreCurrent
,
modifiesParameters
,
keNeedsForce
;
mutable
bool
localValuesAreCurrent
;
mutable
bool
localValuesAreCurrent
;
...
@@ -1292,7 +1293,7 @@ private:
...
@@ -1292,7 +1293,7 @@ private:
std
::
vector
<
double
>
contextValuesDouble
;
std
::
vector
<
double
>
contextValuesDouble
;
std
::
vector
<
float
>
contextValues
;
std
::
vector
<
float
>
contextValues
;
std
::
vector
<
std
::
vector
<
cl
::
Kernel
>
>
kernels
;
std
::
vector
<
std
::
vector
<
cl
::
Kernel
>
>
kernels
;
cl
::
Kernel
sumPotentialEnergyKernel
,
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
cl
::
Kernel
randomKernel
,
kineticEnergyKernel
,
sumKineticEnergyKernel
;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
std
::
vector
<
CustomIntegrator
::
ComputationType
>
stepType
;
std
::
vector
<
bool
>
needsForces
;
std
::
vector
<
bool
>
needsForces
;
std
::
vector
<
bool
>
needsEnergy
;
std
::
vector
<
bool
>
needsEnergy
;
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment