Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3db6b8ee
Commit
3db6b8ee
authored
Feb 07, 2015
by
Jason Swails
Browse files
Merge branch 'master' into gbn-chk
parents
5d8e92ff
3a80b0f1
Changes
121
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
804 additions
and
347 deletions
+804
-347
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+16
-34
platforms/cuda/src/kernels/monteCarloBarostat.cu
platforms/cuda/src/kernels/monteCarloBarostat.cu
+5
-8
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+9
-20
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+46
-41
platforms/cuda/tests/TestCudaCustomManyParticleForce.cpp
platforms/cuda/tests/TestCudaCustomManyParticleForce.cpp
+58
-16
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
+61
-1
platforms/cuda/tests/TestCudaEwald.cpp
platforms/cuda/tests/TestCudaEwald.cpp
+52
-0
platforms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
...orms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
+77
-0
platforms/cuda/tests/TestCudaNonbondedForce.cpp
platforms/cuda/tests/TestCudaNonbondedForce.cpp
+91
-1
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+75
-19
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+66
-13
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+149
-86
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+47
-48
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+8
-9
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+7
-6
platforms/opencl/src/kernels/customGBValueN2.cl
platforms/opencl/src/kernels/customGBValueN2.cl
+8
-9
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
+7
-6
platforms/opencl/src/kernels/customHbondForce.cl
platforms/opencl/src/kernels/customHbondForce.cl
+8
-8
platforms/opencl/src/kernels/customManyParticle.cl
platforms/opencl/src/kernels/customManyParticle.cl
+11
-19
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+3
-3
No files found.
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
3db6b8ee
...
...
@@ -69,7 +69,8 @@ typedef struct {
extern
"C"
__global__
void
computeBornSum
(
unsigned
long
long
*
__restrict__
global_bornSum
,
const
real4
*
__restrict__
posq
,
const
float2
*
__restrict__
global_params
,
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#else
unsigned
int
numTiles
,
#endif
...
...
@@ -104,9 +105,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
make_real3
(
localData
[
tbx
+
j
].
x
-
posq1
.
x
,
localData
[
tbx
+
j
].
y
-
posq1
.
y
,
localData
[
tbx
+
j
].
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
...
...
@@ -151,9 +150,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
make_real3
(
localData
[
tbx
+
tj
].
x
-
posq1
.
x
,
localData
[
tbx
+
tj
].
y
-
posq1
.
y
,
localData
[
tbx
+
tj
].
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
...
...
@@ -291,12 +288,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
posq1
.
x
-=
floor
((
posq1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
posq1
.
y
-=
floor
((
posq1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
posq1
.
z
-=
floor
((
posq1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
x
-=
floor
((
localData
[
threadIdx
.
x
].
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
y
-=
floor
((
localData
[
threadIdx
.
x
].
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
z
-=
floor
((
localData
[
threadIdx
.
x
].
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1
,
blockCenterX
)
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData
[
threadIdx
.
x
],
blockCenterX
)
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
make_real3
(
localData
[
tbx
+
tj
].
x
-
posq1
.
x
,
localData
[
tbx
+
tj
].
y
-
posq1
.
y
,
localData
[
tbx
+
tj
].
z
-
posq1
.
z
);
...
...
@@ -342,9 +335,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
make_real3
(
localData
[
tbx
+
tj
].
x
-
posq1
.
x
,
localData
[
tbx
+
tj
].
y
-
posq1
.
y
,
localData
[
tbx
+
tj
].
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
int
atom2
=
atomIndices
[
tbx
+
tj
];
...
...
@@ -414,7 +405,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
global_bornRadii
,
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#else
unsigned
int
numTiles
,
#endif
...
...
@@ -451,9 +443,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4
posq2
=
make_real4
(
localData
[
tbx
+
j
].
x
,
localData
[
tbx
+
j
].
y
,
localData
[
tbx
+
j
].
z
,
localData
[
tbx
+
j
].
q
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
...
...
@@ -508,9 +498,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4
posq2
=
make_real4
(
localData
[
tbx
+
tj
].
x
,
localData
[
tbx
+
tj
].
y
,
localData
[
tbx
+
tj
].
z
,
localData
[
tbx
+
tj
].
q
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
...
...
@@ -656,12 +644,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
posq1
.
x
-=
floor
((
posq1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
posq1
.
y
-=
floor
((
posq1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
posq1
.
z
-=
floor
((
posq1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
x
-=
floor
((
localData
[
threadIdx
.
x
].
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
y
-=
floor
((
localData
[
threadIdx
.
x
].
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
z
-=
floor
((
localData
[
threadIdx
.
x
].
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1
,
blockCenterX
)
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData
[
threadIdx
.
x
],
blockCenterX
)
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
atomIndices
[
tbx
+
tj
];
...
...
@@ -713,9 +697,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4
posq2
=
make_real4
(
localData
[
tbx
+
tj
].
x
,
localData
[
tbx
+
tj
].
y
,
localData
[
tbx
+
tj
].
z
,
localData
[
tbx
+
tj
].
q
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
...
...
platforms/cuda/src/kernels/monteCarloBarostat.cu
View file @
3db6b8ee
...
...
@@ -2,7 +2,8 @@
* Scale the particle positions with each axis independent
*/
extern
"C"
__global__
void
scalePositions
(
float
scaleX
,
float
scaleY
,
float
scaleZ
,
int
numMolecules
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
*
__restrict__
posq
,
extern
"C"
__global__
void
scalePositions
(
float
scaleX
,
float
scaleY
,
float
scaleZ
,
int
numMolecules
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
*
__restrict__
posq
,
const
int
*
__restrict__
moleculeAtoms
,
const
int
*
__restrict__
moleculeStartIndex
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numMolecules
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
first
=
moleculeStartIndex
[
index
];
...
...
@@ -25,13 +26,9 @@ extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scal
// Move it into the first periodic box.
int
xcell
=
(
int
)
floor
(
center
.
x
*
invPeriodicBoxSize
.
x
);
int
ycell
=
(
int
)
floor
(
center
.
y
*
invPeriodicBoxSize
.
y
);
int
zcell
=
(
int
)
floor
(
center
.
z
*
invPeriodicBoxSize
.
z
);
real3
delta
=
make_real3
(
xcell
*
periodicBoxSize
.
x
,
ycell
*
periodicBoxSize
.
y
,
zcell
*
periodicBoxSize
.
z
);
center
.
x
-=
delta
.
x
;
center
.
y
-=
delta
.
y
;
center
.
z
-=
delta
.
z
;
real3
oldCenter
=
center
;
APPLY_PERIODIC_TO_POS
(
center
)
real3
delta
=
make_real3
(
oldCenter
.
x
-
center
.
x
,
oldCenter
.
y
-
center
.
y
,
oldCenter
.
z
-
center
.
z
);
// Now scale the position of the molecule center.
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
3db6b8ee
...
...
@@ -103,8 +103,9 @@ extern "C" __global__ void computeNonbonded(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
#ifdef USE_CUTOFF
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
#endif
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
...
...
@@ -155,9 +156,7 @@ extern "C" __global__ void computeNonbonded(
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
...
...
@@ -223,9 +222,7 @@ extern "C" __global__ void computeNonbonded(
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
...
...
@@ -412,17 +409,11 @@ extern "C" __global__ void computeNonbonded(
// The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
posq1
.
x
-=
floor
((
posq1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
posq1
.
y
-=
floor
((
posq1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
posq1
.
z
-=
floor
((
posq1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1
,
blockCenterX
)
#ifdef ENABLE_SHUFFLE
shflPosq
.
x
-=
floor
((
shflPosq
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
shflPosq
.
y
-=
floor
((
shflPosq
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
shflPosq
.
z
-=
floor
((
shflPosq
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
shflPosq
,
blockCenterX
)
#else
localData
[
threadIdx
.
x
].
x
-=
floor
((
localData
[
threadIdx
.
x
].
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
y
-=
floor
((
localData
[
threadIdx
.
x
].
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
z
-=
floor
((
localData
[
threadIdx
.
x
].
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData
[
threadIdx
.
x
],
blockCenterX
)
#endif
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
...
...
@@ -499,9 +490,7 @@ extern "C" __global__ void computeNonbonded(
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
...
...
platforms/cuda/src/kernels/pme.cu
View file @
3db6b8ee
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// Compute the index of the grid point each atom is associated with.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
i
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
((
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
GRID_SIZE_X
,
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
GRID_SIZE_Y
,
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
GRID_SIZE_Z
)
;
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
)
;
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
)
)
*
GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
)
)
*
GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
)
)
*
GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
...
...
@@ -18,7 +18,7 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
}
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
...
@@ -27,15 +27,16 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real
charge
=
posq
[
atom
].
w
;
real3
force
=
make_real3
(
0
);
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
((
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
GRID_SIZE_X
,
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
GRID_SIZE_Y
,
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
GRID_SIZE_Z
);
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
...
...
@@ -78,7 +79,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
int
index
=
ybase
+
zindex
;
real
add
=
charge
*
dx
*
dy
*
data
[
iz
].
z
;
real
add
=
pos
.
w
*
dx
*
dy
*
data
[
iz
].
z
;
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
...
...
@@ -115,9 +116,8 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ originalPm
// convolutes on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
extern
"C"
__global__
void
reciprocalConvolution
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
...
...
@@ -131,9 +131,9 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
real
mhx
=
mx
*
invPeriodicBoxSize
.
x
;
real
mhy
=
m
y
*
invPeriodicBoxSize
.
y
;
real
mhz
=
m
z
*
invPeriodicBoxSize
.
z
;
real
mhx
=
mx
*
recipBoxVecX
.
x
;
real
mhy
=
m
x
*
recipBoxVecY
.
x
+
my
*
recipBoxVecY
.
y
;
real
mhz
=
m
x
*
recipBoxVecZ
.
x
+
my
*
recipBoxVecZ
.
y
+
mz
*
recipBoxVecZ
.
z
;
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
...
...
@@ -151,9 +151,8 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
extern
"C"
__global__
void
gridEvaluateEnergy
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
...
...
@@ -168,9 +167,9 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
real
mhx
=
mx
*
invPeriodicBoxSize
.
x
;
real
mhy
=
m
y
*
invPeriodicBoxSize
.
y
;
real
mhz
=
m
z
*
invPeriodicBoxSize
.
z
;
real
mhx
=
mx
*
recipBoxVecX
.
x
;
real
mhy
=
m
x
*
recipBoxVecY
.
x
+
my
*
recipBoxVecY
.
y
;
real
mhz
=
m
x
*
recipBoxVecZ
.
x
+
my
*
recipBoxVecZ
.
y
+
mz
*
recipBoxVecZ
.
z
;
real
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
...
...
@@ -194,7 +193,7 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
extern
"C"
__global__
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
...
@@ -206,12 +205,15 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real3
force
=
make_real3
(
0
);
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
((
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
GRID_SIZE_X
,
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
GRID_SIZE_Y
,
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
GRID_SIZE_Z
);
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
...
...
@@ -266,9 +268,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
real
q
=
pos
.
w
*
EPSILON_FACTOR
;
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
x
*
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
0x100000000
));
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
y
*
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
0x100000000
));
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
z
*
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
0x100000000
));
real
forceX
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecX
.
x
);
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceX
*
0x100000000
));
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceY
*
0x100000000
));
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceZ
*
0x100000000
));
}
}
...
...
platforms/cuda/tests/TestCudaCustomManyParticleForce.cpp
View file @
3db6b8ee
...
...
@@ -55,7 +55,17 @@ const double TOL = 1e-5;
CudaPlatform
platform
;
void
validateAxilrodTeller
(
CustomManyParticleForce
*
force
,
const
vector
<
Vec3
>&
positions
,
const
vector
<
const
int
*>&
expectedSets
,
double
boxSize
)
{
Vec3
computeDelta
(
const
Vec3
&
pos1
,
const
Vec3
&
pos2
,
bool
periodic
,
const
Vec3
*
periodicBoxVectors
)
{
Vec3
diff
=
pos1
-
pos2
;
if
(
periodic
)
{
diff
-=
periodicBoxVectors
[
2
]
*
floor
(
diff
[
2
]
/
periodicBoxVectors
[
2
][
2
]
+
0.5
);
diff
-=
periodicBoxVectors
[
1
]
*
floor
(
diff
[
1
]
/
periodicBoxVectors
[
1
][
1
]
+
0.5
);
diff
-=
periodicBoxVectors
[
0
]
*
floor
(
diff
[
0
]
/
periodicBoxVectors
[
0
][
0
]
+
0.5
);
}
return
diff
;
}
void
validateAxilrodTeller
(
CustomManyParticleForce
*
force
,
const
vector
<
Vec3
>&
positions
,
const
vector
<
const
int
*>&
expectedSets
,
double
boxSize
,
bool
triclinic
)
{
// Create a System and Context.
int
numParticles
=
force
->
getNumParticles
();
...
...
@@ -63,7 +73,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
Vec3
boxVectors
[
3
];
if
(
triclinic
)
{
boxVectors
[
0
]
=
Vec3
(
boxSize
,
0
,
0
);
boxVectors
[
1
]
=
Vec3
(
0.2
*
boxSize
,
boxSize
,
0
);
boxVectors
[
2
]
=
Vec3
(
-
0.3
*
boxSize
,
-
0.1
*
boxSize
,
boxSize
);
}
else
{
boxVectors
[
0
]
=
Vec3
(
boxSize
,
0
,
0
);
boxVectors
[
1
]
=
Vec3
(
0
,
boxSize
,
0
);
boxVectors
[
2
]
=
Vec3
(
0
,
0
,
boxSize
);
}
system
.
setDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
system
.
addForce
(
force
);
VerletIntegrator
integrator
(
0.001
);
Context
context
(
system
,
integrator
,
platform
);
...
...
@@ -74,20 +95,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
// See if the energy matches the expected value.
double
expectedEnergy
=
0
;
bool
periodic
=
(
nonbondedMethod
==
CustomManyParticleForce
::
CutoffPeriodic
);
for
(
int
i
=
0
;
i
<
(
int
)
expectedSets
.
size
();
i
++
)
{
int
p1
=
expectedSets
[
i
][
0
];
int
p2
=
expectedSets
[
i
][
1
];
int
p3
=
expectedSets
[
i
][
2
];
Vec3
d12
=
positions
[
p2
]
-
positions
[
p1
];
Vec3
d13
=
positions
[
p3
]
-
positions
[
p1
];
Vec3
d23
=
positions
[
p3
]
-
positions
[
p2
];
if
(
nonbondedMethod
==
CustomManyParticleForce
::
CutoffPeriodic
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
d12
[
j
]
-=
floor
(
d12
[
j
]
/
boxSize
+
0.5
f
)
*
boxSize
;
d13
[
j
]
-=
floor
(
d13
[
j
]
/
boxSize
+
0.5
f
)
*
boxSize
;
d23
[
j
]
-=
floor
(
d23
[
j
]
/
boxSize
+
0.5
f
)
*
boxSize
;
}
}
Vec3
d12
=
computeDelta
(
positions
[
p2
],
positions
[
p1
],
periodic
,
boxVectors
);
Vec3
d13
=
computeDelta
(
positions
[
p3
],
positions
[
p1
],
periodic
,
boxVectors
);
Vec3
d23
=
computeDelta
(
positions
[
p3
],
positions
[
p2
],
periodic
,
boxVectors
);
double
r12
=
sqrt
(
d12
.
dot
(
d12
));
double
r13
=
sqrt
(
d13
.
dot
(
d13
));
double
r23
=
sqrt
(
d23
.
dot
(
d23
));
...
...
@@ -210,7 +225,7 @@ void testNoCutoff() {
positions
.
push_back
(
Vec3
(
0.4
,
0
,
-
0.8
));
int
sets
[
4
][
3
]
=
{{
0
,
1
,
2
},
{
1
,
2
,
3
},
{
2
,
3
,
0
},
{
3
,
0
,
1
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
4
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
,
false
);
}
void
testCutoff
()
{
...
...
@@ -235,7 +250,7 @@ void testCutoff() {
positions
.
push_back
(
Vec3
(
0.2
,
0.5
,
-
0.1
));
int
sets
[
7
][
3
]
=
{{
0
,
1
,
2
},
{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
2
,
4
},
{
0
,
3
,
4
},
{
1
,
2
,
4
},
{
1
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
7
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
,
false
);
}
void
testPeriodic
()
{
...
...
@@ -261,7 +276,33 @@ void testPeriodic() {
double
boxSize
=
2.1
;
int
sets
[
5
][
3
]
=
{{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
2
,
4
},
{
0
,
3
,
4
},
{
1
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
5
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
boxSize
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
boxSize
,
false
);
}
void
testTriclinic
()
{
CustomManyParticleForce
*
force
=
new
CustomManyParticleForce
(
3
,
"C*(1+3*cos(theta1)*cos(theta2)*cos(theta3))/(r12*r13*r23)^3;"
"theta1=angle(p1,p2,p3); theta2=angle(p2,p3,p1); theta3=angle(p3,p1,p2);"
"r12=distance(p1,p2); r13=distance(p1,p3); r23=distance(p2,p3)"
);
force
->
addGlobalParameter
(
"C"
,
1.5
);
force
->
setNonbondedMethod
(
CustomManyParticleForce
::
CutoffPeriodic
);
force
->
setCutoffDistance
(
1.05
);
vector
<
double
>
params
;
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
vector
<
Vec3
>
positions
;
positions
.
push_back
(
Vec3
(
0
,
0
,
0
));
positions
.
push_back
(
Vec3
(
1
,
0
,
0
));
positions
.
push_back
(
Vec3
(
0
,
1.1
,
0.3
));
positions
.
push_back
(
Vec3
(
0.4
,
0
,
-
0.8
));
positions
.
push_back
(
Vec3
(
0.2
,
0.5
,
-
0.1
));
double
boxSize
=
2.1
;
int
sets
[
4
][
3
]
=
{{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
3
,
4
},
{
1
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
4
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
boxSize
,
true
);
}
void
testExclusions
()
{
...
...
@@ -286,7 +327,7 @@ void testExclusions() {
force
->
addExclusion
(
0
,
3
);
int
sets
[
5
][
3
]
=
{{
0
,
1
,
4
},
{
1
,
2
,
3
},
{
1
,
2
,
4
},
{
1
,
3
,
4
},
{
2
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
5
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
,
false
);
}
void
testAllTerms
()
{
...
...
@@ -672,6 +713,7 @@ int main(int argc, char* argv[]) {
testNoCutoff
();
testCutoff
();
testPeriodic
();
testTriclinic
();
testExclusions
();
testAllTerms
();
testParameters
();
...
...
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
View file @
3db6b8ee
...
...
@@ -7,7 +7,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
4
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -261,6 +261,65 @@ void testPeriodic() {
ASSERT_EQUAL_TOL
(
1.9
+
1
+
0.9
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testTriclinic
()
{
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
Vec3
a
(
3.1
,
0
,
0
);
Vec3
b
(
0.4
,
3.5
,
0
);
Vec3
c
(
-
0.1
,
-
0.5
,
4.0
);
system
.
setDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"r"
);
nonbonded
->
addParticle
(
vector
<
double
>
());
nonbonded
->
addParticle
(
vector
<
double
>
());
nonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
const
double
cutoff
=
1.5
;
nonbonded
->
setCutoffDistance
(
cutoff
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
iteration
=
0
;
iteration
<
50
;
iteration
++
)
{
// Generate random positions for the two particles.
positions
[
0
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
positions
[
1
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
context
.
setPositions
(
positions
);
// Loop over all possible periodic copies and find the nearest one.
Vec3
delta
;
double
distance2
=
100.0
;
for
(
int
i
=
-
1
;
i
<
2
;
i
++
)
for
(
int
j
=
-
1
;
j
<
2
;
j
++
)
for
(
int
k
=
-
1
;
k
<
2
;
k
++
)
{
Vec3
d
=
positions
[
1
]
-
positions
[
0
]
+
a
*
i
+
b
*
j
+
c
*
k
;
if
(
d
.
dot
(
d
)
<
distance2
)
{
delta
=
d
;
distance2
=
d
.
dot
(
d
);
}
}
double
distance
=
sqrt
(
distance2
);
// See if the force and energy are correct.
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
if
(
distance
>=
cutoff
)
{
ASSERT_EQUAL
(
0.0
,
state
.
getPotentialEnergy
());
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
0
],
0
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
1
],
0
);
}
else
{
const
Vec3
force
=
delta
/
sqrt
(
delta
.
dot
(
delta
));
ASSERT_EQUAL_TOL
(
distance
,
state
.
getPotentialEnergy
(),
TOL
);
ASSERT_EQUAL_VEC
(
force
,
state
.
getForces
()[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
-
force
,
state
.
getForces
()[
1
],
TOL
);
}
}
}
void
testContinuous1DFunction
()
{
System
system
;
system
.
addParticle
(
1.0
);
...
...
@@ -925,6 +984,7 @@ int main(int argc, char* argv[]) {
testExclusions
();
testCutoff
();
testPeriodic
();
testTriclinic
();
testContinuous1DFunction
();
testContinuous2DFunction
();
testContinuous3DFunction
();
...
...
platforms/cuda/tests/TestCudaEwald.cpp
View file @
3db6b8ee
...
...
@@ -201,6 +201,57 @@ void testEwald2Ions() {
ASSERT_EQUAL_TOL
(
-
217.276
,
state
.
getPotentialEnergy
(),
0.01
/*10*TOL*/
);
}
void
testTriclinic
()
{
// Create a triclinic box containing eight particles.
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
2.5
,
0
,
0
),
Vec3
(
0.5
,
3.0
,
0
),
Vec3
(
0.7
,
0.9
,
3.5
));
for
(
int
i
=
0
;
i
<
8
;
i
++
)
system
.
addParticle
(
1.0
);
NonbondedForce
*
force
=
new
NonbondedForce
();
system
.
addForce
(
force
);
force
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
force
->
setCutoffDistance
(
1.0
);
force
->
setPMEParameters
(
3.45891
,
32
,
40
,
48
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
force
->
addParticle
(
-
1
,
0.440104
,
0.4184
);
// Cl parameters
for
(
int
i
=
0
;
i
<
4
;
i
++
)
force
->
addParticle
(
1
,
0.332840
,
0.0115897
);
// Na parameters
vector
<
Vec3
>
positions
(
8
);
positions
[
0
]
=
Vec3
(
1.744
,
2.788
,
3.162
);
positions
[
1
]
=
Vec3
(
1.048
,
0.762
,
2.340
);
positions
[
2
]
=
Vec3
(
2.489
,
1.570
,
2.817
);
positions
[
3
]
=
Vec3
(
1.027
,
1.893
,
3.271
);
positions
[
4
]
=
Vec3
(
0.937
,
0.825
,
0.009
);
positions
[
5
]
=
Vec3
(
2.290
,
1.887
,
3.352
);
positions
[
6
]
=
Vec3
(
1.266
,
1.111
,
2.894
);
positions
[
7
]
=
Vec3
(
0.933
,
1.862
,
3.490
);
// Compute the forces and energy.
VerletIntegrator
integ
(
0.001
);
Context
context
(
system
,
integ
,
platform
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Compare them to values computed by Gromacs.
double
expectedEnergy
=
-
963.370
;
vector
<
Vec3
>
expectedForce
(
8
);
expectedForce
[
0
]
=
Vec3
(
4.25253e+01
,
-
1.23503e+02
,
1.22139e+02
);
expectedForce
[
1
]
=
Vec3
(
9.74752e+01
,
1.68213e+02
,
1.93169e+02
);
expectedForce
[
2
]
=
Vec3
(
-
1.50348e+02
,
1.29165e+02
,
3.70435e+02
);
expectedForce
[
3
]
=
Vec3
(
9.18644e+02
,
-
3.52571e+00
,
-
1.34772e+03
);
expectedForce
[
4
]
=
Vec3
(
-
1.61193e+02
,
9.01528e+01
,
-
7.12904e+01
);
expectedForce
[
5
]
=
Vec3
(
2.82630e+02
,
2.78029e+01
,
-
3.72864e+02
);
expectedForce
[
6
]
=
Vec3
(
-
1.47454e+02
,
-
2.14448e+02
,
-
3.55789e+02
);
expectedForce
[
7
]
=
Vec3
(
-
8.82195e+02
,
-
7.39132e+01
,
1.46202e+03
);
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ASSERT_EQUAL_VEC
(
expectedForce
[
i
],
state
.
getForces
()[
i
],
1e-4
);
}
ASSERT_EQUAL_TOL
(
expectedEnergy
,
state
.
getPotentialEnergy
(),
1e-4
);
}
void
testErrorTolerance
(
NonbondedForce
::
NonbondedMethod
method
)
{
// Create a cloud of random point charges.
...
...
@@ -307,6 +358,7 @@ int main(int argc, char* argv[]) {
testEwaldPME
(
false
);
testEwaldPME
(
true
);
// testEwald2Ions();
testTriclinic
();
testErrorTolerance
(
NonbondedForce
::
Ewald
);
testErrorTolerance
(
NonbondedForce
::
PME
);
testPMEParameters
();
...
...
platforms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
View file @
3db6b8ee
...
...
@@ -236,6 +236,82 @@ void testRandomSeed() {
}
}
void
testTriclinic
()
{
const
int
numParticles
=
64
;
const
int
frequency
=
10
;
const
int
steps
=
1000
;
const
double
pressure
=
1.5
;
const
double
pressureInMD
=
pressure
*
(
AVOGADRO
*
1e-25
);
// pressure in kJ/mol/nm^3
const
double
temperature
=
300.0
;
const
double
initialVolume
=
numParticles
*
BOLTZ
*
temperature
/
pressureInMD
;
const
double
initialLength
=
std
::
pow
(
initialVolume
,
1.0
/
3.0
);
// Create a gas of noninteracting particles.
System
system
;
Vec3
initialBox
[
3
];
initialBox
[
0
]
=
Vec3
(
initialLength
,
0
,
0
);
initialBox
[
1
]
=
Vec3
(
0.2
*
initialLength
,
initialLength
,
0
);
initialBox
[
2
]
=
Vec3
(
0.1
*
initialLength
,
0.3
*
initialLength
,
initialLength
);
system
.
setDefaultPeriodicBoxVectors
(
initialBox
[
0
],
initialBox
[
1
],
initialBox
[
2
]);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
1.0
);
positions
[
i
]
=
Vec3
(
initialLength
*
genrand_real2
(
sfmt
),
initialLength
*
genrand_real2
(
sfmt
),
initialLength
*
genrand_real2
(
sfmt
));
}
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pressure
,
pressure
,
pressure
),
temperature
,
true
,
true
,
true
,
frequency
);
system
.
addForce
(
barostat
);
// Run a simulation
LangevinIntegrator
integrator
(
temperature
,
0.1
,
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
// Let it equilibrate.
integrator
.
step
(
10000
);
// Now run it for a while and see if the volume is correct.
double
volume
=
0.0
;
for
(
int
j
=
0
;
j
<
steps
;
++
j
)
{
Vec3
box
[
3
];
context
.
getState
(
0
).
getPeriodicBoxVectors
(
box
[
0
],
box
[
1
],
box
[
2
]);
volume
+=
box
[
0
][
0
]
*
box
[
1
][
1
]
*
box
[
2
][
2
];
integrator
.
step
(
frequency
);
}
volume
/=
steps
;
double
expected
=
(
numParticles
+
1
)
*
BOLTZ
*
temperature
/
pressureInMD
;
ASSERT_USUALLY_EQUAL_TOL
(
expected
,
volume
,
3
/
std
::
sqrt
((
double
)
steps
));
// Make sure the box vectors have been scaled consistently.
State
state
=
context
.
getState
(
State
::
Positions
);
Vec3
box
[
3
];
state
.
getPeriodicBoxVectors
(
box
[
0
],
box
[
1
],
box
[
2
]);
double
xscale
=
box
[
2
][
0
]
/
(
0.1
*
initialLength
);
double
yscale
=
box
[
2
][
1
]
/
(
0.3
*
initialLength
);
double
zscale
=
box
[
2
][
2
]
/
(
1.0
*
initialLength
);
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
ASSERT_EQUAL_VEC
(
Vec3
(
xscale
*
initialBox
[
i
][
0
],
yscale
*
initialBox
[
i
][
1
],
zscale
*
initialBox
[
i
][
2
]),
box
[
i
],
1e-5
);
}
// The barostat should have put all particles inside the first periodic box. One integration step
// has happened since then, so they may have moved slightly outside it.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
Vec3
pos
=
state
.
getPositions
()[
i
];
ASSERT
(
pos
[
2
]
/
box
[
2
][
2
]
>
-
1
&&
pos
[
2
]
/
box
[
2
][
2
]
<
2
);
pos
-=
box
[
2
]
*
floor
(
pos
[
2
]
/
box
[
2
][
2
]);
ASSERT
(
pos
[
1
]
/
box
[
1
][
1
]
>
-
1
&&
pos
[
1
]
/
box
[
1
][
1
]
<
2
);
pos
-=
box
[
1
]
*
floor
(
pos
[
1
]
/
box
[
1
][
1
]);
ASSERT
(
pos
[
0
]
/
box
[
0
][
0
]
>
-
1
&&
pos
[
0
]
/
box
[
0
][
0
]
<
2
);
}
}
/**
* Run a constant pressure simulation on an anisotropic Einstein crystal
* using isotropic and anisotropic barostats. There are a total of 15 simulations:
...
...
@@ -389,6 +465,7 @@ int main(int argc, char* argv[]) {
testIdealGasAxis
(
1
);
testIdealGasAxis
(
2
);
testRandomSeed
();
testTriclinic
();
//testEinsteinCrystal();
}
catch
(
const
exception
&
e
)
{
...
...
platforms/cuda/tests/TestCudaNonbondedForce.cpp
View file @
3db6b8ee
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -355,6 +355,67 @@ void testPeriodic() {
ASSERT_EQUAL_TOL
(
2
*
ONE_4PI_EPS0
*
(
1.0
)
*
(
1.0
+
krf
*
1.0
-
crf
),
state
.
getPotentialEnergy
(),
TOL
);
}
void
testTriclinic
()
{
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
Vec3
a
(
3.1
,
0
,
0
);
Vec3
b
(
0.4
,
3.5
,
0
);
Vec3
c
(
-
0.1
,
-
0.5
,
4.0
);
system
.
setDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
VerletIntegrator
integrator
(
0.01
);
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
nonbonded
->
addParticle
(
1.0
,
1
,
0
);
nonbonded
->
addParticle
(
1.0
,
1
,
0
);
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
const
double
cutoff
=
1.5
;
nonbonded
->
setCutoffDistance
(
cutoff
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
const
double
eps
=
78.3
;
const
double
krf
=
(
1.0
/
(
cutoff
*
cutoff
*
cutoff
))
*
(
eps
-
1.0
)
/
(
2.0
*
eps
+
1.0
);
const
double
crf
=
(
1.0
/
cutoff
)
*
(
3.0
*
eps
)
/
(
2.0
*
eps
+
1.0
);
for
(
int
iteration
=
0
;
iteration
<
50
;
iteration
++
)
{
// Generate random positions for the two particles.
positions
[
0
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
positions
[
1
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
context
.
setPositions
(
positions
);
// Loop over all possible periodic copies and find the nearest one.
Vec3
delta
;
double
distance2
=
100.0
;
for
(
int
i
=
-
1
;
i
<
2
;
i
++
)
for
(
int
j
=
-
1
;
j
<
2
;
j
++
)
for
(
int
k
=
-
1
;
k
<
2
;
k
++
)
{
Vec3
d
=
positions
[
1
]
-
positions
[
0
]
+
a
*
i
+
b
*
j
+
c
*
k
;
if
(
d
.
dot
(
d
)
<
distance2
)
{
delta
=
d
;
distance2
=
d
.
dot
(
d
);
}
}
double
distance
=
sqrt
(
distance2
);
// See if the force and energy are correct.
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
if
(
distance
>=
cutoff
)
{
ASSERT_EQUAL
(
0.0
,
state
.
getPotentialEnergy
());
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
0
],
0
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
1
],
0
);
}
else
{
const
Vec3
force
=
delta
*
ONE_4PI_EPS0
*
(
-
1.0
/
(
distance
*
distance
*
distance
)
+
2.0
*
krf
);
ASSERT_EQUAL_TOL
(
ONE_4PI_EPS0
*
(
1.0
/
distance
+
krf
*
distance
*
distance
-
crf
),
state
.
getPotentialEnergy
(),
TOL
);
ASSERT_EQUAL_VEC
(
force
,
state
.
getForces
()[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
-
force
,
state
.
getForces
()[
1
],
TOL
);
}
}
}
void
testLargeSystem
()
{
const
int
numMolecules
=
600
;
...
...
@@ -862,6 +923,33 @@ void testSwitchingFunction(NonbondedForce::NonbondedMethod method) {
}
}
void
testReordering
()
{
// Check that reordering of atoms doesn't alter their positions.
const
int
numParticles
=
200
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
6
,
0
,
0
),
Vec3
(
2.1
,
6
,
0
),
Vec3
(
-
1.5
,
-
0.5
,
6
));
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
system
.
addForce
(
nonbonded
);
vector
<
Vec3
>
positions
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
0.0
,
0.0
,
0.0
);
positions
.
push_back
(
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
)
*
20
);
}
VerletIntegrator
integrator
(
0.001
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
integrator
.
step
(
1
);
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL_VEC
(
positions
[
i
],
state
.
getPositions
()[
i
],
1e-6
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
argc
>
1
)
...
...
@@ -872,6 +960,7 @@ int main(int argc, char* argv[]) {
testCutoff
();
testCutoff14
();
testPeriodic
();
testTriclinic
();
testLargeSystem
();
//testBlockInteractions(false);
//testBlockInteractions(true);
...
...
@@ -882,6 +971,7 @@ int main(int argc, char* argv[]) {
testParallelComputation
(
NonbondedForce
::
PME
);
testSwitchingFunction
(
NonbondedForce
::
CutoffNonPeriodic
);
testSwitchingFunction
(
NonbondedForce
::
PME
);
testReordering
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/opencl/include/OpenCLContext.h
View file @
3db6b8ee
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -448,36 +448,65 @@ public:
/**
* Get whether the device being used supports 64 bit atomic operations on global memory.
*/
bool
getSupports64BitGlobalAtomics
()
{
bool
getSupports64BitGlobalAtomics
()
const
{
return
supports64BitGlobalAtomics
;
}
/**
* Get whether the device being used supports double precision math.
*/
bool
getSupportsDoublePrecision
()
{
bool
getSupportsDoublePrecision
()
const
{
return
supportsDoublePrecision
;
}
/**
* Get whether double precision is being used.
*/
bool
getUseDoublePrecision
()
{
bool
getUseDoublePrecision
()
const
{
return
useDoublePrecision
;
}
/**
* Get whether mixed precision is being used.
*/
bool
getUseMixedPrecision
()
{
bool
getUseMixedPrecision
()
const
{
return
useMixedPrecision
;
}
/**
* Get whether the periodic box is triclinic.
*/
bool
getBoxIsTriclinic
()
const
{
return
boxIsTriclinic
;
}
/**
* Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision.
*/
std
::
string
doubleToString
(
double
value
);
std
::
string
doubleToString
(
double
value
)
const
;
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
std
::
string
intToString
(
int
value
);
std
::
string
intToString
(
int
value
)
const
;
/**
* Get the vectors defining the periodic box.
*/
void
getPeriodicBoxVectors
(
Vec3
&
a
,
Vec3
&
b
,
Vec3
&
c
)
const
{
a
=
Vec3
(
periodicBoxVecXDouble
.
x
,
periodicBoxVecXDouble
.
y
,
periodicBoxVecXDouble
.
z
);
b
=
Vec3
(
periodicBoxVecYDouble
.
x
,
periodicBoxVecYDouble
.
y
,
periodicBoxVecYDouble
.
z
);
c
=
Vec3
(
periodicBoxVecZDouble
.
x
,
periodicBoxVecZDouble
.
y
,
periodicBoxVecZDouble
.
z
);
}
/**
* Set the vectors defining the periodic box.
*/
void
setPeriodicBoxVectors
(
const
Vec3
&
a
,
const
Vec3
&
b
,
const
Vec3
&
c
)
{
periodicBoxVecX
=
mm_float4
((
float
)
a
[
0
],
(
float
)
a
[
1
],
(
float
)
a
[
2
],
0.0
f
);
periodicBoxVecY
=
mm_float4
((
float
)
b
[
0
],
(
float
)
b
[
1
],
(
float
)
b
[
2
],
0.0
f
);
periodicBoxVecZ
=
mm_float4
((
float
)
c
[
0
],
(
float
)
c
[
1
],
(
float
)
c
[
2
],
0.0
f
);
periodicBoxVecXDouble
=
mm_double4
(
a
[
0
],
a
[
1
],
a
[
2
],
0.0
);
periodicBoxVecYDouble
=
mm_double4
(
b
[
0
],
b
[
1
],
b
[
2
],
0.0
);
periodicBoxVecZDouble
=
mm_double4
(
c
[
0
],
c
[
1
],
c
[
2
],
0.0
);
periodicBoxSize
=
mm_float4
((
float
)
a
[
0
],
(
float
)
b
[
1
],
(
float
)
c
[
2
],
0.0
f
);
invPeriodicBoxSize
=
mm_float4
(
1.0
f
/
(
float
)
a
[
0
],
1.0
f
/
(
float
)
b
[
1
],
1.0
f
/
(
float
)
c
[
2
],
0.0
f
);
periodicBoxSizeDouble
=
mm_double4
(
a
[
0
],
b
[
1
],
c
[
2
],
0.0
);
invPeriodicBoxSizeDouble
=
mm_double4
(
1.0
/
a
[
0
],
1.0
/
b
[
1
],
1.0
/
c
[
2
],
0.0
);
}
/**
* Get the size of the periodic box.
*/
...
...
@@ -490,15 +519,6 @@ public:
mm_double4
getPeriodicBoxSizeDouble
()
const
{
return
periodicBoxSizeDouble
;
}
/**
* Set the size of the periodic box.
*/
void
setPeriodicBoxSize
(
double
xsize
,
double
ysize
,
double
zsize
)
{
periodicBoxSize
=
mm_float4
((
float
)
xsize
,
(
float
)
ysize
,
(
float
)
zsize
,
0
);
invPeriodicBoxSize
=
mm_float4
((
float
)
(
1.0
/
xsize
),
(
float
)
(
1.0
/
ysize
),
(
float
)
(
1.0
/
zsize
),
0
);
periodicBoxSizeDouble
=
mm_double4
(
xsize
,
ysize
,
zsize
,
0
);
invPeriodicBoxSizeDouble
=
mm_double4
(
1.0
/
xsize
,
1.0
/
ysize
,
1.0
/
zsize
,
0
);
}
/**
* Get the inverse of the size of the periodic box.
*/
...
...
@@ -511,6 +531,42 @@ public:
mm_double4
getInvPeriodicBoxSizeDouble
()
const
{
return
invPeriodicBoxSizeDouble
;
}
/**
* Get the first periodic box vector.
*/
mm_float4
getPeriodicBoxVecX
()
{
return
periodicBoxVecX
;
}
/**
* Get the first periodic box vector.
*/
mm_double4
getPeriodicBoxVecXDouble
()
{
return
periodicBoxVecXDouble
;
}
/**
* Get the second periodic box vector.
*/
mm_float4
getPeriodicBoxVecY
()
{
return
periodicBoxVecY
;
}
/**
* Get the second periodic box vector.
*/
mm_double4
getPeriodicBoxVecYDouble
()
{
return
periodicBoxVecYDouble
;
}
/**
* Get the third periodic box vector.
*/
mm_float4
getPeriodicBoxVecZ
()
{
return
periodicBoxVecZ
;
}
/**
* Get the third periodic box vector.
*/
mm_double4
getPeriodicBoxVecZDouble
()
{
return
periodicBoxVecZDouble
;
}
/**
* Get the OpenCLIntegrationUtilities for this context.
*/
...
...
@@ -628,9 +684,9 @@ private:
int
numThreadBlocks
;
int
numForceBuffers
;
int
simdWidth
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
,
periodicBoxVecXDouble
,
periodicBoxVecYDouble
,
periodicBoxVecZDouble
;
std
::
string
defaultOptimizationOptions
;
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
cl
::
Context
context
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
3db6b8ee
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -336,6 +336,54 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines
[
"LOG"
]
=
"log"
;
}
// Set defines for applying periodic boundary conditions.
Vec3
boxVectors
[
3
];
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
boxIsTriclinic
=
(
boxVectors
[
0
][
1
]
!=
0.0
||
boxVectors
[
0
][
2
]
!=
0.0
||
boxVectors
[
1
][
0
]
!=
0.0
||
boxVectors
[
1
][
2
]
!=
0.0
||
boxVectors
[
2
][
0
]
!=
0.0
||
boxVectors
[
2
][
1
]
!=
0.0
);
if
(
boxIsTriclinic
)
{
compilationDefines
[
"APPLY_PERIODIC_TO_DELTA(delta)"
]
=
"{"
"real scale3 = floor(delta.z*invPeriodicBoxSize.z+0.5f);
\\\n
"
"delta.xyz -= scale3*periodicBoxVecZ.xyz;
\\\n
"
"real scale2 = floor(delta.y*invPeriodicBoxSize.y+0.5f);
\\\n
"
"delta.xy -= scale2*periodicBoxVecY.xy;
\\\n
"
"real scale1 = floor(delta.x*invPeriodicBoxSize.x+0.5f);
\\\n
"
"delta.x -= scale1*periodicBoxVecX.x;}"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS(pos)"
]
=
"{"
"real scale3 = floor(pos.z*invPeriodicBoxSize.z);
\\\n
"
"pos.xyz -= scale3*periodicBoxVecZ.xyz;
\\\n
"
"real scale2 = floor(pos.y*invPeriodicBoxSize.y);
\\\n
"
"pos.xy -= scale2*periodicBoxVecY.xy;
\\\n
"
"real scale1 = floor(pos.x*invPeriodicBoxSize.x);
\\\n
"
"pos.x -= scale1*periodicBoxVecX.x;}"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"
]
=
"{"
"real scale3 = floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f);
\\\n
"
"pos.x -= scale3*periodicBoxVecZ.x;
\\\n
"
"pos.y -= scale3*periodicBoxVecZ.y;
\\\n
"
"pos.z -= scale3*periodicBoxVecZ.z;
\\\n
"
"real scale2 = floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f);
\\\n
"
"pos.x -= scale2*periodicBoxVecY.x;
\\\n
"
"pos.y -= scale2*periodicBoxVecY.y;
\\\n
"
"real scale1 = floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f);
\\\n
"
"pos.x -= scale1*periodicBoxVecX.x;}"
;
}
else
{
compilationDefines
[
"APPLY_PERIODIC_TO_DELTA(delta)"
]
=
"delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS(pos)"
]
=
"pos.xyz -= floor(pos.xyz*invPeriodicBoxSize.xyz)*periodicBoxSize.xyz;"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"
]
=
"{"
"pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
\\\n
"
"pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
\\\n
"
"pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}"
;
}
// Create the work thread used for parallelization when running on multiple devices.
thread
=
new
WorkThread
();
...
...
@@ -527,7 +575,7 @@ void OpenCLContext::restoreDefaultQueue() {
currentQueue
=
defaultQueue
;
}
string
OpenCLContext
::
doubleToString
(
double
value
)
{
string
OpenCLContext
::
doubleToString
(
double
value
)
const
{
stringstream
s
;
s
.
precision
(
useDoublePrecision
?
16
:
8
);
s
<<
scientific
<<
value
;
...
...
@@ -536,7 +584,7 @@ string OpenCLContext::doubleToString(double value) {
return
s
.
str
();
}
string
OpenCLContext
::
intToString
(
int
value
)
{
string
OpenCLContext
::
intToString
(
int
value
)
const
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
...
...
@@ -1039,16 +1087,21 @@ void OpenCLContext::reorderAtomsImpl() {
// Move each molecule position into the same box.
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
int
xcell
=
(
int
)
floor
(
molPos
[
i
].
x
*
invPeriodicBoxSizeDouble
.
x
);
int
ycell
=
(
int
)
floor
(
molPos
[
i
].
y
*
invPeriodicBoxSizeDouble
.
y
);
int
zcell
=
(
int
)
floor
(
molPos
[
i
].
z
*
invPeriodicBoxSizeDouble
.
z
);
Real
dx
=
xcell
*
periodicBoxSizeDouble
.
x
;
Real
dy
=
ycell
*
periodicBoxSizeDouble
.
y
;
Real
dz
=
zcell
*
periodicBoxSizeDouble
.
z
;
if
(
dx
!=
0.0
f
||
dy
!=
0.0
f
||
dz
!=
0.0
f
)
{
molPos
[
i
].
x
-=
dx
;
molPos
[
i
].
y
-=
dy
;
molPos
[
i
].
z
-=
dz
;
Real4
center
=
molPos
[
i
];
int
zcell
=
(
int
)
floor
(
center
.
z
*
invPeriodicBoxSize
.
z
);
center
.
x
-=
zcell
*
periodicBoxVecZ
.
x
;
center
.
y
-=
zcell
*
periodicBoxVecZ
.
y
;
center
.
z
-=
zcell
*
periodicBoxVecZ
.
z
;
int
ycell
=
(
int
)
floor
(
center
.
y
*
invPeriodicBoxSize
.
y
);
center
.
x
-=
ycell
*
periodicBoxVecY
.
x
;
center
.
y
-=
ycell
*
periodicBoxVecY
.
y
;
int
xcell
=
(
int
)
floor
(
center
.
x
*
invPeriodicBoxSize
.
x
);
center
.
x
-=
xcell
*
periodicBoxVecX
.
x
;
if
(
xcell
!=
0
||
ycell
!=
0
||
zcell
!=
0
)
{
Real
dx
=
molPos
[
i
].
x
-
center
.
x
;
Real
dy
=
molPos
[
i
].
y
-
center
.
y
;
Real
dz
=
molPos
[
i
].
z
-
center
.
z
;
molPos
[
i
]
=
center
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
Real4
p
=
oldPosq
[
atom
];
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
3db6b8ee
This diff is collapsed.
Click to expand it.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
3db6b8ee
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -325,11 +325,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
cl
::
Program
interactingBlocksProgram
=
context
.
createProgram
(
file
,
defines
);
findBlockBoundsKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
findBlockBoundsKernel
.
setArg
<
cl_int
>
(
0
,
context
.
getNumAtoms
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockCenter
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
rebuildNeighborList
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
sortedBlocks
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
context
.
getPosq
().
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
blockCenter
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
rebuildNeighborList
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
10
,
sortedBlocks
->
getDeviceBuffer
());
sortBoxDataKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"sortBoxData"
);
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
sortedBlocks
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
blockCenter
->
getDeviceBuffer
());
...
...
@@ -341,20 +341,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionCount
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
rebuildNeighborList
->
getDeviceBuffer
());
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
6
,
interactingTiles
->
getSize
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
7
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
8
,
numBlocks
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
sortedBlocks
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
0
,
sortedBlockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
1
,
sortedBlockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
2
,
exclusionIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
3
,
exclusionRowIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
4
,
oldPositions
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
5
,
rebuildNeighborList
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
interactingTiles
->
getSize
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
12
,
sortedBlocks
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
3
,
sortedBlockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
4
,
sortedBlockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
5
,
exclusionIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
6
,
exclusionRowIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
7
,
oldPositions
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
8
,
rebuildNeighborList
->
getDeviceBuffer
());
if
(
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
// The device can't handle this block size, so reduce it.
...
...
@@ -369,18 +369,21 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
}
}
static
void
setPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxSize
());
}
static
void
setInvPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getInvPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getInvPeriodicBoxSize
());
static
void
setPeriodicBoxArgs
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
{
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getInvPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxVecXDouble
());
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxVecYDouble
());
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxVecZDouble
());
}
else
{
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getInvPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getPeriodicBoxVecX
());
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getPeriodicBoxVecY
());
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxVecZ
());
}
}
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
...
...
@@ -397,22 +400,18 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
// Compute the neighbor list.
setPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
1
);
setInvPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
2
);
setPeriodicBoxArgs
(
context
,
findBlockBoundsKernel
,
1
);
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
0
);
setInvPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
1
);
setPeriodicBoxArgs
(
context
,
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
}
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
if
(
kernelSource
.
size
()
>
0
)
{
if
(
useCutoff
)
{
setPeriodicBoxSizeArg
(
context
,
forceKernel
,
9
);
setInvPeriodicBoxSizeArg
(
context
,
forceKernel
,
10
);
}
if
(
useCutoff
)
setPeriodicBoxArgs
(
context
,
forceKernel
,
9
);
context
.
executeKernel
(
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
if
(
context
.
getComputeForceCount
()
==
1
)
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
...
...
@@ -441,11 +440,11 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl_uint
>
(
1
1
,
maxTiles
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
1
4
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
6
,
maxTiles
);
forceKernel
.
setArg
<
cl_uint
>
(
1
4
,
maxTiles
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
1
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
int
numAtoms
=
context
.
getNumAtoms
();
if
(
context
.
getUseDoublePrecision
())
{
vector
<
mm_double4
>
oldPositionsVec
(
numAtoms
,
mm_double4
(
1e30
,
1e30
,
1e30
,
0
));
...
...
@@ -473,8 +472,8 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
7
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
8
,
numBlocks
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
}
}
...
...
@@ -617,7 +616,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
if
(
useCutoff
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
index
+=
5
;
// The periodic box size arguments are set when the kernel is executed.
kernel
.
setArg
<
cl_uint
>
(
index
++
,
interactingTiles
->
getSize
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockCenter
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockBoundingBox
->
getDeviceBuffer
());
...
...
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
3db6b8ee
...
...
@@ -21,7 +21,8 @@ __kernel void computeN2Energy(
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
unsigned
int
numTiles
#
endif
...
...
@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
...
...
@@ -110,7 +111,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
...
...
@@ -266,10 +267,8 @@ __kernel void computeN2Energy(
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
real4
blockCenterX
=
blockCenter[x]
;
posq1.xyz
-=
floor
((
posq1.xyz-blockCenterX.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
local_posq[get_local_id
(
0
)
].x
-=
floor
((
local_posq[get_local_id
(
0
)
].x-blockCenterX.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
local_posq[get_local_id
(
0
)
].y
-=
floor
((
local_posq[get_local_id
(
0
)
].y-blockCenterX.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
local_posq[get_local_id
(
0
)
].z
-=
floor
((
local_posq[get_local_id
(
0
)
].z-blockCenterX.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1,
blockCenterX
)
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[get_local_id
(
0
)
],
blockCenterX
)
SYNC_WARPS
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
; j < TILE_SIZE; j++) {
...
...
@@ -310,7 +309,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
3db6b8ee
...
...
@@ -21,7 +21,8 @@ __kernel void computeN2Energy(
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
unsigned
int
numTiles
#
endif
...
...
@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
...
...
@@ -126,7 +127,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
...
...
@@ -272,7 +273,7 @@ __kernel void computeN2Energy(
real4
blockCenterX
=
blockCenter[x]
;
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
local_posq[tgx].xyz
-=
floor
(
(
local_posq[tgx]
.xyz-
blockCenterX
.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[tgx]
,
blockCenterX
)
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
real4
force
=
0
;
...
...
@@ -332,7 +333,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customGBValueN2.cl
View file @
3db6b8ee
...
...
@@ -15,7 +15,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
__local
real*
restrict
local_value,
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
unsigned
int
numTiles
#
endif
...
...
@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
...
...
@@ -100,7 +101,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
...
...
@@ -239,10 +240,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
real4
blockCenterX
=
blockCenter[x]
;
posq1.xyz
-=
floor
((
posq1.xyz-blockCenterX.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
local_posq[get_local_id
(
0
)
].x
-=
floor
((
local_posq[get_local_id
(
0
)
].x-blockCenterX.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
local_posq[get_local_id
(
0
)
].y
-=
floor
((
local_posq[get_local_id
(
0
)
].y-blockCenterX.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
local_posq[get_local_id
(
0
)
].z
-=
floor
((
local_posq[get_local_id
(
0
)
].z-blockCenterX.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1,
blockCenterX
)
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[get_local_id
(
0
)
],
blockCenterX
)
SYNC_WARPS
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
; j < TILE_SIZE; j++) {
...
...
@@ -278,7 +277,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
View file @
3db6b8ee
...
...
@@ -15,7 +15,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
__local
real*
restrict
local_value,
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
unsigned
int
numTiles
#
endif
...
...
@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
...
...
@@ -109,7 +110,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
...
...
@@ -239,7 +240,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
blockCenterX
=
blockCenter[x]
;
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
local_posq[tgx].xyz
-=
floor
(
(
local_posq[tgx]
.xyz-
blockCenterX
.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[tgx]
,
blockCenterX
)
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
real
value
=
0
;
...
...
@@ -287,7 +288,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customHbondForce.cl
View file @
3db6b8ee
...
...
@@ -11,12 +11,10 @@ real4 delta(real4 vec1, real4 vec2) {
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*/
real4
deltaPeriodic
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
real4
deltaPeriodic
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
#
ifdef
USE_PERIODIC
result.x
-=
floor
(
result.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
result.y
-=
floor
(
result.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
result.z
-=
floor
(
result.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_DELTA
(
result
)
#
endif
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
...
...
@@ -56,7 +54,8 @@ real4 computeCross(real4 vec1, real4 vec2) {
* Compute forces on donors.
*/
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
real energy = 0;
real4 f1 = (real4) 0;
...
...
@@ -102,7 +101,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
real4 a1 = posBuffer[3*index];
real4 a2 = posBuffer[3*index+1];
real4 a3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize);
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize
, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
#ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
...
...
@@ -144,7 +143,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
* Compute forces on acceptors.
*/
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
real4 f1 = (real4) 0;
real4 f2 = (real4) 0;
...
...
@@ -189,7 +189,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
real4
d1
=
posBuffer[3*index]
;
real4
d2
=
posBuffer[3*index+1]
;
real4
d3
=
posBuffer[3*index+2]
;
real4
deltaD1A1
=
deltaPeriodic
(
d1,
a1,
periodicBoxSize,
invPeriodicBoxSize
)
;
real4
deltaD1A1
=
deltaPeriodic
(
d1,
a1,
periodicBoxSize,
invPeriodicBoxSize
,
periodicBoxVecX,
periodicBoxVecY,
periodicBoxVecZ
)
;
#
ifdef
USE_CUTOFF
if
(
deltaD1A1.w
<
CUTOFF_SQUARED
)
{
#
endif
...
...
platforms/opencl/src/kernels/customManyParticle.cl
View file @
3db6b8ee
...
...
@@ -14,12 +14,10 @@ inline void storeForce(int atom, real4 force, __global long* restrict forceBuffe
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*/
inline
real4
delta
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
inline
real4
delta
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0.0f
)
;
#
ifdef
USE_PERIODIC
result.x
-=
floor
(
result.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
result.y
-=
floor
(
result.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
result.z
-=
floor
(
result.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_DELTA
(
result
)
#
endif
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
...
...
@@ -75,7 +73,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
*/
__kernel
void
computeInteraction
(
__global
long*
restrict
forceBuffers,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
real4
periodicBoxSize,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
neighbors,
__global
const
int*
restrict
neighborStartIndex
#
endif
...
...
@@ -138,16 +136,14 @@ __kernel void computeInteraction(
/**
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*/
__kernel
void
findBlockBounds
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
real4*
restrict
posq
,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
numNeighborPairs
)
{
__kernel
void
findBlockBounds
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
,
__global
const
real4*
restrict
posq,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
numNeighborPairs
)
{
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
NUM_ATOMS
)
{
real4
pos
=
posq[base]
;
#
ifdef
USE_PERIODIC
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_POS
(
pos
)
#
endif
real4
minPos
=
pos
;
real4
maxPos
=
pos
;
...
...
@@ -156,9 +152,7 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
pos
=
posq[i]
;
#
ifdef
USE_PERIODIC
real4
center
=
0.5f*
(
maxPos+minPos
)
;
pos.x
-=
floor
((
pos.x-center.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
pos.y
-=
floor
((
pos.y-center.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
pos.z
-=
floor
((
pos.z-center.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos,
center
)
#
endif
minPos
=
(
real4
)
(
min
(
minPos.x,pos.x
)
,
min
(
minPos.y,pos.y
)
,
min
(
minPos.z,pos.z
)
,
0
)
;
maxPos
=
(
real4
)
(
max
(
maxPos.x,pos.x
)
,
max
(
maxPos.y,pos.y
)
,
max
(
maxPos.z,pos.z
)
,
0
)
;
...
...
@@ -176,8 +170,8 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
/**
*
Find
a
list
of
neighbors
for
each
atom.
*/
__kernel
void
findNeighbors
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
real4*
restrict
posq
,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
int2*
restrict
neighborPairs,
__kernel
void
findNeighbors
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
,
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
int2*
restrict
neighborPairs,
__global
int*
restrict
numNeighborPairs,
__global
int*
restrict
numNeighborsForAtom,
int
maxNeighborPairs
#
ifdef
USE_EXCLUSIONS
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
...
...
@@ -212,9 +206,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
real4
blockSize2
=
blockBoundingBox[block2]
;
real4
blockDelta
=
blockCenter1-blockCenter2
;
#
ifdef
USE_PERIODIC
blockDelta.x
-=
floor
(
blockDelta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
blockDelta.y
-=
floor
(
blockDelta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
blockDelta.z
-=
floor
(
blockDelta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
APPLY_PERIODIC_TO_DELTA
(
blockDelta
)
#
endif
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSize1.x-blockSize2.x
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSize1.y-blockSize2.y
)
;
...
...
@@ -243,7 +235,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
//
Decide
whether
to
include
this
atom
pair
in
the
neighbor
list.
real4
atomDelta
=
delta
(
pos1,
pos2,
periodicBoxSize,
invPeriodicBoxSize
)
;
real4
atomDelta
=
delta
(
pos1,
pos2,
periodicBoxSize,
invPeriodicBoxSize
,
periodicBoxVecX,
periodicBoxVecY,
periodicBoxVecZ
)
;
#
ifdef
USE_CENTRAL_PARTICLE
bool
includeAtom
=
(
atom2
!=
atom1
&&
atom2
<
NUM_ATOMS
&&
atomDelta.w
<
CUTOFF_SQUARED
)
;
#
else
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
View file @
3db6b8ee
...
...
@@ -42,8 +42,8 @@ __kernel void computeInteractionGroups(
#
else
__global
real4*
restrict
forceBuffers,
#
endif
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData
,
real4
periodicBox
Size
,
real4
invP
eriodicBox
Size
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX
,
real4
periodicBox
VecY
,
real4
p
eriodicBox
VecZ
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
...
...
@@ -82,7 +82,7 @@ __kernel void computeInteractionGroups(
posq2
=
(
real4
)
(
localData[localIndex].x,
localData[localIndex].y,
localData[localIndex].z,
localData[localIndex].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment