Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
83ed602e
Commit
83ed602e
authored
Feb 05, 2015
by
peastman
Browse files
Merge pull request #797 from peastman/triclinic
C++ libraries support triclinic boxes
parents
b51e05a8
050e1262
Changes
119
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
809 additions
and
332 deletions
+809
-332
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+9
-20
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+46
-41
platforms/cuda/tests/TestCudaCustomManyParticleForce.cpp
platforms/cuda/tests/TestCudaCustomManyParticleForce.cpp
+58
-16
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
+61
-1
platforms/cuda/tests/TestCudaEwald.cpp
platforms/cuda/tests/TestCudaEwald.cpp
+52
-0
platforms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
...orms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
+77
-0
platforms/cuda/tests/TestCudaNonbondedForce.cpp
platforms/cuda/tests/TestCudaNonbondedForce.cpp
+91
-1
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+75
-19
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+66
-13
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+149
-86
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+47
-48
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+8
-9
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+7
-6
platforms/opencl/src/kernels/customGBValueN2.cl
platforms/opencl/src/kernels/customGBValueN2.cl
+8
-9
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
+7
-6
platforms/opencl/src/kernels/customHbondForce.cl
platforms/opencl/src/kernels/customHbondForce.cl
+8
-8
platforms/opencl/src/kernels/customManyParticle.cl
platforms/opencl/src/kernels/customManyParticle.cl
+11
-19
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+3
-3
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+12
-11
platforms/opencl/src/kernels/gbsaObc.cl
platforms/opencl/src/kernels/gbsaObc.cl
+14
-16
No files found.
platforms/cuda/src/kernels/nonbonded.cu
View file @
83ed602e
...
@@ -103,8 +103,9 @@ extern "C" __global__ void computeNonbonded(
...
@@ -103,8 +103,9 @@ extern "C" __global__ void computeNonbonded(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
#endif
#endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
...
@@ -155,9 +156,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -155,9 +156,7 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
...
@@ -223,9 +222,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -223,9 +222,7 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
...
@@ -412,17 +409,11 @@ extern "C" __global__ void computeNonbonded(
...
@@ -412,17 +409,11 @@ extern "C" __global__ void computeNonbonded(
// The box is small enough that we can just translate all the atoms into a single periodic
// The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later.
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
real4
blockCenterX
=
blockCenter
[
x
];
posq1
.
x
-=
floor
((
posq1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1
,
blockCenterX
)
posq1
.
y
-=
floor
((
posq1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
posq1
.
z
-=
floor
((
posq1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflPosq
.
x
-=
floor
((
shflPosq
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
shflPosq
,
blockCenterX
)
shflPosq
.
y
-=
floor
((
shflPosq
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
shflPosq
.
z
-=
floor
((
shflPosq
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#else
#else
localData
[
threadIdx
.
x
].
x
-=
floor
((
localData
[
threadIdx
.
x
].
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData
[
threadIdx
.
x
],
blockCenterX
)
localData
[
threadIdx
.
x
].
y
-=
floor
((
localData
[
threadIdx
.
x
].
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
z
-=
floor
((
localData
[
threadIdx
.
x
].
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
...
@@ -499,9 +490,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -499,9 +490,7 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
...
...
platforms/cuda/src/kernels/pme.cu
View file @
83ed602e
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// Compute the index of the grid point each atom is associated with.
// Compute the index of the grid point each atom is associated with.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
i
];
real4
pos
=
posq
[
i
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
z
*
recipBoxVecZ
.
z
)
;
real3
t
=
make_real3
((
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
GRID_SIZE_X
,
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
)
)
*
GRID_SIZE_X
;
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
GRID_SIZE_Y
,
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
)
)
*
GRID_SIZE_Y
;
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
GRID_SIZE_Z
)
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
)
)
*
GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
pmeAtomGridIndex
[
i
]
=
make_int2
(
i
,
gridIndex
.
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
gridIndex
.
y
*
GRID_SIZE_Z
+
gridIndex
.
z
);
pmeAtomGridIndex
[
i
]
=
make_int2
(
i
,
gridIndex
.
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
gridIndex
.
y
*
GRID_SIZE_Z
+
gridIndex
.
z
);
}
}
}
}
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -27,15 +27,16 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
...
@@ -27,15 +27,16 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real
charge
=
posq
[
atom
].
w
;
real3
force
=
make_real3
(
0
);
real4
pos
=
posq
[
atom
];
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
((
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
GRID_SIZE_X
,
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
GRID_SIZE_Y
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
GRID_SIZE_Z
);
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
...
@@ -78,7 +79,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
...
@@ -78,7 +79,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
int
index
=
ybase
+
zindex
;
int
index
=
ybase
+
zindex
;
real
add
=
charge
*
dx
*
dy
*
data
[
iz
].
z
;
real
add
=
pos
.
w
*
dx
*
dy
*
data
[
iz
].
z
;
#ifdef USE_DOUBLE_PRECISION
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
...
@@ -115,9 +116,8 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ originalPm
...
@@ -115,9 +116,8 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ originalPm
// convolutes on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
// convolutes on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
extern
"C"
__global__
void
extern
"C"
__global__
void
reciprocalConvolution
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
reciprocalConvolution
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
...
@@ -131,9 +131,9 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
...
@@ -131,9 +131,9 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
real
mhx
=
mx
*
invPeriodicBoxSize
.
x
;
real
mhx
=
mx
*
recipBoxVecX
.
x
;
real
mhy
=
m
y
*
invPeriodicBoxSize
.
y
;
real
mhy
=
m
x
*
recipBoxVecY
.
x
+
my
*
recipBoxVecY
.
y
;
real
mhz
=
m
z
*
invPeriodicBoxSize
.
z
;
real
mhz
=
m
x
*
recipBoxVecZ
.
x
+
my
*
recipBoxVecZ
.
y
+
mz
*
recipBoxVecZ
.
z
;
real
bx
=
pmeBsplineModuliX
[
kx
];
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
...
@@ -151,9 +151,8 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
...
@@ -151,9 +151,8 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
extern
"C"
__global__
void
extern
"C"
__global__
void
gridEvaluateEnergy
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
gridEvaluateEnergy
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
...
@@ -168,9 +167,9 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
...
@@ -168,9 +167,9 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
real
mhx
=
mx
*
invPeriodicBoxSize
.
x
;
real
mhx
=
mx
*
recipBoxVecX
.
x
;
real
mhy
=
m
y
*
invPeriodicBoxSize
.
y
;
real
mhy
=
m
x
*
recipBoxVecY
.
x
+
my
*
recipBoxVecY
.
y
;
real
mhz
=
m
z
*
invPeriodicBoxSize
.
z
;
real
mhz
=
m
x
*
recipBoxVecZ
.
x
+
my
*
recipBoxVecZ
.
y
+
mz
*
recipBoxVecZ
.
z
;
real
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
real
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
real
bx
=
pmeBsplineModuliX
[
kx
];
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
by
=
pmeBsplineModuliY
[
ky
];
...
@@ -194,7 +193,7 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
...
@@ -194,7 +193,7 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
extern
"C"
__global__
extern
"C"
__global__
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -206,12 +205,15 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
...
@@ -206,12 +205,15 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real3
force
=
make_real3
(
0
);
real3
force
=
make_real3
(
0
);
real4
pos
=
posq
[
atom
];
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
((
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
GRID_SIZE_X
,
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
GRID_SIZE_Y
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
GRID_SIZE_Z
);
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
...
@@ -266,9 +268,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
...
@@ -266,9 +268,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
}
}
real
q
=
pos
.
w
*
EPSILON_FACTOR
;
real
q
=
pos
.
w
*
EPSILON_FACTOR
;
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
x
*
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
0x100000000
));
real
forceX
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecX
.
x
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
y
*
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
0x100000000
));
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
z
*
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
0x100000000
));
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceX
*
0x100000000
));
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceY
*
0x100000000
));
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceZ
*
0x100000000
));
}
}
}
}
...
...
platforms/cuda/tests/TestCudaCustomManyParticleForce.cpp
View file @
83ed602e
...
@@ -55,7 +55,17 @@ const double TOL = 1e-5;
...
@@ -55,7 +55,17 @@ const double TOL = 1e-5;
CudaPlatform
platform
;
CudaPlatform
platform
;
void
validateAxilrodTeller
(
CustomManyParticleForce
*
force
,
const
vector
<
Vec3
>&
positions
,
const
vector
<
const
int
*>&
expectedSets
,
double
boxSize
)
{
Vec3
computeDelta
(
const
Vec3
&
pos1
,
const
Vec3
&
pos2
,
bool
periodic
,
const
Vec3
*
periodicBoxVectors
)
{
Vec3
diff
=
pos1
-
pos2
;
if
(
periodic
)
{
diff
-=
periodicBoxVectors
[
2
]
*
floor
(
diff
[
2
]
/
periodicBoxVectors
[
2
][
2
]
+
0.5
);
diff
-=
periodicBoxVectors
[
1
]
*
floor
(
diff
[
1
]
/
periodicBoxVectors
[
1
][
1
]
+
0.5
);
diff
-=
periodicBoxVectors
[
0
]
*
floor
(
diff
[
0
]
/
periodicBoxVectors
[
0
][
0
]
+
0.5
);
}
return
diff
;
}
void
validateAxilrodTeller
(
CustomManyParticleForce
*
force
,
const
vector
<
Vec3
>&
positions
,
const
vector
<
const
int
*>&
expectedSets
,
double
boxSize
,
bool
triclinic
)
{
// Create a System and Context.
// Create a System and Context.
int
numParticles
=
force
->
getNumParticles
();
int
numParticles
=
force
->
getNumParticles
();
...
@@ -63,7 +73,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
...
@@ -63,7 +73,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
System
system
;
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
Vec3
boxVectors
[
3
];
if
(
triclinic
)
{
boxVectors
[
0
]
=
Vec3
(
boxSize
,
0
,
0
);
boxVectors
[
1
]
=
Vec3
(
0.2
*
boxSize
,
boxSize
,
0
);
boxVectors
[
2
]
=
Vec3
(
-
0.3
*
boxSize
,
-
0.1
*
boxSize
,
boxSize
);
}
else
{
boxVectors
[
0
]
=
Vec3
(
boxSize
,
0
,
0
);
boxVectors
[
1
]
=
Vec3
(
0
,
boxSize
,
0
);
boxVectors
[
2
]
=
Vec3
(
0
,
0
,
boxSize
);
}
system
.
setDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
system
.
addForce
(
force
);
system
.
addForce
(
force
);
VerletIntegrator
integrator
(
0.001
);
VerletIntegrator
integrator
(
0.001
);
Context
context
(
system
,
integrator
,
platform
);
Context
context
(
system
,
integrator
,
platform
);
...
@@ -74,20 +95,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
...
@@ -74,20 +95,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
// See if the energy matches the expected value.
// See if the energy matches the expected value.
double
expectedEnergy
=
0
;
double
expectedEnergy
=
0
;
bool
periodic
=
(
nonbondedMethod
==
CustomManyParticleForce
::
CutoffPeriodic
);
for
(
int
i
=
0
;
i
<
(
int
)
expectedSets
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
expectedSets
.
size
();
i
++
)
{
int
p1
=
expectedSets
[
i
][
0
];
int
p1
=
expectedSets
[
i
][
0
];
int
p2
=
expectedSets
[
i
][
1
];
int
p2
=
expectedSets
[
i
][
1
];
int
p3
=
expectedSets
[
i
][
2
];
int
p3
=
expectedSets
[
i
][
2
];
Vec3
d12
=
positions
[
p2
]
-
positions
[
p1
];
Vec3
d12
=
computeDelta
(
positions
[
p2
],
positions
[
p1
],
periodic
,
boxVectors
);
Vec3
d13
=
positions
[
p3
]
-
positions
[
p1
];
Vec3
d13
=
computeDelta
(
positions
[
p3
],
positions
[
p1
],
periodic
,
boxVectors
);
Vec3
d23
=
positions
[
p3
]
-
positions
[
p2
];
Vec3
d23
=
computeDelta
(
positions
[
p3
],
positions
[
p2
],
periodic
,
boxVectors
);
if
(
nonbondedMethod
==
CustomManyParticleForce
::
CutoffPeriodic
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
d12
[
j
]
-=
floor
(
d12
[
j
]
/
boxSize
+
0.5
f
)
*
boxSize
;
d13
[
j
]
-=
floor
(
d13
[
j
]
/
boxSize
+
0.5
f
)
*
boxSize
;
d23
[
j
]
-=
floor
(
d23
[
j
]
/
boxSize
+
0.5
f
)
*
boxSize
;
}
}
double
r12
=
sqrt
(
d12
.
dot
(
d12
));
double
r12
=
sqrt
(
d12
.
dot
(
d12
));
double
r13
=
sqrt
(
d13
.
dot
(
d13
));
double
r13
=
sqrt
(
d13
.
dot
(
d13
));
double
r23
=
sqrt
(
d23
.
dot
(
d23
));
double
r23
=
sqrt
(
d23
.
dot
(
d23
));
...
@@ -210,7 +225,7 @@ void testNoCutoff() {
...
@@ -210,7 +225,7 @@ void testNoCutoff() {
positions
.
push_back
(
Vec3
(
0.4
,
0
,
-
0.8
));
positions
.
push_back
(
Vec3
(
0.4
,
0
,
-
0.8
));
int
sets
[
4
][
3
]
=
{{
0
,
1
,
2
},
{
1
,
2
,
3
},
{
2
,
3
,
0
},
{
3
,
0
,
1
}};
int
sets
[
4
][
3
]
=
{{
0
,
1
,
2
},
{
1
,
2
,
3
},
{
2
,
3
,
0
},
{
3
,
0
,
1
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
4
]);
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
4
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
,
false
);
}
}
void
testCutoff
()
{
void
testCutoff
()
{
...
@@ -235,7 +250,7 @@ void testCutoff() {
...
@@ -235,7 +250,7 @@ void testCutoff() {
positions
.
push_back
(
Vec3
(
0.2
,
0.5
,
-
0.1
));
positions
.
push_back
(
Vec3
(
0.2
,
0.5
,
-
0.1
));
int
sets
[
7
][
3
]
=
{{
0
,
1
,
2
},
{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
2
,
4
},
{
0
,
3
,
4
},
{
1
,
2
,
4
},
{
1
,
3
,
4
}};
int
sets
[
7
][
3
]
=
{{
0
,
1
,
2
},
{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
2
,
4
},
{
0
,
3
,
4
},
{
1
,
2
,
4
},
{
1
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
7
]);
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
7
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
,
false
);
}
}
void
testPeriodic
()
{
void
testPeriodic
()
{
...
@@ -261,7 +276,33 @@ void testPeriodic() {
...
@@ -261,7 +276,33 @@ void testPeriodic() {
double
boxSize
=
2.1
;
double
boxSize
=
2.1
;
int
sets
[
5
][
3
]
=
{{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
2
,
4
},
{
0
,
3
,
4
},
{
1
,
3
,
4
}};
int
sets
[
5
][
3
]
=
{{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
2
,
4
},
{
0
,
3
,
4
},
{
1
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
5
]);
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
5
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
boxSize
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
boxSize
,
false
);
}
void
testTriclinic
()
{
CustomManyParticleForce
*
force
=
new
CustomManyParticleForce
(
3
,
"C*(1+3*cos(theta1)*cos(theta2)*cos(theta3))/(r12*r13*r23)^3;"
"theta1=angle(p1,p2,p3); theta2=angle(p2,p3,p1); theta3=angle(p3,p1,p2);"
"r12=distance(p1,p2); r13=distance(p1,p3); r23=distance(p2,p3)"
);
force
->
addGlobalParameter
(
"C"
,
1.5
);
force
->
setNonbondedMethod
(
CustomManyParticleForce
::
CutoffPeriodic
);
force
->
setCutoffDistance
(
1.05
);
vector
<
double
>
params
;
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
vector
<
Vec3
>
positions
;
positions
.
push_back
(
Vec3
(
0
,
0
,
0
));
positions
.
push_back
(
Vec3
(
1
,
0
,
0
));
positions
.
push_back
(
Vec3
(
0
,
1.1
,
0.3
));
positions
.
push_back
(
Vec3
(
0.4
,
0
,
-
0.8
));
positions
.
push_back
(
Vec3
(
0.2
,
0.5
,
-
0.1
));
double
boxSize
=
2.1
;
int
sets
[
4
][
3
]
=
{{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
3
,
4
},
{
1
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
4
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
boxSize
,
true
);
}
}
void
testExclusions
()
{
void
testExclusions
()
{
...
@@ -286,7 +327,7 @@ void testExclusions() {
...
@@ -286,7 +327,7 @@ void testExclusions() {
force
->
addExclusion
(
0
,
3
);
force
->
addExclusion
(
0
,
3
);
int
sets
[
5
][
3
]
=
{{
0
,
1
,
4
},
{
1
,
2
,
3
},
{
1
,
2
,
4
},
{
1
,
3
,
4
},
{
2
,
3
,
4
}};
int
sets
[
5
][
3
]
=
{{
0
,
1
,
4
},
{
1
,
2
,
3
},
{
1
,
2
,
4
},
{
1
,
3
,
4
},
{
2
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
5
]);
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
5
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
,
false
);
}
}
void
testAllTerms
()
{
void
testAllTerms
()
{
...
@@ -672,6 +713,7 @@ int main(int argc, char* argv[]) {
...
@@ -672,6 +713,7 @@ int main(int argc, char* argv[]) {
testNoCutoff
();
testNoCutoff
();
testCutoff
();
testCutoff
();
testPeriodic
();
testPeriodic
();
testTriclinic
();
testExclusions
();
testExclusions
();
testAllTerms
();
testAllTerms
();
testParameters
();
testParameters
();
...
...
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
View file @
83ed602e
...
@@ -7,7 +7,7 @@
...
@@ -7,7 +7,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
4
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -261,6 +261,65 @@ void testPeriodic() {
...
@@ -261,6 +261,65 @@ void testPeriodic() {
ASSERT_EQUAL_TOL
(
1.9
+
1
+
0.9
,
state
.
getPotentialEnergy
(),
TOL
);
ASSERT_EQUAL_TOL
(
1.9
+
1
+
0.9
,
state
.
getPotentialEnergy
(),
TOL
);
}
}
void
testTriclinic
()
{
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
Vec3
a
(
3.1
,
0
,
0
);
Vec3
b
(
0.4
,
3.5
,
0
);
Vec3
c
(
-
0.1
,
-
0.5
,
4.0
);
system
.
setDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"r"
);
nonbonded
->
addParticle
(
vector
<
double
>
());
nonbonded
->
addParticle
(
vector
<
double
>
());
nonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
const
double
cutoff
=
1.5
;
nonbonded
->
setCutoffDistance
(
cutoff
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
iteration
=
0
;
iteration
<
50
;
iteration
++
)
{
// Generate random positions for the two particles.
positions
[
0
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
positions
[
1
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
context
.
setPositions
(
positions
);
// Loop over all possible periodic copies and find the nearest one.
Vec3
delta
;
double
distance2
=
100.0
;
for
(
int
i
=
-
1
;
i
<
2
;
i
++
)
for
(
int
j
=
-
1
;
j
<
2
;
j
++
)
for
(
int
k
=
-
1
;
k
<
2
;
k
++
)
{
Vec3
d
=
positions
[
1
]
-
positions
[
0
]
+
a
*
i
+
b
*
j
+
c
*
k
;
if
(
d
.
dot
(
d
)
<
distance2
)
{
delta
=
d
;
distance2
=
d
.
dot
(
d
);
}
}
double
distance
=
sqrt
(
distance2
);
// See if the force and energy are correct.
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
if
(
distance
>=
cutoff
)
{
ASSERT_EQUAL
(
0.0
,
state
.
getPotentialEnergy
());
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
0
],
0
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
1
],
0
);
}
else
{
const
Vec3
force
=
delta
/
sqrt
(
delta
.
dot
(
delta
));
ASSERT_EQUAL_TOL
(
distance
,
state
.
getPotentialEnergy
(),
TOL
);
ASSERT_EQUAL_VEC
(
force
,
state
.
getForces
()[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
-
force
,
state
.
getForces
()[
1
],
TOL
);
}
}
}
void
testContinuous1DFunction
()
{
void
testContinuous1DFunction
()
{
System
system
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
...
@@ -925,6 +984,7 @@ int main(int argc, char* argv[]) {
...
@@ -925,6 +984,7 @@ int main(int argc, char* argv[]) {
testExclusions
();
testExclusions
();
testCutoff
();
testCutoff
();
testPeriodic
();
testPeriodic
();
testTriclinic
();
testContinuous1DFunction
();
testContinuous1DFunction
();
testContinuous2DFunction
();
testContinuous2DFunction
();
testContinuous3DFunction
();
testContinuous3DFunction
();
...
...
platforms/cuda/tests/TestCudaEwald.cpp
View file @
83ed602e
...
@@ -201,6 +201,57 @@ void testEwald2Ions() {
...
@@ -201,6 +201,57 @@ void testEwald2Ions() {
ASSERT_EQUAL_TOL
(
-
217.276
,
state
.
getPotentialEnergy
(),
0.01
/*10*TOL*/
);
ASSERT_EQUAL_TOL
(
-
217.276
,
state
.
getPotentialEnergy
(),
0.01
/*10*TOL*/
);
}
}
void
testTriclinic
()
{
// Create a triclinic box containing eight particles.
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
2.5
,
0
,
0
),
Vec3
(
0.5
,
3.0
,
0
),
Vec3
(
0.7
,
0.9
,
3.5
));
for
(
int
i
=
0
;
i
<
8
;
i
++
)
system
.
addParticle
(
1.0
);
NonbondedForce
*
force
=
new
NonbondedForce
();
system
.
addForce
(
force
);
force
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
force
->
setCutoffDistance
(
1.0
);
force
->
setPMEParameters
(
3.45891
,
32
,
40
,
48
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
force
->
addParticle
(
-
1
,
0.440104
,
0.4184
);
// Cl parameters
for
(
int
i
=
0
;
i
<
4
;
i
++
)
force
->
addParticle
(
1
,
0.332840
,
0.0115897
);
// Na parameters
vector
<
Vec3
>
positions
(
8
);
positions
[
0
]
=
Vec3
(
1.744
,
2.788
,
3.162
);
positions
[
1
]
=
Vec3
(
1.048
,
0.762
,
2.340
);
positions
[
2
]
=
Vec3
(
2.489
,
1.570
,
2.817
);
positions
[
3
]
=
Vec3
(
1.027
,
1.893
,
3.271
);
positions
[
4
]
=
Vec3
(
0.937
,
0.825
,
0.009
);
positions
[
5
]
=
Vec3
(
2.290
,
1.887
,
3.352
);
positions
[
6
]
=
Vec3
(
1.266
,
1.111
,
2.894
);
positions
[
7
]
=
Vec3
(
0.933
,
1.862
,
3.490
);
// Compute the forces and energy.
VerletIntegrator
integ
(
0.001
);
Context
context
(
system
,
integ
,
platform
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Compare them to values computed by Gromacs.
double
expectedEnergy
=
-
963.370
;
vector
<
Vec3
>
expectedForce
(
8
);
expectedForce
[
0
]
=
Vec3
(
4.25253e+01
,
-
1.23503e+02
,
1.22139e+02
);
expectedForce
[
1
]
=
Vec3
(
9.74752e+01
,
1.68213e+02
,
1.93169e+02
);
expectedForce
[
2
]
=
Vec3
(
-
1.50348e+02
,
1.29165e+02
,
3.70435e+02
);
expectedForce
[
3
]
=
Vec3
(
9.18644e+02
,
-
3.52571e+00
,
-
1.34772e+03
);
expectedForce
[
4
]
=
Vec3
(
-
1.61193e+02
,
9.01528e+01
,
-
7.12904e+01
);
expectedForce
[
5
]
=
Vec3
(
2.82630e+02
,
2.78029e+01
,
-
3.72864e+02
);
expectedForce
[
6
]
=
Vec3
(
-
1.47454e+02
,
-
2.14448e+02
,
-
3.55789e+02
);
expectedForce
[
7
]
=
Vec3
(
-
8.82195e+02
,
-
7.39132e+01
,
1.46202e+03
);
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ASSERT_EQUAL_VEC
(
expectedForce
[
i
],
state
.
getForces
()[
i
],
1e-4
);
}
ASSERT_EQUAL_TOL
(
expectedEnergy
,
state
.
getPotentialEnergy
(),
1e-4
);
}
void
testErrorTolerance
(
NonbondedForce
::
NonbondedMethod
method
)
{
void
testErrorTolerance
(
NonbondedForce
::
NonbondedMethod
method
)
{
// Create a cloud of random point charges.
// Create a cloud of random point charges.
...
@@ -307,6 +358,7 @@ int main(int argc, char* argv[]) {
...
@@ -307,6 +358,7 @@ int main(int argc, char* argv[]) {
testEwaldPME
(
false
);
testEwaldPME
(
false
);
testEwaldPME
(
true
);
testEwaldPME
(
true
);
// testEwald2Ions();
// testEwald2Ions();
testTriclinic
();
testErrorTolerance
(
NonbondedForce
::
Ewald
);
testErrorTolerance
(
NonbondedForce
::
Ewald
);
testErrorTolerance
(
NonbondedForce
::
PME
);
testErrorTolerance
(
NonbondedForce
::
PME
);
testPMEParameters
();
testPMEParameters
();
...
...
platforms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
View file @
83ed602e
...
@@ -236,6 +236,82 @@ void testRandomSeed() {
...
@@ -236,6 +236,82 @@ void testRandomSeed() {
}
}
}
}
void
testTriclinic
()
{
const
int
numParticles
=
64
;
const
int
frequency
=
10
;
const
int
steps
=
1000
;
const
double
pressure
=
1.5
;
const
double
pressureInMD
=
pressure
*
(
AVOGADRO
*
1e-25
);
// pressure in kJ/mol/nm^3
const
double
temperature
=
300.0
;
const
double
initialVolume
=
numParticles
*
BOLTZ
*
temperature
/
pressureInMD
;
const
double
initialLength
=
std
::
pow
(
initialVolume
,
1.0
/
3.0
);
// Create a gas of noninteracting particles.
System
system
;
Vec3
initialBox
[
3
];
initialBox
[
0
]
=
Vec3
(
initialLength
,
0
,
0
);
initialBox
[
1
]
=
Vec3
(
0.2
*
initialLength
,
initialLength
,
0
);
initialBox
[
2
]
=
Vec3
(
0.1
*
initialLength
,
0.3
*
initialLength
,
initialLength
);
system
.
setDefaultPeriodicBoxVectors
(
initialBox
[
0
],
initialBox
[
1
],
initialBox
[
2
]);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
1.0
);
positions
[
i
]
=
Vec3
(
initialLength
*
genrand_real2
(
sfmt
),
initialLength
*
genrand_real2
(
sfmt
),
initialLength
*
genrand_real2
(
sfmt
));
}
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pressure
,
pressure
,
pressure
),
temperature
,
true
,
true
,
true
,
frequency
);
system
.
addForce
(
barostat
);
// Run a simulation
LangevinIntegrator
integrator
(
temperature
,
0.1
,
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
// Let it equilibrate.
integrator
.
step
(
10000
);
// Now run it for a while and see if the volume is correct.
double
volume
=
0.0
;
for
(
int
j
=
0
;
j
<
steps
;
++
j
)
{
Vec3
box
[
3
];
context
.
getState
(
0
).
getPeriodicBoxVectors
(
box
[
0
],
box
[
1
],
box
[
2
]);
volume
+=
box
[
0
][
0
]
*
box
[
1
][
1
]
*
box
[
2
][
2
];
integrator
.
step
(
frequency
);
}
volume
/=
steps
;
double
expected
=
(
numParticles
+
1
)
*
BOLTZ
*
temperature
/
pressureInMD
;
ASSERT_USUALLY_EQUAL_TOL
(
expected
,
volume
,
3
/
std
::
sqrt
((
double
)
steps
));
// Make sure the box vectors have been scaled consistently.
State
state
=
context
.
getState
(
State
::
Positions
);
Vec3
box
[
3
];
state
.
getPeriodicBoxVectors
(
box
[
0
],
box
[
1
],
box
[
2
]);
double
xscale
=
box
[
2
][
0
]
/
(
0.1
*
initialLength
);
double
yscale
=
box
[
2
][
1
]
/
(
0.3
*
initialLength
);
double
zscale
=
box
[
2
][
2
]
/
(
1.0
*
initialLength
);
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
ASSERT_EQUAL_VEC
(
Vec3
(
xscale
*
initialBox
[
i
][
0
],
yscale
*
initialBox
[
i
][
1
],
zscale
*
initialBox
[
i
][
2
]),
box
[
i
],
1e-5
);
}
// The barostat should have put all particles inside the first periodic box. One integration step
// has happened since then, so they may have moved slightly outside it.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
Vec3
pos
=
state
.
getPositions
()[
i
];
ASSERT
(
pos
[
2
]
/
box
[
2
][
2
]
>
-
1
&&
pos
[
2
]
/
box
[
2
][
2
]
<
2
);
pos
-=
box
[
2
]
*
floor
(
pos
[
2
]
/
box
[
2
][
2
]);
ASSERT
(
pos
[
1
]
/
box
[
1
][
1
]
>
-
1
&&
pos
[
1
]
/
box
[
1
][
1
]
<
2
);
pos
-=
box
[
1
]
*
floor
(
pos
[
1
]
/
box
[
1
][
1
]);
ASSERT
(
pos
[
0
]
/
box
[
0
][
0
]
>
-
1
&&
pos
[
0
]
/
box
[
0
][
0
]
<
2
);
}
}
/**
/**
* Run a constant pressure simulation on an anisotropic Einstein crystal
* Run a constant pressure simulation on an anisotropic Einstein crystal
* using isotropic and anisotropic barostats. There are a total of 15 simulations:
* using isotropic and anisotropic barostats. There are a total of 15 simulations:
...
@@ -389,6 +465,7 @@ int main(int argc, char* argv[]) {
...
@@ -389,6 +465,7 @@ int main(int argc, char* argv[]) {
testIdealGasAxis
(
1
);
testIdealGasAxis
(
1
);
testIdealGasAxis
(
2
);
testIdealGasAxis
(
2
);
testRandomSeed
();
testRandomSeed
();
testTriclinic
();
//testEinsteinCrystal();
//testEinsteinCrystal();
}
}
catch
(
const
exception
&
e
)
{
catch
(
const
exception
&
e
)
{
...
...
platforms/cuda/tests/TestCudaNonbondedForce.cpp
View file @
83ed602e
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -355,6 +355,67 @@ void testPeriodic() {
...
@@ -355,6 +355,67 @@ void testPeriodic() {
ASSERT_EQUAL_TOL
(
2
*
ONE_4PI_EPS0
*
(
1.0
)
*
(
1.0
+
krf
*
1.0
-
crf
),
state
.
getPotentialEnergy
(),
TOL
);
ASSERT_EQUAL_TOL
(
2
*
ONE_4PI_EPS0
*
(
1.0
)
*
(
1.0
+
krf
*
1.0
-
crf
),
state
.
getPotentialEnergy
(),
TOL
);
}
}
void
testTriclinic
()
{
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
Vec3
a
(
3.1
,
0
,
0
);
Vec3
b
(
0.4
,
3.5
,
0
);
Vec3
c
(
-
0.1
,
-
0.5
,
4.0
);
system
.
setDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
VerletIntegrator
integrator
(
0.01
);
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
nonbonded
->
addParticle
(
1.0
,
1
,
0
);
nonbonded
->
addParticle
(
1.0
,
1
,
0
);
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
const
double
cutoff
=
1.5
;
nonbonded
->
setCutoffDistance
(
cutoff
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
const
double
eps
=
78.3
;
const
double
krf
=
(
1.0
/
(
cutoff
*
cutoff
*
cutoff
))
*
(
eps
-
1.0
)
/
(
2.0
*
eps
+
1.0
);
const
double
crf
=
(
1.0
/
cutoff
)
*
(
3.0
*
eps
)
/
(
2.0
*
eps
+
1.0
);
for
(
int
iteration
=
0
;
iteration
<
50
;
iteration
++
)
{
// Generate random positions for the two particles.
positions
[
0
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
positions
[
1
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
context
.
setPositions
(
positions
);
// Loop over all possible periodic copies and find the nearest one.
Vec3
delta
;
double
distance2
=
100.0
;
for
(
int
i
=
-
1
;
i
<
2
;
i
++
)
for
(
int
j
=
-
1
;
j
<
2
;
j
++
)
for
(
int
k
=
-
1
;
k
<
2
;
k
++
)
{
Vec3
d
=
positions
[
1
]
-
positions
[
0
]
+
a
*
i
+
b
*
j
+
c
*
k
;
if
(
d
.
dot
(
d
)
<
distance2
)
{
delta
=
d
;
distance2
=
d
.
dot
(
d
);
}
}
double
distance
=
sqrt
(
distance2
);
// See if the force and energy are correct.
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
if
(
distance
>=
cutoff
)
{
ASSERT_EQUAL
(
0.0
,
state
.
getPotentialEnergy
());
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
0
],
0
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
1
],
0
);
}
else
{
const
Vec3
force
=
delta
*
ONE_4PI_EPS0
*
(
-
1.0
/
(
distance
*
distance
*
distance
)
+
2.0
*
krf
);
ASSERT_EQUAL_TOL
(
ONE_4PI_EPS0
*
(
1.0
/
distance
+
krf
*
distance
*
distance
-
crf
),
state
.
getPotentialEnergy
(),
TOL
);
ASSERT_EQUAL_VEC
(
force
,
state
.
getForces
()[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
-
force
,
state
.
getForces
()[
1
],
TOL
);
}
}
}
void
testLargeSystem
()
{
void
testLargeSystem
()
{
const
int
numMolecules
=
600
;
const
int
numMolecules
=
600
;
...
@@ -862,6 +923,33 @@ void testSwitchingFunction(NonbondedForce::NonbondedMethod method) {
...
@@ -862,6 +923,33 @@ void testSwitchingFunction(NonbondedForce::NonbondedMethod method) {
}
}
}
}
void
testReordering
()
{
// Check that reordering of atoms doesn't alter their positions.
const
int
numParticles
=
200
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
6
,
0
,
0
),
Vec3
(
2.1
,
6
,
0
),
Vec3
(
-
1.5
,
-
0.5
,
6
));
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
system
.
addForce
(
nonbonded
);
vector
<
Vec3
>
positions
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
0.0
,
0.0
,
0.0
);
positions
.
push_back
(
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
)
*
20
);
}
VerletIntegrator
integrator
(
0.001
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
integrator
.
step
(
1
);
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL_VEC
(
positions
[
i
],
state
.
getPositions
()[
i
],
1e-6
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
try
{
if
(
argc
>
1
)
if
(
argc
>
1
)
...
@@ -872,6 +960,7 @@ int main(int argc, char* argv[]) {
...
@@ -872,6 +960,7 @@ int main(int argc, char* argv[]) {
testCutoff
();
testCutoff
();
testCutoff14
();
testCutoff14
();
testPeriodic
();
testPeriodic
();
testTriclinic
();
testLargeSystem
();
testLargeSystem
();
//testBlockInteractions(false);
//testBlockInteractions(false);
//testBlockInteractions(true);
//testBlockInteractions(true);
...
@@ -882,6 +971,7 @@ int main(int argc, char* argv[]) {
...
@@ -882,6 +971,7 @@ int main(int argc, char* argv[]) {
testParallelComputation
(
NonbondedForce
::
PME
);
testParallelComputation
(
NonbondedForce
::
PME
);
testSwitchingFunction
(
NonbondedForce
::
CutoffNonPeriodic
);
testSwitchingFunction
(
NonbondedForce
::
CutoffNonPeriodic
);
testSwitchingFunction
(
NonbondedForce
::
PME
);
testSwitchingFunction
(
NonbondedForce
::
PME
);
testReordering
();
}
}
catch
(
const
exception
&
e
)
{
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/opencl/include/OpenCLContext.h
View file @
83ed602e
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -448,36 +448,65 @@ public:
...
@@ -448,36 +448,65 @@ public:
/**
/**
* Get whether the device being used supports 64 bit atomic operations on global memory.
* Get whether the device being used supports 64 bit atomic operations on global memory.
*/
*/
bool
getSupports64BitGlobalAtomics
()
{
bool
getSupports64BitGlobalAtomics
()
const
{
return
supports64BitGlobalAtomics
;
return
supports64BitGlobalAtomics
;
}
}
/**
/**
* Get whether the device being used supports double precision math.
* Get whether the device being used supports double precision math.
*/
*/
bool
getSupportsDoublePrecision
()
{
bool
getSupportsDoublePrecision
()
const
{
return
supportsDoublePrecision
;
return
supportsDoublePrecision
;
}
}
/**
/**
* Get whether double precision is being used.
* Get whether double precision is being used.
*/
*/
bool
getUseDoublePrecision
()
{
bool
getUseDoublePrecision
()
const
{
return
useDoublePrecision
;
return
useDoublePrecision
;
}
}
/**
/**
* Get whether mixed precision is being used.
* Get whether mixed precision is being used.
*/
*/
bool
getUseMixedPrecision
()
{
bool
getUseMixedPrecision
()
const
{
return
useMixedPrecision
;
return
useMixedPrecision
;
}
}
/**
* Get whether the periodic box is triclinic.
*/
bool
getBoxIsTriclinic
()
const
{
return
boxIsTriclinic
;
}
/**
/**
* Convert a number to a string in a format suitable for including in a kernel.
* Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision.
* This takes into account whether the context uses single or double precision.
*/
*/
std
::
string
doubleToString
(
double
value
);
std
::
string
doubleToString
(
double
value
)
const
;
/**
/**
* Convert a number to a string in a format suitable for including in a kernel.
* Convert a number to a string in a format suitable for including in a kernel.
*/
*/
std
::
string
intToString
(
int
value
);
std
::
string
intToString
(
int
value
)
const
;
/**
* Get the vectors defining the periodic box.
*/
void
getPeriodicBoxVectors
(
Vec3
&
a
,
Vec3
&
b
,
Vec3
&
c
)
const
{
a
=
Vec3
(
periodicBoxVecXDouble
.
x
,
periodicBoxVecXDouble
.
y
,
periodicBoxVecXDouble
.
z
);
b
=
Vec3
(
periodicBoxVecYDouble
.
x
,
periodicBoxVecYDouble
.
y
,
periodicBoxVecYDouble
.
z
);
c
=
Vec3
(
periodicBoxVecZDouble
.
x
,
periodicBoxVecZDouble
.
y
,
periodicBoxVecZDouble
.
z
);
}
/**
* Set the vectors defining the periodic box.
*/
void
setPeriodicBoxVectors
(
const
Vec3
&
a
,
const
Vec3
&
b
,
const
Vec3
&
c
)
{
periodicBoxVecX
=
mm_float4
((
float
)
a
[
0
],
(
float
)
a
[
1
],
(
float
)
a
[
2
],
0.0
f
);
periodicBoxVecY
=
mm_float4
((
float
)
b
[
0
],
(
float
)
b
[
1
],
(
float
)
b
[
2
],
0.0
f
);
periodicBoxVecZ
=
mm_float4
((
float
)
c
[
0
],
(
float
)
c
[
1
],
(
float
)
c
[
2
],
0.0
f
);
periodicBoxVecXDouble
=
mm_double4
(
a
[
0
],
a
[
1
],
a
[
2
],
0.0
);
periodicBoxVecYDouble
=
mm_double4
(
b
[
0
],
b
[
1
],
b
[
2
],
0.0
);
periodicBoxVecZDouble
=
mm_double4
(
c
[
0
],
c
[
1
],
c
[
2
],
0.0
);
periodicBoxSize
=
mm_float4
((
float
)
a
[
0
],
(
float
)
b
[
1
],
(
float
)
c
[
2
],
0.0
f
);
invPeriodicBoxSize
=
mm_float4
(
1.0
f
/
(
float
)
a
[
0
],
1.0
f
/
(
float
)
b
[
1
],
1.0
f
/
(
float
)
c
[
2
],
0.0
f
);
periodicBoxSizeDouble
=
mm_double4
(
a
[
0
],
b
[
1
],
c
[
2
],
0.0
);
invPeriodicBoxSizeDouble
=
mm_double4
(
1.0
/
a
[
0
],
1.0
/
b
[
1
],
1.0
/
c
[
2
],
0.0
);
}
/**
/**
* Get the size of the periodic box.
* Get the size of the periodic box.
*/
*/
...
@@ -490,15 +519,6 @@ public:
...
@@ -490,15 +519,6 @@ public:
mm_double4
getPeriodicBoxSizeDouble
()
const
{
mm_double4
getPeriodicBoxSizeDouble
()
const
{
return
periodicBoxSizeDouble
;
return
periodicBoxSizeDouble
;
}
}
/**
* Set the size of the periodic box.
*/
void
setPeriodicBoxSize
(
double
xsize
,
double
ysize
,
double
zsize
)
{
periodicBoxSize
=
mm_float4
((
float
)
xsize
,
(
float
)
ysize
,
(
float
)
zsize
,
0
);
invPeriodicBoxSize
=
mm_float4
((
float
)
(
1.0
/
xsize
),
(
float
)
(
1.0
/
ysize
),
(
float
)
(
1.0
/
zsize
),
0
);
periodicBoxSizeDouble
=
mm_double4
(
xsize
,
ysize
,
zsize
,
0
);
invPeriodicBoxSizeDouble
=
mm_double4
(
1.0
/
xsize
,
1.0
/
ysize
,
1.0
/
zsize
,
0
);
}
/**
/**
* Get the inverse of the size of the periodic box.
* Get the inverse of the size of the periodic box.
*/
*/
...
@@ -511,6 +531,42 @@ public:
...
@@ -511,6 +531,42 @@ public:
mm_double4
getInvPeriodicBoxSizeDouble
()
const
{
mm_double4
getInvPeriodicBoxSizeDouble
()
const
{
return
invPeriodicBoxSizeDouble
;
return
invPeriodicBoxSizeDouble
;
}
}
/**
* Get the first periodic box vector.
*/
mm_float4
getPeriodicBoxVecX
()
{
return
periodicBoxVecX
;
}
/**
* Get the first periodic box vector.
*/
mm_double4
getPeriodicBoxVecXDouble
()
{
return
periodicBoxVecXDouble
;
}
/**
* Get the second periodic box vector.
*/
mm_float4
getPeriodicBoxVecY
()
{
return
periodicBoxVecY
;
}
/**
* Get the second periodic box vector.
*/
mm_double4
getPeriodicBoxVecYDouble
()
{
return
periodicBoxVecYDouble
;
}
/**
* Get the third periodic box vector.
*/
mm_float4
getPeriodicBoxVecZ
()
{
return
periodicBoxVecZ
;
}
/**
* Get the third periodic box vector.
*/
mm_double4
getPeriodicBoxVecZDouble
()
{
return
periodicBoxVecZDouble
;
}
/**
/**
* Get the OpenCLIntegrationUtilities for this context.
* Get the OpenCLIntegrationUtilities for this context.
*/
*/
...
@@ -628,9 +684,9 @@ private:
...
@@ -628,9 +684,9 @@ private:
int
numThreadBlocks
;
int
numThreadBlocks
;
int
numForceBuffers
;
int
numForceBuffers
;
int
simdWidth
;
int
simdWidth
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
,
periodicBoxVecXDouble
,
periodicBoxVecYDouble
,
periodicBoxVecZDouble
;
std
::
string
defaultOptimizationOptions
;
std
::
string
defaultOptimizationOptions
;
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
cl
::
Context
context
;
cl
::
Context
context
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
83ed602e
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -336,6 +336,54 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -336,6 +336,54 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines
[
"LOG"
]
=
"log"
;
compilationDefines
[
"LOG"
]
=
"log"
;
}
}
// Set defines for applying periodic boundary conditions.
Vec3
boxVectors
[
3
];
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
boxIsTriclinic
=
(
boxVectors
[
0
][
1
]
!=
0.0
||
boxVectors
[
0
][
2
]
!=
0.0
||
boxVectors
[
1
][
0
]
!=
0.0
||
boxVectors
[
1
][
2
]
!=
0.0
||
boxVectors
[
2
][
0
]
!=
0.0
||
boxVectors
[
2
][
1
]
!=
0.0
);
if
(
boxIsTriclinic
)
{
compilationDefines
[
"APPLY_PERIODIC_TO_DELTA(delta)"
]
=
"{"
"real scale3 = floor(delta.z*invPeriodicBoxSize.z+0.5f);
\\\n
"
"delta.xyz -= scale3*periodicBoxVecZ.xyz;
\\\n
"
"real scale2 = floor(delta.y*invPeriodicBoxSize.y+0.5f);
\\\n
"
"delta.xy -= scale2*periodicBoxVecY.xy;
\\\n
"
"real scale1 = floor(delta.x*invPeriodicBoxSize.x+0.5f);
\\\n
"
"delta.x -= scale1*periodicBoxVecX.x;}"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS(pos)"
]
=
"{"
"real scale3 = floor(pos.z*invPeriodicBoxSize.z);
\\\n
"
"pos.xyz -= scale3*periodicBoxVecZ.xyz;
\\\n
"
"real scale2 = floor(pos.y*invPeriodicBoxSize.y);
\\\n
"
"pos.xy -= scale2*periodicBoxVecY.xy;
\\\n
"
"real scale1 = floor(pos.x*invPeriodicBoxSize.x);
\\\n
"
"pos.x -= scale1*periodicBoxVecX.x;}"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"
]
=
"{"
"real scale3 = floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f);
\\\n
"
"pos.x -= scale3*periodicBoxVecZ.x;
\\\n
"
"pos.y -= scale3*periodicBoxVecZ.y;
\\\n
"
"pos.z -= scale3*periodicBoxVecZ.z;
\\\n
"
"real scale2 = floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f);
\\\n
"
"pos.x -= scale2*periodicBoxVecY.x;
\\\n
"
"pos.y -= scale2*periodicBoxVecY.y;
\\\n
"
"real scale1 = floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f);
\\\n
"
"pos.x -= scale1*periodicBoxVecX.x;}"
;
}
else
{
compilationDefines
[
"APPLY_PERIODIC_TO_DELTA(delta)"
]
=
"delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS(pos)"
]
=
"pos.xyz -= floor(pos.xyz*invPeriodicBoxSize.xyz)*periodicBoxSize.xyz;"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"
]
=
"{"
"pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
\\\n
"
"pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
\\\n
"
"pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}"
;
}
// Create the work thread used for parallelization when running on multiple devices.
// Create the work thread used for parallelization when running on multiple devices.
thread
=
new
WorkThread
();
thread
=
new
WorkThread
();
...
@@ -527,7 +575,7 @@ void OpenCLContext::restoreDefaultQueue() {
...
@@ -527,7 +575,7 @@ void OpenCLContext::restoreDefaultQueue() {
currentQueue
=
defaultQueue
;
currentQueue
=
defaultQueue
;
}
}
string
OpenCLContext
::
doubleToString
(
double
value
)
{
string
OpenCLContext
::
doubleToString
(
double
value
)
const
{
stringstream
s
;
stringstream
s
;
s
.
precision
(
useDoublePrecision
?
16
:
8
);
s
.
precision
(
useDoublePrecision
?
16
:
8
);
s
<<
scientific
<<
value
;
s
<<
scientific
<<
value
;
...
@@ -536,7 +584,7 @@ string OpenCLContext::doubleToString(double value) {
...
@@ -536,7 +584,7 @@ string OpenCLContext::doubleToString(double value) {
return
s
.
str
();
return
s
.
str
();
}
}
string
OpenCLContext
::
intToString
(
int
value
)
{
string
OpenCLContext
::
intToString
(
int
value
)
const
{
stringstream
s
;
stringstream
s
;
s
<<
value
;
s
<<
value
;
return
s
.
str
();
return
s
.
str
();
...
@@ -1039,16 +1087,21 @@ void OpenCLContext::reorderAtomsImpl() {
...
@@ -1039,16 +1087,21 @@ void OpenCLContext::reorderAtomsImpl() {
// Move each molecule position into the same box.
// Move each molecule position into the same box.
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
int
xcell
=
(
int
)
floor
(
molPos
[
i
].
x
*
invPeriodicBoxSizeDouble
.
x
);
Real4
center
=
molPos
[
i
];
int
ycell
=
(
int
)
floor
(
molPos
[
i
].
y
*
invPeriodicBoxSizeDouble
.
y
);
int
zcell
=
(
int
)
floor
(
center
.
z
*
invPeriodicBoxSize
.
z
);
int
zcell
=
(
int
)
floor
(
molPos
[
i
].
z
*
invPeriodicBoxSizeDouble
.
z
);
center
.
x
-=
zcell
*
periodicBoxVecZ
.
x
;
Real
dx
=
xcell
*
periodicBoxSizeDouble
.
x
;
center
.
y
-=
zcell
*
periodicBoxVecZ
.
y
;
Real
dy
=
ycell
*
periodicBoxSizeDouble
.
y
;
center
.
z
-=
zcell
*
periodicBoxVecZ
.
z
;
Real
dz
=
zcell
*
periodicBoxSizeDouble
.
z
;
int
ycell
=
(
int
)
floor
(
center
.
y
*
invPeriodicBoxSize
.
y
);
if
(
dx
!=
0.0
f
||
dy
!=
0.0
f
||
dz
!=
0.0
f
)
{
center
.
x
-=
ycell
*
periodicBoxVecY
.
x
;
molPos
[
i
].
x
-=
dx
;
center
.
y
-=
ycell
*
periodicBoxVecY
.
y
;
molPos
[
i
].
y
-=
dy
;
int
xcell
=
(
int
)
floor
(
center
.
x
*
invPeriodicBoxSize
.
x
);
molPos
[
i
].
z
-=
dz
;
center
.
x
-=
xcell
*
periodicBoxVecX
.
x
;
if
(
xcell
!=
0
||
ycell
!=
0
||
zcell
!=
0
)
{
Real
dx
=
molPos
[
i
].
x
-
center
.
x
;
Real
dy
=
molPos
[
i
].
y
-
center
.
y
;
Real
dz
=
molPos
[
i
].
z
-
center
.
z
;
molPos
[
i
]
=
center
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
Real4
p
=
oldPosq
[
atom
];
Real4
p
=
oldPosq
[
atom
];
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
83ed602e
This diff is collapsed.
Click to expand it.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
83ed602e
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -325,11 +325,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -325,11 +325,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
cl
::
Program
interactingBlocksProgram
=
context
.
createProgram
(
file
,
defines
);
cl
::
Program
interactingBlocksProgram
=
context
.
createProgram
(
file
,
defines
);
findBlockBoundsKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
findBlockBoundsKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
findBlockBoundsKernel
.
setArg
<
cl_int
>
(
0
,
context
.
getNumAtoms
());
findBlockBoundsKernel
.
setArg
<
cl_int
>
(
0
,
context
.
getNumAtoms
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
context
.
getPosq
().
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockCenter
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
blockCenter
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
rebuildNeighborList
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
rebuildNeighborList
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
sortedBlocks
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
10
,
sortedBlocks
->
getDeviceBuffer
());
sortBoxDataKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"sortBoxData"
);
sortBoxDataKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"sortBoxData"
);
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
sortedBlocks
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
sortedBlocks
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
blockCenter
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
blockCenter
->
getDeviceBuffer
());
...
@@ -341,20 +341,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -341,20 +341,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionCount
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionCount
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
rebuildNeighborList
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
rebuildNeighborList
->
getDeviceBuffer
());
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
6
,
interactingTiles
->
getSize
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
interactingTiles
->
getSize
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
7
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
8
,
numBlocks
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
sortedBlocks
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
12
,
sortedBlocks
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
0
,
sortedBlockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
3
,
sortedBlockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
1
,
sortedBlockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
4
,
sortedBlockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
2
,
exclusionIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
5
,
exclusionIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
3
,
exclusionRowIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
6
,
exclusionRowIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
4
,
oldPositions
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
7
,
oldPositions
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
5
,
rebuildNeighborList
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
8
,
rebuildNeighborList
->
getDeviceBuffer
());
if
(
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
if
(
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
// The device can't handle this block size, so reduce it.
// The device can't handle this block size, so reduce it.
...
@@ -369,18 +369,21 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -369,18 +369,21 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
}
}
}
}
static
void
setPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
static
void
setPeriodicBoxArgs
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
if
(
cl
.
getUseDoublePrecision
())
{
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getInvPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxSize
());
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxVecXDouble
());
}
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxVecYDouble
());
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxVecZDouble
());
static
void
setInvPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
}
if
(
cl
.
getUseDoublePrecision
())
else
{
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getInvPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getPeriodicBoxSize
());
else
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getInvPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getInvPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getPeriodicBoxVecX
());
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getPeriodicBoxVecY
());
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxVecZ
());
}
}
}
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
...
@@ -397,22 +400,18 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
...
@@ -397,22 +400,18 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
// Compute the neighbor list.
// Compute the neighbor list.
setPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
1
);
setPeriodicBoxArgs
(
context
,
findBlockBoundsKernel
,
1
);
setInvPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
2
);
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
sortBoxDataKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
0
);
setPeriodicBoxArgs
(
context
,
findInteractingBlocksKernel
,
0
);
setInvPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
1
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
}
}
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
if
(
kernelSource
.
size
()
>
0
)
{
if
(
kernelSource
.
size
()
>
0
)
{
if
(
useCutoff
)
{
if
(
useCutoff
)
setPeriodicBoxSizeArg
(
context
,
forceKernel
,
9
);
setPeriodicBoxArgs
(
context
,
forceKernel
,
9
);
setInvPeriodicBoxSizeArg
(
context
,
forceKernel
,
10
);
}
context
.
executeKernel
(
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
context
.
executeKernel
(
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
if
(
context
.
getComputeForceCount
()
==
1
)
if
(
context
.
getComputeForceCount
()
==
1
)
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
...
@@ -441,11 +440,11 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
...
@@ -441,11 +440,11 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl_uint
>
(
1
1
,
maxTiles
);
forceKernel
.
setArg
<
cl_uint
>
(
1
4
,
maxTiles
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
1
4
,
interactingAtoms
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
1
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
6
,
maxTiles
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
int
numAtoms
=
context
.
getNumAtoms
();
int
numAtoms
=
context
.
getNumAtoms
();
if
(
context
.
getUseDoublePrecision
())
{
if
(
context
.
getUseDoublePrecision
())
{
vector
<
mm_double4
>
oldPositionsVec
(
numAtoms
,
mm_double4
(
1e30
,
1e30
,
1e30
,
0
));
vector
<
mm_double4
>
oldPositionsVec
(
numAtoms
,
mm_double4
(
1e30
,
1e30
,
1e30
,
0
));
...
@@ -473,8 +472,8 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
...
@@ -473,8 +472,8 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
7
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
8
,
numBlocks
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
}
}
}
}
...
@@ -617,7 +616,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -617,7 +616,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
if
(
useCutoff
)
{
if
(
useCutoff
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
index
+=
5
;
// The periodic box size arguments are set when the kernel is executed.
kernel
.
setArg
<
cl_uint
>
(
index
++
,
interactingTiles
->
getSize
());
kernel
.
setArg
<
cl_uint
>
(
index
++
,
interactingTiles
->
getSize
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockCenter
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockCenter
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockBoundingBox
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockBoundingBox
->
getDeviceBuffer
());
...
...
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
83ed602e
...
@@ -20,8 +20,9 @@ __kernel void computeN2Energy(
...
@@ -20,8 +20,9 @@ __kernel void computeN2Energy(
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
exclusionTiles,
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
...
@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[atom2]
;
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -110,7 +111,7 @@ __kernel void computeN2Energy(
...
@@ -110,7 +111,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[atom2];
real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -266,10 +267,8 @@ __kernel void computeN2Energy(
...
@@ -266,10 +267,8 @@ __kernel void computeN2Energy(
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
posq1.xyz
-=
floor
((
posq1.xyz-blockCenterX.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1,
blockCenterX
)
local_posq[get_local_id
(
0
)
].x
-=
floor
((
local_posq[get_local_id
(
0
)
].x-blockCenterX.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[get_local_id
(
0
)
],
blockCenterX
)
local_posq[get_local_id
(
0
)
].y
-=
floor
((
local_posq[get_local_id
(
0
)
].y-blockCenterX.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
local_posq[get_local_id
(
0
)
].z
-=
floor
((
local_posq[get_local_id
(
0
)
].z-blockCenterX.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
SYNC_WARPS
;
SYNC_WARPS
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
; j < TILE_SIZE; j++) {
for
(
j
=
0
; j < TILE_SIZE; j++) {
...
@@ -310,7 +309,7 @@ __kernel void computeN2Energy(
...
@@ -310,7 +309,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[atom2]
;
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
83ed602e
...
@@ -20,8 +20,9 @@ __kernel void computeN2Energy(
...
@@ -20,8 +20,9 @@ __kernel void computeN2Energy(
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
real4*
restrict
posq,
__local
real4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
ushort2*
exclusionTiles,
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
...
@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[j]
;
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -126,7 +127,7 @@ __kernel void computeN2Energy(
...
@@ -126,7 +127,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[j]
;
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -272,7 +273,7 @@ __kernel void computeN2Energy(
...
@@ -272,7 +273,7 @@ __kernel void computeN2Energy(
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
local_posq[tgx].xyz
-=
floor
(
(
local_posq[tgx]
.xyz-
blockCenterX
.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[tgx]
,
blockCenterX
)
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
real4
force
=
0
;
real4
force
=
0
;
...
@@ -332,7 +333,7 @@ __kernel void computeN2Energy(
...
@@ -332,7 +333,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[j]
;
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customGBValueN2.cl
View file @
83ed602e
...
@@ -14,8 +14,9 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -14,8 +14,9 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#
endif
#
endif
__local
real*
restrict
local_value,
__local
real*
restrict
local_value,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[atom2]
;
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -100,7 +101,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -100,7 +101,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[atom2]
;
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -239,10 +240,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -239,10 +240,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
posq1.xyz
-=
floor
((
posq1.xyz-blockCenterX.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1,
blockCenterX
)
local_posq[get_local_id
(
0
)
].x
-=
floor
((
local_posq[get_local_id
(
0
)
].x-blockCenterX.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[get_local_id
(
0
)
],
blockCenterX
)
local_posq[get_local_id
(
0
)
].y
-=
floor
((
local_posq[get_local_id
(
0
)
].y-blockCenterX.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
local_posq[get_local_id
(
0
)
].z
-=
floor
((
local_posq[get_local_id
(
0
)
].z-blockCenterX.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
SYNC_WARPS
;
SYNC_WARPS
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
; j < TILE_SIZE; j++) {
for
(
j
=
0
; j < TILE_SIZE; j++) {
...
@@ -278,7 +277,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -278,7 +277,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[atom2]
;
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
View file @
83ed602e
...
@@ -14,8 +14,9 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -14,8 +14,9 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
#
endif
#
endif
__local
real*
restrict
local_value,
__local
real*
restrict
local_value,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[j]
;
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -109,7 +110,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -109,7 +110,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[j];
real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -239,7 +240,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -239,7 +240,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
local_posq[tgx].xyz
-=
floor
(
(
local_posq[tgx]
.xyz-
blockCenterX
.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[tgx]
,
blockCenterX
)
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
real
value
=
0
;
real
value
=
0
;
...
@@ -287,7 +288,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -287,7 +288,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[j]
;
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customHbondForce.cl
View file @
83ed602e
...
@@ -11,12 +11,10 @@ real4 delta(real4 vec1, real4 vec2) {
...
@@ -11,12 +11,10 @@ real4 delta(real4 vec1, real4 vec2) {
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*/
*/
real4
deltaPeriodic
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
real4
deltaPeriodic
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
result.x
-=
floor
(
result.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_DELTA
(
result
)
result.y
-=
floor
(
result.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
result.z
-=
floor
(
result.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
#
endif
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
return
result
;
...
@@ -56,7 +54,8 @@ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -56,7 +54,8 @@ real4 computeCross(real4 vec1, real4 vec2) {
* Compute forces on donors.
* Compute forces on donors.
*/
*/
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
PARAMETER_ARGUMENTS) {
real energy = 0;
real energy = 0;
real4 f1 = (real4) 0;
real4 f1 = (real4) 0;
...
@@ -102,7 +101,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
...
@@ -102,7 +101,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
real4 a1 = posBuffer[3*index];
real4 a1 = posBuffer[3*index];
real4 a2 = posBuffer[3*index+1];
real4 a2 = posBuffer[3*index+1];
real4 a3 = posBuffer[3*index+2];
real4 a3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize);
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize
, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) {
if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
#endif
...
@@ -144,7 +143,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
...
@@ -144,7 +143,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
* Compute forces on acceptors.
* Compute forces on acceptors.
*/
*/
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
PARAMETER_ARGUMENTS) {
real4 f1 = (real4) 0;
real4 f1 = (real4) 0;
real4 f2 = (real4) 0;
real4 f2 = (real4) 0;
...
@@ -189,7 +189,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
...
@@ -189,7 +189,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
real4
d1
=
posBuffer[3*index]
;
real4
d1
=
posBuffer[3*index]
;
real4
d2
=
posBuffer[3*index+1]
;
real4
d2
=
posBuffer[3*index+1]
;
real4
d3
=
posBuffer[3*index+2]
;
real4
d3
=
posBuffer[3*index+2]
;
real4
deltaD1A1
=
deltaPeriodic
(
d1,
a1,
periodicBoxSize,
invPeriodicBoxSize
)
;
real4
deltaD1A1
=
deltaPeriodic
(
d1,
a1,
periodicBoxSize,
invPeriodicBoxSize
,
periodicBoxVecX,
periodicBoxVecY,
periodicBoxVecZ
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
deltaD1A1.w
<
CUTOFF_SQUARED
)
{
if
(
deltaD1A1.w
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
...
...
platforms/opencl/src/kernels/customManyParticle.cl
View file @
83ed602e
...
@@ -14,12 +14,10 @@ inline void storeForce(int atom, real4 force, __global long* restrict forceBuffe
...
@@ -14,12 +14,10 @@ inline void storeForce(int atom, real4 force, __global long* restrict forceBuffe
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*/
*/
inline
real4
delta
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
inline
real4
delta
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0.0f
)
;
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
result.x
-=
floor
(
result.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_DELTA
(
result
)
result.y
-=
floor
(
result.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
result.z
-=
floor
(
result.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
#
endif
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
return
result
;
...
@@ -75,7 +73,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
...
@@ -75,7 +73,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
*/
*/
__kernel
void
computeInteraction
(
__kernel
void
computeInteraction
(
__global
long*
restrict
forceBuffers,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
long*
restrict
forceBuffers,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
real4
periodicBoxSize,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
neighbors,
__global
const
int*
restrict
neighborStartIndex
,
__global
const
int*
restrict
neighbors,
__global
const
int*
restrict
neighborStartIndex
#
endif
#
endif
...
@@ -138,16 +136,14 @@ __kernel void computeInteraction(
...
@@ -138,16 +136,14 @@ __kernel void computeInteraction(
/**
/**
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*/
*/
__kernel
void
findBlockBounds
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
real4*
restrict
posq
,
__kernel
void
findBlockBounds
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
numNeighborPairs
)
{
__global
const
real4*
restrict
posq,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
numNeighborPairs
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
NUM_ATOMS
)
{
while
(
base
<
NUM_ATOMS
)
{
real4
pos
=
posq[base]
;
real4
pos
=
posq[base]
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS
(
pos
)
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
#
endif
#
endif
real4
minPos
=
pos
;
real4
minPos
=
pos
;
real4
maxPos
=
pos
;
real4
maxPos
=
pos
;
...
@@ -156,9 +152,7 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
...
@@ -156,9 +152,7 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
pos
=
posq[i]
;
pos
=
posq[i]
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
real4
center
=
0.5f*
(
maxPos+minPos
)
;
real4
center
=
0.5f*
(
maxPos+minPos
)
;
pos.x
-=
floor
((
pos.x-center.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos,
center
)
pos.y
-=
floor
((
pos.y-center.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
pos.z
-=
floor
((
pos.z-center.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
#
endif
minPos
=
(
real4
)
(
min
(
minPos.x,pos.x
)
,
min
(
minPos.y,pos.y
)
,
min
(
minPos.z,pos.z
)
,
0
)
;
minPos
=
(
real4
)
(
min
(
minPos.x,pos.x
)
,
min
(
minPos.y,pos.y
)
,
min
(
minPos.z,pos.z
)
,
0
)
;
maxPos
=
(
real4
)
(
max
(
maxPos.x,pos.x
)
,
max
(
maxPos.y,pos.y
)
,
max
(
maxPos.z,pos.z
)
,
0
)
;
maxPos
=
(
real4
)
(
max
(
maxPos.x,pos.x
)
,
max
(
maxPos.y,pos.y
)
,
max
(
maxPos.z,pos.z
)
,
0
)
;
...
@@ -176,8 +170,8 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
...
@@ -176,8 +170,8 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
/**
/**
*
Find
a
list
of
neighbors
for
each
atom.
*
Find
a
list
of
neighbors
for
each
atom.
*/
*/
__kernel
void
findNeighbors
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
real4*
restrict
posq
,
__kernel
void
findNeighbors
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
int2*
restrict
neighborPairs,
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
int2*
restrict
neighborPairs,
__global
int*
restrict
numNeighborPairs,
__global
int*
restrict
numNeighborsForAtom,
int
maxNeighborPairs
__global
int*
restrict
numNeighborPairs,
__global
int*
restrict
numNeighborsForAtom,
int
maxNeighborPairs
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
...
@@ -212,9 +206,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
...
@@ -212,9 +206,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
real4
blockSize2
=
blockBoundingBox[block2]
;
real4
blockSize2
=
blockBoundingBox[block2]
;
real4
blockDelta
=
blockCenter1-blockCenter2
;
real4
blockDelta
=
blockCenter1-blockCenter2
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
blockDelta.x
-=
floor
(
blockDelta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_DELTA
(
blockDelta
)
blockDelta.y
-=
floor
(
blockDelta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
blockDelta.z
-=
floor
(
blockDelta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
#
endif
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSize1.x-blockSize2.x
)
;
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSize1.x-blockSize2.x
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSize1.y-blockSize2.y
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSize1.y-blockSize2.y
)
;
...
@@ -243,7 +235,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
...
@@ -243,7 +235,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
//
Decide
whether
to
include
this
atom
pair
in
the
neighbor
list.
//
Decide
whether
to
include
this
atom
pair
in
the
neighbor
list.
real4
atomDelta
=
delta
(
pos1,
pos2,
periodicBoxSize,
invPeriodicBoxSize
)
;
real4
atomDelta
=
delta
(
pos1,
pos2,
periodicBoxSize,
invPeriodicBoxSize
,
periodicBoxVecX,
periodicBoxVecY,
periodicBoxVecZ
)
;
#
ifdef
USE_CENTRAL_PARTICLE
#
ifdef
USE_CENTRAL_PARTICLE
bool
includeAtom
=
(
atom2
!=
atom1
&&
atom2
<
NUM_ATOMS
&&
atomDelta.w
<
CUTOFF_SQUARED
)
;
bool
includeAtom
=
(
atom2
!=
atom1
&&
atom2
<
NUM_ATOMS
&&
atomDelta.w
<
CUTOFF_SQUARED
)
;
#
else
#
else
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
View file @
83ed602e
...
@@ -42,8 +42,8 @@ __kernel void computeInteractionGroups(
...
@@ -42,8 +42,8 @@ __kernel void computeInteractionGroups(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
__global
const
int4*
restrict
groupData
,
real4
periodicBox
Size
,
real4
invP
eriodicBox
Size
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX
,
real4
periodicBox
VecY
,
real4
p
eriodicBox
VecZ
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
...
@@ -82,7 +82,7 @@ __kernel void computeInteractionGroups(
...
@@ -82,7 +82,7 @@ __kernel void computeInteractionGroups(
posq2
=
(
real4
)
(
localData[localIndex].x,
localData[localIndex].y,
localData[localIndex].z,
localData[localIndex].q
)
;
posq2
=
(
real4
)
(
localData[localIndex].x,
localData[localIndex].y,
localData[localIndex].z,
localData[localIndex].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
83ed602e
...
@@ -5,15 +5,15 @@
...
@@ -5,15 +5,15 @@
/**
/**
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*/
*/
__kernel
void
findBlockBounds
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
real4*
restrict
posq
,
__kernel
void
findBlockBounds
(
int
numAtoms,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
rebuildNeighborList,
__global
const
real4*
restrict
posq,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
rebuildNeighborList,
__global
real2*
restrict
sortedBlocks
)
{
__global
real2*
restrict
sortedBlocks
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
numAtoms
)
{
while
(
base
<
numAtoms
)
{
real4
pos
=
posq[base]
;
real4
pos
=
posq[base]
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
pos.xyz
-=
floor
(
pos.xyz*invPeriodicBoxSize.xyz
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS
(
pos
)
#
endif
#
endif
real4
minPos
=
pos
;
real4
minPos
=
pos
;
real4
maxPos
=
pos
;
real4
maxPos
=
pos
;
...
@@ -22,7 +22,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
...
@@ -22,7 +22,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
pos
=
posq[i]
;
pos
=
posq[i]
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
real4
center
=
0.5f*
(
maxPos+minPos
)
;
real4
center
=
0.5f*
(
maxPos+minPos
)
;
pos.xyz
-=
floor
((
pos.xyz-center.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos,
center
)
#
endif
#
endif
minPos
=
min
(
minPos,
pos
)
;
minPos
=
min
(
minPos,
pos
)
;
maxPos
=
max
(
maxPos,
pos
)
;
maxPos
=
max
(
maxPos,
pos
)
;
...
@@ -65,9 +65,10 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
...
@@ -65,9 +65,10 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
}
}
}
}
__kernel
void
findBlocksWithInteractions
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
unsigned
int*
restrict
interactionCount,
__kernel
void
findBlocksWithInteractions
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
__global
int*
restrict
interactingTiles,
__global
unsigned
int*
restrict
interactingAtoms,
__global
const
real4*
restrict
posq,
unsigned
int
maxTiles,
unsigned
int
startBlockIndex,
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
interactingTiles,
__global
unsigned
int*
restrict
interactingAtoms,
unsigned
int
numBlocks,
__global
real2*
restrict
sortedBlocks,
__global
const
real4*
restrict
sortedBlockCenter,
__global
const
real4*
restrict
sortedBlockBoundingBox,
__global
const
real4*
restrict
posq,
unsigned
int
maxTiles,
unsigned
int
startBlockIndex,
unsigned
int
numBlocks,
__global
real2*
restrict
sortedBlocks,
__global
const
real4*
restrict
sortedBlockCenter,
__global
const
real4*
restrict
sortedBlockBoundingBox,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__global
real4*
restrict
oldPositions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__global
real4*
restrict
oldPositions,
__global
const
int*
restrict
rebuildNeighborList
)
{
__global
const
int*
restrict
rebuildNeighborList
)
{
...
@@ -108,7 +109,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
...
@@ -108,7 +109,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
//
The
box
is
small
enough
that
we
can
just
translate
all
the
atoms
into
a
single
periodic
//
The
box
is
small
enough
that
we
can
just
translate
all
the
atoms
into
a
single
periodic
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
pos1.xyz
-=
floor
((
pos1.xyz-blockCenterX.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos1,
blockCenterX
)
}
}
#
endif
#
endif
posBuffer[get_local_id
(
0
)
]
=
pos1
;
posBuffer[get_local_id
(
0
)
]
=
pos1
;
...
@@ -136,7 +137,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
...
@@ -136,7 +137,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
real4
blockSizeY
=
(
block2
<
NUM_BLOCKS
?
sortedBlockBoundingBox[block2]
:
(
real4
)
(
0
))
;
real4
blockSizeY
=
(
block2
<
NUM_BLOCKS
?
sortedBlockBoundingBox[block2]
:
(
real4
)
(
0
))
;
real4
blockDelta
=
blockCenterX-blockCenterY
;
real4
blockDelta
=
blockCenterX-blockCenterY
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
blockDelta.xyz
-=
floor
(
blockDelta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
blockDelta
)
#
endif
#
endif
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSizeX.x-blockSizeY.x
)
;
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSizeX.x-blockSizeY.x
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSizeX.y-blockSizeY.y
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSizeX.y-blockSizeY.y
)
;
...
@@ -166,7 +167,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
...
@@ -166,7 +167,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
real3
pos2
=
posq[atom2].xyz
;
real3
pos2
=
posq[atom2].xyz
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
if
(
singlePeriodicCopy
)
if
(
singlePeriodicCopy
)
pos2.xyz
-=
floor
((
pos2.xyz-blockCenterX.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos2,
blockCenterX
)
#
endif
#
endif
bool
interacts
=
false
;
bool
interacts
=
false
;
if
(
atom2
<
NUM_ATOMS
)
{
if
(
atom2
<
NUM_ATOMS
)
{
...
@@ -174,7 +175,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
...
@@ -174,7 +175,7 @@ __kernel void findBlocksWithInteractions(real4 periodicBoxSize, real4 invPeriodi
if
(
!singlePeriodicCopy
)
{
if
(
!singlePeriodicCopy
)
{
for
(
int
j
=
0
; j < TILE_SIZE; j++) {
for
(
int
j
=
0
; j < TILE_SIZE; j++) {
real3
delta
=
pos2-posBuffer[warpStart+j]
;
real3
delta
=
pos2-posBuffer[warpStart+j]
;
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
interacts
|= (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED);
interacts
|= (delta.x*delta.x+delta.y*delta.y+delta.z*delta.z < PADDED_CUTOFF_SQUARED);
}
}
}
}
...
...
platforms/opencl/src/kernels/gbsaObc.cl
View file @
83ed602e
...
@@ -22,7 +22,8 @@ __kernel void computeBornSum(
...
@@ -22,7 +22,8 @@ __kernel void computeBornSum(
__global
const
real4*
restrict
posq,
__global
const
float2*
restrict
global_params,
__global
const
real4*
restrict
posq,
__global
const
float2*
restrict
global_params,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
#
else
#
else
unsigned
int
numTiles,
unsigned
int
numTiles,
#
endif
#
endif
...
@@ -58,7 +59,7 @@ __kernel void computeBornSum(
...
@@ -58,7 +59,7 @@ __kernel void computeBornSum(
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
real4
delta
=
(
real4
)
(
localData[tbx+j].x-posq1.x,
localData[tbx+j].y-posq1.y,
localData[tbx+j].z-posq1.z,
0
)
;
real4
delta
=
(
real4
)
(
localData[tbx+j].x-posq1.x,
localData[tbx+j].y-posq1.y,
localData[tbx+j].z-posq1.z,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -105,7 +106,7 @@ __kernel void computeBornSum(
...
@@ -105,7 +106,7 @@ __kernel void computeBornSum(
for
(
j
=
0
; j < TILE_SIZE; j++) {
for
(
j
=
0
; j < TILE_SIZE; j++) {
real4
delta
=
(
real4
)
(
localData[tbx+tj].x-posq1.x,
localData[tbx+tj].y-posq1.y,
localData[tbx+tj].z-posq1.z,
0
)
;
real4
delta
=
(
real4
)
(
localData[tbx+tj].x-posq1.x,
localData[tbx+tj].y-posq1.y,
localData[tbx+tj].z-posq1.z,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -256,10 +257,8 @@ __kernel void computeBornSum(
...
@@ -256,10 +257,8 @@ __kernel void computeBornSum(
// box, then skip having to apply periodic boundary conditions later.
// box, then skip having to apply periodic boundary conditions later.
real4 blockCenterX = blockCenter[x];
real4 blockCenterX = blockCenter[x];
posq1.xyz -= floor((posq1.xyz-blockCenterX.xyz)*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
localData[get_local_id(0)].x -= floor((localData[get_local_id(0)].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[get_local_id(0)], blockCenterX)
localData[get_local_id(0)].y -= floor((localData[get_local_id(0)].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[get_local_id(0)].z -= floor((localData[get_local_id(0)].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
SYNC_WARPS;
SYNC_WARPS;
unsigned int tj = tgx;
unsigned int tj = tgx;
for (j = 0; j < TILE_SIZE; j++) {
for (j = 0; j < TILE_SIZE; j++) {
...
@@ -307,7 +306,7 @@ __kernel void computeBornSum(
...
@@ -307,7 +306,7 @@ __kernel void computeBornSum(
for (j = 0; j < TILE_SIZE; j++) {
for (j = 0; j < TILE_SIZE; j++) {
real4 delta = (real4) (localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z, 0);
real4 delta = (real4) (localData[tbx+tj].x-posq1.x, localData[tbx+tj].y-posq1.y, localData[tbx+tj].z-posq1.z, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[tbx+tj];
int atom2 = atomIndices[tbx+tj];
...
@@ -391,7 +390,8 @@ __kernel void computeGBSAForce1(
...
@@ -391,7 +390,8 @@ __kernel void computeGBSAForce1(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else
#else
unsigned int numTiles,
unsigned int numTiles,
#endif
#endif
...
@@ -430,7 +430,7 @@ __kernel void computeGBSAForce1(
...
@@ -430,7 +430,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q);
real4 posq2 = (real4) (localData[tbx+j].x, localData[tbx+j].y, localData[tbx+j].z, localData[tbx+j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -485,7 +485,7 @@ __kernel void computeGBSAForce1(
...
@@ -485,7 +485,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real4 posq2 = (real4) (localData[tbx+tj].x, localData[tbx+tj].y, localData[tbx+tj].z, localData[tbx+tj].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -645,10 +645,8 @@ __kernel void computeGBSAForce1(
...
@@ -645,10 +645,8 @@ __kernel void computeGBSAForce1(
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
posq1.xyz
-=
floor
((
posq1.xyz-blockCenterX.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1,
blockCenterX
)
localData[get_local_id
(
0
)
].x
-=
floor
((
localData[get_local_id
(
0
)
].x-blockCenterX.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData[get_local_id
(
0
)
],
blockCenterX
)
localData[get_local_id
(
0
)
].y
-=
floor
((
localData[get_local_id
(
0
)
].y-blockCenterX.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
localData[get_local_id
(
0
)
].z
-=
floor
((
localData[get_local_id
(
0
)
].z-blockCenterX.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
SYNC_WARPS
;
SYNC_WARPS
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
; j < TILE_SIZE; j++) {
for
(
j
=
0
; j < TILE_SIZE; j++) {
...
@@ -700,7 +698,7 @@ __kernel void computeGBSAForce1(
...
@@ -700,7 +698,7 @@ __kernel void computeGBSAForce1(
real4
posq2
=
(
real4
)
(
localData[tbx+tj].x,
localData[tbx+tj].y,
localData[tbx+tj].z,
localData[tbx+tj].q
)
;
real4
posq2
=
(
real4
)
(
localData[tbx+tj].x,
localData[tbx+tj].y,
localData[tbx+tj].z,
localData[tbx+tj].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment