Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3db6b8ee
Commit
3db6b8ee
authored
Feb 07, 2015
by
Jason Swails
Browse files
Merge branch 'master' into gbn-chk
parents
5d8e92ff
3a80b0f1
Changes
121
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
804 additions
and
347 deletions
+804
-347
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+16
-34
platforms/cuda/src/kernels/monteCarloBarostat.cu
platforms/cuda/src/kernels/monteCarloBarostat.cu
+5
-8
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+9
-20
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+46
-41
platforms/cuda/tests/TestCudaCustomManyParticleForce.cpp
platforms/cuda/tests/TestCudaCustomManyParticleForce.cpp
+58
-16
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
+61
-1
platforms/cuda/tests/TestCudaEwald.cpp
platforms/cuda/tests/TestCudaEwald.cpp
+52
-0
platforms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
...orms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
+77
-0
platforms/cuda/tests/TestCudaNonbondedForce.cpp
platforms/cuda/tests/TestCudaNonbondedForce.cpp
+91
-1
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+75
-19
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+66
-13
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+149
-86
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+47
-48
platforms/opencl/src/kernels/customGBEnergyN2.cl
platforms/opencl/src/kernels/customGBEnergyN2.cl
+8
-9
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+7
-6
platforms/opencl/src/kernels/customGBValueN2.cl
platforms/opencl/src/kernels/customGBValueN2.cl
+8
-9
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
+7
-6
platforms/opencl/src/kernels/customHbondForce.cl
platforms/opencl/src/kernels/customHbondForce.cl
+8
-8
platforms/opencl/src/kernels/customManyParticle.cl
platforms/opencl/src/kernels/customManyParticle.cl
+11
-19
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+3
-3
No files found.
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
3db6b8ee
...
@@ -69,7 +69,8 @@ typedef struct {
...
@@ -69,7 +69,8 @@ typedef struct {
extern
"C"
__global__
void
computeBornSum
(
unsigned
long
long
*
__restrict__
global_bornSum
,
const
real4
*
__restrict__
posq
,
const
float2
*
__restrict__
global_params
,
extern
"C"
__global__
void
computeBornSum
(
unsigned
long
long
*
__restrict__
global_bornSum
,
const
real4
*
__restrict__
posq
,
const
float2
*
__restrict__
global_params
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#else
#else
unsigned
int
numTiles
,
unsigned
int
numTiles
,
#endif
#endif
...
@@ -104,9 +105,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -104,9 +105,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
make_real3
(
localData
[
tbx
+
j
].
x
-
posq1
.
x
,
localData
[
tbx
+
j
].
y
-
posq1
.
y
,
localData
[
tbx
+
j
].
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
localData
[
tbx
+
j
].
x
-
posq1
.
x
,
localData
[
tbx
+
j
].
y
-
posq1
.
y
,
localData
[
tbx
+
j
].
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -151,9 +150,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -151,9 +150,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
make_real3
(
localData
[
tbx
+
tj
].
x
-
posq1
.
x
,
localData
[
tbx
+
tj
].
y
-
posq1
.
y
,
localData
[
tbx
+
tj
].
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
localData
[
tbx
+
tj
].
x
-
posq1
.
x
,
localData
[
tbx
+
tj
].
y
-
posq1
.
y
,
localData
[
tbx
+
tj
].
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -291,12 +288,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -291,12 +288,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
// box, then skip having to apply periodic boundary conditions later.
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
real4
blockCenterX
=
blockCenter
[
x
];
posq1
.
x
-=
floor
((
posq1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1
,
blockCenterX
)
posq1
.
y
-=
floor
((
posq1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData
[
threadIdx
.
x
],
blockCenterX
)
posq1
.
z
-=
floor
((
posq1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
x
-=
floor
((
localData
[
threadIdx
.
x
].
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
y
-=
floor
((
localData
[
threadIdx
.
x
].
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
z
-=
floor
((
localData
[
threadIdx
.
x
].
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
make_real3
(
localData
[
tbx
+
tj
].
x
-
posq1
.
x
,
localData
[
tbx
+
tj
].
y
-
posq1
.
y
,
localData
[
tbx
+
tj
].
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
localData
[
tbx
+
tj
].
x
-
posq1
.
x
,
localData
[
tbx
+
tj
].
y
-
posq1
.
y
,
localData
[
tbx
+
tj
].
z
-
posq1
.
z
);
...
@@ -342,9 +335,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -342,9 +335,7 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
real3
delta
=
make_real3
(
localData
[
tbx
+
tj
].
x
-
posq1
.
x
,
localData
[
tbx
+
tj
].
y
-
posq1
.
y
,
localData
[
tbx
+
tj
].
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
localData
[
tbx
+
tj
].
x
-
posq1
.
x
,
localData
[
tbx
+
tj
].
y
-
posq1
.
y
,
localData
[
tbx
+
tj
].
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
int
atom2
=
atomIndices
[
tbx
+
tj
];
int
atom2
=
atomIndices
[
tbx
+
tj
];
...
@@ -414,7 +405,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -414,7 +405,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
global_bornRadii
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
real
*
__restrict__
global_bornRadii
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
,
#else
#else
unsigned
int
numTiles
,
unsigned
int
numTiles
,
#endif
#endif
...
@@ -451,9 +443,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -451,9 +443,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4
posq2
=
make_real4
(
localData
[
tbx
+
j
].
x
,
localData
[
tbx
+
j
].
y
,
localData
[
tbx
+
j
].
z
,
localData
[
tbx
+
j
].
q
);
real4
posq2
=
make_real4
(
localData
[
tbx
+
j
].
x
,
localData
[
tbx
+
j
].
y
,
localData
[
tbx
+
j
].
z
,
localData
[
tbx
+
j
].
q
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -508,9 +498,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -508,9 +498,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4
posq2
=
make_real4
(
localData
[
tbx
+
tj
].
x
,
localData
[
tbx
+
tj
].
y
,
localData
[
tbx
+
tj
].
z
,
localData
[
tbx
+
tj
].
q
);
real4
posq2
=
make_real4
(
localData
[
tbx
+
tj
].
x
,
localData
[
tbx
+
tj
].
y
,
localData
[
tbx
+
tj
].
z
,
localData
[
tbx
+
tj
].
q
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -656,12 +644,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -656,12 +644,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
// box, then skip having to apply periodic boundary conditions later.
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
real4
blockCenterX
=
blockCenter
[
x
];
posq1
.
x
-=
floor
((
posq1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1
,
blockCenterX
)
posq1
.
y
-=
floor
((
posq1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData
[
threadIdx
.
x
],
blockCenterX
)
posq1
.
z
-=
floor
((
posq1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
x
-=
floor
((
localData
[
threadIdx
.
x
].
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
y
-=
floor
((
localData
[
threadIdx
.
x
].
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
z
-=
floor
((
localData
[
threadIdx
.
x
].
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
atomIndices
[
tbx
+
tj
];
int
atom2
=
atomIndices
[
tbx
+
tj
];
...
@@ -713,9 +697,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -713,9 +697,7 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
real4
posq2
=
make_real4
(
localData
[
tbx
+
tj
].
x
,
localData
[
tbx
+
tj
].
y
,
localData
[
tbx
+
tj
].
z
,
localData
[
tbx
+
tj
].
q
);
real4
posq2
=
make_real4
(
localData
[
tbx
+
tj
].
x
,
localData
[
tbx
+
tj
].
y
,
localData
[
tbx
+
tj
].
z
,
localData
[
tbx
+
tj
].
q
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
...
platforms/cuda/src/kernels/monteCarloBarostat.cu
View file @
3db6b8ee
...
@@ -2,7 +2,8 @@
...
@@ -2,7 +2,8 @@
* Scale the particle positions with each axis independent
* Scale the particle positions with each axis independent
*/
*/
extern
"C"
__global__
void
scalePositions
(
float
scaleX
,
float
scaleY
,
float
scaleZ
,
int
numMolecules
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
*
__restrict__
posq
,
extern
"C"
__global__
void
scalePositions
(
float
scaleX
,
float
scaleY
,
float
scaleZ
,
int
numMolecules
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
*
__restrict__
posq
,
const
int
*
__restrict__
moleculeAtoms
,
const
int
*
__restrict__
moleculeStartIndex
)
{
const
int
*
__restrict__
moleculeAtoms
,
const
int
*
__restrict__
moleculeStartIndex
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numMolecules
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numMolecules
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
first
=
moleculeStartIndex
[
index
];
int
first
=
moleculeStartIndex
[
index
];
...
@@ -25,13 +26,9 @@ extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scal
...
@@ -25,13 +26,9 @@ extern "C" __global__ void scalePositions(float scaleX, float scaleY, float scal
// Move it into the first periodic box.
// Move it into the first periodic box.
int
xcell
=
(
int
)
floor
(
center
.
x
*
invPeriodicBoxSize
.
x
);
real3
oldCenter
=
center
;
int
ycell
=
(
int
)
floor
(
center
.
y
*
invPeriodicBoxSize
.
y
);
APPLY_PERIODIC_TO_POS
(
center
)
int
zcell
=
(
int
)
floor
(
center
.
z
*
invPeriodicBoxSize
.
z
);
real3
delta
=
make_real3
(
oldCenter
.
x
-
center
.
x
,
oldCenter
.
y
-
center
.
y
,
oldCenter
.
z
-
center
.
z
);
real3
delta
=
make_real3
(
xcell
*
periodicBoxSize
.
x
,
ycell
*
periodicBoxSize
.
y
,
zcell
*
periodicBoxSize
.
z
);
center
.
x
-=
delta
.
x
;
center
.
y
-=
delta
.
y
;
center
.
z
-=
delta
.
z
;
// Now scale the position of the molecule center.
// Now scale the position of the molecule center.
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
3db6b8ee
...
@@ -103,8 +103,9 @@ extern "C" __global__ void computeNonbonded(
...
@@ -103,8 +103,9 @@ extern "C" __global__ void computeNonbonded(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
tileflags
*
__restrict__
exclusions
,
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
const
ushort2
*
__restrict__
exclusionTiles
,
unsigned
int
startTileIndex
,
unsigned
int
numTileIndices
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
,
const
int
*
__restrict__
tiles
,
const
unsigned
int
*
__restrict__
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockSize
,
const
unsigned
int
*
__restrict__
interactingAtoms
#endif
#endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
...
@@ -155,9 +156,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -155,9 +156,7 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
...
@@ -223,9 +222,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -223,9 +222,7 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
...
@@ -412,17 +409,11 @@ extern "C" __global__ void computeNonbonded(
...
@@ -412,17 +409,11 @@ extern "C" __global__ void computeNonbonded(
// The box is small enough that we can just translate all the atoms into a single periodic
// The box is small enough that we can just translate all the atoms into a single periodic
// box, then skip having to apply periodic boundary conditions later.
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
real4
blockCenterX
=
blockCenter
[
x
];
posq1
.
x
-=
floor
((
posq1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1
,
blockCenterX
)
posq1
.
y
-=
floor
((
posq1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
posq1
.
z
-=
floor
((
posq1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
shflPosq
.
x
-=
floor
((
shflPosq
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
shflPosq
,
blockCenterX
)
shflPosq
.
y
-=
floor
((
shflPosq
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
shflPosq
.
z
-=
floor
((
shflPosq
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#else
#else
localData
[
threadIdx
.
x
].
x
-=
floor
((
localData
[
threadIdx
.
x
].
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData
[
threadIdx
.
x
],
blockCenterX
)
localData
[
threadIdx
.
x
].
y
-=
floor
((
localData
[
threadIdx
.
x
].
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
z
-=
floor
((
localData
[
threadIdx
.
x
].
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
...
@@ -499,9 +490,7 @@ extern "C" __global__ void computeNonbonded(
...
@@ -499,9 +490,7 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
...
...
platforms/cuda/src/kernels/pme.cu
View file @
3db6b8ee
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// Compute the index of the grid point each atom is associated with.
// Compute the index of the grid point each atom is associated with.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
i
];
real4
pos
=
posq
[
i
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
z
*
recipBoxVecZ
.
z
)
;
real3
t
=
make_real3
((
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
GRID_SIZE_X
,
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
)
)
*
GRID_SIZE_X
;
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
GRID_SIZE_Y
,
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
)
)
*
GRID_SIZE_Y
;
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
GRID_SIZE_Z
)
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
)
)
*
GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
...
@@ -18,7 +18,7 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
...
@@ -18,7 +18,7 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
}
}
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -27,15 +27,16 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
...
@@ -27,15 +27,16 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real
charge
=
posq
[
atom
].
w
;
real3
force
=
make_real3
(
0
);
real4
pos
=
posq
[
atom
];
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
((
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
GRID_SIZE_X
,
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
GRID_SIZE_Y
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
GRID_SIZE_Z
);
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
...
@@ -78,7 +79,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
...
@@ -78,7 +79,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
int
index
=
ybase
+
zindex
;
int
index
=
ybase
+
zindex
;
real
add
=
charge
*
dx
*
dy
*
data
[
iz
].
z
;
real
add
=
pos
.
w
*
dx
*
dy
*
data
[
iz
].
z
;
#ifdef USE_DOUBLE_PRECISION
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
...
@@ -115,9 +116,8 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ originalPm
...
@@ -115,9 +116,8 @@ extern "C" __global__ void finishSpreadCharge(long long* __restrict__ originalPm
// convolutes on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
// convolutes on the halfcomplex_pmeGrid, which is of size NX*NY*(NZ/2+1) as F(Q) is conjugate symmetric
extern
"C"
__global__
void
extern
"C"
__global__
void
reciprocalConvolution
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
reciprocalConvolution
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
(
GRID_SIZE_Z
/
2
+
1
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
...
@@ -131,9 +131,9 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
...
@@ -131,9 +131,9 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
real
mhx
=
mx
*
invPeriodicBoxSize
.
x
;
real
mhx
=
mx
*
recipBoxVecX
.
x
;
real
mhy
=
m
y
*
invPeriodicBoxSize
.
y
;
real
mhy
=
m
x
*
recipBoxVecY
.
x
+
my
*
recipBoxVecY
.
y
;
real
mhz
=
m
z
*
invPeriodicBoxSize
.
z
;
real
mhz
=
m
x
*
recipBoxVecZ
.
x
+
my
*
recipBoxVecZ
.
y
+
mz
*
recipBoxVecZ
.
z
;
real
bx
=
pmeBsplineModuliX
[
kx
];
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
real
bz
=
pmeBsplineModuliZ
[
kz
];
...
@@ -151,9 +151,8 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
...
@@ -151,9 +151,8 @@ reciprocalConvolution(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict_
extern
"C"
__global__
void
extern
"C"
__global__
void
gridEvaluateEnergy
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
gridEvaluateEnergy
(
real2
*
__restrict__
halfcomplex_pmeGrid
,
real
*
__restrict__
energyBuffer
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliX
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
const
real
*
__restrict__
pmeBsplineModuliY
,
const
real
*
__restrict__
pmeBsplineModuliZ
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
)
{
// R2C stores into a half complex matrix where the last dimension is cut by half
// R2C stores into a half complex matrix where the last dimension is cut by half
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
const
unsigned
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
const
real
recipScaleFactor
=
RECIP
(
M_PI
*
periodicBoxSize
.
x
*
periodicBoxSize
.
y
*
periodicBoxSize
.
z
);
...
@@ -168,9 +167,9 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
...
@@ -168,9 +167,9 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
mx
=
(
kx
<
(
GRID_SIZE_X
+
1
)
/
2
)
?
kx
:
(
kx
-
GRID_SIZE_X
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
my
=
(
ky
<
(
GRID_SIZE_Y
+
1
)
/
2
)
?
ky
:
(
ky
-
GRID_SIZE_Y
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
int
mz
=
(
kz
<
(
GRID_SIZE_Z
+
1
)
/
2
)
?
kz
:
(
kz
-
GRID_SIZE_Z
);
real
mhx
=
mx
*
invPeriodicBoxSize
.
x
;
real
mhx
=
mx
*
recipBoxVecX
.
x
;
real
mhy
=
m
y
*
invPeriodicBoxSize
.
y
;
real
mhy
=
m
x
*
recipBoxVecY
.
x
+
my
*
recipBoxVecY
.
y
;
real
mhz
=
m
z
*
invPeriodicBoxSize
.
z
;
real
mhz
=
m
x
*
recipBoxVecZ
.
x
+
my
*
recipBoxVecZ
.
y
+
mz
*
recipBoxVecZ
.
z
;
real
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
real
m2
=
mhx
*
mhx
+
mhy
*
mhy
+
mhz
*
mhz
;
real
bx
=
pmeBsplineModuliX
[
kx
];
real
bx
=
pmeBsplineModuliX
[
kx
];
real
by
=
pmeBsplineModuliY
[
ky
];
real
by
=
pmeBsplineModuliY
[
ky
];
...
@@ -194,7 +193,7 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
...
@@ -194,7 +193,7 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, real* __restrict__ e
extern
"C"
__global__
extern
"C"
__global__
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real
4
invPeriodicBoxSize
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real
3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -206,12 +205,15 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
...
@@ -206,12 +205,15 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real3
force
=
make_real3
(
0
);
real3
force
=
make_real3
(
0
);
real4
pos
=
posq
[
atom
];
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
periodicBoxSize
.
x
;
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
pos
.
y
-=
floor
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
periodicBoxSize
.
y
;
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
periodicBoxSize
.
z
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
((
pos
.
x
*
invPeriodicBoxSize
.
x
)
*
GRID_SIZE_X
,
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
(
pos
.
y
*
invPeriodicBoxSize
.
y
)
*
GRID_SIZE_Y
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
(
pos
.
z
*
invPeriodicBoxSize
.
z
)
*
GRID_SIZE_Z
);
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
GRID_SIZE_Z
;
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
int3
gridIndex
=
make_int3
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
...
@@ -266,9 +268,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
...
@@ -266,9 +268,12 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
}
}
}
}
real
q
=
pos
.
w
*
EPSILON_FACTOR
;
real
q
=
pos
.
w
*
EPSILON_FACTOR
;
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
x
*
GRID_SIZE_X
*
invPeriodicBoxSize
.
x
*
0x100000000
));
real
forceX
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecX
.
x
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
y
*
GRID_SIZE_Y
*
invPeriodicBoxSize
.
y
*
0x100000000
));
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
-
q
*
force
.
z
*
GRID_SIZE_Z
*
invPeriodicBoxSize
.
z
*
0x100000000
));
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
forceBuffers
[
atom
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceX
*
0x100000000
));
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceY
*
0x100000000
));
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
forceZ
*
0x100000000
));
}
}
}
}
...
...
platforms/cuda/tests/TestCudaCustomManyParticleForce.cpp
View file @
3db6b8ee
...
@@ -55,7 +55,17 @@ const double TOL = 1e-5;
...
@@ -55,7 +55,17 @@ const double TOL = 1e-5;
CudaPlatform
platform
;
CudaPlatform
platform
;
void
validateAxilrodTeller
(
CustomManyParticleForce
*
force
,
const
vector
<
Vec3
>&
positions
,
const
vector
<
const
int
*>&
expectedSets
,
double
boxSize
)
{
Vec3
computeDelta
(
const
Vec3
&
pos1
,
const
Vec3
&
pos2
,
bool
periodic
,
const
Vec3
*
periodicBoxVectors
)
{
Vec3
diff
=
pos1
-
pos2
;
if
(
periodic
)
{
diff
-=
periodicBoxVectors
[
2
]
*
floor
(
diff
[
2
]
/
periodicBoxVectors
[
2
][
2
]
+
0.5
);
diff
-=
periodicBoxVectors
[
1
]
*
floor
(
diff
[
1
]
/
periodicBoxVectors
[
1
][
1
]
+
0.5
);
diff
-=
periodicBoxVectors
[
0
]
*
floor
(
diff
[
0
]
/
periodicBoxVectors
[
0
][
0
]
+
0.5
);
}
return
diff
;
}
void
validateAxilrodTeller
(
CustomManyParticleForce
*
force
,
const
vector
<
Vec3
>&
positions
,
const
vector
<
const
int
*>&
expectedSets
,
double
boxSize
,
bool
triclinic
)
{
// Create a System and Context.
// Create a System and Context.
int
numParticles
=
force
->
getNumParticles
();
int
numParticles
=
force
->
getNumParticles
();
...
@@ -63,7 +73,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
...
@@ -63,7 +73,18 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
System
system
;
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
Vec3
boxVectors
[
3
];
if
(
triclinic
)
{
boxVectors
[
0
]
=
Vec3
(
boxSize
,
0
,
0
);
boxVectors
[
1
]
=
Vec3
(
0.2
*
boxSize
,
boxSize
,
0
);
boxVectors
[
2
]
=
Vec3
(
-
0.3
*
boxSize
,
-
0.1
*
boxSize
,
boxSize
);
}
else
{
boxVectors
[
0
]
=
Vec3
(
boxSize
,
0
,
0
);
boxVectors
[
1
]
=
Vec3
(
0
,
boxSize
,
0
);
boxVectors
[
2
]
=
Vec3
(
0
,
0
,
boxSize
);
}
system
.
setDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
system
.
addForce
(
force
);
system
.
addForce
(
force
);
VerletIntegrator
integrator
(
0.001
);
VerletIntegrator
integrator
(
0.001
);
Context
context
(
system
,
integrator
,
platform
);
Context
context
(
system
,
integrator
,
platform
);
...
@@ -74,20 +95,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
...
@@ -74,20 +95,14 @@ void validateAxilrodTeller(CustomManyParticleForce* force, const vector<Vec3>& p
// See if the energy matches the expected value.
// See if the energy matches the expected value.
double
expectedEnergy
=
0
;
double
expectedEnergy
=
0
;
bool
periodic
=
(
nonbondedMethod
==
CustomManyParticleForce
::
CutoffPeriodic
);
for
(
int
i
=
0
;
i
<
(
int
)
expectedSets
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
expectedSets
.
size
();
i
++
)
{
int
p1
=
expectedSets
[
i
][
0
];
int
p1
=
expectedSets
[
i
][
0
];
int
p2
=
expectedSets
[
i
][
1
];
int
p2
=
expectedSets
[
i
][
1
];
int
p3
=
expectedSets
[
i
][
2
];
int
p3
=
expectedSets
[
i
][
2
];
Vec3
d12
=
positions
[
p2
]
-
positions
[
p1
];
Vec3
d12
=
computeDelta
(
positions
[
p2
],
positions
[
p1
],
periodic
,
boxVectors
);
Vec3
d13
=
positions
[
p3
]
-
positions
[
p1
];
Vec3
d13
=
computeDelta
(
positions
[
p3
],
positions
[
p1
],
periodic
,
boxVectors
);
Vec3
d23
=
positions
[
p3
]
-
positions
[
p2
];
Vec3
d23
=
computeDelta
(
positions
[
p3
],
positions
[
p2
],
periodic
,
boxVectors
);
if
(
nonbondedMethod
==
CustomManyParticleForce
::
CutoffPeriodic
)
{
for
(
int
j
=
0
;
j
<
3
;
j
++
)
{
d12
[
j
]
-=
floor
(
d12
[
j
]
/
boxSize
+
0.5
f
)
*
boxSize
;
d13
[
j
]
-=
floor
(
d13
[
j
]
/
boxSize
+
0.5
f
)
*
boxSize
;
d23
[
j
]
-=
floor
(
d23
[
j
]
/
boxSize
+
0.5
f
)
*
boxSize
;
}
}
double
r12
=
sqrt
(
d12
.
dot
(
d12
));
double
r12
=
sqrt
(
d12
.
dot
(
d12
));
double
r13
=
sqrt
(
d13
.
dot
(
d13
));
double
r13
=
sqrt
(
d13
.
dot
(
d13
));
double
r23
=
sqrt
(
d23
.
dot
(
d23
));
double
r23
=
sqrt
(
d23
.
dot
(
d23
));
...
@@ -210,7 +225,7 @@ void testNoCutoff() {
...
@@ -210,7 +225,7 @@ void testNoCutoff() {
positions
.
push_back
(
Vec3
(
0.4
,
0
,
-
0.8
));
positions
.
push_back
(
Vec3
(
0.4
,
0
,
-
0.8
));
int
sets
[
4
][
3
]
=
{{
0
,
1
,
2
},
{
1
,
2
,
3
},
{
2
,
3
,
0
},
{
3
,
0
,
1
}};
int
sets
[
4
][
3
]
=
{{
0
,
1
,
2
},
{
1
,
2
,
3
},
{
2
,
3
,
0
},
{
3
,
0
,
1
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
4
]);
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
4
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
,
false
);
}
}
void
testCutoff
()
{
void
testCutoff
()
{
...
@@ -235,7 +250,7 @@ void testCutoff() {
...
@@ -235,7 +250,7 @@ void testCutoff() {
positions
.
push_back
(
Vec3
(
0.2
,
0.5
,
-
0.1
));
positions
.
push_back
(
Vec3
(
0.2
,
0.5
,
-
0.1
));
int
sets
[
7
][
3
]
=
{{
0
,
1
,
2
},
{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
2
,
4
},
{
0
,
3
,
4
},
{
1
,
2
,
4
},
{
1
,
3
,
4
}};
int
sets
[
7
][
3
]
=
{{
0
,
1
,
2
},
{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
2
,
4
},
{
0
,
3
,
4
},
{
1
,
2
,
4
},
{
1
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
7
]);
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
7
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
,
false
);
}
}
void
testPeriodic
()
{
void
testPeriodic
()
{
...
@@ -261,7 +276,33 @@ void testPeriodic() {
...
@@ -261,7 +276,33 @@ void testPeriodic() {
double
boxSize
=
2.1
;
double
boxSize
=
2.1
;
int
sets
[
5
][
3
]
=
{{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
2
,
4
},
{
0
,
3
,
4
},
{
1
,
3
,
4
}};
int
sets
[
5
][
3
]
=
{{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
2
,
4
},
{
0
,
3
,
4
},
{
1
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
5
]);
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
5
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
boxSize
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
boxSize
,
false
);
}
void
testTriclinic
()
{
CustomManyParticleForce
*
force
=
new
CustomManyParticleForce
(
3
,
"C*(1+3*cos(theta1)*cos(theta2)*cos(theta3))/(r12*r13*r23)^3;"
"theta1=angle(p1,p2,p3); theta2=angle(p2,p3,p1); theta3=angle(p3,p1,p2);"
"r12=distance(p1,p2); r13=distance(p1,p3); r23=distance(p2,p3)"
);
force
->
addGlobalParameter
(
"C"
,
1.5
);
force
->
setNonbondedMethod
(
CustomManyParticleForce
::
CutoffPeriodic
);
force
->
setCutoffDistance
(
1.05
);
vector
<
double
>
params
;
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
force
->
addParticle
(
params
);
vector
<
Vec3
>
positions
;
positions
.
push_back
(
Vec3
(
0
,
0
,
0
));
positions
.
push_back
(
Vec3
(
1
,
0
,
0
));
positions
.
push_back
(
Vec3
(
0
,
1.1
,
0.3
));
positions
.
push_back
(
Vec3
(
0.4
,
0
,
-
0.8
));
positions
.
push_back
(
Vec3
(
0.2
,
0.5
,
-
0.1
));
double
boxSize
=
2.1
;
int
sets
[
4
][
3
]
=
{{
0
,
1
,
3
},
{
0
,
1
,
4
},
{
0
,
3
,
4
},
{
1
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
4
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
boxSize
,
true
);
}
}
void
testExclusions
()
{
void
testExclusions
()
{
...
@@ -286,7 +327,7 @@ void testExclusions() {
...
@@ -286,7 +327,7 @@ void testExclusions() {
force
->
addExclusion
(
0
,
3
);
force
->
addExclusion
(
0
,
3
);
int
sets
[
5
][
3
]
=
{{
0
,
1
,
4
},
{
1
,
2
,
3
},
{
1
,
2
,
4
},
{
1
,
3
,
4
},
{
2
,
3
,
4
}};
int
sets
[
5
][
3
]
=
{{
0
,
1
,
4
},
{
1
,
2
,
3
},
{
1
,
2
,
4
},
{
1
,
3
,
4
},
{
2
,
3
,
4
}};
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
5
]);
vector
<
const
int
*>
expectedSets
(
&
sets
[
0
],
&
sets
[
5
]);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
);
validateAxilrodTeller
(
force
,
positions
,
expectedSets
,
2.0
,
false
);
}
}
void
testAllTerms
()
{
void
testAllTerms
()
{
...
@@ -672,6 +713,7 @@ int main(int argc, char* argv[]) {
...
@@ -672,6 +713,7 @@ int main(int argc, char* argv[]) {
testNoCutoff
();
testNoCutoff
();
testCutoff
();
testCutoff
();
testPeriodic
();
testPeriodic
();
testTriclinic
();
testExclusions
();
testExclusions
();
testAllTerms
();
testAllTerms
();
testParameters
();
testParameters
();
...
...
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
View file @
3db6b8ee
...
@@ -7,7 +7,7 @@
...
@@ -7,7 +7,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
4
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -261,6 +261,65 @@ void testPeriodic() {
...
@@ -261,6 +261,65 @@ void testPeriodic() {
ASSERT_EQUAL_TOL
(
1.9
+
1
+
0.9
,
state
.
getPotentialEnergy
(),
TOL
);
ASSERT_EQUAL_TOL
(
1.9
+
1
+
0.9
,
state
.
getPotentialEnergy
(),
TOL
);
}
}
void
testTriclinic
()
{
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
Vec3
a
(
3.1
,
0
,
0
);
Vec3
b
(
0.4
,
3.5
,
0
);
Vec3
c
(
-
0.1
,
-
0.5
,
4.0
);
system
.
setDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"r"
);
nonbonded
->
addParticle
(
vector
<
double
>
());
nonbonded
->
addParticle
(
vector
<
double
>
());
nonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
const
double
cutoff
=
1.5
;
nonbonded
->
setCutoffDistance
(
cutoff
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
iteration
=
0
;
iteration
<
50
;
iteration
++
)
{
// Generate random positions for the two particles.
positions
[
0
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
positions
[
1
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
context
.
setPositions
(
positions
);
// Loop over all possible periodic copies and find the nearest one.
Vec3
delta
;
double
distance2
=
100.0
;
for
(
int
i
=
-
1
;
i
<
2
;
i
++
)
for
(
int
j
=
-
1
;
j
<
2
;
j
++
)
for
(
int
k
=
-
1
;
k
<
2
;
k
++
)
{
Vec3
d
=
positions
[
1
]
-
positions
[
0
]
+
a
*
i
+
b
*
j
+
c
*
k
;
if
(
d
.
dot
(
d
)
<
distance2
)
{
delta
=
d
;
distance2
=
d
.
dot
(
d
);
}
}
double
distance
=
sqrt
(
distance2
);
// See if the force and energy are correct.
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
if
(
distance
>=
cutoff
)
{
ASSERT_EQUAL
(
0.0
,
state
.
getPotentialEnergy
());
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
0
],
0
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
1
],
0
);
}
else
{
const
Vec3
force
=
delta
/
sqrt
(
delta
.
dot
(
delta
));
ASSERT_EQUAL_TOL
(
distance
,
state
.
getPotentialEnergy
(),
TOL
);
ASSERT_EQUAL_VEC
(
force
,
state
.
getForces
()[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
-
force
,
state
.
getForces
()[
1
],
TOL
);
}
}
}
void
testContinuous1DFunction
()
{
void
testContinuous1DFunction
()
{
System
system
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
...
@@ -925,6 +984,7 @@ int main(int argc, char* argv[]) {
...
@@ -925,6 +984,7 @@ int main(int argc, char* argv[]) {
testExclusions
();
testExclusions
();
testCutoff
();
testCutoff
();
testPeriodic
();
testPeriodic
();
testTriclinic
();
testContinuous1DFunction
();
testContinuous1DFunction
();
testContinuous2DFunction
();
testContinuous2DFunction
();
testContinuous3DFunction
();
testContinuous3DFunction
();
...
...
platforms/cuda/tests/TestCudaEwald.cpp
View file @
3db6b8ee
...
@@ -201,6 +201,57 @@ void testEwald2Ions() {
...
@@ -201,6 +201,57 @@ void testEwald2Ions() {
ASSERT_EQUAL_TOL
(
-
217.276
,
state
.
getPotentialEnergy
(),
0.01
/*10*TOL*/
);
ASSERT_EQUAL_TOL
(
-
217.276
,
state
.
getPotentialEnergy
(),
0.01
/*10*TOL*/
);
}
}
void
testTriclinic
()
{
// Create a triclinic box containing eight particles.
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
2.5
,
0
,
0
),
Vec3
(
0.5
,
3.0
,
0
),
Vec3
(
0.7
,
0.9
,
3.5
));
for
(
int
i
=
0
;
i
<
8
;
i
++
)
system
.
addParticle
(
1.0
);
NonbondedForce
*
force
=
new
NonbondedForce
();
system
.
addForce
(
force
);
force
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
force
->
setCutoffDistance
(
1.0
);
force
->
setPMEParameters
(
3.45891
,
32
,
40
,
48
);
for
(
int
i
=
0
;
i
<
4
;
i
++
)
force
->
addParticle
(
-
1
,
0.440104
,
0.4184
);
// Cl parameters
for
(
int
i
=
0
;
i
<
4
;
i
++
)
force
->
addParticle
(
1
,
0.332840
,
0.0115897
);
// Na parameters
vector
<
Vec3
>
positions
(
8
);
positions
[
0
]
=
Vec3
(
1.744
,
2.788
,
3.162
);
positions
[
1
]
=
Vec3
(
1.048
,
0.762
,
2.340
);
positions
[
2
]
=
Vec3
(
2.489
,
1.570
,
2.817
);
positions
[
3
]
=
Vec3
(
1.027
,
1.893
,
3.271
);
positions
[
4
]
=
Vec3
(
0.937
,
0.825
,
0.009
);
positions
[
5
]
=
Vec3
(
2.290
,
1.887
,
3.352
);
positions
[
6
]
=
Vec3
(
1.266
,
1.111
,
2.894
);
positions
[
7
]
=
Vec3
(
0.933
,
1.862
,
3.490
);
// Compute the forces and energy.
VerletIntegrator
integ
(
0.001
);
Context
context
(
system
,
integ
,
platform
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
// Compare them to values computed by Gromacs.
double
expectedEnergy
=
-
963.370
;
vector
<
Vec3
>
expectedForce
(
8
);
expectedForce
[
0
]
=
Vec3
(
4.25253e+01
,
-
1.23503e+02
,
1.22139e+02
);
expectedForce
[
1
]
=
Vec3
(
9.74752e+01
,
1.68213e+02
,
1.93169e+02
);
expectedForce
[
2
]
=
Vec3
(
-
1.50348e+02
,
1.29165e+02
,
3.70435e+02
);
expectedForce
[
3
]
=
Vec3
(
9.18644e+02
,
-
3.52571e+00
,
-
1.34772e+03
);
expectedForce
[
4
]
=
Vec3
(
-
1.61193e+02
,
9.01528e+01
,
-
7.12904e+01
);
expectedForce
[
5
]
=
Vec3
(
2.82630e+02
,
2.78029e+01
,
-
3.72864e+02
);
expectedForce
[
6
]
=
Vec3
(
-
1.47454e+02
,
-
2.14448e+02
,
-
3.55789e+02
);
expectedForce
[
7
]
=
Vec3
(
-
8.82195e+02
,
-
7.39132e+01
,
1.46202e+03
);
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
ASSERT_EQUAL_VEC
(
expectedForce
[
i
],
state
.
getForces
()[
i
],
1e-4
);
}
ASSERT_EQUAL_TOL
(
expectedEnergy
,
state
.
getPotentialEnergy
(),
1e-4
);
}
void
testErrorTolerance
(
NonbondedForce
::
NonbondedMethod
method
)
{
void
testErrorTolerance
(
NonbondedForce
::
NonbondedMethod
method
)
{
// Create a cloud of random point charges.
// Create a cloud of random point charges.
...
@@ -307,6 +358,7 @@ int main(int argc, char* argv[]) {
...
@@ -307,6 +358,7 @@ int main(int argc, char* argv[]) {
testEwaldPME
(
false
);
testEwaldPME
(
false
);
testEwaldPME
(
true
);
testEwaldPME
(
true
);
// testEwald2Ions();
// testEwald2Ions();
testTriclinic
();
testErrorTolerance
(
NonbondedForce
::
Ewald
);
testErrorTolerance
(
NonbondedForce
::
Ewald
);
testErrorTolerance
(
NonbondedForce
::
PME
);
testErrorTolerance
(
NonbondedForce
::
PME
);
testPMEParameters
();
testPMEParameters
();
...
...
platforms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
View file @
3db6b8ee
...
@@ -236,6 +236,82 @@ void testRandomSeed() {
...
@@ -236,6 +236,82 @@ void testRandomSeed() {
}
}
}
}
void
testTriclinic
()
{
const
int
numParticles
=
64
;
const
int
frequency
=
10
;
const
int
steps
=
1000
;
const
double
pressure
=
1.5
;
const
double
pressureInMD
=
pressure
*
(
AVOGADRO
*
1e-25
);
// pressure in kJ/mol/nm^3
const
double
temperature
=
300.0
;
const
double
initialVolume
=
numParticles
*
BOLTZ
*
temperature
/
pressureInMD
;
const
double
initialLength
=
std
::
pow
(
initialVolume
,
1.0
/
3.0
);
// Create a gas of noninteracting particles.
System
system
;
Vec3
initialBox
[
3
];
initialBox
[
0
]
=
Vec3
(
initialLength
,
0
,
0
);
initialBox
[
1
]
=
Vec3
(
0.2
*
initialLength
,
initialLength
,
0
);
initialBox
[
2
]
=
Vec3
(
0.1
*
initialLength
,
0.3
*
initialLength
,
initialLength
);
system
.
setDefaultPeriodicBoxVectors
(
initialBox
[
0
],
initialBox
[
1
],
initialBox
[
2
]);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
1.0
);
positions
[
i
]
=
Vec3
(
initialLength
*
genrand_real2
(
sfmt
),
initialLength
*
genrand_real2
(
sfmt
),
initialLength
*
genrand_real2
(
sfmt
));
}
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pressure
,
pressure
,
pressure
),
temperature
,
true
,
true
,
true
,
frequency
);
system
.
addForce
(
barostat
);
// Run a simulation
LangevinIntegrator
integrator
(
temperature
,
0.1
,
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
// Let it equilibrate.
integrator
.
step
(
10000
);
// Now run it for a while and see if the volume is correct.
double
volume
=
0.0
;
for
(
int
j
=
0
;
j
<
steps
;
++
j
)
{
Vec3
box
[
3
];
context
.
getState
(
0
).
getPeriodicBoxVectors
(
box
[
0
],
box
[
1
],
box
[
2
]);
volume
+=
box
[
0
][
0
]
*
box
[
1
][
1
]
*
box
[
2
][
2
];
integrator
.
step
(
frequency
);
}
volume
/=
steps
;
double
expected
=
(
numParticles
+
1
)
*
BOLTZ
*
temperature
/
pressureInMD
;
ASSERT_USUALLY_EQUAL_TOL
(
expected
,
volume
,
3
/
std
::
sqrt
((
double
)
steps
));
// Make sure the box vectors have been scaled consistently.
State
state
=
context
.
getState
(
State
::
Positions
);
Vec3
box
[
3
];
state
.
getPeriodicBoxVectors
(
box
[
0
],
box
[
1
],
box
[
2
]);
double
xscale
=
box
[
2
][
0
]
/
(
0.1
*
initialLength
);
double
yscale
=
box
[
2
][
1
]
/
(
0.3
*
initialLength
);
double
zscale
=
box
[
2
][
2
]
/
(
1.0
*
initialLength
);
for
(
int
i
=
0
;
i
<
3
;
i
++
)
{
ASSERT_EQUAL_VEC
(
Vec3
(
xscale
*
initialBox
[
i
][
0
],
yscale
*
initialBox
[
i
][
1
],
zscale
*
initialBox
[
i
][
2
]),
box
[
i
],
1e-5
);
}
// The barostat should have put all particles inside the first periodic box. One integration step
// has happened since then, so they may have moved slightly outside it.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
Vec3
pos
=
state
.
getPositions
()[
i
];
ASSERT
(
pos
[
2
]
/
box
[
2
][
2
]
>
-
1
&&
pos
[
2
]
/
box
[
2
][
2
]
<
2
);
pos
-=
box
[
2
]
*
floor
(
pos
[
2
]
/
box
[
2
][
2
]);
ASSERT
(
pos
[
1
]
/
box
[
1
][
1
]
>
-
1
&&
pos
[
1
]
/
box
[
1
][
1
]
<
2
);
pos
-=
box
[
1
]
*
floor
(
pos
[
1
]
/
box
[
1
][
1
]);
ASSERT
(
pos
[
0
]
/
box
[
0
][
0
]
>
-
1
&&
pos
[
0
]
/
box
[
0
][
0
]
<
2
);
}
}
/**
/**
* Run a constant pressure simulation on an anisotropic Einstein crystal
* Run a constant pressure simulation on an anisotropic Einstein crystal
* using isotropic and anisotropic barostats. There are a total of 15 simulations:
* using isotropic and anisotropic barostats. There are a total of 15 simulations:
...
@@ -389,6 +465,7 @@ int main(int argc, char* argv[]) {
...
@@ -389,6 +465,7 @@ int main(int argc, char* argv[]) {
testIdealGasAxis
(
1
);
testIdealGasAxis
(
1
);
testIdealGasAxis
(
2
);
testIdealGasAxis
(
2
);
testRandomSeed
();
testRandomSeed
();
testTriclinic
();
//testEinsteinCrystal();
//testEinsteinCrystal();
}
}
catch
(
const
exception
&
e
)
{
catch
(
const
exception
&
e
)
{
...
...
platforms/cuda/tests/TestCudaNonbondedForce.cpp
View file @
3db6b8ee
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2008-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2008-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -355,6 +355,67 @@ void testPeriodic() {
...
@@ -355,6 +355,67 @@ void testPeriodic() {
ASSERT_EQUAL_TOL
(
2
*
ONE_4PI_EPS0
*
(
1.0
)
*
(
1.0
+
krf
*
1.0
-
crf
),
state
.
getPotentialEnergy
(),
TOL
);
ASSERT_EQUAL_TOL
(
2
*
ONE_4PI_EPS0
*
(
1.0
)
*
(
1.0
+
krf
*
1.0
-
crf
),
state
.
getPotentialEnergy
(),
TOL
);
}
}
void
testTriclinic
()
{
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
Vec3
a
(
3.1
,
0
,
0
);
Vec3
b
(
0.4
,
3.5
,
0
);
Vec3
c
(
-
0.1
,
-
0.5
,
4.0
);
system
.
setDefaultPeriodicBoxVectors
(
a
,
b
,
c
);
VerletIntegrator
integrator
(
0.01
);
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
nonbonded
->
addParticle
(
1.0
,
1
,
0
);
nonbonded
->
addParticle
(
1.0
,
1
,
0
);
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
const
double
cutoff
=
1.5
;
nonbonded
->
setCutoffDistance
(
cutoff
);
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
const
double
eps
=
78.3
;
const
double
krf
=
(
1.0
/
(
cutoff
*
cutoff
*
cutoff
))
*
(
eps
-
1.0
)
/
(
2.0
*
eps
+
1.0
);
const
double
crf
=
(
1.0
/
cutoff
)
*
(
3.0
*
eps
)
/
(
2.0
*
eps
+
1.0
);
for
(
int
iteration
=
0
;
iteration
<
50
;
iteration
++
)
{
// Generate random positions for the two particles.
positions
[
0
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
positions
[
1
]
=
a
*
genrand_real2
(
sfmt
)
+
b
*
genrand_real2
(
sfmt
)
+
c
*
genrand_real2
(
sfmt
);
context
.
setPositions
(
positions
);
// Loop over all possible periodic copies and find the nearest one.
Vec3
delta
;
double
distance2
=
100.0
;
for
(
int
i
=
-
1
;
i
<
2
;
i
++
)
for
(
int
j
=
-
1
;
j
<
2
;
j
++
)
for
(
int
k
=
-
1
;
k
<
2
;
k
++
)
{
Vec3
d
=
positions
[
1
]
-
positions
[
0
]
+
a
*
i
+
b
*
j
+
c
*
k
;
if
(
d
.
dot
(
d
)
<
distance2
)
{
delta
=
d
;
distance2
=
d
.
dot
(
d
);
}
}
double
distance
=
sqrt
(
distance2
);
// See if the force and energy are correct.
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
if
(
distance
>=
cutoff
)
{
ASSERT_EQUAL
(
0.0
,
state
.
getPotentialEnergy
());
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
0
],
0
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
0
),
state
.
getForces
()[
1
],
0
);
}
else
{
const
Vec3
force
=
delta
*
ONE_4PI_EPS0
*
(
-
1.0
/
(
distance
*
distance
*
distance
)
+
2.0
*
krf
);
ASSERT_EQUAL_TOL
(
ONE_4PI_EPS0
*
(
1.0
/
distance
+
krf
*
distance
*
distance
-
crf
),
state
.
getPotentialEnergy
(),
TOL
);
ASSERT_EQUAL_VEC
(
force
,
state
.
getForces
()[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
-
force
,
state
.
getForces
()[
1
],
TOL
);
}
}
}
void
testLargeSystem
()
{
void
testLargeSystem
()
{
const
int
numMolecules
=
600
;
const
int
numMolecules
=
600
;
...
@@ -862,6 +923,33 @@ void testSwitchingFunction(NonbondedForce::NonbondedMethod method) {
...
@@ -862,6 +923,33 @@ void testSwitchingFunction(NonbondedForce::NonbondedMethod method) {
}
}
}
}
void
testReordering
()
{
// Check that reordering of atoms doesn't alter their positions.
const
int
numParticles
=
200
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
6
,
0
,
0
),
Vec3
(
2.1
,
6
,
0
),
Vec3
(
-
1.5
,
-
0.5
,
6
));
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
system
.
addForce
(
nonbonded
);
vector
<
Vec3
>
positions
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
0.0
,
0.0
,
0.0
);
positions
.
push_back
(
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
)
*
20
);
}
VerletIntegrator
integrator
(
0.001
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
integrator
.
step
(
1
);
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL_VEC
(
positions
[
i
],
state
.
getPositions
()[
i
],
1e-6
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
try
{
if
(
argc
>
1
)
if
(
argc
>
1
)
...
@@ -872,6 +960,7 @@ int main(int argc, char* argv[]) {
...
@@ -872,6 +960,7 @@ int main(int argc, char* argv[]) {
testCutoff
();
testCutoff
();
testCutoff14
();
testCutoff14
();
testPeriodic
();
testPeriodic
();
testTriclinic
();
testLargeSystem
();
testLargeSystem
();
//testBlockInteractions(false);
//testBlockInteractions(false);
//testBlockInteractions(true);
//testBlockInteractions(true);
...
@@ -882,6 +971,7 @@ int main(int argc, char* argv[]) {
...
@@ -882,6 +971,7 @@ int main(int argc, char* argv[]) {
testParallelComputation
(
NonbondedForce
::
PME
);
testParallelComputation
(
NonbondedForce
::
PME
);
testSwitchingFunction
(
NonbondedForce
::
CutoffNonPeriodic
);
testSwitchingFunction
(
NonbondedForce
::
CutoffNonPeriodic
);
testSwitchingFunction
(
NonbondedForce
::
PME
);
testSwitchingFunction
(
NonbondedForce
::
PME
);
testReordering
();
}
}
catch
(
const
exception
&
e
)
{
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/opencl/include/OpenCLContext.h
View file @
3db6b8ee
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -448,36 +448,65 @@ public:
...
@@ -448,36 +448,65 @@ public:
/**
/**
* Get whether the device being used supports 64 bit atomic operations on global memory.
* Get whether the device being used supports 64 bit atomic operations on global memory.
*/
*/
bool
getSupports64BitGlobalAtomics
()
{
bool
getSupports64BitGlobalAtomics
()
const
{
return
supports64BitGlobalAtomics
;
return
supports64BitGlobalAtomics
;
}
}
/**
/**
* Get whether the device being used supports double precision math.
* Get whether the device being used supports double precision math.
*/
*/
bool
getSupportsDoublePrecision
()
{
bool
getSupportsDoublePrecision
()
const
{
return
supportsDoublePrecision
;
return
supportsDoublePrecision
;
}
}
/**
/**
* Get whether double precision is being used.
* Get whether double precision is being used.
*/
*/
bool
getUseDoublePrecision
()
{
bool
getUseDoublePrecision
()
const
{
return
useDoublePrecision
;
return
useDoublePrecision
;
}
}
/**
/**
* Get whether mixed precision is being used.
* Get whether mixed precision is being used.
*/
*/
bool
getUseMixedPrecision
()
{
bool
getUseMixedPrecision
()
const
{
return
useMixedPrecision
;
return
useMixedPrecision
;
}
}
/**
* Get whether the periodic box is triclinic.
*/
bool
getBoxIsTriclinic
()
const
{
return
boxIsTriclinic
;
}
/**
/**
* Convert a number to a string in a format suitable for including in a kernel.
* Convert a number to a string in a format suitable for including in a kernel.
* This takes into account whether the context uses single or double precision.
* This takes into account whether the context uses single or double precision.
*/
*/
std
::
string
doubleToString
(
double
value
);
std
::
string
doubleToString
(
double
value
)
const
;
/**
/**
* Convert a number to a string in a format suitable for including in a kernel.
* Convert a number to a string in a format suitable for including in a kernel.
*/
*/
std
::
string
intToString
(
int
value
);
std
::
string
intToString
(
int
value
)
const
;
/**
* Get the vectors defining the periodic box.
*/
void
getPeriodicBoxVectors
(
Vec3
&
a
,
Vec3
&
b
,
Vec3
&
c
)
const
{
a
=
Vec3
(
periodicBoxVecXDouble
.
x
,
periodicBoxVecXDouble
.
y
,
periodicBoxVecXDouble
.
z
);
b
=
Vec3
(
periodicBoxVecYDouble
.
x
,
periodicBoxVecYDouble
.
y
,
periodicBoxVecYDouble
.
z
);
c
=
Vec3
(
periodicBoxVecZDouble
.
x
,
periodicBoxVecZDouble
.
y
,
periodicBoxVecZDouble
.
z
);
}
/**
* Set the vectors defining the periodic box.
*/
void
setPeriodicBoxVectors
(
const
Vec3
&
a
,
const
Vec3
&
b
,
const
Vec3
&
c
)
{
periodicBoxVecX
=
mm_float4
((
float
)
a
[
0
],
(
float
)
a
[
1
],
(
float
)
a
[
2
],
0.0
f
);
periodicBoxVecY
=
mm_float4
((
float
)
b
[
0
],
(
float
)
b
[
1
],
(
float
)
b
[
2
],
0.0
f
);
periodicBoxVecZ
=
mm_float4
((
float
)
c
[
0
],
(
float
)
c
[
1
],
(
float
)
c
[
2
],
0.0
f
);
periodicBoxVecXDouble
=
mm_double4
(
a
[
0
],
a
[
1
],
a
[
2
],
0.0
);
periodicBoxVecYDouble
=
mm_double4
(
b
[
0
],
b
[
1
],
b
[
2
],
0.0
);
periodicBoxVecZDouble
=
mm_double4
(
c
[
0
],
c
[
1
],
c
[
2
],
0.0
);
periodicBoxSize
=
mm_float4
((
float
)
a
[
0
],
(
float
)
b
[
1
],
(
float
)
c
[
2
],
0.0
f
);
invPeriodicBoxSize
=
mm_float4
(
1.0
f
/
(
float
)
a
[
0
],
1.0
f
/
(
float
)
b
[
1
],
1.0
f
/
(
float
)
c
[
2
],
0.0
f
);
periodicBoxSizeDouble
=
mm_double4
(
a
[
0
],
b
[
1
],
c
[
2
],
0.0
);
invPeriodicBoxSizeDouble
=
mm_double4
(
1.0
/
a
[
0
],
1.0
/
b
[
1
],
1.0
/
c
[
2
],
0.0
);
}
/**
/**
* Get the size of the periodic box.
* Get the size of the periodic box.
*/
*/
...
@@ -490,15 +519,6 @@ public:
...
@@ -490,15 +519,6 @@ public:
mm_double4
getPeriodicBoxSizeDouble
()
const
{
mm_double4
getPeriodicBoxSizeDouble
()
const
{
return
periodicBoxSizeDouble
;
return
periodicBoxSizeDouble
;
}
}
/**
* Set the size of the periodic box.
*/
void
setPeriodicBoxSize
(
double
xsize
,
double
ysize
,
double
zsize
)
{
periodicBoxSize
=
mm_float4
((
float
)
xsize
,
(
float
)
ysize
,
(
float
)
zsize
,
0
);
invPeriodicBoxSize
=
mm_float4
((
float
)
(
1.0
/
xsize
),
(
float
)
(
1.0
/
ysize
),
(
float
)
(
1.0
/
zsize
),
0
);
periodicBoxSizeDouble
=
mm_double4
(
xsize
,
ysize
,
zsize
,
0
);
invPeriodicBoxSizeDouble
=
mm_double4
(
1.0
/
xsize
,
1.0
/
ysize
,
1.0
/
zsize
,
0
);
}
/**
/**
* Get the inverse of the size of the periodic box.
* Get the inverse of the size of the periodic box.
*/
*/
...
@@ -511,6 +531,42 @@ public:
...
@@ -511,6 +531,42 @@ public:
mm_double4
getInvPeriodicBoxSizeDouble
()
const
{
mm_double4
getInvPeriodicBoxSizeDouble
()
const
{
return
invPeriodicBoxSizeDouble
;
return
invPeriodicBoxSizeDouble
;
}
}
/**
* Get the first periodic box vector.
*/
mm_float4
getPeriodicBoxVecX
()
{
return
periodicBoxVecX
;
}
/**
* Get the first periodic box vector.
*/
mm_double4
getPeriodicBoxVecXDouble
()
{
return
periodicBoxVecXDouble
;
}
/**
* Get the second periodic box vector.
*/
mm_float4
getPeriodicBoxVecY
()
{
return
periodicBoxVecY
;
}
/**
* Get the second periodic box vector.
*/
mm_double4
getPeriodicBoxVecYDouble
()
{
return
periodicBoxVecYDouble
;
}
/**
* Get the third periodic box vector.
*/
mm_float4
getPeriodicBoxVecZ
()
{
return
periodicBoxVecZ
;
}
/**
* Get the third periodic box vector.
*/
mm_double4
getPeriodicBoxVecZDouble
()
{
return
periodicBoxVecZDouble
;
}
/**
/**
* Get the OpenCLIntegrationUtilities for this context.
* Get the OpenCLIntegrationUtilities for this context.
*/
*/
...
@@ -628,9 +684,9 @@ private:
...
@@ -628,9 +684,9 @@ private:
int
numThreadBlocks
;
int
numThreadBlocks
;
int
numForceBuffers
;
int
numForceBuffers
;
int
simdWidth
;
int
simdWidth
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
;
bool
supports64BitGlobalAtomics
,
supportsDoublePrecision
,
useDoublePrecision
,
useMixedPrecision
,
atomsWereReordered
,
boxIsTriclinic
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
;
mm_float4
periodicBoxSize
,
invPeriodicBoxSize
,
periodicBoxVecX
,
periodicBoxVecY
,
periodicBoxVecZ
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
;
mm_double4
periodicBoxSizeDouble
,
invPeriodicBoxSizeDouble
,
periodicBoxVecXDouble
,
periodicBoxVecYDouble
,
periodicBoxVecZDouble
;
std
::
string
defaultOptimizationOptions
;
std
::
string
defaultOptimizationOptions
;
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
cl
::
Context
context
;
cl
::
Context
context
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
3db6b8ee
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -336,6 +336,54 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -336,6 +336,54 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines
[
"LOG"
]
=
"log"
;
compilationDefines
[
"LOG"
]
=
"log"
;
}
}
// Set defines for applying periodic boundary conditions.
Vec3
boxVectors
[
3
];
system
.
getDefaultPeriodicBoxVectors
(
boxVectors
[
0
],
boxVectors
[
1
],
boxVectors
[
2
]);
boxIsTriclinic
=
(
boxVectors
[
0
][
1
]
!=
0.0
||
boxVectors
[
0
][
2
]
!=
0.0
||
boxVectors
[
1
][
0
]
!=
0.0
||
boxVectors
[
1
][
2
]
!=
0.0
||
boxVectors
[
2
][
0
]
!=
0.0
||
boxVectors
[
2
][
1
]
!=
0.0
);
if
(
boxIsTriclinic
)
{
compilationDefines
[
"APPLY_PERIODIC_TO_DELTA(delta)"
]
=
"{"
"real scale3 = floor(delta.z*invPeriodicBoxSize.z+0.5f);
\\\n
"
"delta.xyz -= scale3*periodicBoxVecZ.xyz;
\\\n
"
"real scale2 = floor(delta.y*invPeriodicBoxSize.y+0.5f);
\\\n
"
"delta.xy -= scale2*periodicBoxVecY.xy;
\\\n
"
"real scale1 = floor(delta.x*invPeriodicBoxSize.x+0.5f);
\\\n
"
"delta.x -= scale1*periodicBoxVecX.x;}"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS(pos)"
]
=
"{"
"real scale3 = floor(pos.z*invPeriodicBoxSize.z);
\\\n
"
"pos.xyz -= scale3*periodicBoxVecZ.xyz;
\\\n
"
"real scale2 = floor(pos.y*invPeriodicBoxSize.y);
\\\n
"
"pos.xy -= scale2*periodicBoxVecY.xy;
\\\n
"
"real scale1 = floor(pos.x*invPeriodicBoxSize.x);
\\\n
"
"pos.x -= scale1*periodicBoxVecX.x;}"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"
]
=
"{"
"real scale3 = floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f);
\\\n
"
"pos.x -= scale3*periodicBoxVecZ.x;
\\\n
"
"pos.y -= scale3*periodicBoxVecZ.y;
\\\n
"
"pos.z -= scale3*periodicBoxVecZ.z;
\\\n
"
"real scale2 = floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f);
\\\n
"
"pos.x -= scale2*periodicBoxVecY.x;
\\\n
"
"pos.y -= scale2*periodicBoxVecY.y;
\\\n
"
"real scale1 = floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f);
\\\n
"
"pos.x -= scale1*periodicBoxVecX.x;}"
;
}
else
{
compilationDefines
[
"APPLY_PERIODIC_TO_DELTA(delta)"
]
=
"delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS(pos)"
]
=
"pos.xyz -= floor(pos.xyz*invPeriodicBoxSize.xyz)*periodicBoxSize.xyz;"
;
compilationDefines
[
"APPLY_PERIODIC_TO_POS_WITH_CENTER(pos, center)"
]
=
"{"
"pos.x -= floor((pos.x-center.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
\\\n
"
"pos.y -= floor((pos.y-center.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
\\\n
"
"pos.z -= floor((pos.z-center.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;}"
;
}
// Create the work thread used for parallelization when running on multiple devices.
// Create the work thread used for parallelization when running on multiple devices.
thread
=
new
WorkThread
();
thread
=
new
WorkThread
();
...
@@ -527,7 +575,7 @@ void OpenCLContext::restoreDefaultQueue() {
...
@@ -527,7 +575,7 @@ void OpenCLContext::restoreDefaultQueue() {
currentQueue
=
defaultQueue
;
currentQueue
=
defaultQueue
;
}
}
string
OpenCLContext
::
doubleToString
(
double
value
)
{
string
OpenCLContext
::
doubleToString
(
double
value
)
const
{
stringstream
s
;
stringstream
s
;
s
.
precision
(
useDoublePrecision
?
16
:
8
);
s
.
precision
(
useDoublePrecision
?
16
:
8
);
s
<<
scientific
<<
value
;
s
<<
scientific
<<
value
;
...
@@ -536,7 +584,7 @@ string OpenCLContext::doubleToString(double value) {
...
@@ -536,7 +584,7 @@ string OpenCLContext::doubleToString(double value) {
return
s
.
str
();
return
s
.
str
();
}
}
string
OpenCLContext
::
intToString
(
int
value
)
{
string
OpenCLContext
::
intToString
(
int
value
)
const
{
stringstream
s
;
stringstream
s
;
s
<<
value
;
s
<<
value
;
return
s
.
str
();
return
s
.
str
();
...
@@ -1039,16 +1087,21 @@ void OpenCLContext::reorderAtomsImpl() {
...
@@ -1039,16 +1087,21 @@ void OpenCLContext::reorderAtomsImpl() {
// Move each molecule position into the same box.
// Move each molecule position into the same box.
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
int
xcell
=
(
int
)
floor
(
molPos
[
i
].
x
*
invPeriodicBoxSizeDouble
.
x
);
Real4
center
=
molPos
[
i
];
int
ycell
=
(
int
)
floor
(
molPos
[
i
].
y
*
invPeriodicBoxSizeDouble
.
y
);
int
zcell
=
(
int
)
floor
(
center
.
z
*
invPeriodicBoxSize
.
z
);
int
zcell
=
(
int
)
floor
(
molPos
[
i
].
z
*
invPeriodicBoxSizeDouble
.
z
);
center
.
x
-=
zcell
*
periodicBoxVecZ
.
x
;
Real
dx
=
xcell
*
periodicBoxSizeDouble
.
x
;
center
.
y
-=
zcell
*
periodicBoxVecZ
.
y
;
Real
dy
=
ycell
*
periodicBoxSizeDouble
.
y
;
center
.
z
-=
zcell
*
periodicBoxVecZ
.
z
;
Real
dz
=
zcell
*
periodicBoxSizeDouble
.
z
;
int
ycell
=
(
int
)
floor
(
center
.
y
*
invPeriodicBoxSize
.
y
);
if
(
dx
!=
0.0
f
||
dy
!=
0.0
f
||
dz
!=
0.0
f
)
{
center
.
x
-=
ycell
*
periodicBoxVecY
.
x
;
molPos
[
i
].
x
-=
dx
;
center
.
y
-=
ycell
*
periodicBoxVecY
.
y
;
molPos
[
i
].
y
-=
dy
;
int
xcell
=
(
int
)
floor
(
center
.
x
*
invPeriodicBoxSize
.
x
);
molPos
[
i
].
z
-=
dz
;
center
.
x
-=
xcell
*
periodicBoxVecX
.
x
;
if
(
xcell
!=
0
||
ycell
!=
0
||
zcell
!=
0
)
{
Real
dx
=
molPos
[
i
].
x
-
center
.
x
;
Real
dy
=
molPos
[
i
].
y
-
center
.
y
;
Real
dz
=
molPos
[
i
].
z
-
center
.
z
;
molPos
[
i
]
=
center
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
Real4
p
=
oldPosq
[
atom
];
Real4
p
=
oldPosq
[
atom
];
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
3db6b8ee
This diff is collapsed.
Click to expand it.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
3db6b8ee
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -325,11 +325,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -325,11 +325,11 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
cl
::
Program
interactingBlocksProgram
=
context
.
createProgram
(
file
,
defines
);
cl
::
Program
interactingBlocksProgram
=
context
.
createProgram
(
file
,
defines
);
findBlockBoundsKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
findBlockBoundsKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlockBounds"
);
findBlockBoundsKernel
.
setArg
<
cl_int
>
(
0
,
context
.
getNumAtoms
());
findBlockBoundsKernel
.
setArg
<
cl_int
>
(
0
,
context
.
getNumAtoms
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
context
.
getPosq
().
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockCenter
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
blockCenter
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
blockBoundingBox
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
rebuildNeighborList
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
rebuildNeighborList
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
sortedBlocks
->
getDeviceBuffer
());
findBlockBoundsKernel
.
setArg
<
cl
::
Buffer
>
(
10
,
sortedBlocks
->
getDeviceBuffer
());
sortBoxDataKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"sortBoxData"
);
sortBoxDataKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"sortBoxData"
);
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
sortedBlocks
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
sortedBlocks
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
blockCenter
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
blockCenter
->
getDeviceBuffer
());
...
@@ -341,20 +341,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -341,20 +341,20 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionCount
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionCount
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
rebuildNeighborList
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
rebuildNeighborList
->
getDeviceBuffer
());
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
6
,
interactingTiles
->
getSize
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
interactingTiles
->
getSize
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
7
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
8
,
numBlocks
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
sortedBlocks
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
12
,
sortedBlocks
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
0
,
sortedBlockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
3
,
sortedBlockCenter
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
1
,
sortedBlockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
4
,
sortedBlockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
2
,
exclusionIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
5
,
exclusionIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
3
,
exclusionRowIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
6
,
exclusionRowIndices
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
4
,
oldPositions
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
7
,
oldPositions
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
5
,
rebuildNeighborList
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
1
8
,
rebuildNeighborList
->
getDeviceBuffer
());
if
(
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
if
(
findInteractingBlocksKernel
.
getWorkGroupInfo
<
CL_KERNEL_WORK_GROUP_SIZE
>
(
context
.
getDevice
())
<
groupSize
)
{
// The device can't handle this block size, so reduce it.
// The device can't handle this block size, so reduce it.
...
@@ -369,18 +369,21 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -369,18 +369,21 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
}
}
}
}
static
void
setPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
static
void
setPeriodicBoxArgs
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
if
(
cl
.
getUseDoublePrecision
())
if
(
cl
.
getUseDoublePrecision
())
{
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxSizeDouble
());
else
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getInvPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxSize
());
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxVecXDouble
());
}
kernel
.
setArg
<
mm_double4
>
(
index
++
,
cl
.
getPeriodicBoxVecYDouble
());
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getPeriodicBoxVecZDouble
());
static
void
setInvPeriodicBoxSizeArg
(
OpenCLContext
&
cl
,
cl
::
Kernel
&
kernel
,
int
index
)
{
}
if
(
cl
.
getUseDoublePrecision
())
else
{
kernel
.
setArg
<
mm_double4
>
(
index
,
cl
.
getInvPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getPeriodicBoxSize
());
else
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getInvPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getInvPeriodicBoxSize
());
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getPeriodicBoxVecX
());
kernel
.
setArg
<
mm_float4
>
(
index
++
,
cl
.
getPeriodicBoxVecY
());
kernel
.
setArg
<
mm_float4
>
(
index
,
cl
.
getPeriodicBoxVecZ
());
}
}
}
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
void
OpenCLNonbondedUtilities
::
prepareInteractions
()
{
...
@@ -397,22 +400,18 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
...
@@ -397,22 +400,18 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
// Compute the neighbor list.
// Compute the neighbor list.
setPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
1
);
setPeriodicBoxArgs
(
context
,
findBlockBoundsKernel
,
1
);
setInvPeriodicBoxSizeArg
(
context
,
findBlockBoundsKernel
,
2
);
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
findBlockBoundsKernel
,
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
sortBoxDataKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
0
);
setPeriodicBoxArgs
(
context
,
findInteractingBlocksKernel
,
0
);
setInvPeriodicBoxSizeArg
(
context
,
findInteractingBlocksKernel
,
1
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
}
}
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
if
(
kernelSource
.
size
()
>
0
)
{
if
(
kernelSource
.
size
()
>
0
)
{
if
(
useCutoff
)
{
if
(
useCutoff
)
setPeriodicBoxSizeArg
(
context
,
forceKernel
,
9
);
setPeriodicBoxArgs
(
context
,
forceKernel
,
9
);
setInvPeriodicBoxSizeArg
(
context
,
forceKernel
,
10
);
}
context
.
executeKernel
(
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
context
.
executeKernel
(
forceKernel
,
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
if
(
context
.
getComputeForceCount
()
==
1
)
if
(
context
.
getComputeForceCount
()
==
1
)
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
...
@@ -441,11 +440,11 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
...
@@ -441,11 +440,11 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingTiles
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
interactingAtoms
=
OpenCLArray
::
create
<
cl_int
>
(
context
,
OpenCLContext
::
TileSize
*
maxTiles
,
"interactingAtoms"
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingTiles
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl_uint
>
(
1
1
,
maxTiles
);
forceKernel
.
setArg
<
cl_uint
>
(
1
4
,
maxTiles
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
1
4
,
interactingAtoms
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
1
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactingAtoms
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
6
,
maxTiles
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
maxTiles
);
int
numAtoms
=
context
.
getNumAtoms
();
int
numAtoms
=
context
.
getNumAtoms
();
if
(
context
.
getUseDoublePrecision
())
{
if
(
context
.
getUseDoublePrecision
())
{
vector
<
mm_double4
>
oldPositionsVec
(
numAtoms
,
mm_double4
(
1e30
,
1e30
,
1e30
,
0
));
vector
<
mm_double4
>
oldPositionsVec
(
numAtoms
,
mm_double4
(
1e30
,
1e30
,
1e30
,
0
));
...
@@ -473,8 +472,8 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
...
@@ -473,8 +472,8 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
forceKernel
.
setArg
<
cl_uint
>
(
5
,
startTileIndex
);
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
7
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
8
,
numBlocks
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
}
}
}
}
...
@@ -617,7 +616,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
...
@@ -617,7 +616,7 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
if
(
useCutoff
)
{
if
(
useCutoff
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
index
+=
2
;
// The periodic box size arguments are set when the kernel is executed.
index
+=
5
;
// The periodic box size arguments are set when the kernel is executed.
kernel
.
setArg
<
cl_uint
>
(
index
++
,
interactingTiles
->
getSize
());
kernel
.
setArg
<
cl_uint
>
(
index
++
,
interactingTiles
->
getSize
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockCenter
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockCenter
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockBoundingBox
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
blockBoundingBox
->
getDeviceBuffer
());
...
...
platforms/opencl/src/kernels/customGBEnergyN2.cl
View file @
3db6b8ee
...
@@ -21,7 +21,8 @@ __kernel void computeN2Energy(
...
@@ -21,7 +21,8 @@ __kernel void computeN2Energy(
__global
const
ushort2*
exclusionTiles,
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
...
@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[atom2]
;
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -110,7 +111,7 @@ __kernel void computeN2Energy(
...
@@ -110,7 +111,7 @@ __kernel void computeN2Energy(
real4 posq2 = local_posq[atom2];
real4 posq2 = local_posq[atom2];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -266,10 +267,8 @@ __kernel void computeN2Energy(
...
@@ -266,10 +267,8 @@ __kernel void computeN2Energy(
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
posq1.xyz
-=
floor
((
posq1.xyz-blockCenterX.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1,
blockCenterX
)
local_posq[get_local_id
(
0
)
].x
-=
floor
((
local_posq[get_local_id
(
0
)
].x-blockCenterX.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[get_local_id
(
0
)
],
blockCenterX
)
local_posq[get_local_id
(
0
)
].y
-=
floor
((
local_posq[get_local_id
(
0
)
].y-blockCenterX.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
local_posq[get_local_id
(
0
)
].z
-=
floor
((
local_posq[get_local_id
(
0
)
].z-blockCenterX.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
SYNC_WARPS
;
SYNC_WARPS
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
; j < TILE_SIZE; j++) {
for
(
j
=
0
; j < TILE_SIZE; j++) {
...
@@ -310,7 +309,7 @@ __kernel void computeN2Energy(
...
@@ -310,7 +309,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[atom2]
;
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
3db6b8ee
...
@@ -21,7 +21,8 @@ __kernel void computeN2Energy(
...
@@ -21,7 +21,8 @@ __kernel void computeN2Energy(
__global
const
ushort2*
exclusionTiles,
__global
const
ushort2*
exclusionTiles,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
...
@@ -60,7 +61,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[j]
;
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -126,7 +127,7 @@ __kernel void computeN2Energy(
...
@@ -126,7 +127,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[j]
;
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -272,7 +273,7 @@ __kernel void computeN2Energy(
...
@@ -272,7 +273,7 @@ __kernel void computeN2Energy(
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
local_posq[tgx].xyz
-=
floor
(
(
local_posq[tgx]
.xyz-
blockCenterX
.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[tgx]
,
blockCenterX
)
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
real4
force
=
0
;
real4
force
=
0
;
...
@@ -332,7 +333,7 @@ __kernel void computeN2Energy(
...
@@ -332,7 +333,7 @@ __kernel void computeN2Energy(
real4
posq2
=
local_posq[j]
;
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customGBValueN2.cl
View file @
3db6b8ee
...
@@ -15,7 +15,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -15,7 +15,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
__local
real*
restrict
local_value,
__local
real*
restrict
local_value,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[atom2]
;
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -100,7 +101,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -100,7 +101,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[atom2]
;
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -239,10 +240,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -239,10 +240,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
//
box,
then
skip
having
to
apply
periodic
boundary
conditions
later.
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
posq1.xyz
-=
floor
((
posq1.xyz-blockCenterX.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1,
blockCenterX
)
local_posq[get_local_id
(
0
)
].x
-=
floor
((
local_posq[get_local_id
(
0
)
].x-blockCenterX.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[get_local_id
(
0
)
],
blockCenterX
)
local_posq[get_local_id
(
0
)
].y
-=
floor
((
local_posq[get_local_id
(
0
)
].y-blockCenterX.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
local_posq[get_local_id
(
0
)
].z
-=
floor
((
local_posq[get_local_id
(
0
)
].z-blockCenterX.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
SYNC_WARPS
;
SYNC_WARPS
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
; j < TILE_SIZE; j++) {
for
(
j
=
0
; j < TILE_SIZE; j++) {
...
@@ -278,7 +277,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -278,7 +277,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[atom2]
;
real4
posq2
=
local_posq[atom2]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
View file @
3db6b8ee
...
@@ -15,7 +15,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -15,7 +15,8 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
__local
real*
restrict
local_value,
__local
real*
restrict
local_value,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -52,7 +53,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[j]
;
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -109,7 +110,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -109,7 +110,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4 posq2 = local_posq[j];
real4 posq2 = local_posq[j];
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = dot(delta.xyz, delta.xyz);
real r2 = dot(delta.xyz, delta.xyz);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -239,7 +240,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -239,7 +240,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
local_posq[tgx].xyz
-=
floor
(
(
local_posq[tgx]
.xyz-
blockCenterX
.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
local_posq[tgx]
,
blockCenterX
)
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
real
value
=
0
;
real
value
=
0
;
...
@@ -287,7 +288,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
...
@@ -287,7 +288,7 @@ __kernel void computeN2Value(__global const real4* restrict posq, __local real4*
real4
posq2
=
local_posq[j]
;
real4
posq2
=
local_posq[j]
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
platforms/opencl/src/kernels/customHbondForce.cl
View file @
3db6b8ee
...
@@ -11,12 +11,10 @@ real4 delta(real4 vec1, real4 vec2) {
...
@@ -11,12 +11,10 @@ real4 delta(real4 vec1, real4 vec2) {
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*/
*/
real4
deltaPeriodic
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
real4
deltaPeriodic
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
result.x
-=
floor
(
result.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_DELTA
(
result
)
result.y
-=
floor
(
result.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
result.z
-=
floor
(
result.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
#
endif
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
return
result
;
...
@@ -56,7 +54,8 @@ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -56,7 +54,8 @@ real4 computeCross(real4 vec1, real4 vec2) {
* Compute forces on donors.
* Compute forces on donors.
*/
*/
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeDonorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict donorBufferIndices, __local real4* posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
PARAMETER_ARGUMENTS) {
real energy = 0;
real energy = 0;
real4 f1 = (real4) 0;
real4 f1 = (real4) 0;
...
@@ -102,7 +101,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
...
@@ -102,7 +101,7 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
real4 a1 = posBuffer[3*index];
real4 a1 = posBuffer[3*index];
real4 a2 = posBuffer[3*index+1];
real4 a2 = posBuffer[3*index+1];
real4 a3 = posBuffer[3*index+2];
real4 a3 = posBuffer[3*index+2];
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize);
real4 deltaD1A1 = deltaPeriodic(d1, a1, periodicBoxSize, invPeriodicBoxSize
, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if (deltaD1A1.w < CUTOFF_SQUARED) {
if (deltaD1A1.w < CUTOFF_SQUARED) {
#endif
#endif
...
@@ -144,7 +143,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
...
@@ -144,7 +143,8 @@ __kernel void computeDonorForces(__global real4* restrict forceBuffers, __global
* Compute forces on acceptors.
* Compute forces on acceptors.
*/
*/
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __global real* restrict energyBuffer, __global const real4* restrict posq, __global const int4* restrict exclusions,
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize
__global const int4* restrict donorAtoms, __global const int4* restrict acceptorAtoms, __global const int4* restrict acceptorBufferIndices, __local real4* restrict posBuffer, real4 periodicBoxSize, real4 invPeriodicBoxSize,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ
PARAMETER_ARGUMENTS) {
PARAMETER_ARGUMENTS) {
real4 f1 = (real4) 0;
real4 f1 = (real4) 0;
real4 f2 = (real4) 0;
real4 f2 = (real4) 0;
...
@@ -189,7 +189,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
...
@@ -189,7 +189,7 @@ __kernel void computeAcceptorForces(__global real4* restrict forceBuffers, __glo
real4
d1
=
posBuffer[3*index]
;
real4
d1
=
posBuffer[3*index]
;
real4
d2
=
posBuffer[3*index+1]
;
real4
d2
=
posBuffer[3*index+1]
;
real4
d3
=
posBuffer[3*index+2]
;
real4
d3
=
posBuffer[3*index+2]
;
real4
deltaD1A1
=
deltaPeriodic
(
d1,
a1,
periodicBoxSize,
invPeriodicBoxSize
)
;
real4
deltaD1A1
=
deltaPeriodic
(
d1,
a1,
periodicBoxSize,
invPeriodicBoxSize
,
periodicBoxVecX,
periodicBoxVecY,
periodicBoxVecZ
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
if
(
deltaD1A1.w
<
CUTOFF_SQUARED
)
{
if
(
deltaD1A1.w
<
CUTOFF_SQUARED
)
{
#
endif
#
endif
...
...
platforms/opencl/src/kernels/customManyParticle.cl
View file @
3db6b8ee
...
@@ -14,12 +14,10 @@ inline void storeForce(int atom, real4 force, __global long* restrict forceBuffe
...
@@ -14,12 +14,10 @@ inline void storeForce(int atom, real4 force, __global long* restrict forceBuffe
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
Compute
the
difference
between
two
vectors,
taking
periodic
boundary
conditions
into
account
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*
and
setting
the
fourth
component
to
the
squared
magnitude.
*/
*/
inline
real4
delta
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
)
{
inline
real4
delta
(
real4
vec1,
real4
vec2,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
)
{
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0.0f
)
;
real4
result
=
(
real4
)
(
vec1.x-vec2.x,
vec1.y-vec2.y,
vec1.z-vec2.z,
0.0f
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
result.x
-=
floor
(
result.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_DELTA
(
result
)
result.y
-=
floor
(
result.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
result.z
-=
floor
(
result.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
#
endif
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
result.w
=
result.x*result.x
+
result.y*result.y
+
result.z*result.z
;
return
result
;
return
result
;
...
@@ -75,7 +73,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
...
@@ -75,7 +73,7 @@ inline bool isInteractionExcluded(int atom1, int atom2, __global int* restrict e
*/
*/
__kernel
void
computeInteraction
(
__kernel
void
computeInteraction
(
__global
long*
restrict
forceBuffers,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
long*
restrict
forceBuffers,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
real4
periodicBoxSize,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
,
__global
const
int*
restrict
neighbors,
__global
const
int*
restrict
neighborStartIndex
,
__global
const
int*
restrict
neighbors,
__global
const
int*
restrict
neighborStartIndex
#
endif
#
endif
...
@@ -138,16 +136,14 @@ __kernel void computeInteraction(
...
@@ -138,16 +136,14 @@ __kernel void computeInteraction(
/**
/**
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*/
*/
__kernel
void
findBlockBounds
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
real4*
restrict
posq
,
__kernel
void
findBlockBounds
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
numNeighborPairs
)
{
__global
const
real4*
restrict
posq,
__global
real4*
restrict
blockCenter,
__global
real4*
restrict
blockBoundingBox,
__global
int*
restrict
numNeighborPairs
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
NUM_ATOMS
)
{
while
(
base
<
NUM_ATOMS
)
{
real4
pos
=
posq[base]
;
real4
pos
=
posq[base]
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS
(
pos
)
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
#
endif
#
endif
real4
minPos
=
pos
;
real4
minPos
=
pos
;
real4
maxPos
=
pos
;
real4
maxPos
=
pos
;
...
@@ -156,9 +152,7 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
...
@@ -156,9 +152,7 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
pos
=
posq[i]
;
pos
=
posq[i]
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
real4
center
=
0.5f*
(
maxPos+minPos
)
;
real4
center
=
0.5f*
(
maxPos+minPos
)
;
pos.x
-=
floor
((
pos.x-center.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
pos,
center
)
pos.y
-=
floor
((
pos.y-center.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
pos.z
-=
floor
((
pos.z-center.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
#
endif
minPos
=
(
real4
)
(
min
(
minPos.x,pos.x
)
,
min
(
minPos.y,pos.y
)
,
min
(
minPos.z,pos.z
)
,
0
)
;
minPos
=
(
real4
)
(
min
(
minPos.x,pos.x
)
,
min
(
minPos.y,pos.y
)
,
min
(
minPos.z,pos.z
)
,
0
)
;
maxPos
=
(
real4
)
(
max
(
maxPos.x,pos.x
)
,
max
(
maxPos.y,pos.y
)
,
max
(
maxPos.z,pos.z
)
,
0
)
;
maxPos
=
(
real4
)
(
max
(
maxPos.x,pos.x
)
,
max
(
maxPos.y,pos.y
)
,
max
(
maxPos.z,pos.z
)
,
0
)
;
...
@@ -176,8 +170,8 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
...
@@ -176,8 +170,8 @@ __kernel void findBlockBounds(real4 periodicBoxSize, real4 invPeriodicBoxSize, _
/**
/**
*
Find
a
list
of
neighbors
for
each
atom.
*
Find
a
list
of
neighbors
for
each
atom.
*/
*/
__kernel
void
findNeighbors
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
real4*
restrict
posq
,
__kernel
void
findNeighbors
(
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ
,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
int2*
restrict
neighborPairs,
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
int2*
restrict
neighborPairs,
__global
int*
restrict
numNeighborPairs,
__global
int*
restrict
numNeighborsForAtom,
int
maxNeighborPairs
__global
int*
restrict
numNeighborPairs,
__global
int*
restrict
numNeighborsForAtom,
int
maxNeighborPairs
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
,
__global
int*
restrict
exclusions,
__global
int*
restrict
exclusionStartIndex
...
@@ -212,9 +206,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
...
@@ -212,9 +206,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
real4
blockSize2
=
blockBoundingBox[block2]
;
real4
blockSize2
=
blockBoundingBox[block2]
;
real4
blockDelta
=
blockCenter1-blockCenter2
;
real4
blockDelta
=
blockCenter1-blockCenter2
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
blockDelta.x
-=
floor
(
blockDelta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_DELTA
(
blockDelta
)
blockDelta.y
-=
floor
(
blockDelta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
blockDelta.z
-=
floor
(
blockDelta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
#
endif
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSize1.x-blockSize2.x
)
;
blockDelta.x
=
max
((
real
)
0
,
fabs
(
blockDelta.x
)
-blockSize1.x-blockSize2.x
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSize1.y-blockSize2.y
)
;
blockDelta.y
=
max
((
real
)
0
,
fabs
(
blockDelta.y
)
-blockSize1.y-blockSize2.y
)
;
...
@@ -243,7 +235,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
...
@@ -243,7 +235,7 @@ __kernel void findNeighbors(real4 periodicBoxSize, real4 invPeriodicBoxSize, __g
//
Decide
whether
to
include
this
atom
pair
in
the
neighbor
list.
//
Decide
whether
to
include
this
atom
pair
in
the
neighbor
list.
real4
atomDelta
=
delta
(
pos1,
pos2,
periodicBoxSize,
invPeriodicBoxSize
)
;
real4
atomDelta
=
delta
(
pos1,
pos2,
periodicBoxSize,
invPeriodicBoxSize
,
periodicBoxVecX,
periodicBoxVecY,
periodicBoxVecZ
)
;
#
ifdef
USE_CENTRAL_PARTICLE
#
ifdef
USE_CENTRAL_PARTICLE
bool
includeAtom
=
(
atom2
!=
atom1
&&
atom2
<
NUM_ATOMS
&&
atomDelta.w
<
CUTOFF_SQUARED
)
;
bool
includeAtom
=
(
atom2
!=
atom1
&&
atom2
<
NUM_ATOMS
&&
atomDelta.w
<
CUTOFF_SQUARED
)
;
#
else
#
else
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
View file @
3db6b8ee
...
@@ -42,8 +42,8 @@ __kernel void computeInteractionGroups(
...
@@ -42,8 +42,8 @@ __kernel void computeInteractionGroups(
#
else
#
else
__global
real4*
restrict
forceBuffers,
__global
real4*
restrict
forceBuffers,
#
endif
#
endif
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
__global
const
int4*
restrict
groupData
,
real4
periodicBox
Size
,
real4
invP
eriodicBox
Size
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
real4
periodicBoxVecX
,
real4
periodicBox
VecY
,
real4
p
eriodicBox
VecZ
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
...
@@ -82,7 +82,7 @@ __kernel void computeInteractionGroups(
...
@@ -82,7 +82,7 @@ __kernel void computeInteractionGroups(
posq2
=
(
real4
)
(
localData[localIndex].x,
localData[localIndex].y,
localData[localIndex].z,
localData[localIndex].q
)
;
posq2
=
(
real4
)
(
localData[localIndex].x,
localData[localIndex].y,
localData[localIndex].z,
localData[localIndex].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment