Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
b21e3182
Commit
b21e3182
authored
Apr 20, 2015
by
Jason Swails
Browse files
Merge branch 'master' into psfinscode
parents
7b30da6e
3946c025
Changes
29
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
322 additions
and
71 deletions
+322
-71
platforms/opencl/src/kernels/fft.cl
platforms/opencl/src/kernels/fft.cl
+33
-2
platforms/opencl/src/kernels/fftR2C.cl
platforms/opencl/src/kernels/fftR2C.cl
+166
-0
platforms/opencl/src/kernels/gbsaObc_cpu.cl
platforms/opencl/src/kernels/gbsaObc_cpu.cl
+16
-17
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+68
-28
platforms/opencl/src/kernels/sort.cl
platforms/opencl/src/kernels/sort.cl
+0
-6
platforms/opencl/tests/TestOpenCLFFT.cpp
platforms/opencl/tests/TestOpenCLFFT.cpp
+34
-13
platforms/reference/src/SimTKReference/ObcParameters.cpp
platforms/reference/src/SimTKReference/ObcParameters.cpp
+3
-3
tests/TestVectorize.cpp
tests/TestVectorize.cpp
+2
-1
wrappers/python/src/swig_doxygen/swig_lib/python/extend.i
wrappers/python/src/swig_doxygen/swig_lib/python/extend.i
+0
-1
No files found.
platforms/opencl/src/kernels/fft.cl
View file @
b21e3182
...
@@ -2,26 +2,57 @@ real2 multiplyComplex(real2 c1, real2 c2) {
...
@@ -2,26 +2,57 @@ real2 multiplyComplex(real2 c1, real2 c2) {
return
(
real2
)
(
c1.x*c2.x-c1.y*c2.y,
c1.x*c2.y+c1.y*c2.x
)
;
return
(
real2
)
(
c1.x*c2.x-c1.y*c2.y,
c1.x*c2.y+c1.y*c2.x
)
;
}
}
/**
*
Load
a
value
from
the
half-complex
grid
produces
by
a
real-to-complex
transform.
*/
real2
loadComplexValue
(
__global
const
real2*
restrict
in,
int
x,
int
y,
int
z
)
{
const
int
inputZSize
=
ZSIZE/2+1
;
if
(
z
<
inputZSize
)
return
in[x*YSIZE*inputZSize+y*inputZSize+z]
;
int
xp
=
(
x
==
0
?
0
:
XSIZE-x
)
;
int
yp
=
(
y
==
0
?
0
:
YSIZE-y
)
;
real2
value
=
in[xp*YSIZE*inputZSize+yp*inputZSize+
(
ZSIZE-z
)
]
;
return
(
real2
)
(
value.x,
-value.y
)
;
}
/**
/**
*
Perform
a
1D
FFT
on
each
row
along
one
axis.
*
Perform
a
1D
FFT
on
each
row
along
one
axis.
*/
*/
__kernel
void
execFFT
(
__global
const
real2
*
restrict
in,
__global
real2
*
restrict
out,
int
sign,
__local
real2*
restrict
w,
__kernel
void
execFFT
(
__global
const
INPUT_TYPE
*
restrict
in,
__global
OUTPUT_TYPE
*
restrict
out,
__local
real2*
restrict
w,
__local
real2*
restrict
data0,
__local
real2*
restrict
data1
)
{
__local
real2*
restrict
data0,
__local
real2*
restrict
data1
)
{
for
(
int
i
=
get_local_id
(
0
)
; i < ZSIZE; i += get_local_size(0))
for
(
int
i
=
get_local_id
(
0
)
; i < ZSIZE; i += get_local_size(0))
w[i]
=
(
real2
)
(
cos
(
-
sign
*i*2*M_PI/ZSIZE
)
,
sin
(
-
sign
*i*2*M_PI/ZSIZE
))
;
w[i]
=
(
real2
)
(
cos
(
-
(
SIGN
)
*i*2*
M_PI/ZSIZE
)
,
sin
(
-
(
SIGN
)
*i*2*
M_PI/ZSIZE
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
baseIndex
=
get_group_id
(
0
)
*BLOCKS_PER_GROUP
; baseIndex < XSIZE*YSIZE; baseIndex += get_num_groups(0)*BLOCKS_PER_GROUP) {
for
(
int
baseIndex
=
get_group_id
(
0
)
*BLOCKS_PER_GROUP
; baseIndex < XSIZE*YSIZE; baseIndex += get_num_groups(0)*BLOCKS_PER_GROUP) {
int
index
=
baseIndex+get_local_id
(
0
)
/ZSIZE
;
int
index
=
baseIndex+get_local_id
(
0
)
/ZSIZE
;
int
x
=
index/YSIZE
;
int
x
=
index/YSIZE
;
int
y
=
index-x*YSIZE
;
int
y
=
index-x*YSIZE
;
#
if
OUTPUT_IS_PACKED
if
(
x
<
XSIZE/2+1
)
{
#
endif
#
if
LOOP_REQUIRED
#
if
LOOP_REQUIRED
for
(
int
z
=
get_local_id
(
0
)
; z < ZSIZE; z += get_local_size(0))
for
(
int
z
=
get_local_id
(
0
)
; z < ZSIZE; z += get_local_size(0))
#
if
INPUT_IS_REAL
data0[z]
=
(
real2
)
(
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+z],
0
)
;
#
elif
INPUT_IS_PACKED
data0[z]
=
loadComplexValue
(
in,
x,
y,
z
)
;
#
else
data0[z]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+z]
;
data0[z]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+z]
;
#
endif
#
else
#
else
if
(
index
<
XSIZE*YSIZE
)
if
(
index
<
XSIZE*YSIZE
)
#
if
INPUT_IS_REAL
data0[get_local_id
(
0
)
]
=
(
real2
)
(
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+get_local_id
(
0
)
%ZSIZE],
0
)
;
#
elif
INPUT_IS_PACKED
data0[get_local_id
(
0
)
]
=
loadComplexValue
(
in,
x,
y,
get_local_id
(
0
)
%ZSIZE
)
;
#
else
data0[get_local_id
(
0
)
]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+get_local_id
(
0
)
%ZSIZE]
;
data0[get_local_id
(
0
)
]
=
in[x*
(
YSIZE*ZSIZE
)
+y*ZSIZE+get_local_id
(
0
)
%ZSIZE]
;
#
endif
#
endif
#
if
OUTPUT_IS_PACKED
}
#
endif
#
endif
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
COMPUTE_FFT
COMPUTE_FFT
...
...
platforms/opencl/src/kernels/fftR2C.cl
0 → 100644
View file @
b21e3182
/**
*
Combine
the
two
halves
of
a
real
grid
into
a
complex
grid
that
is
half
as
large.
*/
__kernel
void
packForwardData
(
__global
const
real*
restrict
in,
__global
real2*
restrict
out
)
{
const
int
gridSize
=
PACKED_XSIZE*PACKED_YSIZE*PACKED_ZSIZE
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
int
x
=
index/
(
PACKED_YSIZE*PACKED_ZSIZE
)
;
int
remainder
=
index-x*
(
PACKED_YSIZE*PACKED_ZSIZE
)
;
int
y
=
remainder/PACKED_ZSIZE
;
int
z
=
remainder-y*PACKED_ZSIZE
;
#
if
PACKED_AXIS
==
0
real2
value
=
(
real2
)
(
in[2*x*YSIZE*ZSIZE+y*ZSIZE+z],
in[
(
2*x+1
)
*YSIZE*ZSIZE+y*
ZSIZE+z]
)
;
#
elif
PACKED_AXIS
==
1
real2
value
=
(
real2
)
(
in[x*YSIZE*ZSIZE+2*y*ZSIZE+z],
in[x*YSIZE*ZSIZE+
(
2*y+1
)
*ZSIZE+z]
)
;
#
else
real2
value
=
(
real2
)
(
in[x*YSIZE*ZSIZE+y*ZSIZE+2*z],
in[x*YSIZE*ZSIZE+y*ZSIZE+
(
2*z+1
)
]
)
;
#
endif
out[index]
=
value
;
}
}
/**
*
Split
the
transformed
data
back
into
a
full
sized,
symmetric
grid.
*/
__kernel
void
unpackForwardData
(
__global
const
real2*
restrict
in,
__global
real2*
restrict
out,
__local
real2*
restrict
w
)
{
//
Compute
the
phase
factors.
#
if
PACKED_AXIS
==
0
for
(
int
i
=
get_local_id
(
0
)
; i < PACKED_XSIZE; i += get_local_size(0))
w[i]
=
(
real2
)
(
sin
(
i*2*M_PI/XSIZE
)
,
cos
(
i*2*M_PI/XSIZE
))
;
#
elif
PACKED_AXIS
==
1
for
(
int
i
=
get_local_id
(
0
)
; i < PACKED_YSIZE; i += get_local_size(0))
w[i]
=
(
real2
)
(
sin
(
i*2*M_PI/YSIZE
)
,
cos
(
i*2*M_PI/YSIZE
))
;
#
else
for
(
int
i
=
get_local_id
(
0
)
; i < PACKED_ZSIZE; i += get_local_size(0))
w[i]
=
(
real2
)
(
sin
(
i*2*M_PI/ZSIZE
)
,
cos
(
i*2*M_PI/ZSIZE
))
;
#
endif
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Transform
the
data.
const
int
gridSize
=
PACKED_XSIZE*PACKED_YSIZE*PACKED_ZSIZE
;
const
int
outputZSize
=
ZSIZE/2+1
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
int
x
=
index/
(
PACKED_YSIZE*PACKED_ZSIZE
)
;
int
remainder
=
index-x*
(
PACKED_YSIZE*PACKED_ZSIZE
)
;
int
y
=
remainder/PACKED_ZSIZE
;
int
z
=
remainder-y*PACKED_ZSIZE
;
int
xp
=
(
x
==
0
?
0
:
PACKED_XSIZE-x
)
;
int
yp
=
(
y
==
0
?
0
:
PACKED_YSIZE-y
)
;
int
zp
=
(
z
==
0
?
0
:
PACKED_ZSIZE-z
)
;
real2
z1
=
in[x*PACKED_YSIZE*PACKED_ZSIZE+y*PACKED_ZSIZE+z]
;
real2
z2
=
in[xp*PACKED_YSIZE*PACKED_ZSIZE+yp*PACKED_ZSIZE+zp]
;
#
if
PACKED_AXIS
==
0
real2
wfac
=
w[x]
;
#
elif
PACKED_AXIS
==
1
real2
wfac
=
w[y]
;
#
else
real2
wfac
=
w[z]
;
#
endif
real2
output
=
(
real2
)
((
z1.x+z2.x
-
wfac.x*
(
z1.x-z2.x
)
+
wfac.y*
(
z1.y+z2.y
))
/2,
(
z1.y-z2.y
-
wfac.y*
(
z1.x-z2.x
)
-
wfac.x*
(
z1.y+z2.y
))
/2
)
;
if
(
z
<
outputZSize
)
out[x*YSIZE*outputZSize+y*outputZSize+z]
=
output
;
xp
=
(
x
==
0
?
0
:
XSIZE-x
)
;
yp
=
(
y
==
0
?
0
:
YSIZE-y
)
;
zp
=
(
z
==
0
?
0
:
ZSIZE-z
)
;
if
(
zp
<
outputZSize
)
{
#
if
PACKED_AXIS
==
0
if
(
x
==
0
)
out[PACKED_XSIZE*YSIZE*outputZSize+yp*outputZSize+zp]
=
(
real2
)
((
z1.x-z1.y+z2.x-z2.y
)
/2,
(
-z1.x-z1.y+z2.x+z2.y
)
/2
)
;
#
elif
PACKED_AXIS
==
1
if
(
y
==
0
)
out[xp*YSIZE*outputZSize+PACKED_YSIZE*outputZSize+zp]
=
(
real2
)
((
z1.x-z1.y+z2.x-z2.y
)
/2,
(
-z1.x-z1.y+z2.x+z2.y
)
/2
)
;
#
else
if
(
z
==
0
)
out[xp*YSIZE*outputZSize+yp*outputZSize+PACKED_ZSIZE]
=
(
real2
)
((
z1.x-z1.y+z2.x-z2.y
)
/2,
(
-z1.x-z1.y+z2.x+z2.y
)
/2
)
;
#
endif
else
out[xp*YSIZE*outputZSize+yp*outputZSize+zp]
=
(
real2
)
(
output.x,
-output.y
)
;
}
}
}
/**
*
Load
a
value
from
the
half-complex
grid
produced
by
a
real-to-complex
transform.
*/
real2
loadComplexValue
(
__global
const
real2*
restrict
in,
int
x,
int
y,
int
z
)
{
const
int
inputZSize
=
ZSIZE/2+1
;
if
(
z
<
inputZSize
)
return
in[x*YSIZE*inputZSize+y*inputZSize+z]
;
int
xp
=
(
x
==
0
?
0
:
XSIZE-x
)
;
int
yp
=
(
y
==
0
?
0
:
YSIZE-y
)
;
real2
value
=
in[xp*YSIZE*inputZSize+yp*inputZSize+
(
ZSIZE-z
)
]
;
return
(
real2
)
(
value.x,
-value.y
)
;
}
/**
*
Repack
the
symmetric
complex
grid
into
one
half
as
large
in
preparation
for
doing
an
inverse
complex-to-real
transform.
*/
__kernel
void
packBackwardData
(
__global
const
real2*
restrict
in,
__global
real2*
restrict
out,
__local
real2*
restrict
w
)
{
//
Compute
the
phase
factors.
#
if
PACKED_AXIS
==
0
for
(
int
i
=
get_local_id
(
0
)
; i < PACKED_XSIZE; i += get_local_size(0))
w[i]
=
(
real2
)
(
cos
(
i*2*M_PI/XSIZE
)
,
sin
(
i*2*M_PI/XSIZE
))
;
#
elif
PACKED_AXIS
==
1
for
(
int
i
=
get_local_id
(
0
)
; i < PACKED_YSIZE; i += get_local_size(0))
w[i]
=
(
real2
)
(
cos
(
i*2*M_PI/YSIZE
)
,
sin
(
i*2*M_PI/YSIZE
))
;
#
else
for
(
int
i
=
get_local_id
(
0
)
; i < PACKED_ZSIZE; i += get_local_size(0))
w[i]
=
(
real2
)
(
cos
(
i*2*M_PI/ZSIZE
)
,
sin
(
i*2*M_PI/ZSIZE
))
;
#
endif
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Transform
the
data.
const
int
gridSize
=
PACKED_XSIZE*PACKED_YSIZE*PACKED_ZSIZE
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
int
x
=
index/
(
PACKED_YSIZE*PACKED_ZSIZE
)
;
int
remainder
=
index-x*
(
PACKED_YSIZE*PACKED_ZSIZE
)
;
int
y
=
remainder/PACKED_ZSIZE
;
int
z
=
remainder-y*PACKED_ZSIZE
;
int
xp
=
(
x
==
0
?
0
:
PACKED_XSIZE-x
)
;
int
yp
=
(
y
==
0
?
0
:
PACKED_YSIZE-y
)
;
int
zp
=
(
z
==
0
?
0
:
PACKED_ZSIZE-z
)
;
real2
z1
=
loadComplexValue
(
in,
x,
y,
z
)
;
#
if
PACKED_AXIS
==
0
real2
wfac
=
w[x]
;
real2
z2
=
loadComplexValue
(
in,
PACKED_XSIZE-x,
yp,
zp
)
;
#
elif
PACKED_AXIS
==
1
real2
wfac
=
w[y]
;
real2
z2
=
loadComplexValue
(
in,
xp,
PACKED_YSIZE-y,
zp
)
;
#
else
real2
wfac
=
w[z]
;
real2
z2
=
loadComplexValue
(
in,
xp,
yp,
PACKED_ZSIZE-z
)
;
#
endif
real2
even
=
(
real2
)
((
z1.x+z2.x
)
/2,
(
z1.y-z2.y
)
/2
)
;
real2
odd
=
(
real2
)
((
z1.x-z2.x
)
/2,
(
z1.y+z2.y
)
/2
)
;
odd
=
(
real2
)
(
odd.x*wfac.x-odd.y*wfac.y,
odd.y*wfac.x+odd.x*wfac.y
)
;
out[x*PACKED_YSIZE*PACKED_ZSIZE+y*PACKED_ZSIZE+z]
=
(
real2
)
(
even.x-odd.y,
even.y+odd.x
)
;
}
}
/**
*
Split
the
data
back
into
a
full
sized,
real
grid
after
an
inverse
transform.
*/
__kernel
void
unpackBackwardData
(
__global
const
real2*
restrict
in,
__global
real*
restrict
out
)
{
const
int
gridSize
=
PACKED_XSIZE*PACKED_YSIZE*PACKED_ZSIZE
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
int
x
=
index/
(
PACKED_YSIZE*PACKED_ZSIZE
)
;
int
remainder
=
index-x*
(
PACKED_YSIZE*PACKED_ZSIZE
)
;
int
y
=
remainder/PACKED_ZSIZE
;
int
z
=
remainder-y*PACKED_ZSIZE
;
real2
value
=
2*in[index]
;
#
if
PACKED_AXIS
==
0
out[2*x*YSIZE*ZSIZE+y*ZSIZE+z]
=
value.x
;
out[
(
2*x+1
)
*YSIZE*ZSIZE+y*
ZSIZE+z]
=
value.y
;
#
elif
PACKED_AXIS
==
1
out[x*YSIZE*ZSIZE+2*y*ZSIZE+z]
=
value.x
;
out[x*YSIZE*ZSIZE+
(
2*y+1
)
*ZSIZE+z]
=
value.y
;
#
else
out[x*YSIZE*ZSIZE+y*ZSIZE+2*z]
=
value.x
;
out[x*YSIZE*ZSIZE+y*ZSIZE+
(
2*z+1
)
]
=
value.y
;
#
endif
}
}
platforms/opencl/src/kernels/gbsaObc_cpu.cl
View file @
b21e3182
...
@@ -21,7 +21,8 @@ __kernel void computeBornSum(
...
@@ -21,7 +21,8 @@ __kernel void computeBornSum(
__global
const
real4*
restrict
posq,
__global
const
float2*
restrict
global_params,
__global
const
real4*
restrict
posq,
__global
const
float2*
restrict
global_params,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
__global
const
int*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
real4
periodicBoxSize,
real4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
real4
periodicBoxVecX,
real4
periodicBoxVecY,
real4
periodicBoxVecZ,
unsigned
int
maxTiles,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockSize,
__global
const
int*
restrict
interactingAtoms,
#
else
#
else
unsigned
int
numTiles,
unsigned
int
numTiles,
#
endif
#
endif
...
@@ -62,7 +63,7 @@ __kernel void computeBornSum(
...
@@ -62,7 +63,7 @@ __kernel void computeBornSum(
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
real
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -111,7 +112,7 @@ __kernel void computeBornSum(
...
@@ -111,7 +112,7 @@ __kernel void computeBornSum(
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -253,14 +254,13 @@ __kernel void computeBornSum(
...
@@ -253,14 +254,13 @@ __kernel void computeBornSum(
real4 blockCenterX = blockCenter[x];
real4 blockCenterX = blockCenter[x];
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
localData[tgx].x -= floor((localData[tgx].x-blockCenterX.x)*invPeriodicBoxSize.x+0.5f)*periodicBoxSize.x;
APPLY_PERIODIC_TO_POS_WITH_CENTER(localData[tgx], blockCenterX)
localData[tgx].y -= floor((localData[tgx].y-blockCenterX.y)*invPeriodicBoxSize.y+0.5f)*periodicBoxSize.y;
localData[tgx].z -= floor((localData[tgx].z-blockCenterX.z)*invPeriodicBoxSize.z+0.5f)*periodicBoxSize.z;
}
}
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
for (unsigned int tgx = 0; tgx < TILE_SIZE; tgx++) {
unsigned int atom1 = x*TILE_SIZE+tgx;
unsigned int atom1 = x*TILE_SIZE+tgx;
real bornSum = 0;
real bornSum = 0;
real4 posq1 = posq[atom1];
real4 posq1 = posq[atom1];
APPLY_PERIODIC_TO_POS_WITH_CENTER(posq1, blockCenterX)
float2 params1 = global_params[atom1];
float2 params1 = global_params[atom1];
for (unsigned int j = 0; j < TILE_SIZE; j++) {
for (unsigned int j = 0; j < TILE_SIZE; j++) {
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
...
@@ -321,7 +321,7 @@ __kernel void computeBornSum(
...
@@ -321,7 +321,7 @@ __kernel void computeBornSum(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
int atom2 = atomIndices[j];
int atom2 = atomIndices[j];
...
@@ -412,7 +412,8 @@ __kernel void computeGBSAForce1(
...
@@ -412,7 +412,8 @@ __kernel void computeGBSAForce1(
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
__global real* restrict energyBuffer, __global const real4* restrict posq, __global const real* restrict global_bornRadii,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
__global const int* restrict tiles, __global const unsigned int* restrict interactionCount, real4 periodicBoxSize, real4 invPeriodicBoxSize,
unsigned int maxTiles, __global const real4* restrict blockCenter, __global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ, unsigned int maxTiles, __global const real4* restrict blockCenter,
__global const real4* restrict blockSize, __global const int* restrict interactingAtoms,
#else
#else
unsigned int numTiles,
unsigned int numTiles,
#endif
#endif
...
@@ -452,7 +453,7 @@ __kernel void computeGBSAForce1(
...
@@ -452,7 +453,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -516,7 +517,7 @@ __kernel void computeGBSAForce1(
...
@@ -516,7 +517,7 @@ __kernel void computeGBSAForce1(
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 posq2 = (real4) (localData[j].x, localData[j].y, localData[j].z, localData[j].q);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
real4 delta = (real4) (posq2.xyz - posq1.xyz, 0);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta.xyz -= floor(delta.xyz*invPeriodicBoxSize.xyz+0.5f)*periodicBoxSize.xyz;
APPLY_PERIODIC_TO_DELTA(delta)
#endif
#endif
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
real r2 = delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -669,15 +670,13 @@ __kernel void computeGBSAForce1(
...
@@ -669,15 +670,13 @@ __kernel void computeGBSAForce1(
real4
blockCenterX
=
blockCenter[x]
;
real4
blockCenterX
=
blockCenter[x]
;
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
localData[tgx].x
-=
floor
((
localData[tgx].x-blockCenterX.x
)
*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
localData[tgx],
blockCenterX
)
localData[tgx].y
-=
floor
((
localData[tgx].y-blockCenterX.y
)
*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
localData[tgx].z
-=
floor
((
localData[tgx].z-blockCenterX.z
)
*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
}
}
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
real4
force
=
0
;
real4
force
=
0
;
real4
posq1
=
posq[atom1]
;
real4
posq1
=
posq[atom1]
;
posq1.xyz
-=
floor
(
(
posq1
.xyz-
blockCenterX
.xyz
)
*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_POS_WITH_CENTER
(
posq1
,
blockCenterX
)
float
bornRadius1
=
global_bornRadii[atom1]
;
float
bornRadius1
=
global_bornRadii[atom1]
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
...
@@ -740,7 +739,7 @@ __kernel void computeGBSAForce1(
...
@@ -740,7 +739,7 @@ __kernel void computeGBSAForce1(
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
posq2
=
(
real4
)
(
localData[j].x,
localData[j].y,
localData[j].z,
localData[j].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
APPLY_PERIODIC_TO_DELTA
(
delta
)
#
endif
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
int
atom2
=
atomIndices[j]
;
int
atom2
=
atomIndices[j]
;
...
...
platforms/opencl/src/kernels/pme.cl
View file @
b21e3182
...
@@ -138,35 +138,34 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -138,35 +138,34 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
real
add
=
pos.w*data[ix].x*data[iy].y*data[iz].z
;
real
add
=
pos.w*data[ix].x*data[iy].y*data[iz].z
;
#
ifdef
USE_DOUBLE_PRECISION
#
ifdef
USE_ALTERNATE_MEMORY_ACCESS_PATTERN
atom_add
(
&pmeGrid[2*index],
(
long
)
(
add*0x100000000
))
;
//
On
Nvidia
devices
(
at
least
Maxwell
anyway
)
,
this
split
ordering
produces
much
higher
performance.
Why?
//
I
have
no
idea!
And
of
course
on
AMD
it
produces
slower
performance.
GPUs
are
not
meant
to
be
understood.
atom_add
(
&pmeGrid[index%2
==
0
?
index/2
:
(
index+GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
)
/2],
(
long
)
(
add*0x100000000
))
;
#
else
#
else
atom_add
(
&pmeGrid[index],
(
long
)
(
add*0x100000000
))
;
atom_add
(
&pmeGrid[index],
(
long
)
(
add*0x100000000
))
;
#
endif
#
endif
}
}
}
}
}
}
}
}
}
}
__kernel
void
finishSpreadCharge
(
__global
long*
restrict
pmeGrid
)
{
__kernel
void
finishSpreadCharge
(
__global
long*
restrict
fixedGrid,
__global
real*
restrict
realGrid
)
{
__global
real2*
realGrid
=
(
__global
real2*
)
pmeGrid
;
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
real
scale
=
EPSILON_FACTOR/
(
real
)
0x100000000
;
real
scale
=
EPSILON_FACTOR/
(
real
)
0x100000000
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
#
ifdef
USE_
DOUBLE_PRECISIO
N
#
ifdef
USE_
ALTERNATE_MEMORY_ACCESS_PATTER
N
long
value
=
pmeGrid[2*index
]
;
long
value
=
fixedGrid[index%2
==
0
?
index/2
:
(
index+gridSize
)
/2
]
;
#
else
#
else
long
value
=
pme
Grid[index]
;
long
value
=
fixed
Grid[index]
;
#
endif
#
endif
real2
realValue
=
(
real2
)
((
real
)
(
value*scale
)
,
0
)
;
realGrid[index]
=
(
real
)
(
value*scale
)
;
realGrid[index]
=
realValue
;
}
}
}
}
#
elif
defined
(
DEVICE_IS_CPU
)
#
elif
defined
(
DEVICE_IS_CPU
)
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
real
2
*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
__global
real*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
if
(
firstx
==
lastx
)
if
(
firstx
==
lastx
)
...
@@ -230,7 +229,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -230,7 +229,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
int zindex = gridIndex.z+iz;
int zindex = gridIndex.z+iz;
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
zindex -= (zindex >= GRID_SIZE_Z ? GRID_SIZE_Z : 0);
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
int index = xindex*GRID_SIZE_Y*GRID_SIZE_Z + yindex*GRID_SIZE_Z + zindex;
pmeGrid[index]
.x
+=
EPSILON_FACTOR*pos.w*data[ix].x*data[iy].y*data[iz].z
;
pmeGrid[index] += EPSILON_FACTOR*pos.w*data[ix].x*data[iy].y*data[iz].z;
}
}
}
}
}
}
...
@@ -238,7 +237,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -238,7 +237,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
}
}
#else
#else
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__kernel void gridSpreadCharge(__global const real4* restrict posq, __global const int2* restrict pmeAtomGridIndex, __global const int* restrict pmeAtomRange,
__global
real
2
*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta
)
{
__global real* restrict pmeGrid, __global const real4* restrict pmeBsplineTheta) {
unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
unsigned int numGridPoints = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
for (int gridIndex = get_global_id(0); gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
for (int gridIndex = get_global_id(0); gridIndex < numGridPoints; gridIndex += get_global_size(0)) {
// Compute the charge on a grid point.
// Compute the charge on a grid point.
...
@@ -290,22 +289,23 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -290,22 +289,23 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
}
}
}
}
}
}
pmeGrid[gridIndex]
=
(
real2
)
(
result*EPSILON_FACTOR
,
0
)
;
pmeGrid[gridIndex] = result*EPSILON_FACTOR;
}
}
}
}
#endif
#endif
__kernel
void
reciprocalConvolution
(
__global
real2*
restrict
pmeGrid,
__global
real*
restrict
energyBuffer,
__global
const
real*
restrict
pmeBsplineModuliX,
__kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global const real* restrict pmeBsplineModuliX,
__global
const
real*
restrict
pmeBsplineModuliY,
__global
const
real*
restrict
pmeBsplineModuliZ,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
real
recipScaleFactor
)
{
__global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ, real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
// R2C stores into a half complex matrix where the last dimension is cut by half
real
energy
=
0.0f
;
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
int
kx
=
index/
(
GRID_SIZE_Y*GRID_SIZE_Z
)
;
// real indices
int
remainder
=
index-kx*GRID_SIZE_Y*GRID_SIZE_Z
;
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
int
ky
=
remainder/GRID_SIZE_Z
;
int remainder = index-kx*GRID_SIZE_Y*(GRID_SIZE_Z/2+1);
int
kz
=
remainder-ky*GRID_SIZE_Z
;
int ky = remainder/(GRID_SIZE_Z/2+1);
if
(
kx
==
0
&&
ky
==
0
&&
kz
==
0
)
int kz = remainder-ky*(GRID_SIZE_Z/2+1);
continue
;
int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
...
@@ -319,13 +319,53 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global r
...
@@ -319,13 +319,53 @@ __kernel void reciprocalConvolution(__global real2* restrict pmeGrid, __global r
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
real denom = m2*bx*by*bz;
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kx != 0 || ky != 0 || kz != 0) {
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
pmeGrid[index] = (real2) (grid.x*eterm, grid.y*eterm);
}
}
}
__kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global real* restrict energyBuffer,
__global const real* restrict pmeBsplineModuliX, __global const real* restrict pmeBsplineModuliY, __global const real* restrict pmeBsplineModuliZ,
real4 recipBoxVecX, real4 recipBoxVecY, real4 recipBoxVecZ) {
// R2C stores into a half complex matrix where the last dimension is cut by half
const unsigned int gridSize = GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z;
const real recipScaleFactor = (1.0f/M_PI)*recipBoxVecX.x*recipBoxVecY.y*recipBoxVecZ.z;
real energy = 0;
for (int index = get_global_id(0); index < gridSize; index += get_global_size(0)) {
// real indices
int kx = index/(GRID_SIZE_Y*(GRID_SIZE_Z));
int remainder = index-kx*GRID_SIZE_Y*(GRID_SIZE_Z);
int ky = remainder/(GRID_SIZE_Z);
int kz = remainder-ky*(GRID_SIZE_Z);
int mx = (kx < (GRID_SIZE_X+1)/2) ? kx : (kx-GRID_SIZE_X);
int my = (ky < (GRID_SIZE_Y+1)/2) ? ky : (ky-GRID_SIZE_Y);
int mz = (kz < (GRID_SIZE_Z+1)/2) ? kz : (kz-GRID_SIZE_Z);
real mhx = mx*recipBoxVecX.x;
real mhy = mx*recipBoxVecY.x+my*recipBoxVecY.y;
real mhz = mx*recipBoxVecZ.x+my*recipBoxVecZ.y+mz*recipBoxVecZ.z;
real m2 = mhx*mhx+mhy*mhy+mhz*mhz;
real bx = pmeBsplineModuliX[kx];
real by = pmeBsplineModuliY[ky];
real bz = pmeBsplineModuliZ[kz];
real denom = m2*bx*by*bz;
real eterm = recipScaleFactor*EXP(-RECIP_EXP_FACTOR*m2)/denom;
if (kz >= (GRID_SIZE_Z/2+1)) {
kx = ((kx == 0) ? kx : GRID_SIZE_X-kx);
ky = ((ky == 0) ? ky : GRID_SIZE_Y-ky);
kz = GRID_SIZE_Z-kz;
}
int indexInHalfComplexGrid = kz + ky*(GRID_SIZE_Z/2+1)+kx*(GRID_SIZE_Y*(GRID_SIZE_Z/2+1));
real2 grid = pmeGrid[indexInHalfComplexGrid];
if (kx != 0 || ky != 0 |
|
kz
!=
0
)
{
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
}
}
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
}
}
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real
2
*
restrict
pmeGrid,
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
data[PME_ORDER]
;
...
@@ -385,7 +425,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
...
@@ -385,7 +425,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
int
zindex
=
gridIndex.z+iz
;
int
zindex
=
gridIndex.z+iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
real
gridvalue
=
pmeGrid[index]
.x
;
real
gridvalue
=
pmeGrid[index]
;
force.x
+=
ddata[ix].x*data[iy].y*data[iz].z*gridvalue
;
force.x
+=
ddata[ix].x*data[iy].y*data[iz].z*gridvalue
;
force.y
+=
data[ix].x*ddata[iy].y*data[iz].z*gridvalue
;
force.y
+=
data[ix].x*ddata[iy].y*data[iz].z*gridvalue
;
force.z
+=
data[ix].x*data[iy].y*ddata[iz].z*gridvalue
;
force.z
+=
data[ix].x*data[iy].y*ddata[iz].z*gridvalue
;
...
...
platforms/opencl/src/kernels/sort.cl
View file @
b21e3182
...
@@ -109,13 +109,7 @@ __kernel void assignElementsToBuckets(__global const DATA_TYPE* restrict data, u
...
@@ -109,13 +109,7 @@ __kernel void assignElementsToBuckets(__global const DATA_TYPE* restrict data, u
float
maxValue
=
(
float
)
(
range[1]
)
;
float
maxValue
=
(
float
)
(
range[1]
)
;
float
bucketWidth
=
(
maxValue-minValue
)
/numBuckets
;
float
bucketWidth
=
(
maxValue-minValue
)
/numBuckets
;
for
(
uint
index
=
get_global_id
(
0
)
; index < length; index += get_global_size(0)) {
for
(
uint
index
=
get_global_id
(
0
)
; index < length; index += get_global_size(0)) {
#
if
defined
(
MAC_AMD_WORKAROUND
)
&&
VALUE_IS_INT2
__global
int*
d
=
(
__global
int*
)
data
;
int2
element
=
(
int2
)
(
d[2*index],
d[2*index+1]
)
;
float
key
=
(
float
)
getValue
(
element
)
;
#
else
float
key
=
(
float
)
getValue
(
data[index]
)
;
float
key
=
(
float
)
getValue
(
data[index]
)
;
#
endif
uint
bucketIndex
=
min
((
uint
)
((
key-minValue
)
/bucketWidth
)
,
numBuckets-1
)
;
uint
bucketIndex
=
min
((
uint
)
((
key-minValue
)
/bucketWidth
)
,
numBuckets-1
)
;
offsetInBucket[index]
=
atom_inc
(
&bucketOffset[bucketIndex]
)
;
offsetInBucket[index]
=
atom_inc
(
&bucketOffset[bucketIndex]
)
;
bucketOfElement[index]
=
bucketIndex
;
bucketOfElement[index]
=
bucketIndex
;
...
...
platforms/opencl/tests/TestOpenCLFFT.cpp
View file @
b21e3182
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011 Stanford University and the Authors.
*
* Portions copyright (c) 2011
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -51,7 +51,7 @@ using namespace std;
...
@@ -51,7 +51,7 @@ using namespace std;
static
OpenCLPlatform
platform
;
static
OpenCLPlatform
platform
;
template
<
class
Real2
>
template
<
class
Real2
>
void
testTransform
()
{
void
testTransform
(
bool
realToComplex
,
int
xsize
,
int
ysize
,
int
zsize
)
{
System
system
;
System
system
;
system
.
addParticle
(
0.0
);
system
.
addParticle
(
0.0
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
);
OpenCLPlatform
::
PlatformData
platformData
(
system
,
""
,
""
,
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
),
"false"
);
...
@@ -59,7 +59,6 @@ void testTransform() {
...
@@ -59,7 +59,6 @@ void testTransform() {
context
.
initialize
();
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
init_gen_rand
(
0
,
sfmt
);
int
xsize
=
28
,
ysize
=
25
,
zsize
=
30
;
vector
<
Real2
>
original
(
xsize
*
ysize
*
zsize
);
vector
<
Real2
>
original
(
xsize
*
ysize
*
zsize
);
vector
<
t_complex
>
reference
(
original
.
size
());
vector
<
t_complex
>
reference
(
original
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
original
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
original
.
size
();
i
++
)
{
...
@@ -67,10 +66,16 @@ void testTransform() {
...
@@ -67,10 +66,16 @@ void testTransform() {
original
[
i
]
=
value
;
original
[
i
]
=
value
;
reference
[
i
]
=
t_complex
(
value
.
x
,
value
.
y
);
reference
[
i
]
=
t_complex
(
value
.
x
,
value
.
y
);
}
}
for
(
int
i
=
0
;
i
<
(
int
)
reference
.
size
();
i
++
)
{
if
(
realToComplex
)
reference
[
i
]
=
t_complex
(
i
%
2
==
0
?
original
[
i
/
2
].
x
:
original
[
i
/
2
].
y
,
0
);
else
reference
[
i
]
=
t_complex
(
original
[
i
].
x
,
original
[
i
].
y
);
}
OpenCLArray
grid1
(
context
,
original
.
size
(),
sizeof
(
Real2
),
"grid1"
);
OpenCLArray
grid1
(
context
,
original
.
size
(),
sizeof
(
Real2
),
"grid1"
);
OpenCLArray
grid2
(
context
,
original
.
size
(),
sizeof
(
Real2
),
"grid2"
);
OpenCLArray
grid2
(
context
,
original
.
size
(),
sizeof
(
Real2
),
"grid2"
);
grid1
.
upload
(
original
);
grid1
.
upload
(
original
);
OpenCLFFT3D
fft
(
context
,
xsize
,
ysize
,
zsize
);
OpenCLFFT3D
fft
(
context
,
xsize
,
ysize
,
zsize
,
realToComplex
);
// Perform a forward FFT, then verify the result is correct.
// Perform a forward FFT, then verify the result is correct.
...
@@ -80,9 +85,14 @@ void testTransform() {
...
@@ -80,9 +85,14 @@ void testTransform() {
fftpack_t
plan
;
fftpack_t
plan
;
fftpack_init_3d
(
&
plan
,
xsize
,
ysize
,
zsize
);
fftpack_init_3d
(
&
plan
,
xsize
,
ysize
,
zsize
);
fftpack_exec_3d
(
plan
,
FFTPACK_FORWARD
,
&
reference
[
0
],
&
reference
[
0
]);
fftpack_exec_3d
(
plan
,
FFTPACK_FORWARD
,
&
reference
[
0
],
&
reference
[
0
]);
for
(
int
i
=
0
;
i
<
(
int
)
result
.
size
();
++
i
)
{
int
outputZSize
=
(
realToComplex
?
zsize
/
2
+
1
:
zsize
);
ASSERT_EQUAL_TOL
(
reference
[
i
].
re
,
result
[
i
].
x
,
1e-3
);
for
(
int
x
=
0
;
x
<
xsize
;
x
++
)
ASSERT_EQUAL_TOL
(
reference
[
i
].
im
,
result
[
i
].
y
,
1e-3
);
for
(
int
y
=
0
;
y
<
ysize
;
y
++
)
for
(
int
z
=
0
;
z
<
outputZSize
;
z
++
)
{
int
index1
=
x
*
ysize
*
zsize
+
y
*
zsize
+
z
;
int
index2
=
x
*
ysize
*
outputZSize
+
y
*
outputZSize
+
z
;
ASSERT_EQUAL_TOL
(
reference
[
index1
].
re
,
result
[
index2
].
x
,
1e-3
);
ASSERT_EQUAL_TOL
(
reference
[
index1
].
im
,
result
[
index2
].
y
,
1e-3
);
}
}
fftpack_destroy
(
plan
);
fftpack_destroy
(
plan
);
...
@@ -91,7 +101,8 @@ void testTransform() {
...
@@ -91,7 +101,8 @@ void testTransform() {
fft
.
execFFT
(
grid2
,
grid1
,
false
);
fft
.
execFFT
(
grid2
,
grid1
,
false
);
grid1
.
download
(
result
);
grid1
.
download
(
result
);
double
scale
=
1.0
/
(
xsize
*
ysize
*
zsize
);
double
scale
=
1.0
/
(
xsize
*
ysize
*
zsize
);
for
(
int
i
=
0
;
i
<
(
int
)
result
.
size
();
++
i
)
{
int
valuesToCheck
=
(
realToComplex
?
original
.
size
()
/
2
:
original
.
size
());
for
(
int
i
=
0
;
i
<
valuesToCheck
;
++
i
)
{
ASSERT_EQUAL_TOL
(
original
[
i
].
x
,
scale
*
result
[
i
].
x
,
1e-4
);
ASSERT_EQUAL_TOL
(
original
[
i
].
x
,
scale
*
result
[
i
].
x
,
1e-4
);
ASSERT_EQUAL_TOL
(
original
[
i
].
y
,
scale
*
result
[
i
].
y
,
1e-4
);
ASSERT_EQUAL_TOL
(
original
[
i
].
y
,
scale
*
result
[
i
].
y
,
1e-4
);
}
}
...
@@ -101,10 +112,20 @@ int main(int argc, char* argv[]) {
...
@@ -101,10 +112,20 @@ int main(int argc, char* argv[]) {
try
{
try
{
if
(
argc
>
1
)
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"OpenCLPrecision"
,
string
(
argv
[
1
]));
platform
.
setPropertyDefaultValue
(
"OpenCLPrecision"
,
string
(
argv
[
1
]));
if
(
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
)
==
"double"
)
if
(
platform
.
getPropertyDefaultValue
(
"OpenCLPrecision"
)
==
"double"
)
{
testTransform
<
mm_double2
>
();
testTransform
<
mm_double2
>
(
false
,
28
,
25
,
30
);
else
testTransform
<
mm_double2
>
(
true
,
28
,
25
,
25
);
testTransform
<
mm_float2
>
();
testTransform
<
mm_double2
>
(
true
,
25
,
28
,
25
);
testTransform
<
mm_double2
>
(
true
,
25
,
25
,
28
);
testTransform
<
mm_double2
>
(
true
,
21
,
25
,
27
);
}
else
{
testTransform
<
mm_float2
>
(
false
,
28
,
25
,
30
);
testTransform
<
mm_float2
>
(
true
,
28
,
25
,
25
);
testTransform
<
mm_float2
>
(
true
,
25
,
28
,
25
);
testTransform
<
mm_float2
>
(
true
,
25
,
25
,
28
);
testTransform
<
mm_float2
>
(
true
,
21
,
25
,
27
);
}
}
}
catch
(
const
exception
&
e
)
{
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/reference/src/SimTKReference/ObcParameters.cpp
View file @
b21e3182
...
@@ -382,9 +382,9 @@ void ObcParameters::setPeriodic(OpenMM::RealVec* vectors) {
...
@@ -382,9 +382,9 @@ void ObcParameters::setPeriodic(OpenMM::RealVec* vectors) {
assert
(
_cutoff
);
assert
(
_cutoff
);
assert
(
boxSize
[
0
][
0
]
>=
2.0
*
_cutoffDistance
);
assert
(
vectors
[
0
][
0
]
>=
2.0
*
_cutoffDistance
);
assert
(
boxSize
[
1
][
1
]
>=
2.0
*
_cutoffDistance
);
assert
(
vectors
[
1
][
1
]
>=
2.0
*
_cutoffDistance
);
assert
(
boxSize
[
2
][
2
]
>=
2.0
*
_cutoffDistance
);
assert
(
vectors
[
2
][
2
]
>=
2.0
*
_cutoffDistance
);
_periodic
=
true
;
_periodic
=
true
;
_periodicBoxVectors
[
0
]
=
vectors
[
0
];
_periodicBoxVectors
[
0
]
=
vectors
[
0
];
...
...
tests/TestVectorize.cpp
View file @
b21e3182
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2014 Stanford University and the Authors.
*
* Portions copyright (c) 2014
-2015
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -148,6 +148,7 @@ void testMathFunctions() {
...
@@ -148,6 +148,7 @@ void testMathFunctions() {
ASSERT_VEC4_EQUAL
(
min
(
f1
,
f2
),
0.4
,
1.2
,
-
1.2
,
-
5.0
);
ASSERT_VEC4_EQUAL
(
min
(
f1
,
f2
),
0.4
,
1.2
,
-
1.2
,
-
5.0
);
ASSERT_VEC4_EQUAL
(
max
(
f1
,
f2
),
1.1
,
1.9
,
1.3
,
-
3.8
);
ASSERT_VEC4_EQUAL
(
max
(
f1
,
f2
),
1.1
,
1.9
,
1.3
,
-
3.8
);
ASSERT_VEC4_EQUAL
(
sqrt
(
fvec4
(
1.5
,
3.1
,
4.0
,
15.0
)),
sqrt
(
1.5
),
sqrt
(
3.1
),
sqrt
(
4.0
),
sqrt
(
15.0
));
ASSERT_VEC4_EQUAL
(
sqrt
(
fvec4
(
1.5
,
3.1
,
4.0
,
15.0
)),
sqrt
(
1.5
),
sqrt
(
3.1
),
sqrt
(
4.0
),
sqrt
(
15.0
));
ASSERT_VEC4_EQUAL
(
rsqrt
(
fvec4
(
1.5
,
3.1
,
4.0
,
15.0
)),
1.0
/
sqrt
(
1.5
),
1.0
/
sqrt
(
3.1
),
1.0
/
sqrt
(
4.0
),
1.0
/
sqrt
(
15.0
));
ASSERT_EQUAL_TOL
(
f1
[
0
]
*
f2
[
0
]
+
f1
[
1
]
*
f2
[
1
]
+
f1
[
2
]
*
f2
[
2
],
dot3
(
f1
,
f2
),
1e-6
);
ASSERT_EQUAL_TOL
(
f1
[
0
]
*
f2
[
0
]
+
f1
[
1
]
*
f2
[
1
]
+
f1
[
2
]
*
f2
[
2
],
dot3
(
f1
,
f2
),
1e-6
);
ASSERT_EQUAL_TOL
(
f1
[
0
]
*
f2
[
0
]
+
f1
[
1
]
*
f2
[
1
]
+
f1
[
2
]
*
f2
[
2
]
+
f1
[
3
]
*
f2
[
3
],
dot4
(
f1
,
f2
),
1e-6
);
ASSERT_EQUAL_TOL
(
f1
[
0
]
*
f2
[
0
]
+
f1
[
1
]
*
f2
[
1
]
+
f1
[
2
]
*
f2
[
2
]
+
f1
[
3
]
*
f2
[
3
],
dot4
(
f1
,
f2
),
1e-6
);
ASSERT
(
any
(
f1
>
0.5
));
ASSERT
(
any
(
f1
>
0.5
));
...
...
wrappers/python/src/swig_doxygen/swig_lib/python/extend.i
View file @
b21e3182
...
@@ -417,7 +417,6 @@ Parameters:
...
@@ -417,7 +417,6 @@ Parameters:
@
staticmethod
@
staticmethod
def
deserialize
(
inputString
)
:
def
deserialize
(
inputString
)
:
"""Reconstruct an object that has been serialized as XML."""
"""Reconstruct an object that has been serialized as XML."""
#
Look
for
the
first
tag
to
figure
out
what
type
of
object
it
is
.
import
re
import
re
match
=
re
.
search
(
"<([^?]\S*)"
,
inputString
)
match
=
re
.
search
(
"<([^?]\S*)"
,
inputString
)
if
match
is
None
:
if
match
is
None
:
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment