Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
59f8cf87
"plugins/vscode:/vscode.git/clone" did not exist on "a741138b1e564f35101bc9e7dd0ee48495bc635c"
Commit
59f8cf87
authored
Oct 17, 2012
by
Peter Eastman
Browse files
Finished implementing double precision for OpenCL
parent
c8dac206
Changes
23
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
53 additions
and
53 deletions
+53
-53
platforms/opencl/src/kernels/pme_cpu.cl
platforms/opencl/src/kernels/pme_cpu.cl
+51
-51
platforms/opencl/src/kernels/removeCM.cl
platforms/opencl/src/kernels/removeCM.cl
+1
-1
platforms/opencl/tests/TestOpenCLAndersenThermostat.cpp
platforms/opencl/tests/TestOpenCLAndersenThermostat.cpp
+1
-1
No files found.
platforms/opencl/src/kernels/pme_cpu.cl
View file @
59f8cf87
__kernel
void
updateBsplines
(
__global
const
float
4*
restrict
posq,
__global
float
4*
restrict
pmeBsplineTheta,
__local
float
4*
restrict
bsplinesCache,
__global
int2*
restrict
pmeAtomGridIndex,
float
4
periodicBoxSize,
float
4
invPeriodicBoxSize,
__global
float
4*
restrict
pmeBsplineDTheta
)
{
const
float
4
scale
=
1.0f/
(
PME_ORDER-1
)
;
__kernel
void
updateBsplines
(
__global
const
real
4*
restrict
posq,
__global
real
4*
restrict
pmeBsplineTheta,
__local
real
4*
restrict
bsplinesCache,
__global
int2*
restrict
pmeAtomGridIndex,
real
4
periodicBoxSize,
real
4
invPeriodicBoxSize,
__global
real
4*
restrict
pmeBsplineDTheta
)
{
const
real
4
scale
=
1.0f/
(
PME_ORDER-1
)
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
__local
float
4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
__local
float
4*
ddata
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER
+
get_local_size
(
0
)
*PME_ORDER]
;
__local
real
4*
data
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER]
;
__local
real
4*
ddata
=
&bsplinesCache[get_local_id
(
0
)
*PME_ORDER
+
get_local_size
(
0
)
*PME_ORDER]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
data[j]
=
0
.0f
;
ddata[j]
=
0
.0f
;
data[j]
=
0
;
ddata[j]
=
0
;
}
float
4
pos
=
posq[i]
;
real
4
pos
=
posq[i]
;
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
float
4
t
=
(
float
4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
real
4
t
=
(
real
4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0
.0f
)
;
float
4
dr
=
(
float
4
)
(
t.x-
(
int
)
t.x,
t.y-
(
int
)
t.y,
t.z-
(
int
)
t.z,
0
.0f
)
;
data[PME_ORDER-1]
=
0
.0f
;
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0
)
;
real
4
dr
=
(
real
4
)
(
t.x-
(
int
)
t.x,
t.y-
(
int
)
t.y,
t.z-
(
int
)
t.z,
0
)
;
data[PME_ORDER-1]
=
0
;
data[1]
=
dr
;
data[0]
=
1.0f-dr
;
for
(
int
j
=
3
; j < PME_ORDER; j++) {
float
div
=
1.0f/
(
j-1.0f
)
;
real
div
=
1.0f/
(
j-1.0f
)
;
data[j-1]
=
div*dr*data[j-2]
;
for
(
int
k
=
1
; k < (j-1); k++)
data[j-k-1]
=
div*
((
dr+
(
float
4
)
k
)
*data[j-k-2]
+
(
-dr+
(
float
4
)
(
j-k
))
*data[j-k-1]
)
;
data[j-k-1]
=
div*
((
dr+
(
real
4
)
k
)
*data[j-k-2]
+
(
-dr+
(
real
4
)
(
j-k
))
*data[j-k-1]
)
;
data[0]
=
div*
(
-
dr+1.0f
)
*data[0]
;
}
ddata[0]
=
-data[0]
;
...
...
@@ -30,7 +30,7 @@ __kernel void updateBsplines(__global const float4* restrict posq, __global floa
ddata[j]
=
data[j-1]-data[j]
;
data[PME_ORDER-1]
=
scale*dr*data[PME_ORDER-2]
;
for
(
int
j
=
1
; j < (PME_ORDER-1); j++)
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
float
4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
float
4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[PME_ORDER-j-1]
=
scale*
((
dr+
(
real
4
)
j
)
*data[PME_ORDER-j-2]
+
(
-dr+
(
real
4
)
(
PME_ORDER-j
))
*data[PME_ORDER-j-1]
)
;
data[0]
=
scale*
(
-dr+1.0f
)
*data[0]
;
for
(
int
j
=
0
; j < PME_ORDER; j++) {
pmeBsplineTheta[i+j*NUM_ATOMS]
=
data[j]
;
...
...
@@ -42,27 +42,27 @@ __kernel void updateBsplines(__global const float4* restrict posq, __global floa
/**
*
This
kernel
is
not
actually
used
when
running
on
a
CPU.
*/
__kernel
void
findAtomRangeForGrid
(
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
int*
restrict
pmeAtomRange,
__global
const
float
4*
restrict
posq,
float
4
periodicBoxSize,
float
4
invPeriodicBoxSize
)
{
__kernel
void
findAtomRangeForGrid
(
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
int*
restrict
pmeAtomRange,
__global
const
real
4*
restrict
posq,
real
4
periodicBoxSize,
real
4
invPeriodicBoxSize
)
{
}
__kernel
void
gridSpreadCharge
(
__global
const
float
4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
float
2*
restrict
pmeGrid,
__global
const
float
4*
restrict
pmeBsplineTheta,
float
4
periodicBoxSize,
float
4
invPeriodicBoxSize
)
{
__kernel
void
gridSpreadCharge
(
__global
const
real
4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
real
2*
restrict
pmeGrid,
__global
const
real
4*
restrict
pmeBsplineTheta,
real
4
periodicBoxSize,
real
4
invPeriodicBoxSize
)
{
const
int
firstx
=
get_global_id
(
0
)
*GRID_SIZE_X/get_global_size
(
0
)
;
const
int
lastx
=
(
get_global_id
(
0
)
+1
)
*GRID_SIZE_X/get_global_size
(
0
)
;
for
(
int
gridIndex
=
firstx*GRID_SIZE_Y*GRID_SIZE_Z
; gridIndex < lastx*GRID_SIZE_Y*GRID_SIZE_Z; gridIndex++)
pmeGrid[gridIndex]
=
(
float2
)
(
0.0f,
0.0f
)
;
pmeGrid[gridIndex]
=
(
real2
)
0
;
for
(
int
atom
=
0
; atom < NUM_ATOMS; atom++) {
float
4
pos
=
posq[atom]
;
real
4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
float
4
t
=
(
float
4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
real
4
t
=
(
real
4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0
.0f
)
;
float
4
dr
=
(
float
4
)
(
t.x-
(
int
)
t.x,
t.y-
(
int
)
t.y,
t.z-
(
int
)
t.z,
0
.0f
)
;
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0
)
;
real
4
dr
=
(
real
4
)
(
t.x-
(
int
)
t.x,
t.y-
(
int
)
t.y,
t.z-
(
int
)
t.z,
0
)
;
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
float
atomCharge
=
pos.w*EPSILON_FACTOR
;
real
atomCharge
=
pos.w*EPSILON_FACTOR
;
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
)
;
...
...
@@ -82,10 +82,10 @@ __kernel void gridSpreadCharge(__global const float4* restrict posq, __global co
}
}
__kernel
void
reciprocalConvolution
(
__global
float
2*
restrict
pmeGrid,
__global
float
*
restrict
energyBuffer,
__global
const
float
*
restrict
pmeBsplineModuliX,
__global
const
float
*
restrict
pmeBsplineModuliY,
__global
const
float
*
restrict
pmeBsplineModuliZ,
float
4
invPeriodicBoxSize,
float
recipScaleFactor
)
{
__kernel
void
reciprocalConvolution
(
__global
real
2*
restrict
pmeGrid,
__global
real
*
restrict
energyBuffer,
__global
const
real
*
restrict
pmeBsplineModuliX,
__global
const
real
*
restrict
pmeBsplineModuliY,
__global
const
real
*
restrict
pmeBsplineModuliZ,
real
4
invPeriodicBoxSize,
real
recipScaleFactor
)
{
const
unsigned
int
gridSize
=
GRID_SIZE_X*GRID_SIZE_Y*GRID_SIZE_Z
;
float
energy
=
0
.0f
;
real
energy
=
0
;
for
(
int
index
=
get_global_id
(
0
)
; index < gridSize; index += get_global_size(0)) {
int
kx
=
index/
(
GRID_SIZE_Y*GRID_SIZE_Z
)
;
int
remainder
=
index-kx*GRID_SIZE_Y*GRID_SIZE_Z
;
...
...
@@ -96,60 +96,60 @@ __kernel void reciprocalConvolution(__global float2* restrict pmeGrid, __global
int
mx
=
(
kx
<
(
GRID_SIZE_X+1
)
/2
)
?
kx
:
(
kx-GRID_SIZE_X
)
;
int
my
=
(
ky
<
(
GRID_SIZE_Y+1
)
/2
)
?
ky
:
(
ky-GRID_SIZE_Y
)
;
int
mz
=
(
kz
<
(
GRID_SIZE_Z+1
)
/2
)
?
kz
:
(
kz-GRID_SIZE_Z
)
;
float
mhx
=
mx*invPeriodicBoxSize.x
;
float
mhy
=
my*invPeriodicBoxSize.y
;
float
mhz
=
mz*invPeriodicBoxSize.z
;
float
bx
=
pmeBsplineModuliX[kx]
;
float
by
=
pmeBsplineModuliY[ky]
;
float
bz
=
pmeBsplineModuliZ[kz]
;
float
2
grid
=
pmeGrid[index]
;
float
m2
=
mhx*mhx+mhy*mhy+mhz*mhz
;
float
denom
=
m2*bx*by*bz
;
float
eterm
=
recipScaleFactor*EXP
(
-RECIP_EXP_FACTOR*m2
)
/denom
;
pmeGrid[index]
=
(
float
2
)
(
grid.x*eterm,
grid.y*eterm
)
;
real
mhx
=
mx*invPeriodicBoxSize.x
;
real
mhy
=
my*invPeriodicBoxSize.y
;
real
mhz
=
mz*invPeriodicBoxSize.z
;
real
bx
=
pmeBsplineModuliX[kx]
;
real
by
=
pmeBsplineModuliY[ky]
;
real
bz
=
pmeBsplineModuliZ[kz]
;
real
2
grid
=
pmeGrid[index]
;
real
m2
=
mhx*mhx+mhy*mhy+mhz*mhz
;
real
denom
=
m2*bx*by*bz
;
real
eterm
=
recipScaleFactor*EXP
(
-RECIP_EXP_FACTOR*m2
)
/denom
;
pmeGrid[index]
=
(
real
2
)
(
grid.x*eterm,
grid.y*eterm
)
;
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
}
__kernel
void
gridInterpolateForce
(
__global
const
float
4*
restrict
posq,
__global
float
4*
restrict
forceBuffers,
__global
const
float
2*
restrict
pmeGrid,
float
4
periodicBoxSize,
float
4
invPeriodicBoxSize,
__global
const
float
4*
restrict
pmeBsplineTheta,
__global
const
float
4*
restrict
pmeBsplineDTheta
)
{
__kernel
void
gridInterpolateForce
(
__global
const
real
4*
restrict
posq,
__global
real
4*
restrict
forceBuffers,
__global
const
real
2*
restrict
pmeGrid,
real
4
periodicBoxSize,
real
4
invPeriodicBoxSize,
__global
const
real
4*
restrict
pmeBsplineTheta,
__global
const
real
4*
restrict
pmeBsplineDTheta
)
{
for
(
int
atom
=
get_global_id
(
0
)
; atom < NUM_ATOMS; atom += get_global_size(0)) {
float
4
force
=
0
.0f
;
float
4
pos
=
posq[atom]
;
real
4
force
=
0
;
real
4
pos
=
posq[atom]
;
pos.x
-=
floor
(
pos.x*invPeriodicBoxSize.x
)
*periodicBoxSize.x
;
pos.y
-=
floor
(
pos.y*invPeriodicBoxSize.y
)
*periodicBoxSize.y
;
pos.z
-=
floor
(
pos.z*invPeriodicBoxSize.z
)
*periodicBoxSize.z
;
float
4
t
=
(
float
4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0
.0f
)
;
real
4
t
=
(
real
4
)
((
pos.x*invPeriodicBoxSize.x
)
*GRID_SIZE_X,
(
pos.y*invPeriodicBoxSize.y
)
*GRID_SIZE_Y,
(
pos.z*invPeriodicBoxSize.z
)
*GRID_SIZE_Z,
0
)
;
int4
gridIndex
=
(
int4
)
(((
int
)
t.x
)
%
GRID_SIZE_X,
((
int
)
t.y
)
%
GRID_SIZE_Y,
((
int
)
t.z
)
%
GRID_SIZE_Z,
0
)
;
for
(
int
ix
=
0
; ix < PME_ORDER; ix++) {
int
xindex
=
gridIndex.x+ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
)
;
float
tx
=
pmeBsplineTheta[atom+ix*NUM_ATOMS].x
;
float
dtx
=
pmeBsplineDTheta[atom+ix*NUM_ATOMS].x
;
real
tx
=
pmeBsplineTheta[atom+ix*NUM_ATOMS].x
;
real
dtx
=
pmeBsplineDTheta[atom+ix*NUM_ATOMS].x
;
for
(
int
iy
=
0
; iy < PME_ORDER; iy++) {
int
yindex
=
gridIndex.y+iy
;
yindex
-=
(
yindex
>=
GRID_SIZE_Y
?
GRID_SIZE_Y
:
0
)
;
float
ty
=
pmeBsplineTheta[atom+iy*NUM_ATOMS].y
;
float
dty
=
pmeBsplineDTheta[atom+iy*NUM_ATOMS].y
;
real
ty
=
pmeBsplineTheta[atom+iy*NUM_ATOMS].y
;
real
dty
=
pmeBsplineDTheta[atom+iy*NUM_ATOMS].y
;
for
(
int
iz
=
0
; iz < PME_ORDER; iz++) {
int
zindex
=
gridIndex.z+iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
)
;
float
tz
=
pmeBsplineTheta[atom+iz*NUM_ATOMS].z
;
float
dtz
=
pmeBsplineDTheta[atom+iz*NUM_ATOMS].z
;
real
tz
=
pmeBsplineTheta[atom+iz*NUM_ATOMS].z
;
real
dtz
=
pmeBsplineDTheta[atom+iz*NUM_ATOMS].z
;
int
index
=
xindex*GRID_SIZE_Y*GRID_SIZE_Z
+
yindex*GRID_SIZE_Z
+
zindex
;
float
gridvalue
=
pmeGrid[index].x
;
real
gridvalue
=
pmeGrid[index].x
;
force.x
+=
dtx*ty*tz*gridvalue
;
force.y
+=
tx*dty*tz*gridvalue
;
force.z
+=
tx*ty*dtz*gridvalue
;
}
}
}
float
4
totalForce
=
forceBuffers[atom]
;
float
q
=
pos.w*EPSILON_FACTOR
;
real
4
totalForce
=
forceBuffers[atom]
;
real
q
=
pos.w*EPSILON_FACTOR
;
totalForce.x
-=
q*force.x*GRID_SIZE_X*invPeriodicBoxSize.x
;
totalForce.y
-=
q*force.y*GRID_SIZE_Y*invPeriodicBoxSize.y
;
totalForce.z
-=
q*force.z*GRID_SIZE_Z*invPeriodicBoxSize.z
;
...
...
platforms/opencl/src/kernels/removeCM.cl
View file @
59f8cf87
...
...
@@ -98,7 +98,7 @@ __kernel void removeCenterOfMassMomentum(unsigned int numAtoms, __global mixed4*
temp[thread]
+=
temp[thread+2]
;
#
endif
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
cm
=
(
temp[0]+temp[1]
)
*INVERSE_TOTAL_MASS
;
cm
=
(
float4
)
(
INVERSE_TOTAL_MASS*
(
temp[0]
.x
+temp[1]
.x
)
,
INVERSE_TOTAL_MASS*
(
temp[0].y+temp[1].y
)
,
INVERSE_TOTAL_MASS*
(
temp[0].z+temp[1].z
)
,
0
)
;
//
Now
remove
the
center
of
mass
velocity
from
each
atom.
...
...
platforms/opencl/tests/TestOpenCLAndersenThermostat.cpp
View file @
59f8cf87
...
...
@@ -84,7 +84,7 @@ void testTemperature() {
}
ke
/=
numSteps
;
double
expected
=
0.5
*
numParticles
*
3
*
BOLTZ
*
temp
;
ASSERT_EQUAL_TOL
(
expected
,
ke
,
6
/
std
::
sqrt
((
double
)
numSteps
));
ASSERT_
USUALLY_
EQUAL_TOL
(
expected
,
ke
,
6
/
std
::
sqrt
((
double
)
numSteps
));
}
void
testConstraints
()
{
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment