Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3a1936fd
Commit
3a1936fd
authored
Dec 11, 2015
by
peastman
Browse files
Merge pull request #1290 from peastman/energyfix
Fixed error in energy computation
parents
4d32047c
86b2af14
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
4 deletions
+14
-4
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-0
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+4
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+1
-0
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+8
-4
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
3a1936fd
...
@@ -1717,6 +1717,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1717,6 +1717,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cuDeviceGetName(deviceName, 100, cu.getDevice());
cuDeviceGetName(deviceName, 100, cu.getDevice());
usePmeStream = (string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
usePmeStream = (string(deviceName) != "GeForce GTX 980"); // Using a separate stream is slower on GTX 980
if (usePmeStream) {
if (usePmeStream) {
pmeDefines["USE_PME_STREAM"] = "1";
cuStreamCreate(&pmeStream, CU_STREAM_NON_BLOCKING);
cuStreamCreate(&pmeStream, CU_STREAM_NON_BLOCKING);
if (useCudaFFT) {
if (useCudaFFT) {
cufftSetStream(fftForward, pmeStream);
cufftSetStream(fftForward, pmeStream);
...
...
platforms/cuda/src/kernels/pme.cu
View file @
3a1936fd
...
@@ -188,7 +188,11 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
...
@@ -188,7 +188,11 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
energy
+=
eterm
*
(
grid
.
x
*
grid
.
x
+
grid
.
y
*
grid
.
y
);
energy
+=
eterm
*
(
grid
.
x
*
grid
.
x
+
grid
.
y
*
grid
.
y
);
}
}
}
}
#ifdef USE_PME_STREAM
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
0.5
f
*
energy
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
0.5
f
*
energy
;
#else
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
0.5
f
*
energy
;
#endif
}
}
extern
"C"
__global__
extern
"C"
__global__
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
3a1936fd
...
@@ -1687,6 +1687,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1687,6 +1687,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeDefines
[
"USE_ALTERNATE_MEMORY_ACCESS_PATTERN"
]
=
"1"
;
pmeDefines
[
"USE_ALTERNATE_MEMORY_ACCESS_PATTERN"
]
=
"1"
;
usePmeQueue
=
isNvidia
;
usePmeQueue
=
isNvidia
;
if
(
usePmeQueue
)
{
if
(
usePmeQueue
)
{
pmeDefines
[
"USE_PME_STREAM"
]
=
"1"
;
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
pmeQueue
=
cl
::
CommandQueue
(
cl
.
getContext
(),
cl
.
getDevice
());
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
int
recipForceGroup
=
force
.
getReciprocalSpaceForceGroup
();
if
(
recipForceGroup
<
0
)
if
(
recipForceGroup
<
0
)
...
...
platforms/opencl/src/kernels/pme.cl
View file @
3a1936fd
...
@@ -84,7 +84,7 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
...
@@ -84,7 +84,7 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
real
4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
data[PME_ORDER]
;
//
Process
the
atoms
in
spatially
sorted
order.
This
improves
efficiency
when
writing
//
Process
the
atoms
in
spatially
sorted
order.
This
improves
efficiency
when
writing
...
@@ -118,7 +118,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
...
@@ -118,7 +118,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
data[j-1]
=
div*dr*data[j-2]
;
data[j-1]
=
div*dr*data[j-2]
;
for
(
int
k
=
1
; k < (j-1); k++)
for
(
int
k
=
1
; k < (j-1); k++)
data[j-k-1]
=
div*
((
dr+
(
real4
)
k
)
*data[j-k-2]
+
(
-dr+
(
real4
)
(
j-k
))
*data[j-k-1]
)
;
data[j-k-1]
=
div*
((
dr+
(
real4
)
k
)
*data[j-k-2]
+
(
-dr+
(
real4
)
(
j-k
))
*data[j-k-1]
)
;
data[0]
=
div*
(
-
dr+1.0f
)
*data[0]
;
data[0]
=
div*
(
-dr+1.0f
)
*data[0]
;
}
}
data[PME_ORDER-1]
=
scale*dr*data[PME_ORDER-2]
;
data[PME_ORDER-1]
=
scale*dr*data[PME_ORDER-2]
;
for
(
int
j
=
1
; j < (PME_ORDER-1); j++)
for
(
int
j
=
1
; j < (PME_ORDER-1); j++)
...
@@ -362,12 +362,16 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
...
@@ -362,12 +362,16 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
}
}
}
#
ifdef
USE_PME_STREAM
energyBuffer[get_global_id
(
0
)
]
=
0.5f*energy
;
energyBuffer[get_global_id
(
0
)
]
=
0.5f*energy
;
#
else
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
#
endif
}
}
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
const
real
4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
data[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
...
@@ -403,7 +407,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
...
@@ -403,7 +407,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
data[j-1]
=
div*dr*data[j-2]
;
data[j-1]
=
div*dr*data[j-2]
;
for
(
int
k
=
1
; k < (j-1); k++)
for
(
int
k
=
1
; k < (j-1); k++)
data[j-k-1]
=
div*
((
dr+
(
real4
)
k
)
*data[j-k-2]
+
(
-dr+
(
real4
)
(
j-k
))
*data[j-k-1]
)
;
data[j-k-1]
=
div*
((
dr+
(
real4
)
k
)
*data[j-k-2]
+
(
-dr+
(
real4
)
(
j-k
))
*data[j-k-1]
)
;
data[0]
=
div*
(
-
dr+1.0f
)
*data[0]
;
data[0]
=
div*
(
-dr+1.0f
)
*data[0]
;
}
}
ddata[0]
=
-data[0]
;
ddata[0]
=
-data[0]
;
for
(
int
j
=
1
; j < PME_ORDER; j++)
for
(
int
j
=
1
; j < PME_ORDER; j++)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment