Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
86b2af14
"platforms/opencl/vscode:/vscode.git/clone" did not exist on "fa8934671bee80d899076258ce0e8e944a2c12c0"
Commit
86b2af14
authored
Dec 11, 2015
by
peastman
Browse files
Fixed error in energy computation
parent
d92e0937
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
4 deletions
+14
-4
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-0
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+4
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+1
-0
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+8
-4
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
86b2af14
...
...
@@ -1717,6 +1717,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
cuDeviceGetName
(
deviceName
,
100
,
cu
.
getDevice
());
usePmeStream
=
(
string
(
deviceName
)
!=
"GeForce GTX 980"
);
// Using a separate stream is slower on GTX 980
if
(
usePmeStream
)
{
pmeDefines
[
"USE_PME_STREAM"
]
=
"1"
;
cuStreamCreate
(
&
pmeStream
,
CU_STREAM_NON_BLOCKING
);
if
(
useCudaFFT
)
{
cufftSetStream
(
fftForward
,
pmeStream
);
...
...
platforms/cuda/src/kernels/pme.cu
View file @
86b2af14
...
...
@@ -188,7 +188,11 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
energy
+=
eterm
*
(
grid
.
x
*
grid
.
x
+
grid
.
y
*
grid
.
y
);
}
}
#ifdef USE_PME_STREAM
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
0.5
f
*
energy
;
#else
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
0.5
f
*
energy
;
#endif
}
extern
"C"
__global__
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
86b2af14
...
...
@@ -1687,6 +1687,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeDefines["USE_ALTERNATE_MEMORY_ACCESS_PATTERN"] = "1";
usePmeQueue = isNvidia;
if (usePmeQueue) {
pmeDefines["USE_PME_STREAM"] = "1";
pmeQueue = cl::CommandQueue(cl.getContext(), cl.getDevice());
int recipForceGroup = force.getReciprocalSpaceForceGroup();
if (recipForceGroup < 0)
...
...
platforms/opencl/src/kernels/pme.cl
View file @
86b2af14
...
...
@@ -84,7 +84,7 @@ __kernel void recordZIndex(__global int2* restrict pmeAtomGridIndex, __global co
__kernel
void
gridSpreadCharge
(
__global
const
real4*
restrict
posq,
__global
const
int2*
restrict
pmeAtomGridIndex,
__global
const
int*
restrict
pmeAtomRange,
__global
long*
restrict
pmeGrid,
__global
const
real4*
restrict
pmeBsplineTheta,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ
)
{
const
real
4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
//
Process
the
atoms
in
spatially
sorted
order.
This
improves
efficiency
when
writing
...
...
@@ -118,7 +118,7 @@ __kernel void gridSpreadCharge(__global const real4* restrict posq, __global con
data[j-1]
=
div*dr*data[j-2]
;
for
(
int
k
=
1
; k < (j-1); k++)
data[j-k-1]
=
div*
((
dr+
(
real4
)
k
)
*data[j-k-2]
+
(
-dr+
(
real4
)
(
j-k
))
*data[j-k-1]
)
;
data[0]
=
div*
(
-
dr+1.0f
)
*data[0]
;
data[0]
=
div*
(
-dr+1.0f
)
*data[0]
;
}
data[PME_ORDER-1]
=
scale*dr*data[PME_ORDER-2]
;
for
(
int
j
=
1
; j < (PME_ORDER-1); j++)
...
...
@@ -362,12 +362,16 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
}
#
ifdef
USE_PME_STREAM
energyBuffer[get_global_id
(
0
)
]
=
0.5f*energy
;
#
else
energyBuffer[get_global_id
(
0
)
]
+=
0.5f*energy
;
#
endif
}
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
real4
periodicBoxSize,
real4
recipBoxVecX,
real4
recipBoxVecY,
real4
recipBoxVecZ,
__global
int2*
restrict
pmeAtomGridIndex
)
{
const
real
4
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
const
real
scale
=
1/
(
real
)
(
PME_ORDER-1
)
;
real4
data[PME_ORDER]
;
real4
ddata[PME_ORDER]
;
...
...
@@ -403,7 +407,7 @@ __kernel void gridInterpolateForce(__global const real4* restrict posq, __global
data[j-1]
=
div*dr*data[j-2]
;
for
(
int
k
=
1
; k < (j-1); k++)
data[j-k-1]
=
div*
((
dr+
(
real4
)
k
)
*data[j-k-2]
+
(
-dr+
(
real4
)
(
j-k
))
*data[j-k-1]
)
;
data[0]
=
div*
(
-
dr+1.0f
)
*data[0]
;
data[0]
=
div*
(
-dr+1.0f
)
*data[0]
;
}
ddata[0]
=
-data[0]
;
for
(
int
j
=
1
; j < PME_ORDER; j++)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment