Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
5cb6ad03
Commit
5cb6ad03
authored
Nov 05, 2015
by
Peter Eastman
Browse files
Fixed a race condition in energy computation
parent
59b0b53f
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
55 additions
and
10 deletions
+55
-10
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+2
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+16
-3
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+7
-1
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+3
-1
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+21
-3
platforms/opencl/src/kernels/pme.cl
platforms/opencl/src/kernels/pme.cl
+6
-1
No files found.
platforms/cuda/include/CudaKernels.h
View file @
5cb6ad03
...
@@ -592,7 +592,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
...
@@ -592,7 +592,7 @@ class CudaCalcNonbondedForceKernel : public CalcNonbondedForceKernel {
public:
public:
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
CudaCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
cu
(
cu
),
hasInitializedFFT
(
false
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
directPmeGrid
(
NULL
),
reciprocalPmeGrid
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
}
}
~
CudaCalcNonbondedForceKernel
();
~
CudaCalcNonbondedForceKernel
();
/**
/**
...
@@ -657,6 +657,7 @@ private:
...
@@ -657,6 +657,7 @@ private:
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeBsplineModuliZ
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomRange
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeAtomGridIndex
;
CudaArray
*
pmeEnergyBuffer
;
CudaSort
*
sort
;
CudaSort
*
sort
;
Kernel
cpuPme
;
Kernel
cpuPme
;
PmeIO
*
pmeio
;
PmeIO
*
pmeio
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
5cb6ad03
...
@@ -1458,16 +1458,24 @@ private:
...
@@ -1458,16 +1458,24 @@ private:
class
CudaCalcNonbondedForceKernel
::
SyncStreamPostComputation
:
public
CudaContext
::
ForcePostComputation
{
class
CudaCalcNonbondedForceKernel
::
SyncStreamPostComputation
:
public
CudaContext
::
ForcePostComputation
{
public:
public:
SyncStreamPostComputation
(
CudaContext
&
cu
,
CUevent
event
,
int
forceGroup
)
:
cu
(
cu
),
event
(
event
),
forceGroup
(
forceGroup
)
{
SyncStreamPostComputation
(
CudaContext
&
cu
,
CUevent
event
,
CUfunction
addEnergyKernel
,
CudaArray
&
pmeEnergyBuffer
,
int
forceGroup
)
:
cu
(
cu
),
event
(
event
),
addEnergyKernel
(
addEnergyKernel
),
pmeEnergyBuffer
(
pmeEnergyBuffer
),
forceGroup
(
forceGroup
)
{
}
}
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
if
((
groups
&
(
1
<<
forceGroup
))
!=
0
)
if
((
groups
&
(
1
<<
forceGroup
))
!=
0
)
cuStreamWaitEvent
(
cu
.
getCurrentStream
(),
event
,
0
);
cuStreamWaitEvent
(
cu
.
getCurrentStream
(),
event
,
0
);
if
(
includeEnergy
)
{
int
bufferSize
=
pmeEnergyBuffer
.
getSize
();
void
*
args
[]
=
{
&
pmeEnergyBuffer
.
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
bufferSize
};
cu
.
executeKernel
(
addEnergyKernel
,
args
,
bufferSize
);
}
return
0.0
;
return
0.0
;
}
}
private:
private:
CudaContext
&
cu
;
CudaContext
&
cu
;
CUevent
event
;
CUevent
event
;
CUfunction
addEnergyKernel
;
CudaArray
&
pmeEnergyBuffer
;
int
forceGroup
;
int
forceGroup
;
};
};
...
@@ -1493,6 +1501,8 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
...
@@ -1493,6 +1501,8 @@ CudaCalcNonbondedForceKernel::~CudaCalcNonbondedForceKernel() {
delete
pmeAtomRange
;
delete
pmeAtomRange
;
if
(
pmeAtomGridIndex
!=
NULL
)
if
(
pmeAtomGridIndex
!=
NULL
)
delete
pmeAtomGridIndex
;
delete
pmeAtomGridIndex
;
if
(
pmeEnergyBuffer
!=
NULL
)
delete
pmeEnergyBuffer
;
if
(
sort
!=
NULL
)
if
(
sort
!=
NULL
)
delete
sort
;
delete
sort
;
if
(
fft
!=
NULL
)
if
(
fft
!=
NULL
)
...
@@ -1681,6 +1691,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1681,6 +1691,9 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
int
energyElementSize
=
(
cu
.
getUseDoublePrecision
()
||
cu
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
pmeEnergyBuffer
=
new
CudaArray
(
cu
,
cu
.
getNumThreadBlocks
()
*
CudaContext
::
ThreadBlockSize
,
energyElementSize
,
"pmeEnergyBuffer"
);
cu
.
clearBuffer
(
*
pmeEnergyBuffer
);
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
int
cufftVersion
;
int
cufftVersion
;
cufftGetVersion
(
&
cufftVersion
);
cufftGetVersion
(
&
cufftVersion
);
...
@@ -1714,7 +1727,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1714,7 +1727,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
if
(
recipForceGroup
<
0
)
if
(
recipForceGroup
<
0
)
recipForceGroup
=
force
.
getForceGroup
();
recipForceGroup
=
force
.
getForceGroup
();
cu
.
addPreComputation
(
new
SyncStreamPreComputation
(
cu
,
pmeStream
,
pmeSyncEvent
,
recipForceGroup
));
cu
.
addPreComputation
(
new
SyncStreamPreComputation
(
cu
,
pmeStream
,
pmeSyncEvent
,
recipForceGroup
));
cu
.
addPostComputation
(
new
SyncStreamPostComputation
(
cu
,
pmeSyncEvent
,
recipForceGroup
));
cu
.
addPostComputation
(
new
SyncStreamPostComputation
(
cu
,
pmeSyncEvent
,
cu
.
getKernel
(
module
,
"addEnergy"
),
*
pmeEnergyBuffer
,
recipForceGroup
));
}
}
hasInitializedFFT
=
true
;
hasInitializedFFT
=
true
;
...
@@ -1889,7 +1902,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1889,7 +1902,7 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
}
}
if
(
includeEnergy
)
{
if
(
includeEnergy
)
{
void
*
computeEnergyArgs
[]
=
{
&
reciprocalPmeGrid
->
getDevicePointer
(),
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
void
*
computeEnergyArgs
[]
=
{
&
reciprocalPmeGrid
->
getDevicePointer
(),
usePmeStream
?
&
pmeEnergyBuffer
->
getDevicePointer
()
:
&
cu
.
getEnergyBuffer
().
getDevicePointer
(),
&
pmeBsplineModuliX
->
getDevicePointer
(),
&
pmeBsplineModuliY
->
getDevicePointer
(),
&
pmeBsplineModuliZ
->
getDevicePointer
(),
&
pmeBsplineModuliX
->
getDevicePointer
(),
&
pmeBsplineModuliY
->
getDevicePointer
(),
&
pmeBsplineModuliZ
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
getPeriodicBoxSizePointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeEvalEnergyKernel
,
computeEnergyArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeEvalEnergyKernel
,
computeEnergyArgs
,
cu
.
getNumAtoms
());
...
...
platforms/cuda/src/kernels/pme.cu
View file @
5cb6ad03
...
@@ -188,7 +188,7 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
...
@@ -188,7 +188,7 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
energy
+=
eterm
*
(
grid
.
x
*
grid
.
x
+
grid
.
y
*
grid
.
y
);
energy
+=
eterm
*
(
grid
.
x
*
grid
.
x
+
grid
.
y
*
grid
.
y
);
}
}
}
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+
=
0.5
f
*
energy
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
0.5
f
*
energy
;
}
}
extern
"C"
__global__
extern
"C"
__global__
...
@@ -286,3 +286,9 @@ void addForces(const real4* __restrict__ forces, unsigned long long* __restrict_
...
@@ -286,3 +286,9 @@ void addForces(const real4* __restrict__ forces, unsigned long long* __restrict_
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
z
*
0x100000000
));
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
static_cast
<
unsigned
long
long
>
((
long
long
)
(
f
.
z
*
0x100000000
));
}
}
}
}
extern
"C"
__global__
void
addEnergy
(
const
mixed
*
__restrict__
pmeEnergyBuffer
,
mixed
*
__restrict__
energyBuffer
,
int
bufferSize
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
bufferSize
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
energyBuffer
[
i
]
+=
pmeEnergyBuffer
[
i
];
}
platforms/opencl/include/OpenCLKernels.h
View file @
5cb6ad03
...
@@ -570,7 +570,7 @@ public:
...
@@ -570,7 +570,7 @@ public:
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
OpenCLCalcNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
hasInitializedKernel
(
false
),
cl
(
cl
),
sigmaEpsilon
(
NULL
),
exceptionParams
(
NULL
),
cosSinSums
(
NULL
),
pmeGrid
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeGrid2
(
NULL
),
pmeBsplineModuliX
(
NULL
),
pmeBsplineModuliY
(
NULL
),
pmeBsplineModuliZ
(
NULL
),
pmeBsplineTheta
(
NULL
),
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
pmeAtomRange
(
NULL
),
pmeAtomGridIndex
(
NULL
),
pmeEnergyBuffer
(
NULL
),
sort
(
NULL
),
fft
(
NULL
),
pmeio
(
NULL
)
{
}
}
~
OpenCLCalcNonbondedForceKernel
();
~
OpenCLCalcNonbondedForceKernel
();
/**
/**
...
@@ -636,12 +636,14 @@ private:
...
@@ -636,12 +636,14 @@ private:
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
*
pmeBsplineTheta
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
*
pmeAtomRange
;
OpenCLArray
*
pmeAtomGridIndex
;
OpenCLArray
*
pmeAtomGridIndex
;
OpenCLArray
*
pmeEnergyBuffer
;
OpenCLSort
*
sort
;
OpenCLSort
*
sort
;
cl
::
CommandQueue
pmeQueue
;
cl
::
CommandQueue
pmeQueue
;
cl
::
Event
pmeSyncEvent
;
cl
::
Event
pmeSyncEvent
;
OpenCLFFT3D
*
fft
;
OpenCLFFT3D
*
fft
;
Kernel
cpuPme
;
Kernel
cpuPme
;
PmeIO
*
pmeio
;
PmeIO
*
pmeio
;
SyncQueuePostComputation
*
syncQueue
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldSumsKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
ewaldForcesKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
cl
::
Kernel
pmeGridIndexKernel
;
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
5cb6ad03
...
@@ -1454,7 +1454,14 @@ private:
...
@@ -1454,7 +1454,14 @@ private:
class
OpenCLCalcNonbondedForceKernel
::
SyncQueuePostComputation
:
public
OpenCLContext
::
ForcePostComputation
{
class
OpenCLCalcNonbondedForceKernel
::
SyncQueuePostComputation
:
public
OpenCLContext
::
ForcePostComputation
{
public:
public:
SyncQueuePostComputation
(
OpenCLContext
&
cl
,
cl
::
Event
&
event
,
int
forceGroup
)
:
cl
(
cl
),
event
(
event
),
forceGroup
(
forceGroup
)
{
SyncQueuePostComputation
(
OpenCLContext
&
cl
,
cl
::
Event
&
event
,
OpenCLArray
&
pmeEnergyBuffer
,
int
forceGroup
)
:
cl
(
cl
),
event
(
event
),
pmeEnergyBuffer
(
pmeEnergyBuffer
),
forceGroup
(
forceGroup
)
{
}
void
setKernel
(
cl
::
Kernel
kernel
)
{
addEnergyKernel
=
kernel
;
addEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeEnergyBuffer
.
getDeviceBuffer
());
addEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
addEnergyKernel
.
setArg
<
cl_int
>
(
2
,
pmeEnergyBuffer
.
getSize
());
}
}
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
double
computeForceAndEnergy
(
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
if
((
groups
&
(
1
<<
forceGroup
))
!=
0
)
{
if
((
groups
&
(
1
<<
forceGroup
))
!=
0
)
{
...
@@ -1463,11 +1470,15 @@ public:
...
@@ -1463,11 +1470,15 @@ public:
event
=
cl
::
Event
();
event
=
cl
::
Event
();
cl
.
getQueue
().
enqueueWaitForEvents
(
events
);
cl
.
getQueue
().
enqueueWaitForEvents
(
events
);
}
}
if
(
includeEnergy
)
cl
.
executeKernel
(
addEnergyKernel
,
pmeEnergyBuffer
.
getSize
());
return
0.0
;
return
0.0
;
}
}
private:
private:
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
cl
::
Event
&
event
;
cl
::
Event
&
event
;
cl
::
Kernel
addEnergyKernel
;
OpenCLArray
&
pmeEnergyBuffer
;
int
forceGroup
;
int
forceGroup
;
};
};
...
@@ -1494,6 +1505,8 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
...
@@ -1494,6 +1505,8 @@ OpenCLCalcNonbondedForceKernel::~OpenCLCalcNonbondedForceKernel() {
delete
pmeAtomRange
;
delete
pmeAtomRange
;
if
(
pmeAtomGridIndex
!=
NULL
)
if
(
pmeAtomGridIndex
!=
NULL
)
delete
pmeAtomGridIndex
;
delete
pmeAtomGridIndex
;
if
(
pmeEnergyBuffer
!=
NULL
)
delete
pmeEnergyBuffer
;
if
(
sort
!=
NULL
)
if
(
sort
!=
NULL
)
delete
sort
;
delete
sort
;
if
(
fft
!=
NULL
)
if
(
fft
!=
NULL
)
...
@@ -1663,6 +1676,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1663,6 +1676,9 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
pmeBsplineTheta
=
new
OpenCLArray
(
cl
,
PmeOrder
*
numParticles
,
4
*
elementSize
,
"pmeBsplineTheta"
);
pmeBsplineTheta
=
new
OpenCLArray
(
cl
,
PmeOrder
*
numParticles
,
4
*
elementSize
,
"pmeBsplineTheta"
);
pmeAtomRange
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomRange
=
OpenCLArray
::
create
<
cl_int
>
(
cl
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
pmeAtomGridIndex
=
OpenCLArray
::
create
<
mm_int2
>
(
cl
,
numParticles
,
"pmeAtomGridIndex"
);
int
energyElementSize
=
(
cl
.
getUseDoublePrecision
()
||
cl
.
getUseMixedPrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
pmeEnergyBuffer
=
new
OpenCLArray
(
cl
,
cl
.
getNumThreadBlocks
()
*
OpenCLContext
::
ThreadBlockSize
,
energyElementSize
,
"pmeEnergyBuffer"
);
cl
.
clearBuffer
(
*
pmeEnergyBuffer
);
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
sort
=
new
OpenCLSort
(
cl
,
new
SortTrait
(),
cl
.
getNumAtoms
());
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
true
);
fft
=
new
OpenCLFFT3D
(
cl
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
true
);
string
vendor
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
string
vendor
=
cl
.
getDevice
().
getInfo
<
CL_DEVICE_VENDOR
>
();
...
@@ -1676,7 +1692,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
...
@@ -1676,7 +1692,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
if
(
recipForceGroup
<
0
)
if
(
recipForceGroup
<
0
)
recipForceGroup
=
force
.
getForceGroup
();
recipForceGroup
=
force
.
getForceGroup
();
cl
.
addPreComputation
(
new
SyncQueuePreComputation
(
cl
,
pmeQueue
,
recipForceGroup
));
cl
.
addPreComputation
(
new
SyncQueuePreComputation
(
cl
,
pmeQueue
,
recipForceGroup
));
cl
.
addPostComputation
(
new
SyncQueuePostComputation
(
cl
,
pmeSyncEvent
,
recipForceGroup
));
cl
.
addPostComputation
(
syncQueue
=
new
SyncQueuePostComputation
(
cl
,
pmeSyncEvent
,
*
pmeEnergyBuffer
,
recipForceGroup
));
}
}
// Initialize the b-spline moduli.
// Initialize the b-spline moduli.
...
@@ -1831,7 +1847,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1831,7 +1847,7 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineModuliY
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineModuliY
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineModuliZ
->
getDeviceBuffer
());
pmeConvolutionKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineModuliZ
->
getDeviceBuffer
());
pmeEvalEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid2
->
getDeviceBuffer
());
pmeEvalEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid2
->
getDeviceBuffer
());
pmeEvalEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
pmeEvalEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
usePmeQueue
?
pmeEnergyBuffer
->
getDeviceBuffer
()
:
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
pmeEvalEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineModuliX
->
getDeviceBuffer
());
pmeEvalEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
pmeBsplineModuliX
->
getDeviceBuffer
());
pmeEvalEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineModuliY
->
getDeviceBuffer
());
pmeEvalEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
pmeBsplineModuliY
->
getDeviceBuffer
());
pmeEvalEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
pmeBsplineModuliZ
->
getDeviceBuffer
());
pmeEvalEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
pmeBsplineModuliZ
->
getDeviceBuffer
());
...
@@ -1844,6 +1860,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
...
@@ -1844,6 +1860,8 @@ double OpenCLCalcNonbondedForceKernel::execute(ContextImpl& context, bool includ
pmeFinishSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid2
->
getDeviceBuffer
());
pmeFinishSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
pmeGrid2
->
getDeviceBuffer
());
pmeFinishSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeGrid
->
getDeviceBuffer
());
pmeFinishSpreadChargeKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
pmeGrid
->
getDeviceBuffer
());
}
}
if
(
usePmeQueue
)
syncQueue
->
setKernel
(
cl
::
Kernel
(
program
,
"addEnergy"
));
}
}
}
}
if
(
cosSinSums
!=
NULL
&&
includeReciprocal
)
{
if
(
cosSinSums
!=
NULL
&&
includeReciprocal
)
{
...
...
platforms/opencl/src/kernels/pme.cl
View file @
5cb6ad03
...
@@ -362,7 +362,7 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
...
@@ -362,7 +362,7 @@ __kernel void gridEvaluateEnergy(__global real2* restrict pmeGrid, __global mixe
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
energy
+=
eterm*
(
grid.x*grid.x
+
grid.y*grid.y
)
;
}
}
}
}
energyBuffer[get_global_id
(
0
)
]
+
=
0.5f*energy
;
energyBuffer[get_global_id
(
0
)
]
=
0.5f*energy
;
}
}
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
__kernel
void
gridInterpolateForce
(
__global
const
real4*
restrict
posq,
__global
real4*
restrict
forceBuffers,
__global
const
real*
restrict
pmeGrid,
...
@@ -445,3 +445,8 @@ __kernel void addForces(__global const real4* restrict forces, __global real4* r
...
@@ -445,3 +445,8 @@ __kernel void addForces(__global const real4* restrict forces, __global real4* r
for
(
int
atom
=
get_global_id
(
0
)
; atom < NUM_ATOMS; atom += get_global_size(0))
for
(
int
atom
=
get_global_id
(
0
)
; atom < NUM_ATOMS; atom += get_global_size(0))
forceBuffers[atom]
+=
forces[atom]
;
forceBuffers[atom]
+=
forces[atom]
;
}
}
__kernel
void
addEnergy
(
__global
const
mixed*
restrict
pmeEnergyBuffer,
__global
mixed*
restrict
energyBuffer,
int
bufferSize
)
{
for
(
int
i
=
get_global_id
(
0
)
; i < bufferSize; i += get_global_size(0))
energyBuffer[i]
+=
pmeEnergyBuffer[i]
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment