Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
711c3a5a
Commit
711c3a5a
authored
Jun 19, 2017
by
Peter Eastman
Browse files
Sum energy on the GPU
parent
d4fcab2a
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
108 additions
and
34 deletions
+108
-34
platforms/cuda/include/CudaContext.h
platforms/cuda/include/CudaContext.h
+6
-0
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+19
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+2
-15
platforms/cuda/src/kernels/utilities.cu
platforms/cuda/src/kernels/utilities.cu
+19
-0
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+7
-1
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+35
-2
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+2
-15
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+18
-0
No files found.
platforms/cuda/include/CudaContext.h
View file @
711c3a5a
...
@@ -285,6 +285,10 @@ public:
...
@@ -285,6 +285,10 @@ public:
* Clear all buffers that have been registered with addAutoclearBuffer().
* Clear all buffers that have been registered with addAutoclearBuffer().
*/
*/
void
clearAutoclearBuffers
();
void
clearAutoclearBuffers
();
/**
* Sum the buffer containing energy.
*/
double
reduceEnergy
();
/**
/**
* Get the current simulation time.
* Get the current simulation time.
*/
*/
...
@@ -638,6 +642,7 @@ private:
...
@@ -638,6 +642,7 @@ private:
CUfunction
clearFourBuffersKernel
;
CUfunction
clearFourBuffersKernel
;
CUfunction
clearFiveBuffersKernel
;
CUfunction
clearFiveBuffersKernel
;
CUfunction
clearSixBuffersKernel
;
CUfunction
clearSixBuffersKernel
;
CUfunction
reduceEnergyKernel
;
CUfunction
setChargesKernel
;
CUfunction
setChargesKernel
;
std
::
vector
<
CudaForceInfo
*>
forces
;
std
::
vector
<
CudaForceInfo
*>
forces
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
Molecule
>
molecules
;
...
@@ -649,6 +654,7 @@ private:
...
@@ -649,6 +654,7 @@ private:
CudaArray
*
velm
;
CudaArray
*
velm
;
CudaArray
*
force
;
CudaArray
*
force
;
CudaArray
*
energyBuffer
;
CudaArray
*
energyBuffer
;
CudaArray
*
energySum
;
CudaArray
*
energyParamDerivBuffer
;
CudaArray
*
energyParamDerivBuffer
;
CudaArray
*
atomIndexDevice
;
CudaArray
*
atomIndexDevice
;
CudaArray
*
chargeBuffer
;
CudaArray
*
chargeBuffer
;
...
...
platforms/cuda/src/CudaContext.cpp
View file @
711c3a5a
...
@@ -108,7 +108,7 @@ static int executeInWindows(const string &command) {
...
@@ -108,7 +108,7 @@ static int executeInWindows(const string &command) {
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
,
CudaContext
*
originalContext
)
:
system
(
system
),
currentStream
(
0
),
const
string
&
tempDir
,
const
std
::
string
&
hostCompiler
,
CudaPlatform
::
PlatformData
&
platformData
,
CudaContext
*
originalContext
)
:
system
(
system
),
currentStream
(
0
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
hasCompilerKernel
(
false
),
isNvccAvailable
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
chargeBuffer
(
NULL
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
energySum
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
chargeBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
// Determine what compiler to use.
// Determine what compiler to use.
...
@@ -307,6 +307,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -307,6 +307,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
clearFourBuffersKernel
=
getKernel
(
utilities
,
"clearFourBuffers"
);
clearFourBuffersKernel
=
getKernel
(
utilities
,
"clearFourBuffers"
);
clearFiveBuffersKernel
=
getKernel
(
utilities
,
"clearFiveBuffers"
);
clearFiveBuffersKernel
=
getKernel
(
utilities
,
"clearFiveBuffers"
);
clearSixBuffersKernel
=
getKernel
(
utilities
,
"clearSixBuffers"
);
clearSixBuffersKernel
=
getKernel
(
utilities
,
"clearSixBuffers"
);
reduceEnergyKernel
=
getKernel
(
utilities
,
"reduceEnergy"
);
setChargesKernel
=
getKernel
(
utilities
,
"setCharges"
);
setChargesKernel
=
getKernel
(
utilities
,
"setCharges"
);
// Set defines based on the requested precision.
// Set defines based on the requested precision.
...
@@ -420,6 +421,8 @@ CudaContext::~CudaContext() {
...
@@ -420,6 +421,8 @@ CudaContext::~CudaContext() {
delete
force
;
delete
force
;
if
(
energyBuffer
!=
NULL
)
if
(
energyBuffer
!=
NULL
)
delete
energyBuffer
;
delete
energyBuffer
;
if
(
energySum
!=
NULL
)
delete
energySum
;
if
(
energyParamDerivBuffer
!=
NULL
)
if
(
energyParamDerivBuffer
!=
NULL
)
delete
energyParamDerivBuffer
;
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
if
(
atomIndexDevice
!=
NULL
)
...
@@ -450,16 +453,19 @@ void CudaContext::initialize() {
...
@@ -450,16 +453,19 @@ void CudaContext::initialize() {
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
create
<
double
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
=
CudaArray
::
create
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
create
<
double
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
}
else
{
else
{
energyBuffer
=
CudaArray
::
create
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
=
CudaArray
::
create
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
=
CudaArray
::
create
<
float
>
(
*
this
,
1
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
}
}
...
@@ -879,6 +885,18 @@ void CudaContext::clearAutoclearBuffers() {
...
@@ -879,6 +885,18 @@ void CudaContext::clearAutoclearBuffers() {
}
}
}
}
double
CudaContext
::
reduceEnergy
()
{
int
bufferSize
=
energyBuffer
->
getSize
();
int
workGroupSize
=
512
;
void
*
args
[]
=
{
&
energyBuffer
->
getDevicePointer
(),
&
energySum
->
getDevicePointer
(),
&
bufferSize
,
&
workGroupSize
};
executeKernel
(
reduceEnergyKernel
,
args
,
workGroupSize
,
workGroupSize
,
workGroupSize
*
energyBuffer
->
getElementSize
());
energySum
->
download
(
pinnedBuffer
);
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
return
*
((
double
*
)
pinnedBuffer
);
else
return
*
((
float
*
)
pinnedBuffer
);
}
void
CudaContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
void
CudaContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
if
(
chargeBuffer
==
NULL
)
if
(
chargeBuffer
==
NULL
)
chargeBuffer
=
new
CudaArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
chargeBuffer
=
new
CudaArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
711c3a5a
...
@@ -115,21 +115,8 @@ double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bo
...
@@ -115,21 +115,8 @@ double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bo
for (auto computation : cu.getPostComputations())
for (auto computation : cu.getPostComputations())
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
cu.getIntegrationUtilities().distributeForcesFromVirtualSites();
cu.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy) {
if (includeEnergy)
CudaArray& energyArray = cu.getEnergyBuffer();
sum += cu.reduceEnergy();
if (cu.getUseDoublePrecision() || cu.getUseMixedPrecision()) {
double* energy = (double*) cu.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
else {
float* energy = (float*) cu.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
}
if (!cu.getForcesValid())
if (!cu.getForcesValid())
valid = false;
valid = false;
return sum;
return sum;
...
...
platforms/cuda/src/kernels/utilities.cu
View file @
711c3a5a
...
@@ -73,6 +73,25 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
...
@@ -73,6 +73,25 @@ __global__ void clearSixBuffers(int* __restrict__ buffer1, int size1, int* __res
clearSingleBuffer
(
buffer6
,
size6
);
clearSingleBuffer
(
buffer6
,
size6
);
}
}
/**
* Sum the energy buffer.
*/
__global__
void
reduceEnergy
(
const
mixed
*
__restrict__
energyBuffer
,
mixed
*
__restrict__
result
,
int
bufferSize
,
int
workGroupSize
)
{
extern
__shared__
mixed
tempBuffer
[];
const
unsigned
int
thread
=
threadIdx
.
x
;
mixed
sum
=
0
;
for
(
unsigned
int
index
=
thread
;
index
<
bufferSize
;
index
+=
blockDim
.
x
)
sum
+=
energyBuffer
[
index
];
tempBuffer
[
thread
]
=
sum
;
for
(
int
i
=
1
;
i
<
workGroupSize
;
i
*=
2
)
{
__syncthreads
();
if
(
thread
%
(
i
*
2
)
==
0
&&
thread
+
i
<
workGroupSize
)
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
}
if
(
thread
==
0
)
*
result
=
tempBuffer
[
0
];
}
/**
/**
* Record the atomic charges into the posq array.
* Record the atomic charges into the posq array.
*/
*/
...
...
platforms/opencl/include/OpenCLContext.h
View file @
711c3a5a
...
@@ -364,9 +364,13 @@ public:
...
@@ -364,9 +364,13 @@ public:
*/
*/
void
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
);
void
reduceBuffer
(
OpenCLArray
&
array
,
int
numBuffers
);
/**
/**
* Sum the buffe
s
r containing forces.
* Sum the buffer
s
containing forces.
*/
*/
void
reduceForces
();
void
reduceForces
();
/**
* Sum the buffer containing energy.
*/
double
reduceEnergy
();
/**
/**
* Get the current simulation time.
* Get the current simulation time.
*/
*/
...
@@ -750,6 +754,7 @@ private:
...
@@ -750,6 +754,7 @@ private:
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
clearSixBuffersKernel
;
cl
::
Kernel
reduceReal4Kernel
;
cl
::
Kernel
reduceReal4Kernel
;
cl
::
Kernel
reduceForcesKernel
;
cl
::
Kernel
reduceForcesKernel
;
cl
::
Kernel
reduceEnergyKernel
;
cl
::
Kernel
setChargesKernel
;
cl
::
Kernel
setChargesKernel
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
OpenCLForceInfo
*>
forces
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
Molecule
>
molecules
;
...
@@ -764,6 +769,7 @@ private:
...
@@ -764,6 +769,7 @@ private:
OpenCLArray
*
forceBuffers
;
OpenCLArray
*
forceBuffers
;
OpenCLArray
*
longForceBuffer
;
OpenCLArray
*
longForceBuffer
;
OpenCLArray
*
energyBuffer
;
OpenCLArray
*
energyBuffer
;
OpenCLArray
*
energySum
;
OpenCLArray
*
energyParamDerivBuffer
;
OpenCLArray
*
energyParamDerivBuffer
;
OpenCLArray
*
atomIndexDevice
;
OpenCLArray
*
atomIndexDevice
;
OpenCLArray
*
chargeBuffer
;
OpenCLArray
*
chargeBuffer
;
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
711c3a5a
...
@@ -69,7 +69,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
...
@@ -69,7 +69,7 @@ static void CL_CALLBACK errorCallback(const char* errinfo, const void* private_i
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
,
OpenCLContext
*
originalContext
)
:
OpenCLContext
::
OpenCLContext
(
const
System
&
system
,
int
platformIndex
,
int
deviceIndex
,
const
string
&
precision
,
OpenCLPlatform
::
PlatformData
&
platformData
,
OpenCLContext
*
originalContext
)
:
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posq
(
NULL
),
system
(
system
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
stepsSinceReorder
(
99999
),
atomsWereReordered
(
false
),
posq
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
posqCorrection
(
NULL
),
velm
(
NULL
),
forceBuffers
(
NULL
),
longForceBuffer
(
NULL
),
energyBuffer
(
NULL
),
energySum
(
NULL
),
energyParamDerivBuffer
(
NULL
),
atomIndexDevice
(
NULL
),
chargeBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
chargeBuffer
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
bonded
(
NULL
),
nonbonded
(
NULL
),
thread
(
NULL
)
{
if
(
precision
==
"single"
)
{
if
(
precision
==
"single"
)
{
useDoublePrecision
=
false
;
useDoublePrecision
=
false
;
...
@@ -315,6 +315,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -315,6 +315,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
reduceReal4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceReal4Buffer"
);
reduceReal4Kernel
=
cl
::
Kernel
(
utilities
,
"reduceReal4Buffer"
);
if
(
supports64BitGlobalAtomics
)
if
(
supports64BitGlobalAtomics
)
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
reduceForcesKernel
=
cl
::
Kernel
(
utilities
,
"reduceForces"
);
reduceEnergyKernel
=
cl
::
Kernel
(
utilities
,
"reduceEnergy"
);
setChargesKernel
=
cl
::
Kernel
(
utilities
,
"setCharges"
);
setChargesKernel
=
cl
::
Kernel
(
utilities
,
"setCharges"
);
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
// Decide whether native_sqrt(), native_rsqrt(), and native_recip() are sufficiently accurate to use.
...
@@ -442,6 +443,8 @@ OpenCLContext::~OpenCLContext() {
...
@@ -442,6 +443,8 @@ OpenCLContext::~OpenCLContext() {
delete
longForceBuffer
;
delete
longForceBuffer
;
if
(
energyBuffer
!=
NULL
)
if
(
energyBuffer
!=
NULL
)
delete
energyBuffer
;
delete
energyBuffer
;
if
(
energySum
!=
NULL
)
delete
energySum
;
if
(
energyParamDerivBuffer
!=
NULL
)
if
(
energyParamDerivBuffer
!=
NULL
)
delete
energyParamDerivBuffer
;
delete
energyParamDerivBuffer
;
if
(
atomIndexDevice
!=
NULL
)
if
(
atomIndexDevice
!=
NULL
)
...
@@ -471,11 +474,19 @@ void OpenCLContext::initialize() {
...
@@ -471,11 +474,19 @@ void OpenCLContext::initialize() {
forceBuffers
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
=
OpenCLArray
::
create
<
mm_double4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
}
}
else
{
else
if
(
useMixedPrecision
)
{
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
create
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
}
else
{
forceBuffers
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
=
OpenCLArray
::
create
<
mm_float4
>
(
*
this
,
&
forceBuffers
->
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
=
OpenCLArray
::
create
<
cl_float
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
=
OpenCLArray
::
create
<
cl_float
>
(
*
this
,
1
,
"energySum"
);
}
}
if
(
supports64BitGlobalAtomics
)
{
if
(
supports64BitGlobalAtomics
)
{
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
longForceBuffer
=
OpenCLArray
::
create
<
cl_long
>
(
*
this
,
3
*
paddedNumAtoms
,
"longForceBuffer"
);
...
@@ -756,6 +767,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
...
@@ -756,6 +767,28 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
executeKernel
(
reduceReal4Kernel
,
bufferSize
,
128
);
executeKernel
(
reduceReal4Kernel
,
bufferSize
,
128
);
}
}
double
OpenCLContext
::
reduceEnergy
()
{
int
workGroupSize
=
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
();
if
(
workGroupSize
>
512
)
workGroupSize
=
512
;
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
energyBuffer
->
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
energySum
->
getDeviceBuffer
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
2
,
energyBuffer
->
getSize
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
3
,
workGroupSize
);
reduceEnergyKernel
.
setArg
(
4
,
workGroupSize
*
energyBuffer
->
getElementSize
(),
NULL
);
executeKernel
(
reduceEnergyKernel
,
workGroupSize
,
workGroupSize
);
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
{
double
energy
;
energySum
->
download
(
&
energy
);
return
energy
;
}
else
{
float
energy
;
energySum
->
download
(
&
energy
);
return
energy
;
}
}
void
OpenCLContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
void
OpenCLContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
if
(
chargeBuffer
==
NULL
)
if
(
chargeBuffer
==
NULL
)
chargeBuffer
=
new
OpenCLArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
chargeBuffer
=
new
OpenCLArray
(
*
this
,
numAtoms
,
useDoublePrecision
?
sizeof
(
double
)
:
sizeof
(
float
),
"chargeBuffer"
);
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
711c3a5a
...
@@ -139,21 +139,8 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
...
@@ -139,21 +139,8 @@ double OpenCLCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context,
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
sum += computation->computeForceAndEnergy(includeForces, includeEnergy, groups);
cl.reduceForces();
cl.reduceForces();
cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
cl.getIntegrationUtilities().distributeForcesFromVirtualSites();
if (includeEnergy) {
if (includeEnergy)
OpenCLArray& energyArray = cl.getEnergyBuffer();
sum += cl.reduceEnergy();
if (cl.getUseDoublePrecision() || cl.getUseMixedPrecision()) {
double* energy = (double*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
else {
float* energy = (float*) cl.getPinnedBuffer();
energyArray.download(energy);
for (int i = 0; i < energyArray.getSize(); i++)
sum += energy[i];
}
}
if (!cl.getForcesValid())
if (!cl.getForcesValid())
valid = false;
valid = false;
return sum;
return sum;
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
711c3a5a
...
@@ -97,6 +97,24 @@ __kernel void reduceForces(__global const long* restrict longBuffer, __global re
...
@@ -97,6 +97,24 @@ __kernel void reduceForces(__global const long* restrict longBuffer, __global re
}
}
#
endif
#
endif
/**
*
Sum
the
energy
buffer.
*/
__kernel
void
reduceEnergy
(
__global
const
mixed*
restrict
energyBuffer,
__global
mixed*
restrict
result,
int
bufferSize,
int
workGroupSize,
__local
mixed*
tempBuffer
)
{
const
unsigned
int
thread
=
get_local_id
(
0
)
;
mixed
sum
=
0
;
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_local_size(0))
sum
+=
energyBuffer[index]
;
tempBuffer[thread]
=
sum
;
for
(
int
i
=
1
; i < workGroupSize; i *= 2) {
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
thread%
(
i*2
)
==
0
&&
thread+i
<
workGroupSize
)
tempBuffer[thread]
+=
tempBuffer[thread+i]
;
}
if
(
thread
==
0
)
*result
=
tempBuffer[0]
;
}
/**
/**
*
This
is
called
to
determine
the
accuracy
of
various
native
functions.
*
This
is
called
to
determine
the
accuracy
of
various
native
functions.
*/
*/
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment