Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
2975f44b
"platforms/vscode:/vscode.git/clone" did not exist on "729c09907584fde040dc58c860181d25b133b68d"
Unverified
Commit
2975f44b
authored
Jan 31, 2023
by
Peter Eastman
Committed by
GitHub
Jan 31, 2023
Browse files
Optimized reducing energy (#3902)
parent
e0c80069
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
34 additions
and
24 deletions
+34
-24
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+17
-9
platforms/cuda/src/kernels/utilities.cu
platforms/cuda/src/kernels/utilities.cu
+2
-2
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+13
-11
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+2
-2
No files found.
platforms/cuda/src/CudaContext.cpp
View file @
2975f44b
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-202
1
Stanford University and the Authors. *
* Portions copyright (c) 2009-202
3
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -435,21 +435,23 @@ void CudaContext::initialize() {
...
@@ -435,21 +435,23 @@ void CudaContext::initialize() {
ContextSelector
selector
(
*
this
);
ContextSelector
selector
(
*
this
);
string
errorMessage
=
"Error initializing Context"
;
string
errorMessage
=
"Error initializing Context"
;
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
int
numEnergyBuffers
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
int
multiprocessors
;
CHECK_RESULT2
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
device
),
"Error checking GPU properties"
);
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
energyBuffer
.
initialize
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
.
initialize
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
.
initialize
<
double
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initialize
<
double
>
(
*
this
,
multiprocessors
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
energyBuffer
.
initialize
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
.
initialize
<
double
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
.
initialize
<
double
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initialize
<
double
>
(
*
this
,
multiprocessors
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
}
else
{
else
{
energyBuffer
.
initialize
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energyBuffer
.
initialize
<
float
>
(
*
this
,
numEnergyBuffers
,
"energyBuffer"
);
energySum
.
initialize
<
float
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initialize
<
float
>
(
*
this
,
multiprocessors
,
"energySum"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numEnergyBuffers
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
}
}
...
@@ -820,12 +822,18 @@ double CudaContext::reduceEnergy() {
...
@@ -820,12 +822,18 @@ double CudaContext::reduceEnergy() {
int
bufferSize
=
energyBuffer
.
getSize
();
int
bufferSize
=
energyBuffer
.
getSize
();
int
workGroupSize
=
512
;
int
workGroupSize
=
512
;
void
*
args
[]
=
{
&
energyBuffer
.
getDevicePointer
(),
&
energySum
.
getDevicePointer
(),
&
bufferSize
,
&
workGroupSize
};
void
*
args
[]
=
{
&
energyBuffer
.
getDevicePointer
(),
&
energySum
.
getDevicePointer
(),
&
bufferSize
,
&
workGroupSize
};
executeKernel
(
reduceEnergyKernel
,
args
,
workGroupSize
,
workGroupSize
,
workGroupSize
*
energyBuffer
.
getElementSize
());
executeKernel
(
reduceEnergyKernel
,
args
,
workGroupSize
*
energySum
.
getSize
()
,
workGroupSize
,
workGroupSize
*
energyBuffer
.
getElementSize
());
energySum
.
download
(
pinnedBuffer
);
energySum
.
download
(
pinnedBuffer
);
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
double
result
=
0
;
return
*
((
double
*
)
pinnedBuffer
);
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
{
else
for
(
int
i
=
0
;
i
<
energySum
.
getSize
();
i
++
)
return
*
((
float
*
)
pinnedBuffer
);
result
+=
((
double
*
)
pinnedBuffer
)[
i
];
}
else
{
for
(
int
i
=
0
;
i
<
energySum
.
getSize
();
i
++
)
result
+=
((
float
*
)
pinnedBuffer
)[
i
];
}
return
result
;
}
}
void
CudaContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
void
CudaContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
...
...
platforms/cuda/src/kernels/utilities.cu
View file @
2975f44b
...
@@ -80,7 +80,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
...
@@ -80,7 +80,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
extern
__shared__
mixed
tempBuffer
[];
extern
__shared__
mixed
tempBuffer
[];
const
unsigned
int
thread
=
threadIdx
.
x
;
const
unsigned
int
thread
=
threadIdx
.
x
;
mixed
sum
=
0
;
mixed
sum
=
0
;
for
(
unsigned
int
index
=
thread
;
index
<
bufferSize
;
index
+=
blockDim
.
x
)
for
(
unsigned
int
index
=
blockDim
.
x
*
blockIdx
.
x
+
thread
Idx
.
x
;
index
<
bufferSize
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
sum
+=
energyBuffer
[
index
];
sum
+=
energyBuffer
[
index
];
tempBuffer
[
thread
]
=
sum
;
tempBuffer
[
thread
]
=
sum
;
for
(
int
i
=
1
;
i
<
workGroupSize
;
i
*=
2
)
{
for
(
int
i
=
1
;
i
<
workGroupSize
;
i
*=
2
)
{
...
@@ -89,7 +89,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
...
@@ -89,7 +89,7 @@ __global__ void reduceEnergy(const mixed* __restrict__ energyBuffer, mixed* __re
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
tempBuffer
[
thread
]
+=
tempBuffer
[
thread
+
i
];
}
}
if
(
thread
==
0
)
if
(
thread
==
0
)
*
result
=
tempBuffer
[
0
];
result
[
blockIdx
.
x
]
=
tempBuffer
[
0
];
}
}
/**
/**
...
...
platforms/opencl/src/OpenCLContext.cpp
View file @
2975f44b
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-202
0
Stanford University and the Authors. *
* Portions copyright (c) 2009-202
3
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -497,23 +497,24 @@ void OpenCLContext::initialize() {
...
@@ -497,23 +497,24 @@ void OpenCLContext::initialize() {
bonded
->
initialize
(
system
);
bonded
->
initialize
(
system
);
numForceBuffers
=
std
::
max
(
numForceBuffers
,
(
int
)
platformData
.
contexts
.
size
());
numForceBuffers
=
std
::
max
(
numForceBuffers
,
(
int
)
platformData
.
contexts
.
size
());
int
energyBufferSize
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
int
energyBufferSize
=
max
(
numThreadBlocks
*
ThreadBlockSize
,
nonbonded
->
getNumEnergyBuffers
());
int
numComputeUnits
=
device
.
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
forceBuffers
.
initialize
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
.
initialize
<
mm_double4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
.
initialize
<
mm_double4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
.
initialize
<
mm_double4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
.
initialize
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energyBuffer
.
initialize
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
.
initialize
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initialize
<
cl_double
>
(
*
this
,
numComputeUnits
,
"energySum"
);
}
}
else
if
(
useMixedPrecision
)
{
else
if
(
useMixedPrecision
)
{
forceBuffers
.
initialize
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
.
initialize
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
.
initialize
<
mm_float4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
.
initialize
<
mm_float4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
.
initialize
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energyBuffer
.
initialize
<
cl_double
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
.
initialize
<
cl_double
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initialize
<
cl_double
>
(
*
this
,
numComputeUnits
,
"energySum"
);
}
}
else
{
else
{
forceBuffers
.
initialize
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
forceBuffers
.
initialize
<
mm_float4
>
(
*
this
,
paddedNumAtoms
*
numForceBuffers
,
"forceBuffers"
);
force
.
initialize
<
mm_float4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
force
.
initialize
<
mm_float4
>
(
*
this
,
&
forceBuffers
.
getDeviceBuffer
(),
paddedNumAtoms
,
"force"
);
energyBuffer
.
initialize
<
cl_float
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energyBuffer
.
initialize
<
cl_float
>
(
*
this
,
energyBufferSize
,
"energyBuffer"
);
energySum
.
initialize
<
cl_float
>
(
*
this
,
1
,
"energySum"
);
energySum
.
initialize
<
cl_float
>
(
*
this
,
numComputeUnits
,
"energySum"
);
}
}
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
longForceBuffer
.
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
longForceBuffer
.
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forceBuffers
.
getDeviceBuffer
());
reduceForcesKernel
.
setArg
<
cl
::
Buffer
>
(
1
,
forceBuffers
.
getDeviceBuffer
());
...
@@ -798,17 +799,18 @@ double OpenCLContext::reduceEnergy() {
...
@@ -798,17 +799,18 @@ double OpenCLContext::reduceEnergy() {
reduceEnergyKernel
.
setArg
<
cl_int
>
(
2
,
energyBuffer
.
getSize
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
2
,
energyBuffer
.
getSize
());
reduceEnergyKernel
.
setArg
<
cl_int
>
(
3
,
workGroupSize
);
reduceEnergyKernel
.
setArg
<
cl_int
>
(
3
,
workGroupSize
);
reduceEnergyKernel
.
setArg
(
4
,
workGroupSize
*
energyBuffer
.
getElementSize
(),
NULL
);
reduceEnergyKernel
.
setArg
(
4
,
workGroupSize
*
energyBuffer
.
getElementSize
(),
NULL
);
executeKernel
(
reduceEnergyKernel
,
workGroupSize
,
workGroupSize
);
executeKernel
(
reduceEnergyKernel
,
workGroupSize
*
energySum
.
getSize
(),
workGroupSize
);
energySum
.
download
(
pinnedMemory
);
double
result
=
0
;
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
{
if
(
getUseDoublePrecision
()
||
getUseMixedPrecision
())
{
double
energy
;
for
(
int
i
=
0
;
i
<
energySum
.
getSize
();
i
++
)
energySum
.
download
(
&
energy
);
result
+=
((
double
*
)
pinnedMemory
)[
i
];
return
energy
;
}
}
else
{
else
{
float
energy
;
for
(
int
i
=
0
;
i
<
energySum
.
getSize
();
i
++
)
energySum
.
download
(
&
energy
);
result
+=
((
float
*
)
pinnedMemory
)[
i
];
return
energy
;
}
}
return
result
;
}
}
void
OpenCLContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
void
OpenCLContext
::
setCharges
(
const
vector
<
double
>&
charges
)
{
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
2975f44b
...
@@ -108,7 +108,7 @@ __kernel void reduceForces(__global long* restrict longBuffer, __global real4* r
...
@@ -108,7 +108,7 @@ __kernel void reduceForces(__global long* restrict longBuffer, __global real4* r
__kernel
void
reduceEnergy
(
__global
const
mixed*
restrict
energyBuffer,
__global
mixed*
restrict
result,
int
bufferSize,
int
workGroupSize,
__local
mixed*
tempBuffer
)
{
__kernel
void
reduceEnergy
(
__global
const
mixed*
restrict
energyBuffer,
__global
mixed*
restrict
result,
int
bufferSize,
int
workGroupSize,
__local
mixed*
tempBuffer
)
{
const
unsigned
int
thread
=
get_local_id
(
0
)
;
const
unsigned
int
thread
=
get_local_id
(
0
)
;
mixed
sum
=
0
;
mixed
sum
=
0
;
for
(
unsigned
int
index
=
thread
; index < bufferSize; index += get_lo
c
al_size(0))
for
(
unsigned
int
index
=
get_global_id
(
0
)
; index < bufferSize; index += get_
g
lo
b
al_size(0))
sum
+=
energyBuffer[index]
;
sum
+=
energyBuffer[index]
;
tempBuffer[thread]
=
sum
;
tempBuffer[thread]
=
sum
;
for
(
int
i
=
1
; i < workGroupSize; i *= 2) {
for
(
int
i
=
1
; i < workGroupSize; i *= 2) {
...
@@ -117,7 +117,7 @@ __kernel void reduceEnergy(__global const mixed* restrict energyBuffer, __global
...
@@ -117,7 +117,7 @@ __kernel void reduceEnergy(__global const mixed* restrict energyBuffer, __global
tempBuffer[thread]
+=
tempBuffer[thread+i]
;
tempBuffer[thread]
+=
tempBuffer[thread+i]
;
}
}
if
(
thread
==
0
)
if
(
thread
==
0
)
*
result
=
tempBuffer[0]
;
result
[get_group_id
(
0
)
]
=
tempBuffer[0]
;
}
}
/**
/**
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment