Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
62f7c44c
Commit
62f7c44c
authored
Mar 06, 2015
by
peastman
Browse files
Load balancing between GPUs forces the neighbor list to be rebuilt
parent
f816d961
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
32 additions
and
24 deletions
+32
-24
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+1
-1
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+5
-2
platforms/cuda/src/kernels/findInteractingBlocks.cu
platforms/cuda/src/kernels/findInteractingBlocks.cu
+2
-2
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+3
-0
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+19
-17
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+2
-2
No files found.
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
62f7c44c
...
@@ -278,7 +278,7 @@ private:
...
@@ -278,7 +278,7 @@ private:
std
::
string
kernelSource
;
std
::
string
kernelSource
;
std
::
map
<
std
::
string
,
std
::
string
>
kernelDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
kernelDefines
;
double
cutoff
;
double
cutoff
;
bool
useCutoff
,
usePeriodic
,
anyExclusions
,
usePadding
;
bool
useCutoff
,
usePeriodic
,
anyExclusions
,
usePadding
,
forceRebuildNeighborList
;
int
startTileIndex
,
numTiles
,
startBlockIndex
,
numBlocks
,
maxTiles
,
numForceThreadBlocks
,
forceThreadBlockSize
,
nonbondedForceGroup
,
numAtoms
;
int
startTileIndex
,
numTiles
,
startBlockIndex
,
numBlocks
,
maxTiles
,
numForceThreadBlocks
,
forceThreadBlockSize
,
nonbondedForceGroup
,
numAtoms
;
};
};
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
62f7c44c
...
@@ -65,7 +65,7 @@ private:
...
@@ -65,7 +65,7 @@ private:
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
cutoff
(
-
1.0
),
useCutoff
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
cutoff
(
-
1.0
),
useCutoff
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
nonbondedForceGroup
(
0
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
nonbondedForceGroup
(
0
)
,
forceRebuildNeighborList
(
true
)
{
// Decide how many thread blocks to use.
// Decide how many thread blocks to use.
string
errorMessage
=
"Error initializing nonbonded utilities"
;
string
errorMessage
=
"Error initializing nonbonded utilities"
;
...
@@ -322,6 +322,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -322,6 +322,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
sortBoxDataArgs
.
push_back
(
&
oldPositions
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
oldPositions
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
forceRebuildNeighborList
);
findInteractingBlocksKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
...
@@ -363,6 +364,7 @@ void CudaNonbondedUtilities::prepareInteractions() {
...
@@ -363,6 +364,7 @@ void CudaNonbondedUtilities::prepareInteractions() {
blockSorter
->
sort
(
*
sortedBlocks
);
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
forceRebuildNeighborList
=
false
;
}
}
void
CudaNonbondedUtilities
::
computeInteractions
()
{
void
CudaNonbondedUtilities
::
computeInteractions
()
{
...
@@ -419,8 +421,9 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
...
@@ -419,8 +421,9 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
startBlockIndex
=
(
int
)
(
startFraction
*
numAtomBlocks
);
startBlockIndex
=
(
int
)
(
startFraction
*
numAtomBlocks
);
numBlocks
=
(
int
)
(
endFraction
*
numAtomBlocks
)
-
startBlockIndex
;
numBlocks
=
(
int
)
(
endFraction
*
numAtomBlocks
)
-
startBlockIndex
;
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
startTileIndex
=
(
int
)
(
startFraction
*
totalTiles
);
;
startTileIndex
=
(
int
)
(
startFraction
*
totalTiles
);
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
forceRebuildNeighborList
=
true
;
}
}
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
{
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
{
...
...
platforms/cuda/src/kernels/findInteractingBlocks.cu
View file @
62f7c44c
...
@@ -43,7 +43,7 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
...
@@ -43,7 +43,7 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
extern
"C"
__global__
void
sortBoxData
(
const
real2
*
__restrict__
sortedBlock
,
const
real4
*
__restrict__
blockCenter
,
extern
"C"
__global__
void
sortBoxData
(
const
real2
*
__restrict__
sortedBlock
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockBoundingBox
,
real4
*
__restrict__
sortedBlockCenter
,
const
real4
*
__restrict__
blockBoundingBox
,
real4
*
__restrict__
sortedBlockCenter
,
real4
*
__restrict__
sortedBlockBoundingBox
,
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
oldPositions
,
real4
*
__restrict__
sortedBlockBoundingBox
,
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
oldPositions
,
unsigned
int
*
__restrict__
interactionCount
,
int
*
__restrict__
rebuildNeighborList
)
{
unsigned
int
*
__restrict__
interactionCount
,
int
*
__restrict__
rebuildNeighborList
,
bool
forceRebuild
)
{
for
(
int
i
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
i
<
NUM_BLOCKS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
i
<
NUM_BLOCKS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
index
=
(
int
)
sortedBlock
[
i
].
y
;
int
index
=
(
int
)
sortedBlock
[
i
].
y
;
sortedBlockCenter
[
i
]
=
blockCenter
[
index
];
sortedBlockCenter
[
i
]
=
blockCenter
[
index
];
...
@@ -52,7 +52,7 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
...
@@ -52,7 +52,7 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
// Also check whether any atom has moved enough so that we really need to rebuild the neighbor list.
// Also check whether any atom has moved enough so that we really need to rebuild the neighbor list.
bool
rebuild
=
f
alse
;
bool
rebuild
=
f
orceRebuild
;
for
(
int
i
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
delta
=
oldPositions
[
i
]
-
posq
[
i
];
real4
delta
=
oldPositions
[
i
]
-
posq
[
i
];
if
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
>
0.25
f
*
PADDING
*
PADDING
)
if
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
>
0.25
f
*
PADDING
*
PADDING
)
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
62f7c44c
...
@@ -340,6 +340,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -340,6 +340,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
oldPositions
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
oldPositions
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionCount
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionCount
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
rebuildNeighborList
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
rebuildNeighborList
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
true
);
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
...
@@ -406,6 +407,7 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
...
@@ -406,6 +407,7 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
context
.
executeKernel
(
sortBoxDataKernel
,
context
.
getNumAtoms
());
context
.
executeKernel
(
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxArgs
(
context
,
findInteractingBlocksKernel
,
0
);
setPeriodicBoxArgs
(
context
,
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
false
);
}
}
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
...
@@ -474,6 +476,7 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
...
@@ -474,6 +476,7 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
true
);
}
}
}
}
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
62f7c44c
...
@@ -177,23 +177,25 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
...
@@ -177,23 +177,25 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
// Balance work between the contexts by transferring a little nonbonded work from the context that
// Balance work between the contexts by transferring a little nonbonded work from the context that
// finished last to the one that finished first.
// finished last to the one that finished first.
int
firstIndex
=
0
,
lastIndex
=
0
;
if
(
cl
.
getComputeForceCount
()
<
200
)
{
for
(
int
i
=
0
;
i
<
(
int
)
completionTimes
.
size
();
i
++
)
{
int
firstIndex
=
0
,
lastIndex
=
0
;
if
(
completionTimes
[
i
]
<
completionTimes
[
firstIndex
])
for
(
int
i
=
0
;
i
<
(
int
)
completionTimes
.
size
();
i
++
)
{
firstIndex
=
i
;
if
(
completionTimes
[
i
]
<
completionTimes
[
firstIndex
])
if
(
completionTimes
[
i
]
>
completionTimes
[
lastIndex
])
firstIndex
=
i
;
lastIndex
=
i
;
if
(
completionTimes
[
i
]
>
completionTimes
[
lastIndex
])
}
lastIndex
=
i
;
double
fractionToTransfer
=
min
(
0.001
,
contextNonbondedFractions
[
lastIndex
]);
}
contextNonbondedFractions
[
firstIndex
]
+=
fractionToTransfer
;
double
fractionToTransfer
=
min
(
0.001
,
contextNonbondedFractions
[
lastIndex
]);
contextNonbondedFractions
[
lastIndex
]
-=
fractionToTransfer
;
contextNonbondedFractions
[
firstIndex
]
+=
fractionToTransfer
;
double
startFraction
=
0.0
;
contextNonbondedFractions
[
lastIndex
]
-=
fractionToTransfer
;
for
(
int
i
=
0
;
i
<
(
int
)
contextNonbondedFractions
.
size
();
i
++
)
{
double
startFraction
=
0.0
;
double
endFraction
=
startFraction
+
contextNonbondedFractions
[
i
];
for
(
int
i
=
0
;
i
<
(
int
)
contextNonbondedFractions
.
size
();
i
++
)
{
if
(
i
==
contextNonbondedFractions
.
size
()
-
1
)
double
endFraction
=
startFraction
+
contextNonbondedFractions
[
i
];
endFraction
=
1.0
;
// Avoid roundoff error
if
(
i
==
contextNonbondedFractions
.
size
()
-
1
)
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
setAtomBlockRange
(
startFraction
,
endFraction
);
endFraction
=
1.0
;
// Avoid roundoff error
startFraction
=
endFraction
;
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
setAtomBlockRange
(
startFraction
,
endFraction
);
startFraction
=
endFraction
;
}
}
}
}
}
return
energy
;
return
energy
;
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
62f7c44c
...
@@ -44,7 +44,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
...
@@ -44,7 +44,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
__kernel
void
sortBoxData
(
__global
const
real2*
restrict
sortedBlock,
__global
const
real4*
restrict
blockCenter,
__kernel
void
sortBoxData
(
__global
const
real2*
restrict
sortedBlock,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
real4*
restrict
sortedBlockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
real4*
restrict
sortedBlockCenter,
__global
real4*
restrict
sortedBlockBoundingBox,
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
oldPositions,
__global
real4*
restrict
sortedBlockBoundingBox,
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
oldPositions,
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
rebuildNeighborList
)
{
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
rebuildNeighborList
,
int
forceRebuild
)
{
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_BLOCKS; i += get_global_size(0)) {
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_BLOCKS; i += get_global_size(0)) {
int
index
=
(
int
)
sortedBlock[i].y
;
int
index
=
(
int
)
sortedBlock[i].y
;
sortedBlockCenter[i]
=
blockCenter[index]
;
sortedBlockCenter[i]
=
blockCenter[index]
;
...
@@ -53,7 +53,7 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
...
@@ -53,7 +53,7 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
//
Also
check
whether
any
atom
has
moved
enough
so
that
we
really
need
to
rebuild
the
neighbor
list.
//
Also
check
whether
any
atom
has
moved
enough
so
that
we
really
need
to
rebuild
the
neighbor
list.
bool
rebuild
=
f
alse
;
bool
rebuild
=
f
orceRebuild
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
real4
delta
=
oldPositions[i]-posq[i]
;
real4
delta
=
oldPositions[i]-posq[i]
;
if
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
>
0.25f*PADDING*PADDING
)
if
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
>
0.25f*PADDING*PADDING
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment