Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
62f7c44c
"platforms/common/vscode:/vscode.git/clone" did not exist on "dde4228b337b7494e23ec913f4dc65f16fee6d08"
Commit
62f7c44c
authored
Mar 06, 2015
by
peastman
Browse files
Load balancing between GPUs forces the neighbor list to be rebuilt
parent
f816d961
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
32 additions
and
24 deletions
+32
-24
platforms/cuda/include/CudaNonbondedUtilities.h
platforms/cuda/include/CudaNonbondedUtilities.h
+1
-1
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+5
-2
platforms/cuda/src/kernels/findInteractingBlocks.cu
platforms/cuda/src/kernels/findInteractingBlocks.cu
+2
-2
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+3
-0
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+19
-17
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+2
-2
No files found.
platforms/cuda/include/CudaNonbondedUtilities.h
View file @
62f7c44c
...
...
@@ -278,7 +278,7 @@ private:
std
::
string
kernelSource
;
std
::
map
<
std
::
string
,
std
::
string
>
kernelDefines
;
double
cutoff
;
bool
useCutoff
,
usePeriodic
,
anyExclusions
,
usePadding
;
bool
useCutoff
,
usePeriodic
,
anyExclusions
,
usePadding
,
forceRebuildNeighborList
;
int
startTileIndex
,
numTiles
,
startBlockIndex
,
numBlocks
,
maxTiles
,
numForceThreadBlocks
,
forceThreadBlockSize
,
nonbondedForceGroup
,
numAtoms
;
};
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
62f7c44c
...
...
@@ -65,7 +65,7 @@ private:
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
cutoff
(
-
1.0
),
useCutoff
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
nonbondedForceGroup
(
0
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
nonbondedForceGroup
(
0
)
,
forceRebuildNeighborList
(
true
)
{
// Decide how many thread blocks to use.
string
errorMessage
=
"Error initializing nonbonded utilities"
;
...
...
@@ -322,6 +322,7 @@ void CudaNonbondedUtilities::initialize(const System& system) {
sortBoxDataArgs
.
push_back
(
&
oldPositions
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
interactionCount
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
rebuildNeighborList
->
getDevicePointer
());
sortBoxDataArgs
.
push_back
(
&
forceRebuildNeighborList
);
findInteractingBlocksKernel
=
context
.
getKernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksArgs
.
push_back
(
context
.
getPeriodicBoxSizePointer
());
findInteractingBlocksArgs
.
push_back
(
context
.
getInvPeriodicBoxSizePointer
());
...
...
@@ -363,6 +364,7 @@ void CudaNonbondedUtilities::prepareInteractions() {
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
forceRebuildNeighborList
=
false
;
}
void
CudaNonbondedUtilities
::
computeInteractions
()
{
...
...
@@ -419,8 +421,9 @@ void CudaNonbondedUtilities::setAtomBlockRange(double startFraction, double endF
startBlockIndex
=
(
int
)
(
startFraction
*
numAtomBlocks
);
numBlocks
=
(
int
)
(
endFraction
*
numAtomBlocks
)
-
startBlockIndex
;
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
startTileIndex
=
(
int
)
(
startFraction
*
totalTiles
);
;
startTileIndex
=
(
int
)
(
startFraction
*
totalTiles
);
numTiles
=
(
int
)
(
endFraction
*
totalTiles
)
-
startTileIndex
;
forceRebuildNeighborList
=
true
;
}
CUfunction
CudaNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
vector
<
ParameterInfo
>&
params
,
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
{
...
...
platforms/cuda/src/kernels/findInteractingBlocks.cu
View file @
62f7c44c
...
...
@@ -43,7 +43,7 @@ extern "C" __global__ void findBlockBounds(int numAtoms, real4 periodicBoxSize,
extern
"C"
__global__
void
sortBoxData
(
const
real2
*
__restrict__
sortedBlock
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockBoundingBox
,
real4
*
__restrict__
sortedBlockCenter
,
real4
*
__restrict__
sortedBlockBoundingBox
,
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
oldPositions
,
unsigned
int
*
__restrict__
interactionCount
,
int
*
__restrict__
rebuildNeighborList
)
{
unsigned
int
*
__restrict__
interactionCount
,
int
*
__restrict__
rebuildNeighborList
,
bool
forceRebuild
)
{
for
(
int
i
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
i
<
NUM_BLOCKS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
index
=
(
int
)
sortedBlock
[
i
].
y
;
sortedBlockCenter
[
i
]
=
blockCenter
[
index
];
...
...
@@ -52,7 +52,7 @@ extern "C" __global__ void sortBoxData(const real2* __restrict__ sortedBlock, co
// Also check whether any atom has moved enough so that we really need to rebuild the neighbor list.
bool
rebuild
=
f
alse
;
bool
rebuild
=
f
orceRebuild
;
for
(
int
i
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
delta
=
oldPositions
[
i
]
-
posq
[
i
];
if
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
>
0.25
f
*
PADDING
*
PADDING
)
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
62f7c44c
...
...
@@ -340,6 +340,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
oldPositions
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionCount
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
rebuildNeighborList
->
getDeviceBuffer
());
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
true
);
findInteractingBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findBlocksWithInteractions"
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
...
...
@@ -406,6 +407,7 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
context
.
executeKernel
(
sortBoxDataKernel
,
context
.
getNumAtoms
());
setPeriodicBoxArgs
(
context
,
findInteractingBlocksKernel
,
0
);
context
.
executeKernel
(
findInteractingBlocksKernel
,
context
.
getNumAtoms
(),
interactingBlocksThreadBlockSize
);
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
false
);
}
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
...
...
@@ -474,6 +476,7 @@ void OpenCLNonbondedUtilities::setAtomBlockRange(double startFraction, double en
forceKernel
.
setArg
<
cl_uint
>
(
6
,
numTiles
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startBlockIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
numBlocks
);
sortBoxDataKernel
.
setArg
<
cl_int
>
(
9
,
true
);
}
}
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
62f7c44c
...
...
@@ -177,23 +177,25 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
// Balance work between the contexts by transferring a little nonbonded work from the context that
// finished last to the one that finished first.
int
firstIndex
=
0
,
lastIndex
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
completionTimes
.
size
();
i
++
)
{
if
(
completionTimes
[
i
]
<
completionTimes
[
firstIndex
])
firstIndex
=
i
;
if
(
completionTimes
[
i
]
>
completionTimes
[
lastIndex
])
lastIndex
=
i
;
}
double
fractionToTransfer
=
min
(
0.001
,
contextNonbondedFractions
[
lastIndex
]);
contextNonbondedFractions
[
firstIndex
]
+=
fractionToTransfer
;
contextNonbondedFractions
[
lastIndex
]
-=
fractionToTransfer
;
double
startFraction
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
contextNonbondedFractions
.
size
();
i
++
)
{
double
endFraction
=
startFraction
+
contextNonbondedFractions
[
i
];
if
(
i
==
contextNonbondedFractions
.
size
()
-
1
)
endFraction
=
1.0
;
// Avoid roundoff error
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
setAtomBlockRange
(
startFraction
,
endFraction
);
startFraction
=
endFraction
;
if
(
cl
.
getComputeForceCount
()
<
200
)
{
int
firstIndex
=
0
,
lastIndex
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
completionTimes
.
size
();
i
++
)
{
if
(
completionTimes
[
i
]
<
completionTimes
[
firstIndex
])
firstIndex
=
i
;
if
(
completionTimes
[
i
]
>
completionTimes
[
lastIndex
])
lastIndex
=
i
;
}
double
fractionToTransfer
=
min
(
0.001
,
contextNonbondedFractions
[
lastIndex
]);
contextNonbondedFractions
[
firstIndex
]
+=
fractionToTransfer
;
contextNonbondedFractions
[
lastIndex
]
-=
fractionToTransfer
;
double
startFraction
=
0.0
;
for
(
int
i
=
0
;
i
<
(
int
)
contextNonbondedFractions
.
size
();
i
++
)
{
double
endFraction
=
startFraction
+
contextNonbondedFractions
[
i
];
if
(
i
==
contextNonbondedFractions
.
size
()
-
1
)
endFraction
=
1.0
;
// Avoid roundoff error
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
setAtomBlockRange
(
startFraction
,
endFraction
);
startFraction
=
endFraction
;
}
}
}
return
energy
;
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
62f7c44c
...
...
@@ -44,7 +44,7 @@ __kernel void findBlockBounds(int numAtoms, real4 periodicBoxSize, real4 invPeri
__kernel
void
sortBoxData
(
__global
const
real2*
restrict
sortedBlock,
__global
const
real4*
restrict
blockCenter,
__global
const
real4*
restrict
blockBoundingBox,
__global
real4*
restrict
sortedBlockCenter,
__global
real4*
restrict
sortedBlockBoundingBox,
__global
const
real4*
restrict
posq,
__global
const
real4*
restrict
oldPositions,
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
rebuildNeighborList
)
{
__global
unsigned
int*
restrict
interactionCount,
__global
int*
restrict
rebuildNeighborList
,
int
forceRebuild
)
{
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_BLOCKS; i += get_global_size(0)) {
int
index
=
(
int
)
sortedBlock[i].y
;
sortedBlockCenter[i]
=
blockCenter[index]
;
...
...
@@ -53,7 +53,7 @@ __kernel void sortBoxData(__global const real2* restrict sortedBlock, __global c
//
Also
check
whether
any
atom
has
moved
enough
so
that
we
really
need
to
rebuild
the
neighbor
list.
bool
rebuild
=
f
alse
;
bool
rebuild
=
f
orceRebuild
;
for
(
int
i
=
get_global_id
(
0
)
; i < NUM_ATOMS; i += get_global_size(0)) {
real4
delta
=
oldPositions[i]-posq[i]
;
if
(
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
>
0.25f*PADDING*PADDING
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment