Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
dd352ee5
"platforms/opencl/tests/TestOpenCLNonbondedForce.cpp" did not exist on "644cc275ab5a549a82cce4d49680da729c2051e6"
Commit
dd352ee5
authored
Apr 26, 2011
by
Peter Eastman
Browse files
Added dynamic load balancing between GPUs
parent
6e3526b4
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
106 additions
and
35 deletions
+106
-35
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+22
-9
platforms/opencl/src/OpenCLNonbondedUtilities.h
platforms/opencl/src/OpenCLNonbondedUtilities.h
+4
-0
platforms/opencl/src/OpenCLParallelKernels.cpp
platforms/opencl/src/OpenCLParallelKernels.cpp
+54
-7
platforms/opencl/src/OpenCLParallelKernels.h
platforms/opencl/src/OpenCLParallelKernels.h
+2
-0
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+4
-3
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
+5
-4
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+5
-4
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+5
-4
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+5
-4
No files found.
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
dd352ee5
...
...
@@ -233,8 +233,6 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
if
(
useCutoff
)
{
map
<
string
,
string
>
defines
;
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"START_TILE_INDEX"
]
=
OpenCLExpressionUtilities
::
intToString
(
startTileIndex
);
defines
[
"END_TILE_INDEX"
]
=
OpenCLExpressionUtilities
::
intToString
(
startTileIndex
+
numTiles
);
if
(
forceBufferPerAtomBlock
)
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
if
(
usePeriodic
)
...
...
@@ -256,6 +254,8 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionFlags
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
interactingTiles
->
getSize
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startTileIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
startTileIndex
+
numTiles
);
if
(
context
.
getSIMDWidth
()
==
32
&&
!
deviceIsCpu
)
{
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
...
...
@@ -302,8 +302,8 @@ void OpenCLNonbondedUtilities::prepareInteractions() {
void
OpenCLNonbondedUtilities
::
computeInteractions
()
{
if
(
cutoff
!=
-
1.0
)
{
if
(
useCutoff
)
{
forceKernel
.
setArg
<
mm_float4
>
(
1
0
,
context
.
getPeriodicBoxSize
());
forceKernel
.
setArg
<
mm_float4
>
(
1
1
,
context
.
getInvPeriodicBoxSize
());
forceKernel
.
setArg
<
mm_float4
>
(
1
2
,
context
.
getPeriodicBoxSize
());
forceKernel
.
setArg
<
mm_float4
>
(
1
3
,
context
.
getInvPeriodicBoxSize
());
}
context
.
executeKernel
(
forceKernel
,
(
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
)
*
OpenCLContext
::
TileSize
,
deviceIsCpu
?
1
:
-
1
);
}
...
...
@@ -325,14 +325,14 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
newSize
=
numTiles
;
delete
interactingTiles
;
interactingTiles
=
new
OpenCLArray
<
mm_ushort2
>
(
context
,
newSize
,
"interactingTiles"
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
interactingTiles
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl_uint
>
(
1
2
,
newSize
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
10
,
interactingTiles
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl_uint
>
(
1
4
,
newSize
);
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
9
,
newSize
);
if
(
context
.
getSIMDWidth
()
==
32
||
deviceIsCpu
)
{
delete
interactionFlags
;
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
deviceIsCpu
?
2
*
newSize
:
newSize
,
"interactionFlags"
);
forceKernel
.
setArg
<
cl
::
Buffer
>
(
1
3
,
interactionFlags
->
getDeviceBuffer
());
forceKernel
.
setArg
<
cl
::
Buffer
>
(
1
5
,
interactionFlags
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionFlags
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
interactingTiles
->
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
interactionFlags
->
getDeviceBuffer
());
...
...
@@ -340,6 +340,19 @@ void OpenCLNonbondedUtilities::updateNeighborListSize() {
}
}
void
OpenCLNonbondedUtilities
::
setTileRange
(
int
startTileIndex
,
int
numTiles
)
{
this
->
startTileIndex
=
startTileIndex
;
this
->
numTiles
=
numTiles
;
if
(
cutoff
==
-
1.0
)
return
;
// There are no nonbonded interactions in the System.
forceKernel
.
setArg
<
cl_uint
>
(
8
,
startTileIndex
);
forceKernel
.
setArg
<
cl_uint
>
(
9
,
startTileIndex
+
numTiles
);
if
(
useCutoff
)
{
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
10
,
startTileIndex
);
findInteractingBlocksKernel
.
setArg
<
cl_uint
>
(
11
,
startTileIndex
+
numTiles
);
}
}
cl
::
Kernel
OpenCLNonbondedUtilities
::
createInteractionKernel
(
const
string
&
source
,
const
vector
<
ParameterInfo
>&
params
,
const
vector
<
ParameterInfo
>&
arguments
,
bool
useExclusions
,
bool
isSymmetric
)
const
{
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
...
...
@@ -447,8 +460,6 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
defines
[
"NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
context
.
getNumAtomBlocks
());
defines
[
"START_TILE_INDEX"
]
=
OpenCLExpressionUtilities
::
intToString
(
startTileIndex
);
defines
[
"END_TILE_INDEX"
]
=
OpenCLExpressionUtilities
::
intToString
(
startTileIndex
+
numTiles
);
string
file
;
if
(
deviceIsCpu
)
file
=
OpenCLKernelSources
::
nonbonded_cpu
;
...
...
@@ -470,6 +481,8 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
exclusionRowIndices
->
getDeviceBuffer
());
kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
*
localDataSize
:
OpenCLContext
::
ThreadBlockSize
*
localDataSize
),
NULL
);
kernel
.
setArg
(
index
++
,
4
*
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
kernel
.
setArg
<
cl_uint
>
(
index
++
,
startTileIndex
);
kernel
.
setArg
<
cl_uint
>
(
index
++
,
startTileIndex
+
numTiles
);
if
(
useCutoff
)
{
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactingTiles
->
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionCount
->
getDeviceBuffer
());
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.h
View file @
dd352ee5
...
...
@@ -190,6 +190,10 @@ public:
int
getNumTiles
()
const
{
return
numTiles
;
}
/**
* Set the range of tiles that should be processed by this context.
*/
void
setTileRange
(
int
startTileIndex
,
int
numTiles
);
/**
* Create a Kernel for evaluating a nonbonded interaction. Cutoffs and periodic boundary conditions
* are assumed to be the same as those for the default interaction Kernel, since this kernel will use
...
...
platforms/opencl/src/OpenCLParallelKernels.cpp
View file @
dd352ee5
...
...
@@ -29,6 +29,28 @@
using
namespace
OpenMM
;
using
namespace
std
;
/**
* Get the current clock time, measured in microseconds.
*/
#ifdef _MSC_VER
#include <Windows.h>
static
long
getTime
()
{
FILETIME
ft
;
GetSystemTimeAsFileTime
(
&
ft
);
// 100-nanoseconds since 1-1-1601
ULARGE_INTEGER
result
;
result
.
LowPart
=
ft
.
dwLowDateTime
;
result
.
HighPart
=
ft
.
dwHighDateTime
;
return
result
/
10
;
}
#else
#include <sys/time.h>
static
long
getTime
()
{
struct
timeval
tod
;
gettimeofday
(
&
tod
,
0
);
return
1000000
*
tod
.
tv_sec
+
tod
.
tv_usec
;
}
#endif
class
OpenCLParallelCalcForcesAndEnergyKernel
::
BeginComputationTask
:
public
OpenCLContext
::
WorkTask
{
public:
BeginComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
...
...
@@ -52,8 +74,8 @@ private:
class
OpenCLParallelCalcForcesAndEnergyKernel
::
FinishComputationTask
:
public
OpenCLContext
::
WorkTask
{
public:
FinishComputationTask
(
ContextImpl
&
context
,
OpenCLContext
&
cl
,
OpenCLCalcForcesAndEnergyKernel
&
kernel
,
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
{
bool
includeForce
,
bool
includeEnergy
,
double
&
energy
,
long
&
completionTime
)
:
context
(
context
),
cl
(
cl
),
kernel
(
kernel
),
includeForce
(
includeForce
),
includeEnergy
(
includeEnergy
),
energy
(
energy
)
,
completionTime
(
completionTime
)
{
}
void
execute
()
{
// Execute the kernel, then download forces.
...
...
@@ -61,7 +83,7 @@ public:
energy
+=
kernel
.
finishComputation
(
context
,
includeForce
,
includeEnergy
);
if
(
includeForce
)
cl
.
getForce
().
download
();
mm_float4
f
=
cl
.
getForce
()[
0
]
;
completionTime
=
getTime
()
;
}
private:
ContextImpl
&
context
;
...
...
@@ -69,10 +91,11 @@ private:
OpenCLCalcForcesAndEnergyKernel
&
kernel
;
bool
includeForce
,
includeEnergy
;
double
&
energy
;
long
&
completionTime
;
};
OpenCLParallelCalcForcesAndEnergyKernel
::
OpenCLParallelCalcForcesAndEnergyKernel
(
string
name
,
const
Platform
&
platform
,
OpenCLPlatform
::
PlatformData
&
data
)
:
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
)
{
CalcForcesAndEnergyKernel
(
name
,
platform
),
data
(
data
)
,
completionTimes
(
data
.
contexts
.
size
()),
contextTiles
(
data
.
contexts
.
size
())
{
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
kernels
.
push_back
(
Kernel
(
new
OpenCLCalcForcesAndEnergyKernel
(
name
,
platform
,
*
data
.
contexts
[
i
])));
}
...
...
@@ -98,7 +121,7 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
OpenCLContext
&
cl
=
*
data
.
contexts
[
i
];
OpenCLContext
::
WorkThread
&
thread
=
cl
.
getWorkThread
();
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cl
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
data
.
contextEnergy
[
i
]));
thread
.
addTask
(
new
FinishComputationTask
(
context
,
cl
,
getKernel
(
i
),
includeForce
,
includeEnergy
,
data
.
contextEnergy
[
i
]
,
completionTimes
[
i
]
));
}
data
.
syncContexts
();
double
energy
=
0.0
;
...
...
@@ -107,8 +130,6 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
if
(
includeForce
)
{
// Sum the forces from all devices.
for
(
int
i
=
0
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
data
.
contexts
[
i
]
->
getForce
().
download
();
OpenCLArray
<
mm_float4
>&
forces
=
data
.
contexts
[
0
]
->
getForce
();
for
(
int
i
=
1
;
i
<
(
int
)
data
.
contexts
.
size
();
i
++
)
{
OpenCLArray
<
mm_float4
>&
contextForces
=
data
.
contexts
[
i
]
->
getForce
();
...
...
@@ -121,6 +142,32 @@ double OpenCLParallelCalcForcesAndEnergyKernel::finishComputation(ContextImpl& c
}
}
forces
.
upload
();
// Balance work between the contexts by transferring a few nonbonded tiles from the context that
// finished last to the one that finished first.
int
firstIndex
=
0
,
lastIndex
=
0
;
int
totalTiles
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
completionTimes
.
size
();
i
++
)
{
if
(
completionTimes
[
i
]
<
completionTimes
[
firstIndex
])
firstIndex
=
i
;
if
(
completionTimes
[
i
]
>
completionTimes
[
lastIndex
])
lastIndex
=
i
;
contextTiles
[
i
]
=
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
getNumTiles
();
totalTiles
+=
contextTiles
[
i
];
}
int
tilesToTransfer
=
totalTiles
/
1000
;
if
(
tilesToTransfer
<
1
)
tilesToTransfer
=
1
;
if
(
tilesToTransfer
>
contextTiles
[
lastIndex
])
tilesToTransfer
=
contextTiles
[
lastIndex
];
contextTiles
[
firstIndex
]
+=
tilesToTransfer
;
contextTiles
[
lastIndex
]
-=
tilesToTransfer
;
int
startIndex
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
contextTiles
.
size
();
i
++
)
{
data
.
contexts
[
i
]
->
getNonbondedUtilities
().
setTileRange
(
startIndex
,
contextTiles
[
i
]);
startIndex
+=
contextTiles
[
i
];
}
}
return
energy
;
}
...
...
platforms/opencl/src/OpenCLParallelKernels.h
View file @
dd352ee5
...
...
@@ -76,6 +76,8 @@ private:
class
FinishComputationTask
;
OpenCLPlatform
::
PlatformData
&
data
;
std
::
vector
<
Kernel
>
kernels
;
std
::
vector
<
long
>
completionTimes
;
std
::
vector
<
int
>
contextTiles
;
};
/**
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
dd352ee5
...
...
@@ -159,7 +159,8 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
*/
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionCount, __global ushort2* interactingTiles,
__global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles) {
__global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles, unsigned int startTileIndex,
unsigned int endTileIndex) {
__local ushort2 buffer[BUFFER_SIZE];
__local int valid[BUFFER_SIZE];
__local short sum[BUFFER_SIZE];
...
...
@@ -172,11 +173,11 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
for (int i = 0; i < BUFFER_GROUPS; ++i)
valid[i*GROUP_SIZE+get_local_id(0)] = false;
barrier(CLK_LOCAL_MEM_FENCE);
for (int baseIndex =
START_TILE_INDEX
+get_group_id(0)*get_local_size(0); baseIndex <
END_TILE_INDEX
; baseIndex += get_global_size(0)) {
for (int baseIndex =
startTileIndex
+get_group_id(0)*get_local_size(0); baseIndex <
endTileIndex
; baseIndex += get_global_size(0)) {
// Identify the pair of blocks to compare.
int index = baseIndex+get_local_id(0);
if (index <
END_TILE_INDEX
) {
if (index <
endTileIndex
) {
unsigned int y = (unsigned int) floor(NUM_BLOCKS+0.5f-sqrt((NUM_BLOCKS+0.5f)*(NUM_BLOCKS+0.5f)-2*index));
unsigned int x = (index-y*NUM_BLOCKS+y*(y+1)/2);
if (x < y |
|
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
...
...
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
View file @
dd352ee5
...
...
@@ -123,12 +123,13 @@ void storeInteractionData(ushort2* buffer, int numValid, __global unsigned int*
*/
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionCount, __global ushort2* interactingTiles,
__global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles) {
__global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles, unsigned int startTileIndex,
unsigned int endTileIndex) {
ushort2 buffer[BUFFER_SIZE];
int valuesInBuffer = 0;
const int numTiles =
END_TILE_INDEX-START_TILE_INDEX
;
unsigned int start =
START_TILE_INDEX
+get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end =
START_TILE_INDEX
+(get_group_id(0)+1)*numTiles/get_num_groups(0);
const int numTiles =
endTileIndex-startTileIndex
;
unsigned int start =
startTileIndex
+get_group_id(0)*numTiles/get_num_groups(0);
unsigned int end =
startTileIndex
+(get_group_id(0)+1)*numTiles/get_num_groups(0);
for (int index = start; index < end; index++) {
// Identify the pair of blocks to compare.
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
dd352ee5
...
...
@@ -13,6 +13,7 @@ typedef struct {
__kernel
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
unsigned
int
startTileIndex,
unsigned
int
endTileIndex,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
#
else
...
...
@@ -21,11 +22,11 @@ __kernel void computeNonbonded(__global float4* forceBuffers, __global float* en
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
(
numTiles
>
maxTiles
?
START_TILE_INDEX
+get_group_id
(
0
)
*
(
END_TILE_INDEX-START_TILE_INDEX
)
/get_num_groups
(
0
)
:
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
))
;
unsigned
int
end
=
(
numTiles
>
maxTiles
?
START_TILE_INDEX
+
(
get_group_id
(
0
)
+1
)
*
(
END_TILE_INDEX-START_TILE_INDEX
)
/get_num_groups
(
0
)
:
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
))
;
unsigned
int
pos
=
(
numTiles
>
maxTiles
?
startTileIndex
+get_group_id
(
0
)
*
(
endTileIndex-startTileIndex
)
/get_num_groups
(
0
)
:
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
))
;
unsigned
int
end
=
(
numTiles
>
maxTiles
?
startTileIndex
+
(
get_group_id
(
0
)
+1
)
*
(
endTileIndex-startTileIndex
)
/get_num_groups
(
0
)
:
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
))
;
#
else
unsigned
int
pos
=
START_TILE_INDEX
+get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
START_TILE_INDEX
+
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
pos
=
startTileIndex
+get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
startTileIndex
+
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
#
endif
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
dd352ee5
...
...
@@ -14,6 +14,7 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
unsigned
int
startTileIndex,
unsigned
int
endTileIndex,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
#
else
...
...
@@ -22,11 +23,11 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
(
numTiles
>
maxTiles
?
START_TILE_INDEX
+get_group_id
(
0
)
*
(
END_TILE_INDEX-START_TILE_INDEX
)
/get_num_groups
(
0
)
:
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
))
;
unsigned
int
end
=
(
numTiles
>
maxTiles
?
START_TILE_INDEX
+
(
get_group_id
(
0
)
+1
)
*
(
END_TILE_INDEX-START_TILE_INDEX
)
/get_num_groups
(
0
)
:
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
))
;
unsigned
int
pos
=
(
numTiles
>
maxTiles
?
startTileIndex
+get_group_id
(
0
)
*
(
endTileIndex-startTileIndex
)
/get_num_groups
(
0
)
:
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
))
;
unsigned
int
end
=
(
numTiles
>
maxTiles
?
startTileIndex
+
(
get_group_id
(
0
)
+1
)
*
(
endTileIndex-startTileIndex
)
/get_num_groups
(
0
)
:
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
))
;
#
else
unsigned
int
pos
=
START_TILE_INDEX
+get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
START_TILE_INDEX
+
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
pos
=
startTileIndex
+get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
startTileIndex
+
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
#
endif
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
dd352ee5
...
...
@@ -14,6 +14,7 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float*
tempBuffer,
unsigned
int
startTileIndex,
unsigned
int
endTileIndex,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
#
else
...
...
@@ -24,11 +25,11 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
(
numTiles
>
maxTiles
?
START_TILE_INDEX+warp*
(
END_TILE_INDEX-START_TILE_INDEX
)
/totalWarps
:
warp*numTiles/totalWarps
)
;
unsigned
int
end
=
(
numTiles
>
maxTiles
?
START_TILE_INDEX+
(
warp+1
)
*
(
END_TILE_INDEX-START_TILE_INDEX
)
/totalWarps
:
(
warp+1
)
*numTiles/totalWarps
)
;
unsigned
int
pos
=
(
numTiles
>
maxTiles
?
startTileIndex+warp*
(
endTileIndex-startTileIndex
)
/totalWarps
:
warp*numTiles/totalWarps
)
;
unsigned
int
end
=
(
numTiles
>
maxTiles
?
startTileIndex+
(
warp+1
)
*
(
endTileIndex-startTileIndex
)
/totalWarps
:
(
warp+1
)
*numTiles/totalWarps
)
;
#
else
unsigned
int
pos
=
START_TILE_INDEX
+warp*numTiles/totalWarps
;
unsigned
int
end
=
START_TILE_INDEX
+
(
warp+1
)
*numTiles/totalWarps
;
unsigned
int
pos
=
startTileIndex
+warp*numTiles/totalWarps
;
unsigned
int
end
=
startTileIndex
+
(
warp+1
)
*numTiles/totalWarps
;
#
endif
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment