Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a8fe9cea
Commit
a8fe9cea
authored
Aug 20, 2010
by
Peter Eastman
Browse files
Continuing changes to reduce memory use for large systems
parent
b0d2357c
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
155 additions
and
88 deletions
+155
-88
platforms/opencl/src/OpenCLContext.h
platforms/opencl/src/OpenCLContext.h
+7
-0
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+2
-1
platforms/opencl/src/OpenCLNonbondedUtilities.h
platforms/opencl/src/OpenCLNonbondedUtilities.h
+2
-2
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
+5
-5
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
+5
-5
platforms/opencl/src/kernels/customGBValueN2_default.cl
platforms/opencl/src/kernels/customGBValueN2_default.cl
+5
-5
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
+5
-5
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+91
-31
platforms/opencl/src/kernels/gbsaObc_default.cl
platforms/opencl/src/kernels/gbsaObc_default.cl
+10
-10
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+10
-10
platforms/opencl/src/kernels/nonbonded_default.cl
platforms/opencl/src/kernels/nonbonded_default.cl
+5
-5
platforms/opencl/src/kernels/nonbonded_nvidia.cl
platforms/opencl/src/kernels/nonbonded_nvidia.cl
+5
-5
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
+3
-4
No files found.
platforms/opencl/src/OpenCLContext.h
View file @
a8fe9cea
...
@@ -84,6 +84,13 @@ struct mm_float16 {
...
@@ -84,6 +84,13 @@ struct mm_float16 {
s8
(
s8
),
s9
(
s9
),
s10
(
s10
),
s11
(
s11
),
s12
(
s12
),
s13
(
s13
),
s14
(
s14
),
s15
(
15
)
{
s8
(
s8
),
s9
(
s9
),
s10
(
s10
),
s11
(
s11
),
s12
(
s12
),
s13
(
s13
),
s14
(
s14
),
s15
(
15
)
{
}
}
};
};
struct
mm_ushort2
{
cl_ushort
x
,
y
;
mm_ushort2
()
{
}
mm_ushort2
(
cl_ushort
x
,
cl_ushort
y
)
:
x
(
x
),
y
(
y
)
{
}
};
struct
mm_int2
{
struct
mm_int2
{
cl_int
x
,
y
;
cl_int
x
,
y
;
mm_int2
()
{
mm_int2
()
{
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
a8fe9cea
...
@@ -200,7 +200,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -200,7 +200,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
// Create data structures for the neighbor list.
// Create data structures for the neighbor list.
if
(
useCutoff
)
{
if
(
useCutoff
)
{
interactingTiles
=
new
OpenCLArray
<
cl_uint
>
(
context
,
numTiles
,
"interactingTiles"
);
interactingTiles
=
new
OpenCLArray
<
mm_ushort2
>
(
context
,
numTiles
,
"interactingTiles"
);
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
numTiles
,
"interactionFlags"
);
interactionFlags
=
new
OpenCLArray
<
cl_uint
>
(
context
,
numTiles
,
"interactionFlags"
);
interactionCount
=
new
OpenCLArray
<
cl_uint
>
(
context
,
1
,
"interactionCount"
);
interactionCount
=
new
OpenCLArray
<
cl_uint
>
(
context
,
1
,
"interactionCount"
);
blockCenter
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockCenter
=
new
OpenCLArray
<
mm_float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
...
@@ -230,6 +230,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
...
@@ -230,6 +230,7 @@ void OpenCLNonbondedUtilities::initialize(const System& system) {
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
blockBoundingBox
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
5
,
interactionCount
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
6
,
interactingTiles
->
getDeviceBuffer
());
findInteractingBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
findInteractionsWithinBlocksKernel
=
cl
::
Kernel
(
interactingBlocksProgram
,
"findInteractionsWithinBlocks"
);
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl_float
>
(
0
,
(
cl_float
)
(
cutoff
*
cutoff
));
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
findInteractionsWithinBlocksKernel
.
setArg
<
cl
::
Buffer
>
(
3
,
context
.
getPosq
().
getDeviceBuffer
());
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.h
View file @
a8fe9cea
...
@@ -148,7 +148,7 @@ public:
...
@@ -148,7 +148,7 @@ public:
/**
/**
* Get the array containing tiles with interactions.
* Get the array containing tiles with interactions.
*/
*/
OpenCLArray
<
cl_uint
>&
getInteractingTiles
()
{
OpenCLArray
<
mm_ushort2
>&
getInteractingTiles
()
{
return
*
interactingTiles
;
return
*
interactingTiles
;
}
}
/**
/**
...
@@ -197,7 +197,7 @@ private:
...
@@ -197,7 +197,7 @@ private:
OpenCLArray
<
cl_uint
>*
exclusions
;
OpenCLArray
<
cl_uint
>*
exclusions
;
OpenCLArray
<
cl_uint
>*
exclusionIndices
;
OpenCLArray
<
cl_uint
>*
exclusionIndices
;
OpenCLArray
<
cl_uint
>*
exclusionRowIndices
;
OpenCLArray
<
cl_uint
>*
exclusionRowIndices
;
OpenCLArray
<
cl_uint
>*
interactingTiles
;
OpenCLArray
<
mm_ushort2
>*
interactingTiles
;
OpenCLArray
<
cl_uint
>*
interactionFlags
;
OpenCLArray
<
cl_uint
>*
interactionFlags
;
OpenCLArray
<
cl_uint
>*
interactionCount
;
OpenCLArray
<
cl_uint
>*
interactionCount
;
OpenCLArray
<
mm_float4
>*
blockCenter
;
OpenCLArray
<
mm_float4
>*
blockCenter
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
View file @
a8fe9cea
...
@@ -11,7 +11,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -11,7 +11,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempForceBuffer,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempForceBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
u
nsigned
int
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
u
short2
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -30,9 +30,9 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -30,9 +30,9 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
u
nsigned
int
x
=
tiles[pos]
;
u
short2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
x
=
(
x>>17
)
;
unsigned
int
y
=
tileIndices.y
;
#
else
#
else
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
...
@@ -207,8 +207,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -207,8 +207,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
STORE_DERIVATIVES_2
STORE_DERIVATIVES_2
}
}
lasty
=
y
;
}
}
lasty
=
y
;
pos++
;
pos++
;
}
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
View file @
a8fe9cea
...
@@ -11,7 +11,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -11,7 +11,7 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempBuffer,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
u
nsigned
int
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
u
short2
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -31,9 +31,9 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -31,9 +31,9 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
u
nsigned
int
x
=
tiles[pos]
;
u
short2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
x
=
(
x>>17
)
;
unsigned
int
y
=
tileIndices.y
;
#
else
#
else
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
...
@@ -195,8 +195,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -195,8 +195,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
forceBuffers[offset2].xyz
+=
local_force[get_local_id
(
0
)
].xyz
;
forceBuffers[offset2].xyz
+=
local_force[get_local_id
(
0
)
].xyz
;
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
STORE_DERIVATIVES_2
STORE_DERIVATIVES_2
lasty
=
y
;
}
}
lasty
=
y
;
pos++
;
pos++
;
}
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
...
...
platforms/opencl/src/kernels/customGBValueN2_default.cl
View file @
a8fe9cea
...
@@ -9,7 +9,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -9,7 +9,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
u
nsigned
int
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
u
short2
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -27,9 +27,9 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -27,9 +27,9 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
u
nsigned
int
x
=
tiles[pos]
;
u
short2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
x
=
(
x>>17
)
;
unsigned
int
y
=
tileIndices.y
;
#
else
#
else
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
...
@@ -196,8 +196,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -196,8 +196,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
global_value[offset1]
+=
value+tempBuffer[get_local_id
(
0
)
+TILE_SIZE]
;
global_value[offset1]
+=
value+tempBuffer[get_local_id
(
0
)
+TILE_SIZE]
;
global_value[offset2]
+=
local_value[get_local_id
(
0
)
]+local_value[get_local_id
(
0
)
+TILE_SIZE]
;
global_value[offset2]
+=
local_value[get_local_id
(
0
)
]+local_value[get_local_id
(
0
)
+TILE_SIZE]
;
}
}
lasty
=
y
;
}
}
lasty
=
y
;
pos++
;
pos++
;
}
}
}
}
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
View file @
a8fe9cea
...
@@ -9,7 +9,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -9,7 +9,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
u
nsigned
int
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
u
short2
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -29,9 +29,9 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -29,9 +29,9 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
u
nsigned
int
x
=
tiles[pos]
;
u
short2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
x
=
(
x>>17
)
;
unsigned
int
y
=
tileIndices.y
;
#
else
#
else
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
...
@@ -233,8 +233,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -233,8 +233,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
#
endif
#
endif
global_value[offset1]
+=
value
;
global_value[offset1]
+=
value
;
global_value[offset2]
+=
local_value[get_local_id
(
0
)
]
;
global_value[offset2]
+=
local_value[get_local_id
(
0
)
]
;
lasty
=
y
;
}
}
lasty
=
y
;
pos++
;
pos++
;
}
}
}
}
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
a8fe9cea
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_byte_addressable_store
:
enable
#
define
TILE_SIZE
32
#
define
TILE_SIZE
32
#
define
GROUP_SIZE
64
#
define
GROUP_SIZE
64
#
define
BUFFER_GROUPS
4
#
define
BUFFER_GROUPS
4
...
@@ -44,31 +45,34 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
...
@@ -44,31 +45,34 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
*
This
is
called
by
findBlocksWithInteractions
()
.
It
compacts
the
list
of
blocks
and
writes
them
*
This
is
called
by
findBlocksWithInteractions
()
.
It
compacts
the
list
of
blocks
and
writes
them
*
to
global
memory.
*
to
global
memory.
*/
*/
void
storeInteractionData
(
__local
short2*
buffer,
__local
bool*
valid,
__local
int*
sum,
__local
int*
sum2,
__local
short2*
temp,
__local
int*
baseIndex,
void
storeInteractionData
(
__local
ushort2*
buffer,
__local
int*
valid,
__local
short*
sum,
__local
ushort2*
temp,
__local
int*
baseIndex,
__global
unsigned
int*
interactionCount,
__global
unsigned
int*
interactingTiles
)
{
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox
)
{
//
The
buffer
is
full,
so
we
need
to
compact
it
and
write
out
results.
Start
by
doing
a
parallel
prefix
sum.
//
The
buffer
is
full,
so
we
need
to
compact
it
and
write
out
results.
Start
by
doing
a
parallel
prefix
sum.
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
sum
[i]
=
(
valid[i]
?
1
:
0
)
;
temp
[i]
.x
=
(
valid[i]
?
1
:
0
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
whichBuffer
=
0
;
int
whichBuffer
=
0
;
for
(
int
offset
=
1
; offset < BUFFER_SIZE; offset *= 2) {
for
(
int
offset
=
1
; offset < BUFFER_SIZE; offset *= 2) {
if
(
whichBuffer
==
0
)
if
(
whichBuffer
==
0
)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
sum2
[i]
=
(
i
<
offset
?
sum
[i]
:
sum[i]+sum
[i-offset]
)
;
temp
[i]
.y
=
(
i
<
offset
?
temp
[i]
.x
:
temp[i].x+temp
[i-offset]
.x
)
;
else
else
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
sum
[i]
=
(
i
<
offset
?
sum2
[i]
:
sum2[i]+sum2
[i-offset]
)
;
temp
[i]
.x
=
(
i
<
offset
?
temp
[i]
.y
:
temp[i].y+temp
[i-offset]
.y
)
;
whichBuffer
=
1-whichBuffer
;
whichBuffer
=
1-whichBuffer
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
if
(
whichBuffer
==
1
)
{
if
(
whichBuffer
==
0
)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i]
=
sum2[i]
;
sum[i]
=
temp[i].x
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
else
}
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
sum[i]
=
temp[i].y
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Compact
the
buffer
and
store
it
to
global
memory
.
//
Compact
the
buffer.
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
if
(
valid[i]
)
{
if
(
valid[i]
)
{
...
@@ -77,14 +81,74 @@ void storeInteractionData(__local short2* buffer, __local bool* valid, __local i
...
@@ -77,14 +81,74 @@ void storeInteractionData(__local short2* buffer, __local bool* valid, __local i
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
numValid
=
sum[BUFFER_SIZE-1]
;
int
numValid
=
sum[BUFFER_SIZE-1]
;
if
(
get_local_id
(
0
)
==
0
)
*baseIndex
=
atom_add
(
interactionCount,
numValid
)
;
//
Filter
the
list
of
tiles
by
comparing
the
distance
from
each
atom
to
the
other
bounding
box.
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
int
tile
;
int
index
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
int
group
=
get_local_id
(
0
)
/TILE_SIZE
;
__local
int*
flag
=
sum
;
int
lasty
=
-1
;
float4
center,
boxSize,
pos
;
for
(
tile
=
0
; tile < numValid; tile++) {
int
x
=
temp[tile].x
;
int
y
=
temp[tile].y
;
if
(
x
==
y
)
{
tile++
;
continue
;
}
if
(
index
==
0
)
flag[group]
=
true
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Load
an
atom
position
and
the
bounding
box
the
other
block.
if
(
group
==
0
)
{
center
=
blockCenter[x]
;
boxSize
=
blockBoundingBox[x]
;
if
(
y
!=
lasty
)
pos
=
posq[y*TILE_SIZE+index]
;
}
else
{
if
(
y
!=
lasty
)
{
center
=
blockCenter[y]
;
boxSize
=
blockBoundingBox[y]
;
}
pos
=
posq[x*TILE_SIZE+index]
;
}
lasty
=
y
;
//
Find
the
distance
of
the
atom
from
the
bounding
box.
float4
delta
=
pos-center
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*invPeriodicBoxSize.x+0.5f
)
*periodicBoxSize.x
;
delta.y
-=
floor
(
delta.y*invPeriodicBoxSize.y+0.5f
)
*periodicBoxSize.y
;
delta.z
-=
floor
(
delta.z*invPeriodicBoxSize.z+0.5f
)
*periodicBoxSize.z
;
#
endif
delta
=
max
((
float4
)
0.0f,
fabs
(
delta
)
-boxSize
)
;
if
(
delta.x*delta.x+delta.y*delta.y+delta.z*delta.z
<
cutoffSquared
)
flag[group]
=
false
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
flag[0]
||
flag[1]
)
{
//
This
tile
contains
no
interactions.
numValid--
;
if
(
get_local_id
(
0
)
==
0
)
temp[tile]
=
temp[numValid]
;
}
else
tile++
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
//
Store
it
to
global
memory.
//
Store
it
to
global
memory.
if
(
get_local_id
(
0
)
==
0
)
*baseIndex
=
atom_add
(
interactionCount,
numValid
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
for
(
int
i
=
get_local_id
(
0
)
; i < numValid; i += GROUP_SIZE)
for
(
int
i
=
get_local_id
(
0
)
; i < numValid; i += GROUP_SIZE)
interactingTiles[*baseIndex+i]
=
(
temp[i]
.x<<17
)
+
(
temp[i].y<<2
)
;
interactingTiles[*baseIndex+i]
=
temp[i]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
...
@@ -93,12 +157,11 @@ void storeInteractionData(__local short2* buffer, __local bool* valid, __local i
...
@@ -93,12 +157,11 @@ void storeInteractionData(__local short2* buffer, __local bool* valid, __local i
*
mark
them
as
non-interacting.
*
mark
them
as
non-interacting.
*/
*/
__kernel
void
findBlocksWithInteractions
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
blockCenter,
__kernel
void
findBlocksWithInteractions
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionCount,
__global
unsigned
int*
interactingTiles
)
{
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
__global
float4*
posq
)
{
__local
short2
buffer[BUFFER_SIZE]
;
__local
ushort2
buffer[BUFFER_SIZE]
;
__local
bool
valid[BUFFER_SIZE]
;
__local
int
valid[BUFFER_SIZE]
;
__local
int
sum[BUFFER_SIZE]
;
__local
short
sum[BUFFER_SIZE]
;
__local
int
sum2[BUFFER_SIZE]
;
__local
ushort2
temp[BUFFER_SIZE]
;
__local
short2
temp[BUFFER_SIZE]
;
__local
int
bufferFull
;
__local
int
bufferFull
;
__local
int
globalIndex
;
__local
int
globalIndex
;
int
valuesInBuffer
=
0
;
int
valuesInBuffer
=
0
;
...
@@ -138,7 +201,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
...
@@ -138,7 +201,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
int
bufferIndex
=
valuesInBuffer*GROUP_SIZE+get_local_id
(
0
)
;
int
bufferIndex
=
valuesInBuffer*GROUP_SIZE+get_local_id
(
0
)
;
valid[bufferIndex]
=
true
;
valid[bufferIndex]
=
true
;
buffer[bufferIndex]
=
(
short2
)
(
x,
y
)
;
buffer[bufferIndex]
=
(
u
short2
)
(
x,
y
)
;
valuesInBuffer++
;
valuesInBuffer++
;
if
(
!bufferFull
&&
valuesInBuffer
==
BUFFER_GROUPS
)
if
(
!bufferFull
&&
valuesInBuffer
==
BUFFER_GROUPS
)
bufferFull
=
true
;
bufferFull
=
true
;
...
@@ -146,21 +209,21 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
...
@@ -146,21 +209,21 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
bufferFull
)
{
if
(
bufferFull
)
{
storeInteractionData
(
buffer,
valid,
sum,
sum2,
temp,
&globalIndex,
interactionCount,
interactingTiles
)
;
storeInteractionData
(
buffer,
valid,
sum,
temp,
&globalIndex,
interactionCount,
interactingTiles
,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox
)
;
valuesInBuffer
=
0
;
valuesInBuffer
=
0
;
if
(
get_local_id
(
0
)
==
0
)
if
(
get_local_id
(
0
)
==
0
)
bufferFull
=
false
;
bufferFull
=
false
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
}
}
storeInteractionData
(
buffer,
valid,
sum,
sum2,
temp,
&globalIndex,
interactionCount,
interactingTiles
)
;
storeInteractionData
(
buffer,
valid,
sum,
temp,
&globalIndex,
interactionCount,
interactingTiles
,
cutoffSquared,
periodicBoxSize,
invPeriodicBoxSize,
posq,
blockCenter,
blockBoundingBox
)
;
}
}
/**
/**
*
Compare
each
atom
in
one
block
to
the
bounding
box
of
another
block,
and
set
*
Compare
each
atom
in
one
block
to
the
bounding
box
of
another
block,
and
set
*
flags
for
which
ones
are
interacting.
*
flags
for
which
ones
are
interacting.
*/
*/
__kernel
void
findInteractionsWithinBlocks
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
u
nsigned
int
*
tiles,
__global
float4*
blockCenter,
__kernel
void
findInteractionsWithinBlocks
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
u
short2
*
tiles,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
unsigned
int*
flags
)
{
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
unsigned
int*
flags
)
{
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
...
@@ -173,13 +236,10 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB
...
@@ -173,13 +236,10 @@ __kernel void findInteractionsWithinBlocks(float cutoffSquared, float4 periodicB
float4
apos
;
float4
apos
;
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
unsigned
int
x
=
tiles[pos]
;
ushort2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
bool
hasExclusions
=
(
x
&
0x1
)
;
unsigned
int
y
=
tileIndices.y
;
x
=
(
x
>>
17
)
;
if
(
x
==
y
)
{
if
(
x
==
y
||
hasExclusions
)
{
//
Assume
this
tile
will
be
dense.
if
(
index
==
0
)
if
(
index
==
0
)
interactionFlags[pos]
=
0xFFFFFFFF
;
interactionFlags[pos]
=
0xFFFFFFFF
;
}
}
...
...
platforms/opencl/src/kernels/gbsaObc_default.cl
View file @
a8fe9cea
...
@@ -17,7 +17,7 @@ typedef struct {
...
@@ -17,7 +17,7 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
u
nsigned
int
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
u
short2
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -32,9 +32,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -32,9 +32,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
u
nsigned
int
x
=
tiles[pos]
;
u
short2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
x
=
(
x>>17
)
;
unsigned
int
y
=
tileIndices.y
;
#
else
#
else
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
...
@@ -181,8 +181,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -181,8 +181,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
global_bornSum[offset1]
+=
bornSum+tempBuffer[get_local_id
(
0
)
+TILE_SIZE]
;
global_bornSum[offset1]
+=
bornSum+tempBuffer[get_local_id
(
0
)
+TILE_SIZE]
;
global_bornSum[offset2]
+=
localData[get_local_id
(
0
)
].bornSum+localData[get_local_id
(
0
)
+TILE_SIZE].bornSum
;
global_bornSum[offset2]
+=
localData[get_local_id
(
0
)
].bornSum+localData[get_local_id
(
0
)
+TILE_SIZE].bornSum
;
}
}
lasty
=
y
;
}
}
lasty
=
y
;
pos++
;
pos++
;
}
}
}
}
...
@@ -196,7 +196,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -196,7 +196,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
u
nsigned
int
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
u
short2
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -211,9 +211,9 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -211,9 +211,9 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
u
nsigned
int
x
=
tiles[pos]
;
u
short2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
x
=
(
x>>17
)
;
unsigned
int
y
=
tileIndices.y
;
#
else
#
else
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
...
@@ -366,8 +366,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -366,8 +366,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
global_bornForce[offset1]
+=
force.w+tempBuffer[get_local_id
(
0
)
+TILE_SIZE].w
;
global_bornForce[offset1]
+=
force.w+tempBuffer[get_local_id
(
0
)
+TILE_SIZE].w
;
global_bornForce[offset2]
+=
sum.w
;
global_bornForce[offset2]
+=
sum.w
;
}
}
lasty
=
y
;
}
}
lasty
=
y
;
pos++
;
pos++
;
}
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
a8fe9cea
...
@@ -17,7 +17,7 @@ typedef struct {
...
@@ -17,7 +17,7 @@ typedef struct {
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
u
nsigned
int
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
u
short2
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -34,9 +34,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -34,9 +34,9 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
u
nsigned
int
x
=
tiles[pos]
;
u
short2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
x
=
(
x>>17
)
;
unsigned
int
y
=
tileIndices.y
;
#
else
#
else
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
...
@@ -247,8 +247,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -247,8 +247,8 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
#
endif
#
endif
global_bornSum[offset1]
+=
bornSum
;
global_bornSum[offset1]
+=
bornSum
;
global_bornSum[offset2]
+=
localData[get_local_id
(
0
)
].bornSum
;
global_bornSum[offset2]
+=
localData[get_local_id
(
0
)
].bornSum
;
lasty
=
y
;
}
}
lasty
=
y
;
pos++
;
pos++
;
}
}
}
}
...
@@ -262,7 +262,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -262,7 +262,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
u
nsigned
int
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
__global
u
short2
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
)
{
#
else
#
else
unsigned
int
numTiles
)
{
unsigned
int
numTiles
)
{
#
endif
#
endif
...
@@ -279,9 +279,9 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -279,9 +279,9 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
u
nsigned
int
x
=
tiles[pos]
;
u
short2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
x
=
(
x>>17
)
;
unsigned
int
y
=
tileIndices.y
;
#
else
#
else
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
...
@@ -489,8 +489,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -489,8 +489,8 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
forceBuffers[offset2]
+=
(
float4
)
(
localData[get_local_id
(
0
)
].fx,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0
)
;
forceBuffers[offset2]
+=
(
float4
)
(
localData[get_local_id
(
0
)
].fx,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0
)
;
global_bornForce[offset1]
+=
force.w
;
global_bornForce[offset1]
+=
force.w
;
global_bornForce[offset2]
+=
localData[get_local_id
(
0
)
].fw
;
global_bornForce[offset2]
+=
localData[get_local_id
(
0
)
].fw
;
lasty
=
y
;
}
}
lasty
=
y
;
pos++
;
pos++
;
}
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
...
...
platforms/opencl/src/kernels/nonbonded_default.cl
View file @
a8fe9cea
...
@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
...
@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
u
nsigned
int
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
u
short2
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -33,9 +33,9 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -33,9 +33,9 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
u
nsigned
int
x
=
tiles[pos]
;
u
short2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
x
=
(
x>>17
)
;
unsigned
int
y
=
tileIndices.y
;
#
else
#
else
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
...
@@ -212,8 +212,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -212,8 +212,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
float4
sum
=
(
float4
)
(
localData[get_local_id
(
0
)
].fx+localData[get_local_id
(
0
)
+TILE_SIZE].fx,
localData[get_local_id
(
0
)
].fy+localData[get_local_id
(
0
)
+TILE_SIZE].fy,
localData[get_local_id
(
0
)
].fz+localData[get_local_id
(
0
)
+TILE_SIZE].fz,
0.0f
)
;
float4
sum
=
(
float4
)
(
localData[get_local_id
(
0
)
].fx+localData[get_local_id
(
0
)
+TILE_SIZE].fx,
localData[get_local_id
(
0
)
].fy+localData[get_local_id
(
0
)
+TILE_SIZE].fy,
localData[get_local_id
(
0
)
].fz+localData[get_local_id
(
0
)
+TILE_SIZE].fz,
0.0f
)
;
forceBuffers[offset2].xyz
=
forceBuffers[offset2].xyz+sum.xyz
;
forceBuffers[offset2].xyz
=
forceBuffers[offset2].xyz+sum.xyz
;
}
}
lasty
=
y
;
}
}
lasty
=
y
;
pos++
;
pos++
;
}
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
...
...
platforms/opencl/src/kernels/nonbonded_nvidia.cl
View file @
a8fe9cea
...
@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
...
@@ -15,7 +15,7 @@ __kernel __attribute__((reqd_work_group_size(WORK_GROUP_SIZE, 1, 1)))
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
void
computeNonbonded
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
u
nsigned
int
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
__global
u
short2
*
tiles,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize
#
else
#
else
unsigned
int
numTiles
unsigned
int
numTiles
#
endif
#
endif
...
@@ -35,9 +35,9 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -35,9 +35,9 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
while
(
pos
<
end
)
{
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
//
Extract
the
coordinates
of
this
tile
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
u
nsigned
int
x
=
tiles[pos]
;
u
short2
tileIndices
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
;
unsigned
int
x
=
tileIndices.x
;
x
=
(
x>>17
)
;
unsigned
int
y
=
tileIndices.y
;
#
else
#
else
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
unsigned
int
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
...
@@ -261,8 +261,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
...
@@ -261,8 +261,8 @@ void computeNonbonded(__global float4* forceBuffers, __global float* energyBuffe
#
endif
#
endif
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset2]
+=
(
float4
)
(
localData[get_local_id
(
0
)
].fx,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0.0f
)
;
forceBuffers[offset2]
+=
(
float4
)
(
localData[get_local_id
(
0
)
].fx,
localData[get_local_id
(
0
)
].fy,
localData[get_local_id
(
0
)
].fz,
0.0f
)
;
lasty
=
y
;
}
}
lasty
=
y
;
pos++
;
pos++
;
}
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
...
...
platforms/opencl/tests/TestOpenCLNonbondedForce.cpp
View file @
a8fe9cea
...
@@ -517,7 +517,7 @@ void testBlockInteractions(bool periodic) {
...
@@ -517,7 +517,7 @@ void testBlockInteractions(bool periodic) {
// Verify that interactions were identified correctly.
// Verify that interactions were identified correctly.
vector
<
cl_uint
>
interactionCount
;
vector
<
cl_uint
>
interactionCount
;
vector
<
cl_uint
>
interactingTiles
;
vector
<
mm_ushort2
>
interactingTiles
;
vector
<
cl_uint
>
interactionFlags
;
vector
<
cl_uint
>
interactionFlags
;
nb
.
getInteractionCount
().
download
(
interactionCount
);
nb
.
getInteractionCount
().
download
(
interactionCount
);
int
numWithInteractions
=
interactionCount
[
0
];
int
numWithInteractions
=
interactionCount
[
0
];
...
@@ -528,9 +528,8 @@ void testBlockInteractions(bool periodic) {
...
@@ -528,9 +528,8 @@ void testBlockInteractions(bool periodic) {
const
unsigned
int
grid
=
OpenCLContext
::
TileSize
;
const
unsigned
int
grid
=
OpenCLContext
::
TileSize
;
const
unsigned
int
dim
=
clcontext
.
getNumAtomBlocks
();
const
unsigned
int
dim
=
clcontext
.
getNumAtomBlocks
();
for
(
int
i
=
0
;
i
<
numWithInteractions
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numWithInteractions
;
i
++
)
{
unsigned
int
tile
=
interactingTiles
[
i
];
unsigned
int
x
=
interactingTiles
[
i
].
x
;
unsigned
int
x
=
(
tile
>>
17
);
unsigned
int
y
=
interactingTiles
[
i
].
y
;
unsigned
int
y
=
((
tile
>>
2
)
&
0x7fff
);
int
index
=
(
x
>
y
?
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
:
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
);
int
index
=
(
x
>
y
?
x
+
y
*
dim
-
y
*
(
y
+
1
)
/
2
:
y
+
x
*
dim
-
x
*
(
x
+
1
)
/
2
);
hasInteractions
[
index
]
=
true
;
hasInteractions
[
index
]
=
true
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment