Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
985da46e
Commit
985da46e
authored
Dec 19, 2009
by
Peter Eastman
Browse files
Created non-Nvidia-specific kernels for CustomGBForce (not yet fully debugged)
parent
d837b440
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
382 additions
and
3 deletions
+382
-3
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+7
-3
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
+192
-0
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
+2
-0
platforms/opencl/src/kernels/customGBValueN2_default.cl
platforms/opencl/src/kernels/customGBValueN2_default.cl
+181
-0
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
985da46e
...
...
@@ -1494,7 +1494,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
n2EnergySource
.
str
();
stringstream
extraArgs
,
loadLocal1
,
loadLocal2
,
load1
,
load2
,
recordDeriv
,
storeDerivs1
,
storeDerivs2
;
stringstream
extraArgs
,
loadLocal1
,
loadLocal2
,
load1
,
load2
,
recordDeriv
,
storeDerivs1
,
storeDerivs2
,
declareTemps
,
setTemps
;
if
(
force
.
getNumGlobalParameters
()
>
0
)
extraArgs
<<
", __constant float* globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
...
...
@@ -1523,8 +1523,10 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
load1
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_1 = 0;
\n
"
;
load2
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_2 = 0;
\n
"
;
recordDeriv
<<
"local_deriv"
<<
index
<<
"[atom2] += deriv"
<<
index
<<
"_2;
\n
"
;
storeDerivs1
<<
"derivBuffers"
<<
index
<<
"[offset1] += deriv"
<<
index
<<
"_1;
\n
"
;
storeDerivs2
<<
"derivBuffers"
<<
index
<<
"[offset2] += local_deriv"
<<
index
<<
"[get_local_id(0)];
\n
"
;
storeDerivs1
<<
"STORE_DERIVATIVE_1("
<<
index
<<
")"
;
storeDerivs2
<<
"STORE_DERIVATIVE_2("
<<
index
<<
")"
;
declareTemps
<<
"__local "
<<
buffer
.
getType
()
<<
" tempDerivBuffer"
<<
index
<<
"[64];
\n
"
;
setTemps
<<
"tempDerivBuffer"
<<
index
<<
"[get_local_id(0)] = deriv"
<<
index
<<
"_1;
\n
"
;
}
replacements
[
"PARAMETER_ARGUMENTS"
]
=
extraArgs
.
str
()
+
tableArgs
.
str
();
replacements
[
"LOAD_LOCAL_PARAMETERS_FROM_1"
]
=
loadLocal1
.
str
();
...
...
@@ -1534,6 +1536,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
replacements
[
"RECORD_DERIVATIVE_2"
]
=
recordDeriv
.
str
();
replacements
[
"STORE_DERIVATIVES_1"
]
=
storeDerivs1
.
str
();
replacements
[
"STORE_DERIVATIVES_2"
]
=
storeDerivs2
.
str
();
replacements
[
"DECLARE_TEMP_BUFFERS"
]
=
declareTemps
.
str
();
replacements
[
"SET_TEMP_BUFFERS"
]
=
setTemps
.
str
();
map
<
string
,
string
>
defines
;
if
(
cl
.
getNonbondedUtilities
().
getForceBufferPerAtomBlock
())
defines
[
"USE_OUTPUT_BUFFER_PER_BLOCK"
]
=
"1"
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
0 → 100644
View file @
985da46e
#
define
TILE_SIZE
32
#
define
STORE_DERIVATIVE_1
(
INDEX
)
derivBuffers##INDEX[offset1]
+=
deriv##INDEX##_1+tempDerivBuffer##INDEX[get_local_id
(
0
)
+TILE_SIZE]
;
#
define
STORE_DERIVATIVE_2
(
INDEX
)
derivBuffers##INDEX[offset2]
+=
local_deriv##INDEX[get_local_id
(
0
)
]+local_deriv##INDEX[get_local_id
(
0
)
+TILE_SIZE]
;
/**
*
Compute
a
force
based
on
pair
interactions.
*/
__kernel
void
computeN2Energy
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__local
float4*
local_force,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__local
float4*
tempForceBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
#
else
unsigned
int
numTiles
#
endif
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
DECLARE_TEMP_BUFFERS
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
unsigned
int
x
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
*TILE_SIZE
;
bool
hasExclusions
=
(
x
&
0x1
)
;
x
=
(
x>>17
)
*TILE_SIZE
;
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
forceBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
unsigned
int
atom1
=
x
+
tgx
;
float4
force
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
exclusions[exclusionIndices[tile]+tgx]
>>
baseLocalAtom
;
#
endif
for
(
unsigned
int
j
=
0
; j < TILE_SIZE/2; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
baseLocalAtom+j
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
sqrt
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+j
;
float
dEdR
=
0.0f
;
float
tempEnergy
=
0.0f
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
{
COMPUTE_INTERACTION
dEdR
/=
-r
;
}
energy
+=
0.5f*tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
#
ifdef
USE_CUTOFF
}
#
endif
#
ifdef
USE_EXCLUSIONS
excl
>>=
1
;
#
endif
}
//
Sum
the
forces
and
write
results.
if
(
get_local_id
(
0
)
>=
TILE_SIZE
)
{
tempForceBuffer[get_local_id
(
0
)
]
=
force
;
SET_TEMP_BUFFERS
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
TILE_SIZE
)
{
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset1
=
x
+
tgx
+
(
x/TILE_SIZE
)
*PADDED_NUM_ATOMS
;
#
else
unsigned
int
offset1
=
x
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
forceBuffers[offset1].xyz
+=
force.xyz+tempForceBuffer[get_local_id
(
0
)
+TILE_SIZE].xyz
;
STORE_DERIVATIVES_1
}
}
else
{
//
This
is
an
off-diagonal
tile.
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
unsigned
int
j
=
y
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
local_force[get_local_id
(
0
)
]
=
0.0f
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Compute
the
full
set
of
interactions
in
this
tile.
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
yi
=
y/TILE_SIZE
;
unsigned
int
tile
=
xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*
(
yi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
(
hasExclusions
?
exclusions[exclusionIndices[tile]+tgx]
:
0xFFFFFFFF
)
;
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
TILE_SIZE
-
tgx
))
;
excl
>>=
baseLocalAtom
;
#
endif
unsigned
int
tj
=
tgx%
(
TILE_SIZE/2
)
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE/2; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
baseLocalAtom+tj
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
sqrt
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+tj
;
float
dEdR
=
0.0f
;
float
tempEnergy
=
0.0f
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-r
;
}
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
atom2
=
baseLocalAtom+tj
;
local_force[baseLocalAtom+tj+forceBufferOffset].xyz
+=
delta.xyz
;
RECORD_DERIVATIVE_2
#
ifdef
USE_CUTOFF
}
#
endif
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
ifdef
USE_EXCLUSIONS
excl
>>=
1
;
#
endif
tj
=
(
tj+1
)
%
(
TILE_SIZE/2
)
;
}
//
Sum
the
forces
and
write
results.
if
(
get_local_id
(
0
)
>=
TILE_SIZE
)
{
tempForceBuffer[get_local_id
(
0
)
]
=
force
;
SET_TEMP_BUFFERS
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
TILE_SIZE
)
{
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset1
=
x
+
tgx
+
(
y/TILE_SIZE
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TILE_SIZE
)
*PADDED_NUM_ATOMS
;
#
else
unsigned
int
offset1
=
x
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
forceBuffers[offset1].xyz
+=
force.xyz+tempForceBuffer[get_local_id
(
0
)
+TILE_SIZE].xyz
;
forceBuffers[offset2].xyz
+=
local_force[get_local_id
(
0
)
].xyz+local_force[get_local_id
(
0
)
+TILE_SIZE].xyz
;
STORE_DERIVATIVES_1
STORE_DERIVATIVES_2
}
lasty
=
y
;
}
pos++
;
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
View file @
985da46e
#
define
TILE_SIZE
32
#
define
STORE_DERIVATIVE_1
(
INDEX
)
derivBuffers##INDEX[offset1]
+=
deriv##INDEX##_1
;
#
define
STORE_DERIVATIVE_2
(
INDEX
)
derivBuffers##INDEX[offset2]
+=
local_deriv##INDEX[get_local_id
(
0
)
]
;
/**
*
Compute
a
force
based
on
pair
interactions.
...
...
platforms/opencl/src/kernels/customGBValueN2_default.cl
0 → 100644
View file @
985da46e
#
define
TILE_SIZE
32
/**
*
Compute
a
value
based
on
pair
interactions.
*/
__kernel
void
computeN2Value
(
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
#
else
unsigned
int
numTiles
#
endif
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
unsigned
int
x
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
*TILE_SIZE
;
bool
hasExclusions
=
(
x
&
0x1
)
;
x
=
(
x>>17
)
*TILE_SIZE
;
unsigned
int
baseLocalAtom
=
(
get_local_id
(
0
)
<
TILE_SIZE
?
0
:
TILE_SIZE/2
)
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
valueBufferOffset
=
(
tgx
<
TILE_SIZE/2
?
0
:
TILE_SIZE
)
;
unsigned
int
atom1
=
x
+
tgx
;
float
value
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
exclusions[exclusionIndices[tile]+tgx]
>>
baseLocalAtom
;
#
endif
for
(
unsigned
int
j
=
0
; j < TILE_SIZE/2; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
baseLocalAtom+j
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
sqrt
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+j
;
float
tempValue1
=
0.0f
;
float
tempValue2
=
0.0f
;
#
ifdef
USE_EXCLUSIONS
if
(
!isExcluded
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
{
#
else
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
{
#
endif
COMPUTE_VALUE
}
value
+=
tempValue1
;
#
ifdef
USE_CUTOFF
}
#
endif
#
ifdef
USE_EXCLUSIONS
excl
>>=
1
;
#
endif
}
//
Sum
the
values
and
write
results.
if
(
get_local_id
(
0
)
>=
TILE_SIZE
)
tempBuffer[get_local_id
(
0
)
]
=
value
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
TILE_SIZE
)
{
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset
=
x
+
tgx
+
(
x/TILE_SIZE
)
*PADDED_NUM_ATOMS
;
#
else
unsigned
int
offset
=
x
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
global_value[offset]
+=
value+tempBuffer[get_local_id
(
0
)
+TILE_SIZE]
;
}
}
else
{
//
This
is
an
off-diagonal
tile.
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
unsigned
int
j
=
y
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
local_value[get_local_id
(
0
)
]
=
0.0f
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Compute
the
full
set
of
interactions
in
this
tile.
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
yi
=
y/TILE_SIZE
;
unsigned
int
tile
=
xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*
(
yi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
(
hasExclusions
?
exclusions[exclusionIndices[tile]+tgx]
:
0xFFFFFFFF
)
;
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
TILE_SIZE
-
tgx
))
;
excl
>>=
baseLocalAtom
;
#
endif
unsigned
int
tj
=
tgx%
(
TILE_SIZE/2
)
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE/2; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
baseLocalAtom+tj
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
sqrt
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+baseLocalAtom+tj
;
float
tempValue1
=
0.0f
;
float
tempValue2
=
0.0f
;
#
ifdef
USE_EXCLUSIONS
if
(
!isExcluded
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
#
else
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
#
endif
COMPUTE_VALUE
}
value
+=
tempValue1
;
local_value[baseLocalAtom+tj+valueBufferOffset]
+=
tempValue2
;
#
ifdef
USE_CUTOFF
}
#
endif
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
ifdef
USE_EXCLUSIONS
excl
>>=
1
;
#
endif
tj
=
(
tj+1
)
%
(
TILE_SIZE/2
)
;
}
//
Sum
the
values
and
write
results.
if
(
get_local_id
(
0
)
>=
TILE_SIZE
)
tempBuffer[get_local_id
(
0
)
]
=
value
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
TILE_SIZE
)
{
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset1
=
x
+
tgx
+
(
y/TILE_SIZE
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TILE_SIZE
)
*PADDED_NUM_ATOMS
;
#
else
unsigned
int
offset1
=
x
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
global_value[offset1]
+=
value+tempBuffer[get_local_id
(
0
)
+TILE_SIZE]
;
global_value[offset2]
+=
local_value[get_local_id
(
0
)
]+local_value[get_local_id
(
0
)
+TILE_SIZE]
;
}
lasty
=
y
;
}
pos++
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment