Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
51fc3bd2
Commit
51fc3bd2
authored
Dec 05, 2009
by
Peter Eastman
Browse files
Beginnings of OpenCL implementation of CustomGBForce
parent
7ded64ba
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
735 additions
and
57 deletions
+735
-57
platforms/opencl/src/OpenCLExpressionUtilities.cpp
platforms/opencl/src/OpenCLExpressionUtilities.cpp
+27
-0
platforms/opencl/src/OpenCLExpressionUtilities.h
platforms/opencl/src/OpenCLExpressionUtilities.h
+31
-0
platforms/opencl/src/OpenCLKernelFactory.cpp
platforms/opencl/src/OpenCLKernelFactory.cpp
+2
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+338
-53
platforms/opencl/src/OpenCLKernels.h
platforms/opencl/src/OpenCLKernels.h
+44
-0
platforms/opencl/src/OpenCLParameterSet.cpp
platforms/opencl/src/OpenCLParameterSet.cpp
+62
-4
platforms/opencl/src/OpenCLParameterSet.h
platforms/opencl/src/OpenCLParameterSet.h
+6
-0
platforms/opencl/src/OpenCLPlatform.cpp
platforms/opencl/src/OpenCLPlatform.cpp
+1
-0
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
+203
-0
platforms/opencl/src/kernels/customGBValueReduction.cl
platforms/opencl/src/kernels/customGBValueReduction.cl
+21
-0
No files found.
platforms/opencl/src/OpenCLExpressionUtilities.cpp
View file @
51fc3bd2
...
...
@@ -318,3 +318,30 @@ void OpenCLExpressionUtilities::findRelatedPowers(const ExpressionTreeNode& node
for
(
int
i
=
0
;
i
<
(
int
)
searchNode
.
getChildren
().
size
();
i
++
)
findRelatedPowers
(
node
,
searchNode
.
getChildren
()[
i
],
powers
);
}
vector
<
mm_float4
>
OpenCLExpressionUtilities
::
computeFunctionCoefficients
(
const
vector
<
double
>&
values
,
bool
interpolating
)
{
// First create a padded set of function values.
vector
<
double
>
padded
(
values
.
size
()
+
2
);
padded
[
0
]
=
2
*
values
[
0
]
-
values
[
1
];
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
();
i
++
)
padded
[
i
+
1
]
=
values
[
i
];
padded
[
padded
.
size
()
-
1
]
=
2
*
values
[
values
.
size
()
-
1
]
-
values
[
values
.
size
()
-
2
];
// Now compute the spline coefficients.
vector
<
mm_float4
>
f
(
values
.
size
()
-
1
);
for
(
int
i
=
0
;
i
<
(
int
)
values
.
size
()
-
1
;
i
++
)
{
if
(
interpolating
)
f
[
i
]
=
(
mm_float4
)
{(
cl_float
)
padded
[
i
+
1
],
(
cl_float
)
(
0.5
*
(
-
padded
[
i
]
+
padded
[
i
+
2
])),
(
cl_float
)
(
0.5
*
(
2.0
*
padded
[
i
]
-
5.0
*
padded
[
i
+
1
]
+
4.0
*
padded
[
i
+
2
]
-
padded
[
i
+
3
])),
(
cl_float
)
(
0.5
*
(
-
padded
[
i
]
+
3.0
*
padded
[
i
+
1
]
-
3.0
*
padded
[
i
+
2
]
+
padded
[
i
+
3
]))};
else
f
[
i
]
=
(
mm_float4
)
{(
cl_float
)
((
padded
[
i
]
+
4.0
*
padded
[
i
+
1
]
+
padded
[
i
+
2
])
/
6.0
),
(
cl_float
)
((
-
3.0
*
padded
[
i
]
+
3.0
*
padded
[
i
+
2
])
/
6.0
),
(
cl_float
)
((
3.0
*
padded
[
i
]
-
6.0
*
padded
[
i
+
1
]
+
3.0
*
padded
[
i
+
2
])
/
6.0
),
(
cl_float
)
((
-
padded
[
i
]
+
3.0
*
padded
[
i
+
1
]
-
3.0
*
padded
[
i
+
2
]
+
padded
[
i
+
3
])
/
6.0
)};
}
return
f
;
}
platforms/opencl/src/OpenCLExpressionUtilities.h
View file @
51fc3bd2
...
...
@@ -27,6 +27,8 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "OpenCLContext.h"
#include "lepton/CustomFunction.h"
#include "lepton/ExpressionTreeNode.h"
#include "lepton/ParsedExpression.h"
#include <map>
...
...
@@ -54,6 +56,15 @@ public:
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
);
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
*
* @param values the tabulated values of the function
* @param interpolating true if an interpolating spline should be used, false if an approximating spline should be used
* @return the spline coefficients
*/
static
std
::
vector
<
mm_float4
>
computeFunctionCoefficients
(
const
std
::
vector
<
double
>&
values
,
bool
interpolating
);
class
FunctionPlaceholder
;
private:
static
void
processExpression
(
std
::
stringstream
&
out
,
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
...
...
@@ -66,6 +77,26 @@ private:
std
::
map
<
int
,
const
Lepton
::
ExpressionTreeNode
*>&
powers
);
};
/**
* This class serves as a placeholder for custom functions in expressions.
*/
class
OpenCLExpressionUtilities
::
FunctionPlaceholder
:
public
Lepton
::
CustomFunction
{
public:
int
getNumArguments
()
const
{
return
1
;
}
double
evaluate
(
const
double
*
arguments
)
const
{
return
0.0
;
}
double
evaluateDerivative
(
const
double
*
arguments
,
const
int
*
derivOrder
)
const
{
return
0.0
;
}
CustomFunction
*
clone
()
const
{
return
new
FunctionPlaceholder
();
}
};
}
// namespace OpenMM
#endif
/*OPENMM_OPENCLEXPRESSIONUTILITIES_H_*/
platforms/opencl/src/OpenCLKernelFactory.cpp
View file @
51fc3bd2
...
...
@@ -54,6 +54,8 @@ KernelImpl* OpenCLKernelFactory::createKernelImpl(std::string name, const Platfo
return
new
OpenCLCalcCustomNonbondedForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcGBSAOBCForceKernel
::
Name
())
return
new
OpenCLCalcGBSAOBCForceKernel
(
name
,
platform
,
cl
);
if
(
name
==
CalcCustomGBForceKernel
::
Name
())
return
new
OpenCLCalcCustomGBForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
CalcCustomExternalForceKernel
::
Name
())
return
new
OpenCLCalcCustomExternalForceKernel
(
name
,
platform
,
cl
,
context
.
getSystem
());
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
51fc3bd2
This diff is collapsed.
Click to expand it.
platforms/opencl/src/OpenCLKernels.h
View file @
51fc3bd2
...
...
@@ -470,6 +470,50 @@ private:
cl
::
Kernel
reduceBornForceKernel
;
};
/**
* This kernel is invoked by CustomGBForce to calculate the forces acting on the system.
*/
class
OpenCLCalcCustomGBForceKernel
:
public
CalcCustomGBForceKernel
{
public:
OpenCLCalcCustomGBForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
System
&
system
)
:
CalcCustomGBForceKernel
(
name
,
platform
),
hasInitializedKernels
(
false
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
valueBuffers
(
NULL
),
tabulatedFunctionParams
(
NULL
),
system
(
system
)
{
}
~
OpenCLCalcCustomGBForceKernel
();
/**
* Initialize the kernel.
*
* @param system the System this kernel will be applied to
* @param force the CustomGBForce this kernel will be used for
*/
void
initialize
(
const
System
&
system
,
const
CustomGBForce
&
force
);
/**
* Execute the kernel to calculate the forces.
*
* @param context the context in which to execute this kernel
*/
void
executeForces
(
ContextImpl
&
context
);
/**
* Execute the kernel to calculate the energy.
*
* @param context the context in which to execute this kernel
* @return the potential energy due to the CustomGBForce
*/
double
executeEnergy
(
ContextImpl
&
context
);
private:
bool
hasInitializedKernels
;
OpenCLContext
&
cl
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
computedValues
;
OpenCLArray
<
cl_float
>*
globals
;
OpenCLArray
<
cl_float
>*
valueBuffers
;
OpenCLArray
<
mm_float4
>*
tabulatedFunctionParams
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
<
mm_float4
>*>
tabulatedFunctions
;
System
&
system
;
cl
::
Kernel
pairValueKernel
,
reduceValueKernel
,
pairForceKernel
,
particleForceKernel
;
};
/**
* This kernel is invoked by CustomExternalForce to calculate the forces acting on the system and the energy of the system.
*/
...
...
platforms/opencl/src/OpenCLParameterSet.cpp
View file @
51fc3bd2
...
...
@@ -70,21 +70,79 @@ OpenCLParameterSet::~OpenCLParameterSet() {
delete
&
buffers
[
i
].
getBuffer
();
}
void
OpenCLParameterSet
::
setParameterValues
(
const
vector
<
vector
<
cl_float
>
>&
values
)
{
void
OpenCLParameterSet
::
getParameterValues
(
vector
<
vector
<
cl_float
>
>&
values
)
const
{
values
.
resize
(
numObjects
);
for
(
int
i
=
0
;
i
<
numObjects
;
i
++
)
values
[
i
].
resize
(
numParameters
);
try
{
int
base
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
getType
()
==
"float4"
)
{
vector
<
mm_float4
>
data
(
numObjects
);
context
.
getQueue
().
enqueueReadBuffer
(
buffers
[
i
].
getBuffer
(),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
values
[
j
][
base
]
=
data
[
j
].
x
;
if
(
base
+
1
<
numParameters
)
values
[
j
][
base
+
1
]
=
data
[
j
].
y
;
if
(
base
+
2
<
numParameters
)
values
[
j
][
base
+
2
]
=
data
[
j
].
z
;
if
(
base
+
3
<
numParameters
)
values
[
j
][
base
+
3
]
=
data
[
j
].
w
;
}
base
+=
4
;
}
else
if
(
buffers
[
i
].
getType
()
==
"float2"
)
{
vector
<
mm_float2
>
data
(
numObjects
);
context
.
getQueue
().
enqueueReadBuffer
(
buffers
[
i
].
getBuffer
(),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
values
[
j
][
base
]
=
data
[
j
].
x
;
if
(
base
+
1
<
numParameters
)
values
[
j
][
base
+
1
]
=
data
[
j
].
y
;
}
base
+=
2
;
}
else
if
(
buffers
[
i
].
getType
()
==
"float"
)
{
vector
<
cl_float
>
data
(
numObjects
);
context
.
getQueue
().
enqueueReadBuffer
(
buffers
[
i
].
getBuffer
(),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
data
[
j
]
=
(
mm_float4
)
{
values
[
j
][
base
],
values
[
j
][
base
+
1
],
values
[
j
][
base
+
2
],
values
[
j
][
base
+
3
]};
values
[
j
][
base
]
=
data
[
j
];
}
else
throw
OpenMMException
(
"Internal error: Unknown buffer type in OpenCLParameterSet"
);
}
}
catch
(
cl
::
Error
err
)
{
stringstream
str
;
str
<<
"Error downloading parameter set "
<<
name
<<
": "
<<
err
.
what
()
<<
" ("
<<
err
.
err
()
<<
")"
;
throw
OpenMMException
(
str
.
str
());
}
}
void
OpenCLParameterSet
::
setParameterValues
(
const
vector
<
vector
<
cl_float
>
>&
values
)
{
try
{
int
base
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
getType
()
==
"float4"
)
{
vector
<
mm_float4
>
data
(
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
data
[
j
].
x
=
values
[
j
][
base
];
if
(
base
+
1
<
numParameters
)
data
[
j
].
y
=
values
[
j
][
base
+
1
];
if
(
base
+
2
<
numParameters
)
data
[
j
].
z
=
values
[
j
][
base
+
2
];
if
(
base
+
3
<
numParameters
)
data
[
j
].
w
=
values
[
j
][
base
+
3
];
}
context
.
getQueue
().
enqueueWriteBuffer
(
buffers
[
i
].
getBuffer
(),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
base
+=
4
;
}
else
if
(
buffers
[
i
].
getType
()
==
"float2"
)
{
vector
<
mm_float2
>
data
(
numObjects
);
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
data
[
j
]
=
(
mm_float2
)
{
values
[
j
][
base
],
values
[
j
][
base
+
1
]};
for
(
int
j
=
0
;
j
<
numObjects
;
j
++
)
{
data
[
j
].
x
=
values
[
j
][
base
];
if
(
base
+
1
<
numParameters
)
data
[
j
].
y
=
values
[
j
][
base
+
1
];
}
context
.
getQueue
().
enqueueWriteBuffer
(
buffers
[
i
].
getBuffer
(),
CL_TRUE
,
0
,
numObjects
*
buffers
[
i
].
getSize
(),
&
data
[
0
]);
base
+=
2
;
}
...
...
platforms/opencl/src/OpenCLParameterSet.h
View file @
51fc3bd2
...
...
@@ -64,6 +64,12 @@ public:
int
getNumObjects
()
const
{
return
numObjects
;
}
/**
* Get the values of all parameters.
*
* @param values on exit, values[i][j] contains the value of parameter j for object i
*/
void
getParameterValues
(
std
::
vector
<
std
::
vector
<
cl_float
>
>&
values
)
const
;
/**
* Set the values of all parameters.
*
...
...
platforms/opencl/src/OpenCLPlatform.cpp
View file @
51fc3bd2
...
...
@@ -54,6 +54,7 @@ OpenCLPlatform::OpenCLPlatform() {
registerKernelFactory
(
CalcNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomNonbondedForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcGBSAOBCForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomGBForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcCustomExternalForceKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateVerletStepKernel
::
Name
(),
factory
);
registerKernelFactory
(
IntegrateLangevinStepKernel
::
Name
(),
factory
);
...
...
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
0 → 100644
View file @
51fc3bd2
#
define
TILE_SIZE
32
/**
*
Compute
a
value
based
on
pair
interactions.
*/
__kernel
void
computeN2Value
(
__global
float4*
posq,
__local
float4*
local_posq,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
#
else
unsigned
int
numTiles
#
endif
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
#
endif
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
pos
=
warp*numTiles/totalWarps
;
unsigned
int
end
=
(
warp+1
)
*numTiles/totalWarps
;
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
unsigned
int
x
=
tiles[pos]
;
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
*TILE_SIZE
;
bool
hasExclusions
=
(
x
&
0x1
)
;
x
=
(
x>>17
)
*TILE_SIZE
;
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
;
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
;
unsigned
int
atom1
=
x
+
tgx
;
float
value
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
exclusions[exclusionIndices[tile]+tgx]
;
#
endif
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
tbx+j
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
float
tempValue1
=
0.0f
;
float
tempValue2
=
0.0f
;
#
ifdef
USE_EXCLUSIONS
if
(
!isExcluded
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
{
#
else
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
{
#
endif
COMPUTE_VALUE
}
value
+=
tempValue1
;
#
ifdef
USE_EXCLUSIONS
excl
>>=
1
;
#
endif
}
//
Write
results
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset
=
x
+
tgx
+
(
x/TILE_SIZE
)
*PADDED_NUM_ATOMS
;
#
else
unsigned
int
offset
=
x
+
tgx
+
warp*PADDED_NUM_ATOMS
;
#
endif
global_value[offset]
+=
value
;
}
else
{
//
This
is
an
off-diagonal
tile.
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
local_value[get_local_id
(
0
)
]
=
0.0f
;
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
if
(
!hasExclusions
&&
flags
!=
0xFFFFFFFF
)
{
if
(
flags
==
0
)
{
//
No
interactions
in
this
tile.
}
else
{
//
Compute
only
a
subset
of
the
interactions
in
this
tile.
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
if
((
flags&
(
1<<j
))
!=
0
)
{
int
atom2
=
tbx+j
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+j
;
float
tempValue1
=
0.0f
;
float
tempValue2
=
0.0f
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_VALUE
}
value
+=
tempValue1
;
tempBuffer[get_local_id
(
0
)
]
=
tempValue2
;
//
Sum
the
forces
on
atom2.
if
(
tgx
%
2
==
0
)
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+1]
;
if
(
tgx
%
4
==
0
)
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+2]
;
if
(
tgx
%
8
==
0
)
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+4]
;
if
(
tgx
%
16
==
0
)
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+8]
;
if
(
tgx
==
0
)
local_value[tbx+j]
+=
tempBuffer[get_local_id
(
0
)
]
+
tempBuffer[get_local_id
(
0
)
+16]
;
}
}
}
}
else
#
endif
{
//
Compute
the
full
set
of
interactions
in
this
tile.
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
yi
=
y/TILE_SIZE
;
unsigned
int
tile
=
xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*
(
yi+1
)
/2
;
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
(
hasExclusions
?
exclusions[exclusionIndices[tile]+tgx]
:
0xFFFFFFFF
)
;
excl
=
(
excl
>>
tgx
)
|
(
excl
<<
(
TILE_SIZE
-
tgx
))
;
#
endif
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
int
atom2
=
tbx+tj
;
float4
posq2
=
local_posq[atom2]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x/PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y/PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
delta.z
-=
floor
(
delta.z/PERIODIC_BOX_SIZE_Z+0.5f
)
*PERIODIC_BOX_SIZE_Z
;
#
endif
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
r
=
sqrt
(
r2
)
;
LOAD_ATOM2_PARAMETERS
atom2
=
y+tj
;
float
tempValue1
=
0.0f
;
float
tempValue2
=
0.0f
;
#
ifdef
USE_EXCLUSIONS
if
(
!isExcluded
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
#
else
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
#
endif
COMPUTE_VALUE
}
value
+=
tempValue1
;
local_value[tbx+tj]
+=
tempValue2
;
#
ifdef
USE_EXCLUSIONS
excl
>>=
1
;
#
endif
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
}
}
//
Write
results
#
ifdef
USE_OUTPUT_BUFFER_PER_BLOCK
unsigned
int
offset1
=
x
+
tgx
+
(
y/TILE_SIZE
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
(
x/TILE_SIZE
)
*PADDED_NUM_ATOMS
;
#
else
unsigned
int
offset1
=
x
+
tgx
+
warp*PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
+
tgx
+
warp*PADDED_NUM_ATOMS
;
#
endif
global_value[offset1]
+=
value
;
global_value[offset2]
+=
local_value[get_local_id
(
0
)
]
;
lasty
=
y
;
}
pos++
;
}
}
platforms/opencl/src/kernels/customGBValueReduction.cl
0 → 100644
View file @
51fc3bd2
/**
*
Reduce
a
pairwise
computed
value,
and
compute
per-particle
values.
*/
__kernel
void
reduceGBValue
(
int
bufferSize,
int
numBuffers,
__global
float*
valueBuffers
PARAMETER_ARGUMENTS
)
{
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
//
Reduce
the
pairwise
value
int
totalSize
=
bufferSize*numBuffers
;
float
sum
=
valueBuffers[index]
;
for
(
int
i
=
index+bufferSize
; i < totalSize; i += bufferSize)
sum
+=
valueBuffers[i]
;
//
Now
calculate
other
values
COMPUTE_VALUES
index
+=
get_global_size
(
0
)
;
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment