Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
69e75377
"vscode:/vscode.git/clone" did not exist on "54fa65ed0d443449b45a80bcddaba4894ffd9c41"
Commit
69e75377
authored
Oct 07, 2011
by
Peter Eastman
Browse files
Added "const" and "restrict" to lots of kernel arguments to let the compiler do more optimizations
parent
bf8b9f30
Changes
38
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
93 additions
and
92 deletions
+93
-92
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+3
-3
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+21
-21
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+4
-4
platforms/opencl/src/kernels/andersenThermostat.cl
platforms/opencl/src/kernels/andersenThermostat.cl
+2
-2
platforms/opencl/src/kernels/brownian.cl
platforms/opencl/src/kernels/brownian.cl
+3
-3
platforms/opencl/src/kernels/ccma.cl
platforms/opencl/src/kernels/ccma.cl
+7
-7
platforms/opencl/src/kernels/compact.cl
platforms/opencl/src/kernels/compact.cl
+4
-4
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+4
-4
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
+4
-4
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
+6
-6
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
+1
-1
platforms/opencl/src/kernels/customGBGradientChainRule.cl
platforms/opencl/src/kernels/customGBGradientChainRule.cl
+1
-1
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
+4
-4
platforms/opencl/src/kernels/customGBValueN2_default.cl
platforms/opencl/src/kernels/customGBValueN2_default.cl
+4
-4
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
+6
-6
platforms/opencl/src/kernels/customHbondForce.cl
platforms/opencl/src/kernels/customHbondForce.cl
+4
-4
platforms/opencl/src/kernels/ewald.cl
platforms/opencl/src/kernels/ewald.cl
+2
-2
platforms/opencl/src/kernels/fft.cl
platforms/opencl/src/kernels/fft.cl
+2
-1
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+7
-7
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
+4
-4
No files found.
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
69e75377
...
...
@@ -154,12 +154,12 @@ void OpenCLBondedUtilities::initialize(const System& system) {
const
vector
<
int
>&
set
=
*
iter
;
int
setSize
=
set
.
size
();
stringstream
s
;
s
<<
"__kernel void computeBondedForces(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq"
;
s
<<
"__kernel void computeBondedForces(__global float4*
restrict
forceBuffers, __global float*
restrict
energyBuffer, __global
const
float4*
restrict
posq"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
OpenCLExpressionUtilities
::
intToString
(
indexWidth
[
force
]));
s
<<
", __global "
<<
indexType
<<
"* atomIndices"
<<
i
;
s
<<
", __global "
<<
indexType
<<
"* bufferIndices"
<<
i
;
s
<<
", __global
const
"
<<
indexType
<<
"*
restrict
atomIndices"
<<
i
;
s
<<
", __global
const
"
<<
indexType
<<
"*
restrict
bufferIndices"
<<
i
;
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
69e75377
...
...
@@ -1739,7 +1739,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
tabulatedFunctions
.
push_back
(
new
OpenCLArray
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
tableArgs
<<
", __global float4* "
<<
arrayName
;
tableArgs
<<
", __global
const
float4*
restrict
"
<<
arrayName
;
}
if
(
force
.
getNumFunctions
()
>
0
)
{
tabulatedFunctionParams
=
new
OpenCLArray
<
mm_float4
>
(
cl
,
tabulatedFunctionParamsVec
.
size
(),
"tabulatedFunctionParameters"
,
false
,
CL_MEM_READ_ONLY
);
...
...
@@ -1837,7 +1837,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_"
<<
paramName
;
extraArgs
<<
", __global
const
"
<<
buffer
.
getType
()
<<
"*
restrict
global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"*
restrict
local_"
<<
paramName
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[localAtomIndex] = "
<<
paramName
<<
"1;
\n
"
;
loadLocal2
<<
"local_"
<<
paramName
<<
"[localAtomIndex] = global_"
<<
paramName
<<
"[j];
\n
"
;
load1
<<
buffer
.
getType
()
<<
" "
<<
paramName
<<
"1 = global_"
<<
paramName
<<
"[atom1];
\n
"
;
...
...
@@ -1884,12 +1884,12 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* "
<<
paramName
;
extraArgs
<<
", __global
const
"
<<
buffer
.
getType
()
<<
"*
restrict
"
<<
paramName
;
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* global_"
<<
valueName
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"*
restrict
global_"
<<
valueName
;
reductionSource
<<
buffer
.
getType
()
<<
" local_"
<<
valueName
<<
";
\n
"
;
}
reductionSource
<<
"local_values"
<<
computedValues
->
getParameterSuffix
(
0
)
<<
" = sum;
\n
"
;
...
...
@@ -1977,7 +1977,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_"
<<
paramName
;
extraArgs
<<
", __global
const
"
<<
buffer
.
getType
()
<<
"*
restrict
global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"*
restrict
local_"
<<
paramName
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[localAtomIndex] = "
<<
paramName
<<
"1;
\n
"
;
loadLocal2
<<
"local_"
<<
paramName
<<
"[localAtomIndex] = global_"
<<
paramName
<<
"[j];
\n
"
;
load1
<<
buffer
.
getType
()
<<
" "
<<
paramName
<<
"1 = global_"
<<
paramName
<<
"[atom1];
\n
"
;
...
...
@@ -1986,17 +1986,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* global_"
<<
valueName
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_"
<<
valueName
;
extraArgs
<<
", __global
const
"
<<
buffer
.
getType
()
<<
"*
restrict
global_"
<<
valueName
<<
", __local "
<<
buffer
.
getType
()
<<
"*
restrict
local_"
<<
valueName
;
loadLocal1
<<
"local_"
<<
valueName
<<
"[localAtomIndex] = "
<<
valueName
<<
"1;
\n
"
;
loadLocal2
<<
"local_"
<<
valueName
<<
"[localAtomIndex] = global_"
<<
valueName
<<
"[j];
\n
"
;
load1
<<
buffer
.
getType
()
<<
" "
<<
valueName
<<
"1 = global_"
<<
valueName
<<
"[atom1];
\n
"
;
load2
<<
buffer
.
getType
()
<<
" "
<<
valueName
<<
"2 = local_"
<<
valueName
<<
"[atom2];
\n
"
;
}
if
(
useLong
)
{
extraArgs
<<
", __global long* derivBuffers"
;
extraArgs
<<
", __global long*
restrict
derivBuffers"
;
for
(
int
i
=
0
;
i
<
force
.
getNumComputedValues
();
i
++
)
{
string
index
=
intToString
(
i
+
1
);
extraArgs
<<
", __local float* local_deriv"
<<
index
;
extraArgs
<<
", __local float*
restrict
local_deriv"
<<
index
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[localAtomIndex] = 0.0f;
\n
"
;
declare1
<<
"float deriv"
<<
index
<<
"_1 = 0.0f;
\n
"
;
load2
<<
"float deriv"
<<
index
<<
"_2 = 0.0f;
\n
"
;
...
...
@@ -2011,7 +2011,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
index
=
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* derivBuffers"
<<
index
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_deriv"
<<
index
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"*
restrict
derivBuffers"
<<
index
<<
", __local "
<<
buffer
.
getType
()
<<
"*
restrict
local_deriv"
<<
index
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[localAtomIndex] = 0.0f;
\n
"
;
declare1
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_1 = 0.0f;
\n
"
;
load2
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_2 = 0.0f;
\n
"
;
...
...
@@ -2068,21 +2068,21 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* "
<<
paramName
;
extraArgs
<<
", __global
const
"
<<
buffer
.
getType
()
<<
"*
restrict
"
<<
paramName
;
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* "
<<
valueName
;
extraArgs
<<
", __global
const
"
<<
buffer
.
getType
()
<<
"*
restrict
"
<<
valueName
;
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
index
=
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* derivBuffers"
<<
index
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"*
restrict
derivBuffers"
<<
index
;
compute
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
" = derivBuffers"
<<
index
<<
"[index];
\n
"
;
}
if
(
useLong
)
{
extraArgs
<<
", __global
long*
derivBuffersIn"
;
extraArgs
<<
", __global
const long* restrict
derivBuffersIn"
;
for
(
int
i
=
0
;
i
<
energyDerivs
->
getNumParameters
();
++
i
)
reduce
<<
"derivBuffers"
<<
energyDerivs
->
getParameterSuffix
(
i
,
"[index]"
)
<<
" = (1.0f/0xFFFFFFFF)*derivBuffersIn[index+PADDED_NUM_ATOMS*"
<<
intToString
(
i
)
<<
"];
\n
"
;
...
...
@@ -2147,17 +2147,17 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* "
<<
paramName
;
extraArgs
<<
", __global
const
"
<<
buffer
.
getType
()
<<
"*
restrict
"
<<
paramName
;
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* "
<<
valueName
;
extraArgs
<<
", __global
const
"
<<
buffer
.
getType
()
<<
"*
restrict
"
<<
valueName
;
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
index
=
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* derivBuffers"
<<
index
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"*
restrict
derivBuffers"
<<
index
;
compute
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
" = derivBuffers"
<<
index
<<
"[index];
\n
"
;
}
map
<
string
,
string
>
variables
;
...
...
@@ -2866,12 +2866,12 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
vector
<
mm_float4
>
f
=
OpenCLExpressionUtilities
::
computeFunctionCoefficients
(
values
,
min
,
max
);
tabulatedFunctions
.
push_back
(
new
OpenCLArray
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
tableArgs
<<
", __global float4* "
<<
arrayName
;
tableArgs
<<
", __global
const
float4*
restrict
"
<<
arrayName
;
}
if
(
force
.
getNumFunctions
()
>
0
)
{
tabulatedFunctionParams
=
new
OpenCLArray
<
mm_float4
>
(
cl
,
tabulatedFunctionParamsVec
.
size
(),
"tabulatedFunctionParameters"
,
false
,
CL_MEM_READ_ONLY
);
tabulatedFunctionParams
->
upload
(
tabulatedFunctionParamsVec
);
tableArgs
<<
", __global float4* functionParams"
;
tableArgs
<<
", __global
const
float4*
restrict
functionParams"
;
}
// Record information about parameters.
...
...
@@ -2973,15 +2973,15 @@ void OpenCLCalcCustomHbondForceKernel::initialize(const System& system, const Cu
// Next it needs to load parameters from global memory.
if
(
force
.
getNumGlobalParameters
()
>
0
)
extraArgs
<<
", __global
float*
globals"
;
extraArgs
<<
", __global
const float* restrict
globals"
;
for
(
int
i
=
0
;
i
<
(
int
)
donorParams
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
donorParams
->
getBuffers
()[
i
];
extraArgs
<<
", __global "
+
buffer
.
getType
()
+
"* donor"
+
buffer
.
getName
();
extraArgs
<<
", __global
const
"
+
buffer
.
getType
()
+
"*
restrict
donor"
+
buffer
.
getName
();
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" donorParams"
+
intToString
(
i
+
1
)
+
" = donor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
}
for
(
int
i
=
0
;
i
<
(
int
)
acceptorParams
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
acceptorParams
->
getBuffers
()[
i
];
extraArgs
<<
", __global "
+
buffer
.
getType
()
+
"* acceptor"
+
buffer
.
getName
();
extraArgs
<<
", __global
const
"
+
buffer
.
getType
()
+
"*
restrict
acceptor"
+
buffer
.
getName
();
addDonorAndAcceptorCode
(
computeDonor
,
computeAcceptor
,
buffer
.
getType
()
+
" acceptorParams"
+
intToString
(
i
+
1
)
+
" = acceptor"
+
buffer
.
getName
()
+
"[index];
\n
"
);
}
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
69e75377
...
...
@@ -399,9 +399,9 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
replacements
[
"ATOM_PARAMETER_DATA"
]
=
localData
.
str
();
stringstream
args
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
args
<<
", __global "
;
args
<<
", __global
const
"
;
args
<<
params
[
i
].
getType
();
args
<<
"* global_"
;
args
<<
"*
restrict
global_"
;
args
<<
params
[
i
].
getName
();
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
{
...
...
@@ -411,11 +411,11 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
}
else
{
if
((
arguments
[
i
].
getMemory
().
getInfo
<
CL_MEM_FLAGS
>
()
&
CL_MEM_READ_ONLY
)
==
0
)
args
<<
", __global "
;
args
<<
", __global
const
"
;
else
args
<<
", __constant "
;
args
<<
arguments
[
i
].
getType
();
args
<<
"* "
;
args
<<
"*
restrict
"
;
args
<<
arguments
[
i
].
getName
();
}
}
...
...
platforms/opencl/src/kernels/andersenThermostat.cl
View file @
69e75377
...
...
@@ -2,8 +2,8 @@
*
Apply
the
Andersen
thermostat
to
adjust
particle
velocities.
*/
__kernel
void
applyAndersenThermostat
(
float
collisionFrequency,
float
kT,
__global
float4*
velm,
__global
float2*
stepSize,
__global
float4*
random,
unsigned
int
randomIndex,
__global
int*
atomGroups
)
{
__kernel
void
applyAndersenThermostat
(
float
collisionFrequency,
float
kT,
__global
float4*
velm,
__global
const
float2*
restrict
stepSize,
__global
const
float4*
restrict
random,
unsigned
int
randomIndex,
__global
const
int*
restrict
atomGroups
)
{
float
collisionProbability
=
1.0f-exp
(
-collisionFrequency*stepSize[0].y
)
;
float
randomRange
=
erf
(
collisionProbability/sqrt
(
2.0f
))
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
...
...
platforms/opencl/src/kernels/brownian.cl
View file @
69e75377
...
...
@@ -2,8 +2,8 @@
*
Perform
the
first
step
of
Brownian
integration.
*/
__kernel
void
integrateBrownianPart1
(
float
tauDeltaT,
float
noiseAmplitude,
__global
float4*
force,
__global
float4*
posDelta,
__global
float4*
velm,
__global
float4*
random,
unsigned
int
randomIndex
)
{
__kernel
void
integrateBrownianPart1
(
float
tauDeltaT,
float
noiseAmplitude,
__global
const
float4*
restrict
force,
__global
float4*
restrict
posDelta,
__global
const
float4*
restrict
velm,
__global
const
float4*
restrict
random,
unsigned
int
randomIndex
)
{
randomIndex
+=
get_global_id
(
0
)
;
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
float
invMass
=
velm[index].w
;
...
...
@@ -16,7 +16,7 @@ __kernel void integrateBrownianPart1(float tauDeltaT, float noiseAmplitude, __gl
*
Perform
the
second
step
of
Brownian
integration.
*/
__kernel
void
integrateBrownianPart2
(
float
oneOverDeltaT,
__global
float4*
posq,
__global
float4*
velm,
__global
float4*
posDelta
)
{
__kernel
void
integrateBrownianPart2
(
float
oneOverDeltaT,
__global
float4*
posq,
__global
float4*
velm,
__global
const
float4*
restrict
posDelta
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_ATOMS; index += get_global_size(0)) {
float4
delta
=
posDelta[index]
;
velm[index].xyz
=
oneOverDeltaT*delta.xyz
;
...
...
platforms/opencl/src/kernels/ccma.cl
View file @
69e75377
/**
*
Compute
the
direction
each
constraint
is
pointing
in.
This
is
called
once
at
the
beginning
of
constraint
evaluation.
*/
__kernel
void
computeConstraintDirections
(
__global
int2*
constraintAtoms,
__global
float4*
constraintDistance,
__global
float4*
atomPositions
)
{
__kernel
void
computeConstraintDirections
(
__global
const
int2*
restrict
constraintAtoms,
__global
float4*
restrict
constraintDistance,
__global
const
float4*
restrict
atomPositions
)
{
for
(
int
index
=
get_global_id
(
0
)
; index < NUM_CONSTRAINTS; index += get_global_size(0)) {
//
Compute
the
direction
for
this
constraint.
...
...
@@ -19,8 +19,8 @@ __kernel void computeConstraintDirections(__global int2* constraintAtoms, __glob
/**
*
Compute
the
force
applied
by
each
constraint.
*/
__kernel
void
computeConstraintForce
(
__global
int2*
constraintAtoms,
__global
float4*
constraintDistance,
__global
float4*
atomPositions,
__global
float*
reducedMass,
__global
float*
delta1,
__global
int*
converged,
float
tol,
int
iteration
)
{
__kernel
void
computeConstraintForce
(
__global
const
int2*
restrict
constraintAtoms,
__global
const
float4*
restrict
constraintDistance,
__global
const
float4*
restrict
atomPositions,
__global
const
float*
restrict
reducedMass,
__global
float*
restrict
delta1,
__global
int*
restrict
converged,
float
tol,
int
iteration
)
{
__local
int
groupConverged
;
if
(
converged[1-iteration%2]
)
{
if
(
get_global_id
(
0
)
==
0
)
...
...
@@ -58,8 +58,8 @@ __kernel void computeConstraintForce(__global int2* constraintAtoms, __global fl
/**
*
Multiply
the
vector
of
constraint
forces
by
the
constraint
matrix.
*/
__kernel
void
multiplyByConstraintMatrix
(
__global
float*
delta1,
__global
float*
delta2,
__global
int*
constraintMatrixColumn,
__global
float*
constraintMatrixValue,
__global
int*
converged,
int
iteration
)
{
__kernel
void
multiplyByConstraintMatrix
(
__global
const
float*
restrict
delta1,
__global
float*
restrict
delta2,
__global
const
int*
restrict
constraintMatrixColumn,
__global
const
float*
restrict
constraintMatrixValue,
__global
const
int*
restrict
converged,
int
iteration
)
{
if
(
converged[iteration%2]
)
return
; // The constraint iteration has already converged.
...
...
@@ -81,8 +81,8 @@ __kernel void multiplyByConstraintMatrix(__global float* delta1, __global float*
/**
*
Update
the
atom
positions
based
on
constraint
forces.
*/
__kernel
void
updateAtomPositions
(
__global
int*
numAtomConstraints,
__global
int*
atomConstraints,
__global
float4*
constraintDistance,
__global
float4*
atomPositions,
__global
float4*
velm,
__global
float*
delta1,
__global
float*
delta2,
__global
int*
converged,
int
iteration
)
{
__kernel
void
updateAtomPositions
(
__global
const
int*
restrict
numAtomConstraints,
__global
const
int*
restrict
atomConstraints,
__global
const
float4*
restrict
constraintDistance,
__global
float4*
restrict
atomPositions,
__global
const
float4*
restrict
velm,
__global
const
float*
restrict
delta1,
__global
const
float*
restrict
delta2,
__global
int*
restrict
converged,
int
iteration
)
{
if
(
get_global_id
(
0
)
==
0
)
converged[1-iteration%2]
=
1
;
if
(
converged[iteration%2]
)
...
...
platforms/opencl/src/kernels/compact.cl
View file @
69e75377
...
...
@@ -56,7 +56,7 @@ unsigned int sumReduce128(__local unsigned int* arr) {
return
arr[0]
;
}
__kernel
void
countElts
(
__global
unsigned
int*
dgBlockCounts,
__global
unsigned
int*
dgValid,
const
unsigned
int
len,
__local
unsigned
int*
dsCount
)
{
__kernel
void
countElts
(
__global
unsigned
int*
restrict
dgBlockCounts,
__global
const
unsigned
int*
restrict
dgValid,
const
unsigned
int
len,
__local
unsigned
int*
restrict
dsCount
)
{
dsCount[get_local_id
(
0
)
]
=
0
;
unsigned
int
ub
;
const
unsigned
int
eltsPerBlock
=
len/get_num_groups
(
0
)
+
((
len
%
get_num_groups
(
0
))
?
1
:
0
)
;
...
...
@@ -110,9 +110,9 @@ int compactSIMDPrefixSum(__local const unsigned int* dsData, __local const unsig
return
numValid
;
}
__kernel
void
moveValidElementsStaged
(
__global
unsigned
int*
dgData,
__global
unsigned
int*
dgCompact,
__global
unsigned
int*
dgValid,
__global
unsigned
int*
dgBlockCounts,
unsigned
int
len,
__global
unsigned
int*
dNumValidElements,
__local
unsigned
int*
inBlock,
__local
unsigned
int*
validBlock,
__local
unsigned
int*
compactBlock
)
{
__kernel
void
moveValidElementsStaged
(
__global
const
unsigned
int*
restrict
dgData,
__global
unsigned
int*
restrict
dgCompact,
__global
const
unsigned
int*
restrict
dgValid,
__global
const
unsigned
int*
restrict
dgBlockCounts,
unsigned
int
len,
__global
unsigned
int*
restrict
dNumValidElements,
__local
unsigned
int*
restrict
inBlock,
__local
unsigned
int*
restrict
validBlock,
__local
unsigned
int*
restrict
compactBlock
)
{
__local
unsigned
int
dsLocalIndex[256]
;
int
blockOutOffset=0
;
//
Sum
up
the
blockCounts
before
us
to
find
our
offset
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
View file @
69e75377
...
...
@@ -6,11 +6,11 @@
*
Compute
a
force
based
on
pair
interactions.
*/
__kernel
void
computeN2Energy
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__local
float4*
local_force,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempBuffer,
__kernel
void
computeN2Energy
(
__global
float4*
restrict
forceBuffers,
__global
float*
restrict
energyBuffer,
__local
float4*
restrict
local_force,
__global
const
float4*
restrict
posq,
__local
float4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__local
float4*
restrict
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
unsigned
int*
restrict
interactionFlags
#
else
unsigned
int
numTiles
#
endif
...
...
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
View file @
69e75377
...
...
@@ -7,11 +7,11 @@
*/
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeN2Energy
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__local
float4*
local_force,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempForceBuffer,
void
computeN2Energy
(
__global
float4*
restrict
forceBuffers,
__global
float*
restrict
energyBuffer,
__local
float4*
restrict
local_force,
__global
const
float4*
restrict
posq,
__local
float4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__local
float4*
restrict
tempForceBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles
#
else
unsigned
int
numTiles
#
endif
...
...
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
View file @
69e75377
...
...
@@ -14,15 +14,15 @@
*/
__kernel
void
computeN2Energy
(
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
forceBuffers,
__global
long*
restrict
forceBuffers,
#
else
__global
float4*
forceBuffers,
__global
float4*
restrict
forceBuffers,
#
endif
__global
float*
energyBuffer,
__local
float4*
local_force,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempBuffer,
__global
float*
restrict
energyBuffer,
__local
float4*
restrict
local_force,
__global
const
float4*
restrict
posq,
__local
float4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__local
float4*
restrict
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
unsigned
int*
restrict
interactionFlags
#
else
unsigned
int
numTiles
#
endif
...
...
platforms/opencl/src/kernels/customGBEnergyPerParticle.cl
View file @
69e75377
...
...
@@ -8,7 +8,7 @@
*
Reduce
the
derivatives
computed
in
the
N^2
energy
kernel,
and
compute
all
per-particle
energy
terms.
*/
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq
__kernel
void
computePerParticleEnergy
(
int
bufferSize,
int
numBuffers,
__global
float4*
restrict
forceBuffers,
__global
float*
restrict
energyBuffer,
__global
const
float4*
restrict
posq
PARAMETER_ARGUMENTS
)
{
float
energy
=
0.0f
;
unsigned
int
index
=
get_global_id
(
0
)
;
...
...
platforms/opencl/src/kernels/customGBGradientChainRule.cl
View file @
69e75377
...
...
@@ -2,7 +2,7 @@
*
Compute
chain
rule
terms
for
computed
values
that
depend
explicitly
on
particle
coordinates.
*/
__kernel
void
computeGradientChainRuleTerms
(
__global
float4*
forceBuffers,
__global
float4*
posq
__kernel
void
computeGradientChainRuleTerms
(
__global
float4*
restrict
forceBuffers,
__global
const
float4*
restrict
posq
PARAMETER_ARGUMENTS
)
{
unsigned
int
index
=
get_global_id
(
0
)
;
while
(
index
<
NUM_ATOMS
)
{
...
...
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
View file @
69e75377
...
...
@@ -4,11 +4,11 @@
*
Compute
a
value
based
on
pair
interactions.
*/
__kernel
void
computeN2Value
(
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
__kernel
void
computeN2Value
(
__global
const
float4*
restrict
posq,
__local
float4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__global
float*
restrict
global_value,
__local
float*
restrict
local_value,
__local
float*
restrict
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
unsigned
int*
restrict
interactionFlags
#
else
unsigned
int
numTiles
#
endif
...
...
platforms/opencl/src/kernels/customGBValueN2_default.cl
View file @
69e75377
...
...
@@ -5,11 +5,11 @@
*/
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
void
computeN2Value
(
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
void
computeN2Value
(
__global
const
float4*
restrict
posq,
__local
float4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
__global
float*
restrict
global_value,
__local
float*
restrict
local_value,
__local
float*
restrict
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles
#
else
unsigned
int
numTiles
#
endif
...
...
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
View file @
69e75377
...
...
@@ -7,16 +7,16 @@
/**
*
Compute
a
value
based
on
pair
interactions.
*/
__kernel
void
computeN2Value
(
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__kernel
void
computeN2Value
(
__global
const
float4*
restrict
posq,
__local
float4*
restrict
local_posq,
__global
const
unsigned
int*
restrict
exclusions,
__global
const
unsigned
int*
restrict
exclusionIndices,
__global
const
unsigned
int*
restrict
exclusionRowIndices,
#
ifdef
SUPPORTS_64_BIT_ATOMICS
__global
long*
global_value,
__global
long*
restrict
global_value,
#
else
__global
float*
global_value,
__global
float*
restrict
global_value,
#
endif
__local
float*
local_value,
__local
float*
tempBuffer,
__local
float*
restrict
local_value,
__local
float*
restrict
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
__global
const
ushort2*
restrict
tiles,
__global
const
unsigned
int*
restrict
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
const
unsigned
int*
restrict
interactionFlags
#
else
unsigned
int
numTiles
#
endif
...
...
platforms/opencl/src/kernels/customHbondForce.cl
View file @
69e75377
...
...
@@ -55,8 +55,8 @@ float4 computeCross(float4 vec1, float4 vec2) {
/**
* Compute forces on donors.
*/
__kernel void computeDonorForces(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global
int4*
exclusions,
__global
int4* donorAtoms, __global int4*
acceptorAtoms, __global
int4*
donorBufferIndices, __local float4* posBuffer, float4 periodicBoxSize, float4 invPeriodicBoxSize
__kernel void computeDonorForces(__global float4*
restrict
forceBuffers, __global float*
restrict
energyBuffer, __global
const
float4*
restrict
posq, __global
const int4* restrict
exclusions,
__global
const int4* restrict donorAtoms, __global const int4* restrict
acceptorAtoms, __global
const int4* restrict
donorBufferIndices, __local float4* posBuffer, float4 periodicBoxSize, float4 invPeriodicBoxSize
PARAMETER_ARGUMENTS) {
float energy = 0.0f;
float4 f1 = (float4) 0;
...
...
@@ -141,8 +141,8 @@ __kernel void computeDonorForces(__global float4* forceBuffers, __global float*
/**
* Compute forces on acceptors.
*/
__kernel void computeAcceptorForces(__global float4* forceBuffers, __global float* energyBuffer, __global float4* posq, __global
int4*
exclusions,
__global
int4* donorAtoms, __global int4*
acceptorAtoms, __global
int4*
acceptorBufferIndices, __local float4* posBuffer, float4 periodicBoxSize, float4 invPeriodicBoxSize
__kernel void computeAcceptorForces(__global float4*
restrict
forceBuffers, __global float*
restrict
energyBuffer, __global
const
float4*
restrict
posq, __global
const int4* restrict
exclusions,
__global
const int4* restrict donorAtoms, __global const int4* restrict
acceptorAtoms, __global
const int4* restrict
acceptorBufferIndices, __local float4*
restrict
posBuffer, float4 periodicBoxSize, float4 invPeriodicBoxSize
PARAMETER_ARGUMENTS) {
float4 f1 = (float4) 0;
float4 f2 = (float4) 0;
...
...
platforms/opencl/src/kernels/ewald.cl
View file @
69e75377
...
...
@@ -7,7 +7,7 @@ float2 multofFloat2(float2 a, float2 b) {
*
Precompute
the
cosine
and
sine
sums
which
appear
in
each
force
term.
*/
__kernel
void
calculateEwaldCosSinSums
(
__global
float*
energyBuffer,
__global
float4*
posq,
__global
float2*
cosSinSum,
float4
reciprocalPeriodicBoxSize,
float
reciprocalCoefficient
)
{
__kernel
void
calculateEwaldCosSinSums
(
__global
float*
restrict
energyBuffer,
__global
const
float4*
restrict
posq,
__global
float2*
restrict
cosSinSum,
float4
reciprocalPeriodicBoxSize,
float
reciprocalCoefficient
)
{
const
unsigned
int
ksizex
=
2*KMAX_X-1
;
const
unsigned
int
ksizey
=
2*KMAX_Y-1
;
const
unsigned
int
ksizez
=
2*KMAX_Z-1
;
...
...
@@ -58,7 +58,7 @@ __kernel void calculateEwaldCosSinSums(__global float* energyBuffer, __global fl
*
previous
routine.
*/
__kernel
void
calculateEwaldForces
(
__global
float4*
forceBuffers,
__global
float4*
posq,
__global
float2*
cosSinSum,
float4
reciprocalPeriodicBoxSize,
float
reciprocalCoefficient
)
{
__kernel
void
calculateEwaldForces
(
__global
float4*
restrict
forceBuffers,
__global
const
float4*
restrict
posq,
__global
const
float2*
restrict
cosSinSum,
float4
reciprocalPeriodicBoxSize,
float
reciprocalCoefficient
)
{
unsigned
int
atom
=
get_global_id
(
0
)
;
while
(
atom
<
NUM_ATOMS
)
{
float4
force
=
forceBuffers[atom]
;
...
...
platforms/opencl/src/kernels/fft.cl
View file @
69e75377
...
...
@@ -6,7 +6,8 @@ float2 multiplyComplex(float2 c1, float2 c2) {
*
Perform
a
1D
FFT
on
each
row
along
one
axis.
*/
__kernel
void
execFFT
(
__global
float2*
in,
__global
float2*
out,
float
sign,
__local
float2*
w,
__local
float2*
data0,
__local
float2*
data1
)
{
__kernel
void
execFFT
(
__global
const
float2*
restrict
in,
__global
float2*
restrict
out,
float
sign,
__local
float2*
restrict
w,
__local
float2*
restrict
data0,
__local
float2*
restrict
data1
)
{
for
(
int
i
=
get_local_id
(
0
)
; i < ZSIZE; i += get_local_size(0))
w[i]
=
(
float2
)
(
cos
(
-sign*i*2*M_PI/ZSIZE
)
,
sin
(
-sign*i*2*M_PI/ZSIZE
))
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
69e75377
...
...
@@ -8,7 +8,7 @@
/**
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*/
__kernel
void
findBlockBounds
(
int
numAtoms,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionCount
)
{
__kernel
void
findBlockBounds
(
int
numAtoms,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
const
float4*
restrict
posq,
__global
float4*
restrict
blockCenter,
__global
float4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
interactionCount
)
{
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
numAtoms
)
{
...
...
@@ -47,7 +47,7 @@ __kernel void findBlockBounds(int numAtoms, float4 periodicBoxSize, float4 invPe
*/
void
storeInteractionData
(
__local
ushort2*
buffer,
__local
int*
valid,
__local
short*
sum,
__local
ushort2*
temp,
__local
int*
baseIndex,
__global
unsigned
int*
interactionCount,
__global
ushort2*
interactingTiles,
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
unsigned
int
maxTiles
)
{
float4
invPeriodicBoxSize,
__global
const
float4*
posq,
__global
const
float4*
blockCenter,
__global
const
float4*
blockBoundingBox,
unsigned
int
maxTiles
)
{
//
The
buffer
is
full,
so
we
need
to
compact
it
and
write
out
results.
Start
by
doing
a
parallel
prefix
sum.
for
(
int
i
=
get_local_id
(
0
)
; i < BUFFER_SIZE; i += GROUP_SIZE)
...
...
@@ -144,9 +144,9 @@ void storeInteractionData(__local ushort2* buffer, __local int* valid, __local s
* Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart,
* mark them as non-interacting.
*/
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionCount, __global ushort2* interactingTiles,
__global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles, unsigned int startTileIndex,
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global
const
float4*
restrict
blockCenter,
__global
const
float4*
restrict
blockBoundingBox, __global unsigned int*
restrict
interactionCount, __global ushort2*
restrict
interactingTiles,
__global unsigned int*
restrict
interactionFlags, __global
const
float4*
restrict
posq, unsigned int maxTiles, unsigned int startTileIndex,
unsigned int endTileIndex) {
__local ushort2 buffer[BUFFER_SIZE];
__local int valid[BUFFER_SIZE];
...
...
@@ -220,8 +220,8 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
*
Compare
each
atom
in
one
block
to
the
bounding
box
of
another
block,
and
set
*
flags
for
which
ones
are
interacting.
*/
__kernel
void
findInteractionsWithinBlocks
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
ushort2*
tiles,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount,
__local
unsigned
int*
flags,
unsigned
int
maxTiles
)
{
__kernel
void
findInteractionsWithinBlocks
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
const
float4*
restrict
posq,
__global
const
ushort2*
restrict
tiles,
__global
const
float4*
restrict
blockCenter,
__global
const
float4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
interactionFlags,
__global
const
unsigned
int*
restrict
interactionCount,
__local
unsigned
int*
restrict
flags,
unsigned
int
maxTiles
)
{
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
numTiles
=
interactionCount[0]
;
...
...
platforms/opencl/src/kernels/findInteractingBlocks_cpu.cl
View file @
69e75377
...
...
@@ -8,7 +8,7 @@
/**
*
Find
a
bounding
box
for
the
atoms
in
each
block.
*/
__kernel
void
findBlockBounds
(
int
numAtoms,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
float4*
posq,
__global
float4*
blockCenter,
__global
float4*
blockBoundingBox,
__global
unsigned
int*
interactionCount
)
{
__kernel
void
findBlockBounds
(
int
numAtoms,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
const
float4*
restrict
posq,
__global
float4*
restrict
blockCenter,
__global
float4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
interactionCount
)
{
int
index
=
get_global_id
(
0
)
;
int
base
=
index*TILE_SIZE
;
while
(
base
<
numAtoms
)
{
...
...
@@ -121,9 +121,9 @@ void storeInteractionData(ushort2* buffer, int numValid, __global unsigned int*
* Compare the bounding boxes for each pair of blocks. If they are sufficiently far apart,
* mark them as non-interacting.
*/
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global float4* blockCenter,
__global float4* blockBoundingBox, __global unsigned int* interactionCount, __global ushort2* interactingTiles,
__global unsigned int* interactionFlags, __global float4* posq, unsigned int maxTiles, unsigned int startTileIndex,
__kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBoxSize, float4 invPeriodicBoxSize, __global
const
float4*
restrict
blockCenter,
__global
const
float4*
restrict
blockBoundingBox, __global unsigned int*
restrict
interactionCount, __global ushort2*
restrict
interactingTiles,
__global unsigned int*
restrict
interactionFlags, __global
const
float4*
restrict
posq, unsigned int maxTiles, unsigned int startTileIndex,
unsigned int endTileIndex) {
ushort2 buffer[BUFFER_SIZE];
int valuesInBuffer = 0;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment