Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a0651b5b
Commit
a0651b5b
authored
Apr 29, 2010
by
Peter Eastman
Browse files
Eliminated more local memory bank conflicts
parent
7b3a8266
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
232 additions
and
132 deletions
+232
-132
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+39
-42
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
+35
-18
platforms/opencl/src/OpenCLNonbondedUtilities.h
platforms/opencl/src/OpenCLNonbondedUtilities.h
+21
-8
platforms/opencl/src/OpenCLParameterSet.cpp
platforms/opencl/src/OpenCLParameterSet.cpp
+3
-3
platforms/opencl/src/kernels/gbsaObc_default.cl
platforms/opencl/src/kernels/gbsaObc_default.cl
+63
-27
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
+71
-34
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
a0651b5b
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-200
9
Stanford University and the Authors. *
* Portions copyright (c) 2008-20
1
0 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -1216,7 +1216,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
for
(
int
i
=
0
;
i
<
tableSize
;
++
i
)
erfcVector
[
i
]
=
(
float
)
erfc
(
i
*
(
alpha
*
force
.
getCutoffDistance
())
/
(
tableSize
-
1
));
erfcTable
->
upload
(
erfcVector
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"erfcTable"
,
"float"
,
sizeof
(
cl_float
),
erfcTable
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"erfcTable"
,
"float"
,
1
,
sizeof
(
cl_float
),
erfcTable
->
getDeviceBuffer
()));
}
// Add the interaction to the default nonbonded kernel.
...
...
@@ -1224,7 +1224,7 @@ void OpenCLCalcNonbondedForceKernel::initialize(const System& system, const Nonb
string
source
=
cl
.
replaceStrings
(
OpenCLKernelSources
::
coulombLennardJones
,
defines
);
cl
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
);
if
(
hasLJ
)
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"sigmaEpsilon"
,
"float
2
"
,
sizeof
(
cl_float2
),
sigmaEpsilon
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"sigmaEpsilon"
,
"float"
,
2
,
sizeof
(
cl_float2
),
sigmaEpsilon
->
getDeviceBuffer
()));
// Initialize the exceptions.
...
...
@@ -1431,12 +1431,12 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
vector
<
mm_float4
>
f
=
OpenCLExpressionUtilities
::
computeFunctionCoefficients
(
values
,
interpolating
);
tabulatedFunctions
.
push_back
(
new
OpenCLArray
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float
4
"
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
}
if
(
force
.
getNumFunctions
()
>
0
)
{
tabulatedFunctionParams
=
new
OpenCLArray
<
mm_float4
>
(
cl
,
tabulatedFunctionParamsVec
.
size
(),
"tabulatedFunctionParameters"
,
false
,
CL_MEM_READ_ONLY
);
tabulatedFunctionParams
->
upload
(
tabulatedFunctionParamsVec
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"functionParams"
,
"float
4
"
,
sizeof
(
cl_float4
),
tabulatedFunctionParams
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"functionParams"
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctionParams
->
getDeviceBuffer
()));
}
// Record information for the expressions.
...
...
@@ -1479,11 +1479,11 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
cl
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
);
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
intToString
(
i
+
1
),
buffer
.
get
Type
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
intToString
(
i
+
1
),
buffer
.
get
ComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
if
(
globals
!=
NULL
)
{
globals
->
upload
(
globalParamValues
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
sizeof
(
cl_float
),
globals
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
cl_float
),
globals
->
getDeviceBuffer
()));
}
cl
.
addForce
(
new
OpenCLCustomNonbondedForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
}
...
...
@@ -1559,8 +1559,8 @@ void OpenCLCalcGBSAOBCForceKernel::initialize(const System& system, const GBSAOB
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
GBSAOBCForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
GBSAOBCForce
::
CutoffNonPeriodic
);
string
source
=
OpenCLKernelSources
::
gbsaObc2
;
nb
.
addInteraction
(
useCutoff
,
usePeriodic
,
false
,
force
.
getCutoffDistance
(),
vector
<
vector
<
int
>
>
(),
source
);
nb
.
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"obcParams"
,
"float
2
"
,
sizeof
(
cl_float2
),
params
->
getDeviceBuffer
()));;
nb
.
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"bornForce"
,
"float"
,
sizeof
(
cl_float
),
bornForce
->
getDeviceBuffer
()));;
nb
.
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"obcParams"
,
"float"
,
2
,
sizeof
(
cl_float2
),
params
->
getDeviceBuffer
()));;
nb
.
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
"bornForce"
,
"float"
,
1
,
sizeof
(
cl_float
),
bornForce
->
getDeviceBuffer
()));;
cl
.
addForce
(
new
OpenCLGBSAOBCForceInfo
(
nb
.
getNumForceBuffers
(),
force
));
}
...
...
@@ -1589,42 +1589,39 @@ void OpenCLCalcGBSAOBCForceKernel::executeForces(ContextImpl& context) {
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
string
file
=
(
cl
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
gbsaObc_nvidia
:
OpenCLKernelSources
::
gbsaObc_default
);
cl
::
Program
program
=
cl
.
createProgram
(
file
,
defines
);
int
index
=
0
;
computeBornSumKernel
=
cl
::
Kernel
(
program
,
"computeBornSum"
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
0
,
bornSum
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
1
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getPosq
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
3
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
4
,
params
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
5
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float2
),
NULL
);
computeBornSumKernel
.
setArg
(
6
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bornSum
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
params
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
13
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
8
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
9
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
}
else
{
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
7
,
nb
.
getTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl_uint
>
(
8
,
nb
.
getTiles
().
getSize
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl_uint
>
(
index
++
,
nb
.
getTiles
().
getSize
());
}
force1Kernel
=
cl
::
Kernel
(
program
,
"computeGBSAForce1"
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
0
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
1
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
2
,
cl
.
getPosq
().
getDeviceBuffer
());
force1Kernel
.
setArg
(
3
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
force1Kernel
.
setArg
(
4
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
5
,
bornRadii
->
getDeviceBuffer
());
force1Kernel
.
setArg
(
6
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
7
,
bornForce
->
getDeviceBuffer
());
force1Kernel
.
setArg
(
8
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
force1Kernel
.
setArg
(
9
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
mm_float4
),
NULL
);
index
=
0
;
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bornRadii
->
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bornForce
->
getDeviceBuffer
());
force1Kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
13
*
sizeof
(
cl_float
),
NULL
);
force1Kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
mm_float4
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
10
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
11
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
12
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
}
else
{
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
10
,
nb
.
getTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl_uint
>
(
11
,
nb
.
getTiles
().
getSize
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl_uint
>
(
index
++
,
nb
.
getTiles
().
getSize
());
}
program
=
cl
.
createProgram
(
OpenCLKernelSources
::
gbsaObcReductions
,
defines
);
reduceBornSumKernel
=
cl
::
Kernel
(
program
,
"reduceBornSum"
);
...
...
@@ -1773,13 +1770,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
vector
<
mm_float4
>
f
=
OpenCLExpressionUtilities
::
computeFunctionCoefficients
(
values
,
interpolating
);
tabulatedFunctions
.
push_back
(
new
OpenCLArray
<
mm_float4
>
(
cl
,
values
.
size
()
-
1
,
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float
4
"
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDeviceBuffer
()));
tableArgs
<<
", __global float4* "
<<
arrayName
;
}
if
(
force
.
getNumFunctions
()
>
0
)
{
tabulatedFunctionParams
=
new
OpenCLArray
<
mm_float4
>
(
cl
,
tabulatedFunctionParamsVec
.
size
(),
"tabulatedFunctionParameters"
,
false
,
CL_MEM_READ_ONLY
);
tabulatedFunctionParams
->
upload
(
tabulatedFunctionParamsVec
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"functionParams"
,
"float
4
"
,
sizeof
(
cl_float4
),
tabulatedFunctionParams
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"functionParams"
,
"float"
,
4
,
sizeof
(
cl_float4
),
tabulatedFunctionParams
->
getDeviceBuffer
()));
tableArgs
<<
", __constant float4* "
<<
prefix
<<
"functionParams"
;
}
...
...
@@ -2162,21 +2159,21 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
prefix
+
"params"
+
intToString
(
i
+
1
);
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
get
Type
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
get
ComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
paramName
=
prefix
+
"values"
+
intToString
(
i
+
1
);
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
get
Type
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
get
ComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
paramName
=
prefix
+
"dEdV"
+
intToString
(
i
+
1
);
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
get
Type
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
paramName
,
buffer
.
get
ComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
if
(
globals
!=
NULL
)
{
globals
->
upload
(
globalParamValues
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
sizeof
(
cl_float
),
globals
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
cl_float
),
globals
->
getDeviceBuffer
()));
}
}
cl
.
addForce
(
new
OpenCLCustomGBForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.cpp
View file @
a0651b5b
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors.
*
* Portions copyright (c) 2009
-2010
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -283,9 +283,15 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
source
;
int
localDataSize
=
7
*
sizeof
(
cl_float
);
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
stringstream
localData
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
localData
<<
params
[
i
].
getType
()
<<
" "
<<
params
[
i
].
getName
()
<<
";
\n
"
;
if
(
params
[
i
].
getNumComponents
()
==
1
)
localData
<<
params
[
i
].
getType
()
<<
" "
<<
params
[
i
].
getName
()
<<
";
\n
"
;
else
{
for
(
int
j
=
0
;
j
<
params
[
i
].
getNumComponents
();
++
j
)
localData
<<
params
[
i
].
getComponentType
()
<<
" "
<<
params
[
i
].
getName
()
<<
"_"
<<
suffixes
[
j
]
<<
";
\n
"
;
}
localDataSize
+=
params
[
i
].
getSize
();
}
if
((
localDataSize
/
4
)
%
2
==
0
)
{
...
...
@@ -318,20 +324,25 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
stringstream
loadLocal1
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
loadLocal1
<<
"localData[get_local_id(0)]."
;
loadLocal1
<<
params
[
i
].
getName
();
loadLocal1
<<
" = "
;
loadLocal1
<<
params
[
i
].
getName
();
loadLocal1
<<
"1;
\n
"
;
if
(
params
[
i
].
getNumComponents
()
==
1
)
{
loadLocal1
<<
"localData[get_local_id(0)]."
<<
params
[
i
].
getName
()
<<
" = "
<<
params
[
i
].
getName
()
<<
"1;
\n
"
;
}
else
{
for
(
int
j
=
0
;
j
<
params
[
i
].
getNumComponents
();
++
j
)
loadLocal1
<<
"localData[get_local_id(0)]."
<<
params
[
i
].
getName
()
<<
"_"
<<
suffixes
[
j
]
<<
" = "
<<
params
[
i
].
getName
()
<<
"1."
<<
suffixes
[
j
]
<<
";
\n
"
;
}
}
replacements
[
"LOAD_LOCAL_PARAMETERS_FROM_1"
]
=
loadLocal1
.
str
();
stringstream
loadLocal2
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
loadLocal2
<<
"localData[get_local_id(0)]."
;
loadLocal2
<<
params
[
i
].
getName
();
loadLocal2
<<
" = global_"
;
loadLocal2
<<
params
[
i
].
getName
();
loadLocal2
<<
"[j];
\n
"
;
if
(
params
[
i
].
getNumComponents
()
==
1
)
{
loadLocal2
<<
"localData[get_local_id(0)]."
<<
params
[
i
].
getName
()
<<
" = global_"
<<
params
[
i
].
getName
()
<<
"[j];
\n
"
;
}
else
{
loadLocal2
<<
params
[
i
].
getType
()
<<
" temp_"
<<
params
[
i
].
getName
()
<<
" = global_"
<<
params
[
i
].
getName
()
<<
"[j];
\n
"
;
for
(
int
j
=
0
;
j
<
params
[
i
].
getNumComponents
();
++
j
)
loadLocal2
<<
"localData[get_local_id(0)]."
<<
params
[
i
].
getName
()
<<
"_"
<<
suffixes
[
j
]
<<
" = temp_"
<<
params
[
i
].
getName
()
<<
"."
<<
suffixes
[
j
]
<<
";
\n
"
;
}
}
replacements
[
"LOAD_LOCAL_PARAMETERS_FROM_GLOBAL"
]
=
loadLocal2
.
str
();
stringstream
load1
;
...
...
@@ -346,12 +357,18 @@ cl::Kernel OpenCLNonbondedUtilities::createInteractionKernel(const string& sourc
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
stringstream
load2j
;
for
(
int
i
=
0
;
i
<
(
int
)
params
.
size
();
i
++
)
{
load2j
<<
params
[
i
].
getType
();
load2j
<<
" "
;
load2j
<<
params
[
i
].
getName
();
load2j
<<
"2 = localData[atom2]."
;
load2j
<<
params
[
i
].
getName
();
load2j
<<
";
\n
"
;
if
(
params
[
i
].
getNumComponents
()
==
1
)
{
load2j
<<
params
[
i
].
getType
()
<<
" "
<<
params
[
i
].
getName
()
<<
"2 = localData[atom2]."
<<
params
[
i
].
getName
()
<<
";
\n
"
;
}
else
{
load2j
<<
params
[
i
].
getType
()
<<
" "
<<
params
[
i
].
getName
()
<<
"2 = ("
<<
params
[
i
].
getType
()
<<
") ("
;
for
(
int
j
=
0
;
j
<
params
[
i
].
getNumComponents
();
++
j
)
{
if
(
j
>
0
)
load2j
<<
", "
;
load2j
<<
"localData[atom2]."
<<
params
[
i
].
getName
()
<<
"_"
<<
suffixes
[
j
];
}
load2j
<<
");
\n
"
;
}
}
replacements
[
"LOAD_ATOM2_PARAMETERS"
]
=
load2j
.
str
();
map
<
string
,
string
>
defines
;
...
...
platforms/opencl/src/OpenCLNonbondedUtilities.h
View file @
a0651b5b
...
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors.
*
* Portions copyright (c) 2009
-2010
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -29,6 +29,7 @@
#include "OpenCLContext.h"
#include "openmm/System.h"
#include "OpenCLExpressionUtilities.h"
#include <string>
#include <vector>
...
...
@@ -229,19 +230,30 @@ public:
* Create a ParameterInfo object.
*
* @param name the name of the parameter
* @param type the data type of the parameter
* @param type the data type of the parameter's components
* @param numComponents the number of components in the parameter
* @param size the size of the parameter in bytes
* @param memory the memory containing the parameter values
*/
ParameterInfo
(
const
std
::
string
&
name
,
const
std
::
string
&
type
,
int
size
,
cl
::
Memory
&
memory
)
:
name
(
name
),
type
(
type
),
size
(
size
),
memory
(
&
memory
)
{
ParameterInfo
(
const
std
::
string
&
name
,
const
std
::
string
&
componentType
,
int
numComponents
,
int
size
,
cl
::
Memory
&
memory
)
:
name
(
name
),
componentType
(
componentType
),
numComponents
(
numComponents
),
size
(
size
),
memory
(
&
memory
)
{
if
(
numComponents
==
1
)
type
=
componentType
;
else
type
=
componentType
+
OpenCLExpressionUtilities
::
intToString
(
numComponents
);
}
const
std
::
string
&
getName
()
const
{
return
name
;
}
const
std
::
string
&
getComponentType
()
const
{
return
componentType
;
}
const
std
::
string
&
getType
()
const
{
return
type
;
}
int
getNumComponents
()
const
{
return
numComponents
;
}
int
getSize
()
const
{
return
size
;
}
...
...
@@ -250,8 +262,9 @@ public:
}
private:
std
::
string
name
;
std
::
string
componentType
;
std
::
string
type
;
int
size
;
int
size
,
numComponents
;
cl
::
Memory
*
memory
;
};
...
...
platforms/opencl/src/OpenCLParameterSet.cpp
View file @
a0651b5b
...
...
@@ -41,21 +41,21 @@ OpenCLParameterSet::OpenCLParameterSet(OpenCLContext& context, int numParameters
cl
::
Buffer
*
buf
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_READ_WRITE
,
numObjects
*
sizeof
(
mm_float4
));
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float
4
"
,
sizeof
(
mm_float4
),
*
buf
));
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
4
,
sizeof
(
mm_float4
),
*
buf
));
params
-=
4
;
}
if
(
params
>
1
)
{
cl
::
Buffer
*
buf
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_READ_WRITE
,
numObjects
*
sizeof
(
mm_float2
));
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float
2
"
,
sizeof
(
mm_float2
),
*
buf
));
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
2
,
sizeof
(
mm_float2
),
*
buf
));
params
-=
2
;
}
if
(
params
>
0
)
{
cl
::
Buffer
*
buf
=
new
cl
::
Buffer
(
context
.
getContext
(),
CL_MEM_READ_WRITE
,
numObjects
*
sizeof
(
cl_float
));
std
::
stringstream
name
;
name
<<
"param"
<<
(
++
bufferCount
);
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
sizeof
(
cl_float
),
*
buf
));
buffers
.
push_back
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
name
.
str
(),
"float"
,
1
,
sizeof
(
cl_float
),
*
buf
));
}
}
catch
(
cl
::
Error
err
)
{
...
...
platforms/opencl/src/kernels/gbsaObc_default.cl
View file @
a0651b5b
#
define
TILE_SIZE
32
typedef
struct
{
float
x,
y,
z
;
float
q
;
float
fx,
fy,
fz,
fw
;
float
radius,
scaledRadius
;
float
bornSum
;
float
bornRadius
;
float
bornForce
;
}
AtomData
;
/**
*
Compute
the
Born
sum.
*/
__kernel
void
computeBornSum
(
__global
float*
global_bornSum,
__local
float*
local_bornSum,
__global
float4*
posq,
__local
float4*
local_posq,
__global
float2*
global_params,
__local
float2*
local_params,
__local
float*
tempBuffer,
__global
unsigned
int*
tiles,
__kernel
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
)
{
#
else
...
...
@@ -34,13 +43,17 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_params[get_local_id
(
0
)
]
=
params1
;
localData[get_local_id
(
0
)
].x
=
posq1.x
;
localData[get_local_id
(
0
)
].y
=
posq1.y
;
localData[get_local_id
(
0
)
].z
=
posq1.z
;
localData[get_local_id
(
0
)
].q
=
posq1.w
;
localData[get_local_id
(
0
)
].radius
=
params1.x
;
localData[get_local_id
(
0
)
].scaledRadius
=
params1.y
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE/2; j++) {
float4
delta
=
(
float4
)
(
local
_posq[baseLocalAtom+j].xyz
-
posq1.
xy
z,
0.0f
)
;
float4
delta
=
(
float4
)
(
local
Data[baseLocalAtom+j].x-posq1.x,
localData[baseLocalAtom+j].y-posq1.y,
localData[baseLocalAtom+j].z-
posq1.z,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
...
...
@@ -54,7 +67,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float2
params2
=
local_params
[baseLocalAtom+j]
;
float2
params2
=
(
float2
)
(
localData[baseLocalAtom+j].radius,
localData
[baseLocalAtom+j]
.scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
if
((
j
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
float
l_ij
=
1.0f/max
(
params1.x,
fabs
(
r-params2.y
))
;
...
...
@@ -89,10 +102,16 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
unsigned
int
j
=
y
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
local_params[get_local_id
(
0
)
]
=
global_params[j]
;
float4
tempPosq
=
posq[j]
;
localData[get_local_id
(
0
)
].x
=
tempPosq.x
;
localData[get_local_id
(
0
)
].y
=
tempPosq.y
;
localData[get_local_id
(
0
)
].z
=
tempPosq.z
;
localData[get_local_id
(
0
)
].q
=
tempPosq.w
;
float2
tempParams
=
global_params[j]
;
localData[get_local_id
(
0
)
].radius
=
tempParams.x
;
localData[get_local_id
(
0
)
].scaledRadius
=
tempParams.y
;
}
local
_bornSum
[get_local_id
(
0
)
]
=
0.0f
;
local
Data
[get_local_id
(
0
)
]
.bornSum
=
0.0f
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Compute
the
full
set
of
interactions
in
this
tile.
...
...
@@ -102,7 +121,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
unsigned
int
tile
=
xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*
(
yi+1
)
/2
;
unsigned
int
tj
=
tgx%
(
TILE_SIZE/2
)
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE/2; j++) {
float4
delta
=
(
float4
)
(
local
_posq[baseLocalAtom+tj].xyz
-
posq1.
xy
z,
0.0f
)
;
float4
delta
=
(
float4
)
(
local
Data[baseLocalAtom+tj].x-posq1.x,
localData[baseLocalAtom+tj].y-posq1.y,
localData[baseLocalAtom+tj].z-
posq1.z,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
...
...
@@ -116,7 +135,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float2
params2
=
local_params
[baseLocalAtom+tj]
;
float2
params2
=
(
float2
)
(
localData[baseLocalAtom+tj].radius,
localData
[baseLocalAtom+tj]
.scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
if
(
params1.x
<
rScaledRadiusJ
)
{
float
l_ij
=
1.0f/max
(
params1.x,
fabs
(
r-params2.y
))
;
...
...
@@ -140,7 +159,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
(
0.25f*params1.y*params1.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params2.x
<
params1.x-r
)
term
+=
2.0f*
(
1.0f/params2.x-l_ij
)
;
local
_bornSum
[baseLocalAtom+tj+forceBufferOffset]
+=
term
;
local
Data
[baseLocalAtom+tj+forceBufferOffset]
.bornSum
+=
term
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
@@ -161,7 +180,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
unsigned
int
offset2
=
y
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
global_bornSum[offset1]
+=
bornSum+tempBuffer[get_local_id
(
0
)
+TILE_SIZE]
;
global_bornSum[offset2]
+=
local
_bornSum
[get_local_id
(
0
)
]
+local_bornSum
[get_local_id
(
0
)
+TILE_SIZE]
;
global_bornSum[offset2]
+=
local
Data
[get_local_id
(
0
)
]
.bornSum+localData
[get_local_id
(
0
)
+TILE_SIZE]
.bornSum
;
}
lasty
=
y
;
}
...
...
@@ -174,8 +193,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
*/
__kernel
void
computeGBSAForce1
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__
local
float4*
local_posq,
__local
float4*
local_force,
__global
float*
global_bornRadii,
__local
float*
loc
al_bornRadii,
__global
float*
global_bornForce,
__local
flo
at*
local
_bornForce
,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
__global
float4*
posq,
__
global
float*
glob
al_bornRadii,
__global
float*
global_bornForce,
__local
AtomD
at
a
*
local
Data
,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
)
{
#
else
...
...
@@ -204,14 +223,17 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_bornRadii[get_local_id
(
0
)
]
=
bornRadius1
;
localData[get_local_id
(
0
)
].x
=
posq1.x
;
localData[get_local_id
(
0
)
].y
=
posq1.y
;
localData[get_local_id
(
0
)
].z
=
posq1.z
;
localData[get_local_id
(
0
)
].q
=
posq1.w
;
localData[get_local_id
(
0
)
].bornRadius
=
bornRadius1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE/2; j++) {
if
(
atom1
<
NUM_ATOMS
&&
y+baseLocalAtom+j
<
NUM_ATOMS
)
{
float4
posq2
=
local_posq[baseLocalAtom+j]
;
float4
posq2
=
(
float4
)
(
localData[baseLocalAtom+j].x,
localData[baseLocalAtom+j].y,
localData[baseLocalAtom+j].z,
localData[baseLocalAtom+j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -221,7 +243,7 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
bornRadius2
=
local
_bornRadii
[baseLocalAtom+j]
;
float
bornRadius2
=
local
Data
[baseLocalAtom+j]
.bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
...
...
@@ -264,10 +286,17 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
unsigned
int
j
=
y
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
local_bornRadii[get_local_id
(
0
)
]
=
global_bornRadii[j]
;
float4
tempPosq
=
posq[j]
;
localData[get_local_id
(
0
)
].x
=
tempPosq.x
;
localData[get_local_id
(
0
)
].y
=
tempPosq.y
;
localData[get_local_id
(
0
)
].z
=
tempPosq.z
;
localData[get_local_id
(
0
)
].q
=
tempPosq.w
;
localData[get_local_id
(
0
)
].bornRadius
=
global_bornRadii[j]
;
}
local_force[get_local_id
(
0
)
]
=
0.0f
;
localData[get_local_id
(
0
)
].fx
=
0.0f
;
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
localData[get_local_id
(
0
)
].fw
=
0.0f
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
Compute
the
full
set
of
interactions
in
this
tile.
...
...
@@ -278,7 +307,7 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
unsigned
int
tj
=
tgx%
(
TILE_SIZE/2
)
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE/2; j++) {
if
(
atom1
<
NUM_ATOMS
&&
y+baseLocalAtom+tj
<
NUM_ATOMS
)
{
float4
posq2
=
local_posq
[baseLocalAtom+tj]
;
float4
posq2
=
(
float4
)
(
localData[baseLocalAtom+tj].x,
localData[baseLocalAtom+tj].y,
localData[baseLocalAtom+tj].z,
localData
[baseLocalAtom+tj]
.q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -288,7 +317,7 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
bornRadius2
=
local
_bornRadii
[baseLocalAtom+tj]
;
float
bornRadius2
=
local
Data
[baseLocalAtom+tj]
.bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
...
...
@@ -308,7 +337,10 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
local_force[baseLocalAtom+tj+forceBufferOffset]
+=
(
float4
)
(
delta.xyz,
dGpol_dalpha2_ij*bornRadius1
)
;
localData[baseLocalAtom+tj+forceBufferOffset].fx
+=
delta.x
;
localData[baseLocalAtom+tj+forceBufferOffset].fy
+=
delta.y
;
localData[baseLocalAtom+tj+forceBufferOffset].fz
+=
delta.z
;
localData[baseLocalAtom+tj+forceBufferOffset].fw
+=
dGpol_dalpha2_ij*bornRadius1
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
tj
=
(
tj+1
)
%
(
TILE_SIZE/2
)
;
...
...
@@ -328,9 +360,13 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
unsigned
int
offset2
=
y
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
#
endif
forceBuffers[offset1].xyz
=
forceBuffers[offset1].xyz+force.xyz+tempBuffer[get_local_id
(
0
)
+TILE_SIZE].xyz
;
forceBuffers[offset2].xyz
=
forceBuffers[offset2].xyz+local_force[get_local_id
(
0
)
].xyz+local_force[get_local_id
(
0
)
+TILE_SIZE].xyz
;
float4
sum
=
(
float4
)
(
localData[get_local_id
(
0
)
].fx+localData[get_local_id
(
0
)
+TILE_SIZE].fx,
localData[get_local_id
(
0
)
].fy+localData[get_local_id
(
0
)
+TILE_SIZE].fy,
localData[get_local_id
(
0
)
].fz+localData[get_local_id
(
0
)
+TILE_SIZE].fz,
localData[get_local_id
(
0
)
].fw+localData[get_local_id
(
0
)
+TILE_SIZE].fw
)
;
forceBuffers[offset2].xyz
=
forceBuffers[offset2].xyz+sum.xyz
;
global_bornForce[offset1]
+=
force.w+tempBuffer[get_local_id
(
0
)
+TILE_SIZE].w
;
global_bornForce[offset2]
+=
local_force[get_local_id
(
0
)
].w+local_force[get_local_id
(
0
)
+TILE_SIZE]
.w
;
global_bornForce[offset2]
+=
sum
.w
;
}
lasty
=
y
;
}
...
...
platforms/opencl/src/kernels/gbsaObc_nvidia.cl
View file @
a0651b5b
#
define
TILE_SIZE
32
typedef
struct
{
float
x,
y,
z
;
float
q
;
float
fx,
fy,
fz,
fw
;
float
radius,
scaledRadius
;
float
bornSum
;
float
bornRadius
;
float
bornForce
;
}
AtomData
;
/**
*
Compute
the
Born
sum.
*/
__kernel
void
computeBornSum
(
__global
float*
global_bornSum,
__local
float*
local_bornSum,
__global
float4*
posq,
__local
float4*
local_posq,
__global
float2*
global_params,
__local
float2*
local_params,
__local
float*
tempBuffer,
__global
unsigned
int*
tiles,
__kernel
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
)
{
#
else
...
...
@@ -35,12 +44,16 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_params[get_local_id
(
0
)
]
=
params1
;
localData[get_local_id
(
0
)
].x
=
posq1.x
;
localData[get_local_id
(
0
)
].y
=
posq1.y
;
localData[get_local_id
(
0
)
].z
=
posq1.z
;
localData[get_local_id
(
0
)
].q
=
posq1.w
;
localData[get_local_id
(
0
)
].radius
=
params1.x
;
localData[get_local_id
(
0
)
].scaledRadius
=
params1.y
;
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
float4
delta
=
(
float4
)
(
local
_posq[tbx+j].xyz
-
posq1.
xy
z,
0.0f
)
;
float4
delta
=
(
float4
)
(
local
Data[tbx+j].x-posq1.x,
localData[tbx+j].y-posq1.y,
localData[tbx+j].z-
posq1.z,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
...
...
@@ -54,7 +67,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float2
params2
=
local_params[tbx+j]
;
float2
params2
=
(
float2
)
(
localData[tbx+j].radius,
localData[tbx+j].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
if
((
j
!=
tgx
)
&&
(
params1.x
<
rScaledRadiusJ
))
{
float
l_ij
=
1.0f/max
(
params1.x,
fabs
(
r-params2.y
))
;
...
...
@@ -83,10 +96,16 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
local_params[get_local_id
(
0
)
]
=
global_params[j]
;
}
local_bornSum[get_local_id
(
0
)
]
=
0.0f
;
float4
tempPosq
=
posq[j]
;
localData[get_local_id
(
0
)
].x
=
tempPosq.x
;
localData[get_local_id
(
0
)
].y
=
tempPosq.y
;
localData[get_local_id
(
0
)
].z
=
tempPosq.z
;
localData[get_local_id
(
0
)
].q
=
tempPosq.w
;
float2
tempParams
=
global_params[j]
;
localData[get_local_id
(
0
)
].radius
=
tempParams.x
;
localData[get_local_id
(
0
)
].scaledRadius
=
tempParams.y
;
}
localData[get_local_id
(
0
)
].bornSum
=
0.0f
;
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
if
(
flags
!=
0xFFFFFFFF
)
{
...
...
@@ -98,7 +117,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
if
((
flags&
(
1<<j
))
!=
0
)
{
float4
delta
=
(
float4
)
(
local
_posq[tbx+j].xyz
-
posq1.
xy
z,
0.0f
)
;
float4
delta
=
(
float4
)
(
local
Data[tbx+j].x-posq1.x,
localData[tbx+j].y-posq1.y,
localData[tbx+j].z-
posq1.z,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
...
...
@@ -113,7 +132,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float2
params2
=
local_params[tbx+j]
;
float2
params2
=
(
float2
)
(
localData[tbx+j].radius,
localData[tbx+j].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
if
(
params1.x
<
rScaledRadiusJ
)
{
float
l_ij
=
1.0f/max
(
params1.x,
fabs
(
r-params2.y
))
;
...
...
@@ -152,7 +171,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
if
(
tgx
%
16
==
0
)
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+8]
;
if
(
tgx
==
0
)
local
_bornSum[tbx+j]
+=
tempBuffer[get_local_id
(
0
)
]
+
tempBuffer[get_local_id
(
0
)
+16]
;
local
Data[tbx+j].bornSum
+=
tempBuffer[get_local_id
(
0
)
]
+
tempBuffer[get_local_id
(
0
)
+16]
;
}
}
}
...
...
@@ -167,7 +186,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
unsigned
int
tile
=
xi+yi*PADDED_NUM_ATOMS/TILE_SIZE-yi*
(
yi+1
)
/2
;
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
float4
delta
=
(
float4
)
(
local
_posq[tbx+tj].xyz
-
posq1.
xy
z,
0.0f
)
;
float4
delta
=
(
float4
)
(
local
Data[tbx+tj].x-posq1.x,
localData[tbx+tj].y-posq1.y,
localData[tbx+tj].z-
posq1.z,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
delta.y
-=
floor
(
delta.y*INV_PERIODIC_BOX_SIZE_Y+0.5f
)
*PERIODIC_BOX_SIZE_Y
;
...
...
@@ -181,7 +200,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
#
endif
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float2
params2
=
local_params[tbx+tj]
;
float2
params2
=
(
float2
)
(
localData[tbx+tj].radius,
localData[tbx+tj].scaledRadius
)
;
float
rScaledRadiusJ
=
r+params2.y
;
if
(
params1.x
<
rScaledRadiusJ
)
{
float
l_ij
=
1.0f/max
(
params1.x,
fabs
(
r-params2.y
))
;
...
...
@@ -205,7 +224,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
(
0.25f*params1.y*params1.y*invR
)
*
(
l_ij2-u_ij2
)
;
if
(
params2.x
<
params1.x-r
)
term
+=
2.0f*
(
1.0f/params2.x-l_ij
)
;
local
_bornSum[tbx+tj]
+=
term
;
local
Data[tbx+tj].bornSum
+=
term
;
}
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
...
...
@@ -222,7 +241,7 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
unsigned
int
offset2
=
y
+
tgx
+
warp*PADDED_NUM_ATOMS
;
#
endif
global_bornSum[offset1]
+=
bornSum
;
global_bornSum[offset2]
+=
local
_bornSum
[get_local_id
(
0
)
]
;
global_bornSum[offset2]
+=
local
Data
[get_local_id
(
0
)
]
.bornSum
;
lasty
=
y
;
}
pos++
;
...
...
@@ -234,8 +253,8 @@ __kernel void computeBornSum(__global float* global_bornSum, __local float* loca
*/
__kernel
void
computeGBSAForce1
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__
local
float4*
local_posq,
__local
float4*
local_force,
__global
float*
global_bornRadii,
__local
float*
loc
al_bornRadii,
__global
float*
global_bornForce,
__local
flo
at*
local
_bornForce
,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
__global
float4*
posq,
__
global
float*
glob
al_bornRadii,
__global
float*
global_bornForce,
__local
AtomD
at
a
*
local
Data
,
__local
float4*
tempBuffer,
__global
unsigned
int*
tiles,
#
ifdef
USE_CUTOFF
__global
unsigned
int*
interactionFlags,
__global
unsigned
int*
interactionCount
)
{
#
else
...
...
@@ -265,13 +284,16 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
local_bornRadii[get_local_id
(
0
)
]
=
bornRadius1
;
localData[get_local_id
(
0
)
].x
=
posq1.x
;
localData[get_local_id
(
0
)
].y
=
posq1.y
;
localData[get_local_id
(
0
)
].z
=
posq1.z
;
localData[get_local_id
(
0
)
].q
=
posq1.w
;
localData[get_local_id
(
0
)
].bornRadius
=
bornRadius1
;
unsigned
int
xi
=
x/TILE_SIZE
;
unsigned
int
tile
=
xi+xi*PADDED_NUM_ATOMS/TILE_SIZE-xi*
(
xi+1
)
/2
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
if
(
atom1
<
NUM_ATOMS
&&
y+j
<
NUM_ATOMS
)
{
float4
posq2
=
local_posq[tbx+j]
;
float4
posq2
=
(
float4
)
(
localData[tbx+j].x,
localData[tbx+j].y,
localData[tbx+j].z,
localData[tbx+j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -281,7 +303,7 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
bornRadius2
=
local
_bornRadii[tbx+j]
;
float
bornRadius2
=
local
Data[tbx+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
...
...
@@ -318,10 +340,17 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
local_bornRadii[get_local_id
(
0
)
]
=
global_bornRadii[j]
;
}
local_force[get_local_id
(
0
)
]
=
0.0f
;
float4
tempPosq
=
posq[j]
;
localData[get_local_id
(
0
)
].x
=
tempPosq.x
;
localData[get_local_id
(
0
)
].y
=
tempPosq.y
;
localData[get_local_id
(
0
)
].z
=
tempPosq.z
;
localData[get_local_id
(
0
)
].q
=
tempPosq.w
;
localData[get_local_id
(
0
)
].bornRadius
=
global_bornRadii[j]
;
}
localData[get_local_id
(
0
)
].fx
=
0.0f
;
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
localData[get_local_id
(
0
)
].fw
=
0.0f
;
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
interactionFlags[pos]
;
if
(
flags
!=
0xFFFFFFFF
)
{
...
...
@@ -333,7 +362,7 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
if
((
flags&
(
1<<j
))
!=
0
)
{
float4
posq2
=
local_posq[tbx+j]
;
float4
posq2
=
(
float4
)
(
localData[tbx+j].x,
localData[tbx+j].y,
localData[tbx+j].z,
localData[tbx+j].q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -343,7 +372,7 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
bornRadius2
=
local
_bornRadii[tbx+j]
;
float
bornRadius2
=
local
Data[tbx+j].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
...
...
@@ -377,8 +406,13 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+4]
;
if
(
tgx
%
16
==
0
)
tempBuffer[get_local_id
(
0
)
]
+=
tempBuffer[get_local_id
(
0
)
+8]
;
if
(
tgx
==
0
)
local_force[tbx+j]
+=
tempBuffer[get_local_id
(
0
)
]
+
tempBuffer[get_local_id
(
0
)
+16]
;
if
(
tgx
==
0
)
{
float4
sum
=
tempBuffer[get_local_id
(
0
)
]
+
tempBuffer[get_local_id
(
0
)
+16]
;
localData[tbx+j].fx
+=
sum.x
;
localData[tbx+j].fy
+=
sum.y
;
localData[tbx+j].fz
+=
sum.z
;
localData[tbx+j].fw
+=
sum.w
;
}
}
}
}
...
...
@@ -394,7 +428,7 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
if
(
atom1
<
NUM_ATOMS
&&
y+tj
<
NUM_ATOMS
)
{
float4
posq2
=
local_posq
[tbx+tj]
;
float4
posq2
=
(
float4
)
(
localData[tbx+tj].x,
localData[tbx+tj].y,
localData[tbx+tj].z,
localData
[tbx+tj]
.q
)
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.x
-=
floor
(
delta.x*INV_PERIODIC_BOX_SIZE_X+0.5f
)
*PERIODIC_BOX_SIZE_X
;
...
...
@@ -404,7 +438,7 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
float
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
float
invR
=
native_rsqrt
(
r2
)
;
float
r
=
native_recip
(
invR
)
;
float
bornRadius2
=
local
_bornRadii[tbx+tj]
;
float
bornRadius2
=
local
Data[tbx+tj].bornRadius
;
float
alpha2_ij
=
bornRadius1*bornRadius2
;
float
D_ij
=
r2/
(
4.0f*alpha2_ij
)
;
float
expTerm
=
exp
(
-D_ij
)
;
...
...
@@ -424,7 +458,10 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
local_force[tbx+tj]
+=
(
float4
)
(
delta.xyz,
dGpol_dalpha2_ij*bornRadius1
)
;
localData[tbx+tj].fx
+=
delta.x
;
localData[tbx+tj].fy
+=
delta.y
;
localData[tbx+tj].fz
+=
delta.z
;
localData[tbx+tj].fw
+=
dGpol_dalpha2_ij*bornRadius1
;
}
tj
=
(
tj
+
1
)
&
(
TILE_SIZE
-
1
)
;
}
...
...
@@ -439,9 +476,9 @@ __kernel void computeGBSAForce1(__global float4* forceBuffers, __global float* e
unsigned
int
offset2
=
y
+
tgx
+
warp*PADDED_NUM_ATOMS
;
#
endif
forceBuffers[offset1].xyz
+=
force.xyz
;
forceBuffers[offset2]
.xyz
+=
local_force
[get_local_id
(
0
)
].
xyz
;
forceBuffers[offset2]
+=
(
float4
)
(
localData[get_local_id
(
0
)
].fx,
localData[get_local_id
(
0
)
].fy,
localData
[get_local_id
(
0
)
].
fz,
0
)
;
global_bornForce[offset1]
+=
force.w
;
global_bornForce[offset2]
+=
local
_force
[get_local_id
(
0
)
].w
;
global_bornForce[offset2]
+=
local
Data
[get_local_id
(
0
)
].
f
w
;
lasty
=
y
;
}
pos++
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment