Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
562cfb39
Commit
562cfb39
authored
Nov 17, 2010
by
Peter Eastman
Browse files
Created CustomGBForce kernels optimized for CPU. Fixed bugs in CPU GBSA kernels.
parent
cbfff447
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
560 additions
and
51 deletions
+560
-51
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+37
-23
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
+251
-0
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
+5
-3
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
+5
-3
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
+237
-0
platforms/opencl/src/kernels/customGBValueN2_default.cl
platforms/opencl/src/kernels/customGBValueN2_default.cl
+3
-1
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
+3
-1
platforms/opencl/src/kernels/gbsaObc_cpu.cl
platforms/opencl/src/kernels/gbsaObc_cpu.cl
+19
-20
No files found.
platforms/opencl/src/OpenCLKernels.cpp
View file @
562cfb39
...
@@ -1715,7 +1715,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1715,7 +1715,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
params
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
params
->
getDeviceBuffer
());
computeBornSumKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
13
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
13
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
computeBornSumKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
1
:
OpenCLContext
::
ThreadBlockSize
)
*
sizeof
(
cl_float
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
computeBornSumKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
...
@@ -1734,7 +1734,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1734,7 +1734,7 @@ double OpenCLCalcGBSAOBCForceKernel::execute(ContextImpl& context, bool includeF
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bornRadii
->
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bornRadii
->
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bornForce
->
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bornForce
->
getDeviceBuffer
());
force1Kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
13
*
sizeof
(
cl_float
),
NULL
);
force1Kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
13
*
sizeof
(
cl_float
),
NULL
);
force1Kernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
mm_float4
),
NULL
);
force1Kernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
1
:
OpenCLContext
::
ThreadBlockSize
)
*
sizeof
(
mm_float4
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
force1Kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
...
@@ -1954,6 +1954,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -1954,6 +1954,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
CustomGBForce
::
NoCutoff
);
bool
useCutoff
=
(
force
.
getNonbondedMethod
()
!=
CustomGBForce
::
NoCutoff
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
CustomGBForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
CustomGBForce
::
CutoffNonPeriodic
);
bool
usePeriodic
=
(
force
.
getNonbondedMethod
()
!=
CustomGBForce
::
NoCutoff
&&
force
.
getNonbondedMethod
()
!=
CustomGBForce
::
CutoffNonPeriodic
);
bool
deviceIsCpu
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
{
{
// Create the N2 value kernel.
// Create the N2 value kernel.
...
@@ -1987,8 +1988,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -1987,8 +1988,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_"
<<
paramName
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_"
<<
paramName
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[
get_
local
_id(0)
] = "
<<
paramName
<<
"1;
\n
"
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[local
AtomIndex
] = "
<<
paramName
<<
"1;
\n
"
;
loadLocal2
<<
"local_"
<<
paramName
<<
"[
get_
local
_id(0)
] = global_"
<<
paramName
<<
"[j];
\n
"
;
loadLocal2
<<
"local_"
<<
paramName
<<
"[local
AtomIndex
] = global_"
<<
paramName
<<
"[j];
\n
"
;
load1
<<
buffer
.
getType
()
<<
" "
<<
paramName
<<
"1 = global_"
<<
paramName
<<
"[atom1];
\n
"
;
load1
<<
buffer
.
getType
()
<<
" "
<<
paramName
<<
"1 = global_"
<<
paramName
<<
"[atom1];
\n
"
;
load2
<<
buffer
.
getType
()
<<
" "
<<
paramName
<<
"2 = local_"
<<
paramName
<<
"[atom2];
\n
"
;
load2
<<
buffer
.
getType
()
<<
" "
<<
paramName
<<
"2 = local_"
<<
paramName
<<
"[atom2];
\n
"
;
}
}
...
@@ -2010,7 +2011,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2010,7 +2011,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
string
file
=
(
cl
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
customGBValueN2_nvidia
:
OpenCLKernelSources
::
customGBValueN2_default
);
string
file
;
if
(
deviceIsCpu
)
file
=
OpenCLKernelSources
::
customGBValueN2_cpu
;
else
if
(
cl
.
getSIMDWidth
()
==
32
)
file
=
OpenCLKernelSources
::
customGBValueN2_nvidia
;
else
OpenCLKernelSources
::
customGBValueN2_default
;
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
file
,
replacements
),
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
file
,
replacements
),
defines
);
pairValueKernel
=
cl
::
Kernel
(
program
,
"computeN2Value"
);
pairValueKernel
=
cl
::
Kernel
(
program
,
"computeN2Value"
);
}
}
...
@@ -2106,8 +2113,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2106,8 +2113,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
string
paramName
=
"params"
+
intToString
(
i
+
1
);
string
paramName
=
"params"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_"
<<
paramName
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* global_"
<<
paramName
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_"
<<
paramName
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[
get_
local
_id(0)
] = "
<<
paramName
<<
"1;
\n
"
;
loadLocal1
<<
"local_"
<<
paramName
<<
"[local
AtomIndex
] = "
<<
paramName
<<
"1;
\n
"
;
loadLocal2
<<
"local_"
<<
paramName
<<
"[
get_
local
_id(0)
] = global_"
<<
paramName
<<
"[j];
\n
"
;
loadLocal2
<<
"local_"
<<
paramName
<<
"[local
AtomIndex
] = global_"
<<
paramName
<<
"[j];
\n
"
;
load1
<<
buffer
.
getType
()
<<
" "
<<
paramName
<<
"1 = global_"
<<
paramName
<<
"[atom1];
\n
"
;
load1
<<
buffer
.
getType
()
<<
" "
<<
paramName
<<
"1 = global_"
<<
paramName
<<
"[atom1];
\n
"
;
load2
<<
buffer
.
getType
()
<<
" "
<<
paramName
<<
"2 = local_"
<<
paramName
<<
"[atom2];
\n
"
;
load2
<<
buffer
.
getType
()
<<
" "
<<
paramName
<<
"2 = local_"
<<
paramName
<<
"[atom2];
\n
"
;
}
}
...
@@ -2115,8 +2122,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2115,8 +2122,8 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
string
valueName
=
"values"
+
intToString
(
i
+
1
);
string
valueName
=
"values"
+
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* global_"
<<
valueName
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_"
<<
valueName
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* global_"
<<
valueName
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_"
<<
valueName
;
loadLocal1
<<
"local_"
<<
valueName
<<
"[
get_
local
_id(0)
] = "
<<
valueName
<<
"1;
\n
"
;
loadLocal1
<<
"local_"
<<
valueName
<<
"[local
AtomIndex
] = "
<<
valueName
<<
"1;
\n
"
;
loadLocal2
<<
"local_"
<<
valueName
<<
"[
get_
local
_id(0)
] = global_"
<<
valueName
<<
"[j];
\n
"
;
loadLocal2
<<
"local_"
<<
valueName
<<
"[local
AtomIndex
] = global_"
<<
valueName
<<
"[j];
\n
"
;
load1
<<
buffer
.
getType
()
<<
" "
<<
valueName
<<
"1 = global_"
<<
valueName
<<
"[atom1];
\n
"
;
load1
<<
buffer
.
getType
()
<<
" "
<<
valueName
<<
"1 = global_"
<<
valueName
<<
"[atom1];
\n
"
;
load2
<<
buffer
.
getType
()
<<
" "
<<
valueName
<<
"2 = local_"
<<
valueName
<<
"[atom2];
\n
"
;
load2
<<
buffer
.
getType
()
<<
" "
<<
valueName
<<
"2 = local_"
<<
valueName
<<
"[atom2];
\n
"
;
}
}
...
@@ -2124,7 +2131,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2124,7 +2131,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
string
index
=
intToString
(
i
+
1
);
string
index
=
intToString
(
i
+
1
);
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* derivBuffers"
<<
index
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_deriv"
<<
index
;
extraArgs
<<
", __global "
<<
buffer
.
getType
()
<<
"* derivBuffers"
<<
index
<<
", __local "
<<
buffer
.
getType
()
<<
"* local_deriv"
<<
index
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[
get_
local
_id(0)
] = 0.0f;
\n
"
;
clearLocal
<<
"local_deriv"
<<
index
<<
"[local
AtomIndex
] = 0.0f;
\n
"
;
load1
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_1 = 0.0f;
\n
"
;
load1
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_1 = 0.0f;
\n
"
;
load2
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_2 = 0.0f;
\n
"
;
load2
<<
buffer
.
getType
()
<<
" deriv"
<<
index
<<
"_2 = 0.0f;
\n
"
;
recordDeriv
<<
"local_deriv"
<<
index
<<
"[atom2] += deriv"
<<
index
<<
"_2;
\n
"
;
recordDeriv
<<
"local_deriv"
<<
index
<<
"[atom2] += deriv"
<<
index
<<
"_2;
\n
"
;
...
@@ -2157,7 +2164,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2157,7 +2164,13 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"NUM_ATOMS"
]
=
intToString
(
cl
.
getNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
defines
[
"NUM_BLOCKS"
]
=
OpenCLExpressionUtilities
::
intToString
(
cl
.
getNumAtomBlocks
());
string
file
=
(
cl
.
getSIMDWidth
()
==
32
?
OpenCLKernelSources
::
customGBEnergyN2_nvidia
:
OpenCLKernelSources
::
customGBEnergyN2_default
);
string
file
;
if
(
deviceIsCpu
)
file
=
OpenCLKernelSources
::
customGBEnergyN2_cpu
;
else
if
(
cl
.
getSIMDWidth
()
==
32
)
file
=
OpenCLKernelSources
::
customGBEnergyN2_nvidia
;
else
file
=
OpenCLKernelSources
::
customGBEnergyN2_default
;
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
file
,
replacements
),
defines
);
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
file
,
replacements
),
defines
);
pairEnergyKernel
=
cl
::
Kernel
(
program
,
"computeN2Energy"
);
pairEnergyKernel
=
cl
::
Kernel
(
program
,
"computeN2Energy"
);
}
}
...
@@ -2408,6 +2421,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2408,6 +2421,7 @@ void OpenCLCalcCustomGBForceKernel::initialize(const System& system, const Custo
}
}
double
OpenCLCalcCustomGBForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
OpenCLCalcCustomGBForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
bool
deviceIsCpu
=
(
cl
.
getDevice
().
getInfo
<
CL_DEVICE_TYPE
>
()
==
CL_DEVICE_TYPE_CPU
);
OpenCLNonbondedUtilities
&
nb
=
cl
.
getNonbondedUtilities
();
OpenCLNonbondedUtilities
&
nb
=
cl
.
getNonbondedUtilities
();
if
(
!
hasInitializedKernels
)
{
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
hasInitializedKernels
=
true
;
...
@@ -2422,14 +2436,14 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2422,14 +2436,14 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getNonbondedUtilities
().
getExclusionIndices
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getNonbondedUtilities
().
getExclusionIndices
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getNonbondedUtilities
().
getExclusionRowIndices
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getNonbondedUtilities
().
getExclusionRowIndices
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
valueBuffers
->
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
valueBuffers
->
getDeviceBuffer
());
pairValueKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
pairValueKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
sizeof
(
cl_float
),
NULL
);
pairValueKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float
),
NULL
);
pairValueKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
sizeof
(
cl_float
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
pairValueKernel
.
setArg
<
cl_uint
>
(
index
++
,
maxTiles
);
pairValueKernel
.
setArg
<
cl_uint
>
(
index
++
,
maxTiles
);
if
(
cl
.
getSIMDWidth
()
==
32
)
if
(
cl
.
getSIMDWidth
()
==
32
||
deviceIsCpu
)
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
pairValueKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
}
}
else
else
...
@@ -2465,19 +2479,19 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2465,19 +2479,19 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
index
=
0
;
index
=
0
;
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getForceBuffers
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
pairEnergyKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
sizeof
(
cl_float4
),
NULL
);
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
pairEnergyKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
sizeof
(
cl_float4
),
NULL
);
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getNonbondedUtilities
().
getExclusions
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getNonbondedUtilities
().
getExclusions
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getNonbondedUtilities
().
getExclusionIndices
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getNonbondedUtilities
().
getExclusionIndices
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getNonbondedUtilities
().
getExclusionRowIndices
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getNonbondedUtilities
().
getExclusionRowIndices
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
sizeof
(
cl_float4
),
NULL
);
pairEnergyKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
sizeof
(
cl_float4
),
NULL
);
if
(
nb
.
getUseCutoff
())
{
if
(
nb
.
getUseCutoff
())
{
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractingTiles
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionCount
().
getDeviceBuffer
());
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
index
+=
2
;
// Periodic box size arguments are set when the kernel is executed.
pairEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
maxTiles
);
pairEnergyKernel
.
setArg
<
cl_uint
>
(
index
++
,
maxTiles
);
if
(
cl
.
getSIMDWidth
()
==
32
)
if
(
cl
.
getSIMDWidth
()
==
32
||
deviceIsCpu
)
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
pairEnergyKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
nb
.
getInteractionFlags
().
getDeviceBuffer
());
}
}
else
else
...
@@ -2487,17 +2501,17 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2487,17 +2501,17 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
pairEnergyKernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
buffer
.
getMemory
());
pairEnergyKernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
buffer
.
getMemory
());
pairEnergyKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
buffer
.
getSize
(),
NULL
);
pairEnergyKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
buffer
.
getSize
(),
NULL
);
}
}
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
computedValues
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
computedValues
->
getBuffers
()[
i
];
pairEnergyKernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
buffer
.
getMemory
());
pairEnergyKernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
buffer
.
getMemory
());
pairEnergyKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
buffer
.
getSize
(),
NULL
);
pairEnergyKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
buffer
.
getSize
(),
NULL
);
}
}
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
energyDerivs
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
energyDerivs
->
getBuffers
()[
i
];
pairEnergyKernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
buffer
.
getMemory
());
pairEnergyKernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
buffer
.
getMemory
());
pairEnergyKernel
.
setArg
(
index
++
,
OpenCLContext
::
ThreadBlockSize
*
buffer
.
getSize
(),
NULL
);
pairEnergyKernel
.
setArg
(
index
++
,
(
deviceIsCpu
?
OpenCLContext
::
TileSize
:
OpenCLContext
::
ThreadBlockSize
)
*
buffer
.
getSize
(),
NULL
);
}
}
if
(
tabulatedFunctionParams
!=
NULL
)
{
if
(
tabulatedFunctionParams
!=
NULL
)
{
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
...
@@ -2560,9 +2574,9 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
...
@@ -2560,9 +2574,9 @@ double OpenCLCalcCustomGBForceKernel::execute(ContextImpl& context, bool include
}
}
}
}
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
numTiles
=
cl
.
getNumAtomBlocks
()
*
(
cl
.
getNumAtomBlocks
()
+
1
)
/
2
;
cl
.
executeKernel
(
pairValueKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
pairValueKernel
,
numTiles
*
OpenCLContext
::
TileSize
,
deviceIsCpu
?
1
:
-
1
);
cl
.
executeKernel
(
perParticleValueKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
perParticleValueKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
pairEnergyKernel
,
numTiles
*
OpenCLContext
::
TileSize
);
cl
.
executeKernel
(
pairEnergyKernel
,
numTiles
*
OpenCLContext
::
TileSize
,
deviceIsCpu
?
1
:
-
1
);
cl
.
executeKernel
(
perParticleEnergyKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
perParticleEnergyKernel
,
cl
.
getPaddedNumAtoms
());
if
(
needParameterGradient
)
if
(
needParameterGradient
)
cl
.
executeKernel
(
gradientChainRuleKernel
,
cl
.
getPaddedNumAtoms
());
cl
.
executeKernel
(
gradientChainRuleKernel
,
cl
.
getPaddedNumAtoms
());
...
...
platforms/opencl/src/kernels/customGBEnergyN2_cpu.cl
0 → 100644
View file @
562cfb39
#
define
TILE_SIZE
32
#
define
STORE_DERIVATIVE_1
(
INDEX
)
derivBuffers##INDEX[offset1]
+=
deriv##INDEX##_1
;
#
define
STORE_DERIVATIVE_2
(
INDEX
)
derivBuffers##INDEX[offset2]
+=
local_deriv##INDEX[tgx]
;
/**
*
Compute
a
force
based
on
pair
interactions.
*/
__kernel
void
computeN2Energy
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__local
float4*
local_force,
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
#
else
unsigned
int
numTiles
#
endif
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
#
else
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
#
endif
float
energy
=
0.0f
;
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
x
=
tileIndices.x
;
y
=
tileIndices.y
;
}
else
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
//
Locate
the
exclusion
data
for
this
tile.
#
ifdef
USE_EXCLUSIONS
unsigned
int
exclusionStart
=
exclusionRowIndices[x]
;
unsigned
int
exclusionEnd
=
exclusionRowIndices[x+1]
;
int
exclusionIndex
=
-1
;
for
(
int
i
=
exclusionStart
; i < exclusionEnd; i++)
if
(
exclusionIndices[i]
==
y
)
{
exclusionIndex
=
i*TILE_SIZE
;
break
;
}
bool
hasExclusions
=
(
exclusionIndex
>
-1
)
;
#
else
bool
hasExclusions
=
false
;
#
endif
//
Load
the
data
for
this
tile
if
we
don
't
already
have
it
cached.
if
(
lasty
!=
y
)
{
for
(
int
localAtomIndex
=
0
; localAtomIndex < TILE_SIZE; localAtomIndex++) {
unsigned
int
j
=
y*TILE_SIZE
+
localAtomIndex
;
local_posq[localAtomIndex]
=
posq[j]
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
}
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
exclusions[exclusionIndex+tgx]
;
#
endif
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
float4
force
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
float4
posq2
=
local_posq[j]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
SQRT
(
r2
)
;
unsigned
int
atom2
=
j
;
LOAD_ATOM2_PARAMETERS
atom2
=
y*TILE_SIZE+j
;
float
dEdR
=
0.0f
;
float
tempEnergy
=
0.0f
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
{
COMPUTE_INTERACTION
dEdR
/=
-r
;
}
energy
+=
0.5f*tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
#
ifdef
USE_CUTOFF
}
#
endif
#
ifdef
USE_EXCLUSIONS
excl
>>=
1
;
#
endif
}
//
Write
results
unsigned
int
offset1
=
x*TILE_SIZE
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
forceBuffers[offset1].xyz
+=
force.xyz
;
STORE_DERIVATIVES_1
}
}
else
{
//
This
is
an
off-diagonal
tile.
for
(
int
localAtomIndex
=
0
; localAtomIndex < TILE_SIZE; localAtomIndex++) {
local_force[localAtomIndex]
=
0.0f
;
CLEAR_LOCAL_DERIVATIVES
}
#
if
defined
(
USE_CUTOFF
)
&&
defined
(
USE_EXCLUSIONS
)
unsigned
int
flags1
=
(
numTiles
<=
maxTiles
?
interactionFlags[2*pos]
:
0xFFFFFFFF
)
;
unsigned
int
flags2
=
(
numTiles
<=
maxTiles
?
interactionFlags[2*pos+1]
:
0xFFFFFFFF
)
;
if
(
!hasExclusions
&&
(
flags1
!=
0xFFFFFFFF
||
flags2
!=
0xFFFFFFFF
))
{
//
Compute
only
a
subset
of
the
interactions
in
this
tile.
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
if
((
flags2&
(
1<<tgx
))
!=
0
)
{
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
float
value
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
if
((
flags&
(
1<<j
))
!=
0
)
{
float4
posq2
=
local_posq[j]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
if
(
r2
<
CUTOFF_SQUARED
)
{
float
r
=
SQRT
(
r2
)
;
unsigned
int
atom2
=
j
;
LOAD_ATOM2_PARAMETERS
atom2
=
y*TILE_SIZE+j
;
float
dEdR
=
0.0f
;
float
tempEnergy
=
0.0f
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-r
;
}
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
atom2
=
j
;
local_force[atom2].xyz
+=
delta.xyz
;
RECORD_DERIVATIVE_2
}
}
}
//
Write
results
for
atom1.
unsigned
int
offset
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
global_value[offset]
+=
value
;
}
}
}
else
#
endif
{
//
Compute
the
full
set
of
interactions
in
this
tile.
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
float4
force
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
(
hasExclusions
?
exclusions[exclusionIndex+tgx]
:
0xFFFFFFFF
)
;
#
endif
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
float4
posq2
=
local_posq[j]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
SQRT
(
r2
)
;
unsigned
int
atom2
=
j
;
LOAD_ATOM2_PARAMETERS
atom2
=
y*TILE_SIZE+j
;
float
dEdR
=
0.0f
;
float
tempEnergy
=
0.0f
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_INTERACTION
dEdR
/=
-r
;
}
energy
+=
tempEnergy
;
delta.xyz
*=
dEdR
;
force.xyz
-=
delta.xyz
;
atom2
=
j
;
local_force[atom2].xyz
+=
delta.xyz
;
RECORD_DERIVATIVE_2
#
ifdef
USE_CUTOFF
}
#
endif
#
ifdef
USE_EXCLUSIONS
excl
>>=
1
;
#
endif
}
//
Write
results
for
atom1.
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
forceBuffers[offset1].xyz
+=
force.xyz
;
STORE_DERIVATIVES_1
}
}
//
Write
results
for
(
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
offset2
=
y*TILE_SIZE+tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
forceBuffers[offset2].xyz
+=
local_force[tgx].xyz
;
STORE_DERIVATIVES_2
}
}
lasty
=
y
;
pos++
;
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
platforms/opencl/src/kernels/customGBEnergyN2_default.cl
View file @
562cfb39
...
@@ -74,7 +74,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -74,7 +74,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
if
(
x
==
y
)
{
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
const
unsigned
int
localAtomIndex
=
get_local_id
(
0
)
;
local_posq[localAtomIndex]
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
LOAD_LOCAL_PARAMETERS_FROM_1
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
...
@@ -136,12 +137,13 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -136,12 +137,13 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
else
{
else
{
//
This
is
an
off-diagonal
tile.
//
This
is
an
off-diagonal
tile.
const
unsigned
int
localAtomIndex
=
get_local_id
(
0
)
;
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
unsigned
int
j
=
y*TILE_SIZE
+
tgx
;
unsigned
int
j
=
y*TILE_SIZE
+
tgx
;
local_posq[
get_
local
_id
(
0
)
]
=
posq[j]
;
local_posq[local
AtomIndex
]
=
posq[j]
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
}
local_force[
get_
local
_id
(
0
)
]
=
0.0f
;
local_force[local
AtomIndex
]
=
0.0f
;
CLEAR_LOCAL_DERIVATIVES
CLEAR_LOCAL_DERIVATIVES
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
...
...
platforms/opencl/src/kernels/customGBEnergyN2_nvidia.cl
View file @
562cfb39
...
@@ -75,7 +75,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -75,7 +75,8 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
if
(
x
==
y
)
{
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
const
unsigned
int
localAtomIndex
=
get_local_id
(
0
)
;
local_posq[localAtomIndex]
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
LOAD_LOCAL_PARAMETERS_FROM_1
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
exclusions[exclusionIndex[localGroupIndex]+tgx]
;
unsigned
int
excl
=
exclusions[exclusionIndex[localGroupIndex]+tgx]
;
...
@@ -128,12 +129,13 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
...
@@ -128,12 +129,13 @@ void computeN2Energy(__global float4* forceBuffers, __global float* energyBuffer
else
{
else
{
//
This
is
an
off-diagonal
tile.
//
This
is
an
off-diagonal
tile.
const
unsigned
int
localAtomIndex
=
get_local_id
(
0
)
;
if
(
lasty
!=
y
)
{
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y*TILE_SIZE
+
tgx
;
unsigned
int
j
=
y*TILE_SIZE
+
tgx
;
local_posq[
get_
local
_id
(
0
)
]
=
posq[j]
;
local_posq[local
AtomIndex
]
=
posq[j]
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
}
local_force[
get_
local
_id
(
0
)
]
=
0.0f
;
local_force[local
AtomIndex
]
=
0.0f
;
CLEAR_LOCAL_DERIVATIVES
CLEAR_LOCAL_DERIVATIVES
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
unsigned
int
flags
=
(
numTiles
<=
maxTiles
?
interactionFlags[pos]
:
0xFFFFFFFF
)
;
...
...
platforms/opencl/src/kernels/customGBValueN2_cpu.cl
0 → 100644
View file @
562cfb39
#
define
TILE_SIZE
32
/**
*
Compute
a
value
based
on
pair
interactions.
*/
__kernel
void
computeN2Value
(
__global
float4*
posq,
__local
float4*
local_posq,
__global
unsigned
int*
exclusions,
__global
unsigned
int*
exclusionIndices,
__global
unsigned
int*
exclusionRowIndices,
__global
float*
global_value,
__local
float*
local_value,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
#
else
unsigned
int
numTiles
#
endif
PARAMETER_ARGUMENTS
)
{
#
ifdef
USE_CUTOFF
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
pos
=
get_group_id
(
0
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS*
(
NUM_BLOCKS+1
)
/2
:
numTiles
)
/get_num_groups
(
0
)
;
#
else
unsigned
int
pos
=
get_group_id
(
0
)
*numTiles/get_num_groups
(
0
)
;
unsigned
int
end
=
(
get_group_id
(
0
)
+1
)
*numTiles/get_num_groups
(
0
)
;
#
endif
unsigned
int
lasty
=
0xFFFFFFFF
;
while
(
pos
<
end
)
{
//
Extract
the
coordinates
of
this
tile
unsigned
int
x,
y
;
#
ifdef
USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
ushort2
tileIndices
=
tiles[pos]
;
x
=
tileIndices.x
;
y
=
tileIndices.y
;
}
else
#
endif
{
y
=
(
unsigned
int
)
floor
(
NUM_BLOCKS+0.5f-sqrt
((
NUM_BLOCKS+0.5f
)
*
(
NUM_BLOCKS+0.5f
)
-2*pos
))
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
if
(
x
>=
NUM_BLOCKS
)
{
//
Occasionally
happens
due
to
roundoff
error.
y++
;
x
=
(
pos-y*NUM_BLOCKS+y*
(
y+1
)
/2
)
;
}
}
//
Locate
the
exclusion
data
for
this
tile.
#
ifdef
USE_EXCLUSIONS
unsigned
int
exclusionStart
=
exclusionRowIndices[x]
;
unsigned
int
exclusionEnd
=
exclusionRowIndices[x+1]
;
int
exclusionIndex
=
-1
;
for
(
int
i
=
exclusionStart
; i < exclusionEnd; i++)
if
(
exclusionIndices[i]
==
y
)
{
exclusionIndex
=
i*TILE_SIZE
;
break
;
}
bool
hasExclusions
=
(
exclusionIndex
>
-1
)
;
#
else
bool
hasExclusions
=
false
;
#
endif
//
Load
the
data
for
this
tile
if
we
don
't
already
have
it
cached.
if
(
lasty
!=
y
)
{
for
(
int
localAtomIndex
=
0
; localAtomIndex < TILE_SIZE; localAtomIndex++) {
unsigned
int
j
=
y*TILE_SIZE
+
localAtomIndex
;
local_posq[localAtomIndex]
=
posq[j]
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
}
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
exclusions[exclusionIndex+tgx]
;
#
endif
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
float
value
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
float4
posq2
=
local_posq[j]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
SQRT
(
r2
)
;
unsigned
int
atom2
=
j
;
LOAD_ATOM2_PARAMETERS
atom2
=
y*TILE_SIZE+j
;
float
tempValue1
=
0.0f
;
float
tempValue2
=
0.0f
;
#
ifdef
USE_EXCLUSIONS
if
(
!isExcluded
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
{
#
else
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
)
{
#
endif
COMPUTE_VALUE
}
value
+=
tempValue1
;
#
ifdef
USE_CUTOFF
}
#
endif
#
ifdef
USE_EXCLUSIONS
excl
>>=
1
;
#
endif
}
//
Write
results
unsigned
int
offset
=
x*TILE_SIZE
+
tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
global_value[offset]
+=
value
;
}
}
else
{
//
This
is
an
off-diagonal
tile.
for
(
int
tgx
=
0
; tgx < TILE_SIZE; tgx++)
local_value[tgx]
=
0.0f
;
#
if
defined
(
USE_CUTOFF
)
&&
defined
(
USE_EXCLUSIONS
)
unsigned
int
flags1
=
(
numTiles
<=
maxTiles
?
interactionFlags[2*pos]
:
0xFFFFFFFF
)
;
unsigned
int
flags2
=
(
numTiles
<=
maxTiles
?
interactionFlags[2*pos+1]
:
0xFFFFFFFF
)
;
if
(
!hasExclusions
&&
(
flags1
!=
0xFFFFFFFF
||
flags2
!=
0xFFFFFFFF
))
{
//
Compute
only
a
subset
of
the
interactions
in
this
tile.
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
if
((
flags2&
(
1<<tgx
))
!=
0
)
{
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
float
value
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
if
((
flags&
(
1<<j
))
!=
0
)
{
float4
posq2
=
local_posq[j]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
float
tempValue1
=
0.0f
;
float
tempValue2
=
0.0f
;
if
(
r2
<
CUTOFF_SQUARED
)
{
float
r
=
SQRT
(
r2
)
;
unsigned
int
atom2
=
j
;
LOAD_ATOM2_PARAMETERS
atom2
=
y*TILE_SIZE+j
;
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
COMPUTE_VALUE
}
value
+=
tempValue1
;
local_value[j]
+=
tempValue2
;
}
}
}
//
Write
results
for
atom1.
unsigned
int
offset
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
global_value[offset]
+=
value
;
}
}
}
else
#
endif
{
//
Compute
the
full
set
of
interactions
in
this
tile.
for
(
unsigned
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
atom1
=
x*TILE_SIZE+tgx
;
float
value
=
0.0f
;
float4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
(
hasExclusions
?
exclusions[exclusionIndex+tgx]
:
0xFFFFFFFF
)
;
#
endif
for
(
unsigned
int
j
=
0
; j < TILE_SIZE; j++) {
#
ifdef
USE_EXCLUSIONS
bool
isExcluded
=
!
(
excl
&
0x1
)
;
#
endif
float4
posq2
=
local_posq[j]
;
float4
delta
=
(
float4
)
(
posq2.xyz
-
posq1.xyz,
0.0f
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
float
r2
=
dot
(
delta.xyz,
delta.xyz
)
;
#
ifdef
USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
#
endif
float
r
=
SQRT
(
r2
)
;
unsigned
int
atom2
=
j
;
LOAD_ATOM2_PARAMETERS
atom2
=
y*TILE_SIZE+j
;
float
tempValue1
=
0.0f
;
float
tempValue2
=
0.0f
;
#
ifdef
USE_EXCLUSIONS
if
(
!isExcluded
&&
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
#
else
if
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
)
{
#
endif
COMPUTE_VALUE
}
value
+=
tempValue1
;
local_value[j]
+=
tempValue2
;
#
ifdef
USE_CUTOFF
}
#
endif
#
ifdef
USE_EXCLUSIONS
excl
>>=
1
;
#
endif
}
//
Write
results
for
atom1.
unsigned
int
offset
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
global_value[offset]
+=
value
;
}
}
//
Write
results
for
(
int
tgx
=
0
; tgx < TILE_SIZE; tgx++) {
unsigned
int
offset
=
y*TILE_SIZE+tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
global_value[offset]
+=
local_value[tgx]
;
}
}
lasty
=
y
;
pos++
;
}
}
platforms/opencl/src/kernels/customGBValueN2_default.cl
View file @
562cfb39
...
@@ -71,7 +71,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -71,7 +71,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
if
(
x
==
y
)
{
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
const
unsigned
int
localAtomIndex
=
get_local_id
(
0
)
;
local_posq[localAtomIndex]
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
LOAD_LOCAL_PARAMETERS_FROM_1
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
...
@@ -134,6 +135,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -134,6 +135,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
if
(
lasty
!=
y
&&
get_local_id
(
0
)
<
TILE_SIZE
)
{
unsigned
int
j
=
y*TILE_SIZE
+
tgx
;
unsigned
int
j
=
y*TILE_SIZE
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
const
unsigned
int
localAtomIndex
=
get_local_id
(
0
)
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
}
local_value[get_local_id
(
0
)
]
=
0.0f
;
local_value[get_local_id
(
0
)
]
=
0.0f
;
...
...
platforms/opencl/src/kernels/customGBValueN2_nvidia.cl
View file @
562cfb39
...
@@ -73,7 +73,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -73,7 +73,8 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
if
(
x
==
y
)
{
if
(
x
==
y
)
{
//
This
tile
is
on
the
diagonal.
//
This
tile
is
on
the
diagonal.
local_posq[get_local_id
(
0
)
]
=
posq1
;
const
unsigned
int
localAtomIndex
=
get_local_id
(
0
)
;
local_posq[localAtomIndex]
=
posq1
;
LOAD_LOCAL_PARAMETERS_FROM_1
LOAD_LOCAL_PARAMETERS_FROM_1
#
ifdef
USE_EXCLUSIONS
#
ifdef
USE_EXCLUSIONS
unsigned
int
excl
=
exclusions[exclusionIndex[localGroupIndex]+tgx]
;
unsigned
int
excl
=
exclusions[exclusionIndex[localGroupIndex]+tgx]
;
...
@@ -129,6 +130,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
...
@@ -129,6 +130,7 @@ void computeN2Value(__global float4* posq, __local float4* local_posq, __global
if
(
lasty
!=
y
)
{
if
(
lasty
!=
y
)
{
unsigned
int
j
=
y*TILE_SIZE
+
tgx
;
unsigned
int
j
=
y*TILE_SIZE
+
tgx
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
local_posq[get_local_id
(
0
)
]
=
posq[j]
;
const
unsigned
int
localAtomIndex
=
get_local_id
(
0
)
;
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
}
}
local_value[get_local_id
(
0
)
]
=
0.0f
;
local_value[get_local_id
(
0
)
]
=
0.0f
;
...
...
platforms/opencl/src/kernels/gbsaObc_cpu.cl
View file @
562cfb39
...
@@ -14,8 +14,7 @@ typedef struct {
...
@@ -14,8 +14,7 @@ typedef struct {
*
Compute
the
Born
sum.
*
Compute
the
Born
sum.
*/
*/
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
void
computeBornSum
(
__global
float*
global_bornSum,
__global
float4*
posq,
__global
float2*
global_params,
__local
AtomData*
localData,
__local
float*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
)
{
__global
ushort2*
tiles,
__global
unsigned
int*
interactionCount,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
unsigned
int
maxTiles,
__global
unsigned
int*
interactionFlags
)
{
#
else
#
else
...
@@ -171,8 +170,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -171,8 +170,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
//
Write
results
for
atom1.
//
Write
results
for
atom1.
unsigned
int
offset
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
global_bornSum[offset]
+=
localData[tgx].bornSum
;
global_bornSum[offset]
+=
bornSum
;
}
}
}
//
Write
results
//
Write
results
...
@@ -181,6 +179,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -181,6 +179,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
unsigned
int
offset
=
y*TILE_SIZE+tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset
=
y*TILE_SIZE+tgx
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
global_bornSum[offset]
+=
localData[tgx].bornSum
;
global_bornSum[offset]
+=
localData[tgx].bornSum
;
}
}
}
lasty
=
y
;
lasty
=
y
;
pos++
;
pos++
;
}
}
...
@@ -190,8 +189,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
...
@@ -190,8 +189,7 @@ void computeBornSum(__global float* global_bornSum, __global float4* posq, __glo
*
First
part
of
computing
the
GBSA
interaction.
*
First
part
of
computing
the
GBSA
interaction.
*/
*/
__kernel
__attribute__
((
reqd_work_group_size
(
WORK_GROUP_SIZE,
1
,
1
)))
__kernel
void
computeGBSAForce1
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
void
computeGBSAForce1
(
__global
float4*
forceBuffers,
__global
float*
energyBuffer,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float4*
posq,
__global
float*
global_bornRadii,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
__global
float*
global_bornForce,
__local
AtomData*
localData,
__local
float4*
tempBuffer,
#
ifdef
USE_CUTOFF
#
ifdef
USE_CUTOFF
...
@@ -344,7 +342,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -344,7 +342,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
unsigned
int
offset
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
=
forceBuffers[offset].xyz+force.xyz
;
forceBuffers[offset].xyz
=
forceBuffers[offset].xyz+force.xyz
;
}
global_bornForce[offset]
+=
force.w
;
}
}
//
Write
results
//
Write
results
...
@@ -358,6 +356,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
...
@@ -358,6 +356,7 @@ void computeGBSAForce1(__global float4* forceBuffers, __global float* energyBuff
forceBuffers[offset]
=
f
;
forceBuffers[offset]
=
f
;
global_bornForce[offset]
+=
localData[tgx].fw
;
global_bornForce[offset]
+=
localData[tgx].fw
;
}
}
}
lasty
=
y
;
lasty
=
y
;
pos++
;
pos++
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment