Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ae686364
"platforms/vscode:/vscode.git/clone" did not exist on "f38f956dbeb64b8de703b22180be5152f840409e"
Unverified
Commit
ae686364
authored
Aug 17, 2022
by
Peter Eastman
Committed by
GitHub
Aug 17, 2022
Browse files
Improved support for devices without 64 bit atomics (#3737)
parent
48664a1f
Changes
29
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
117 additions
and
1037 deletions
+117
-1037
platforms/common/include/openmm/common/CommonKernels.h
platforms/common/include/openmm/common/CommonKernels.h
+0
-2
platforms/common/src/CommonKernels.cpp
platforms/common/src/CommonKernels.cpp
+49
-198
platforms/common/src/IntegrationUtilities.cpp
platforms/common/src/IntegrationUtilities.cpp
+0
-2
platforms/common/src/kernels/customGBEnergyN2.cc
platforms/common/src/kernels/customGBEnergyN2.cc
+0
-35
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
+0
-45
platforms/common/src/kernels/customGBEnergyPerParticle.cc
platforms/common/src/kernels/customGBEnergyPerParticle.cc
+0
-7
platforms/common/src/kernels/customGBGradientChainRule.cc
platforms/common/src/kernels/customGBGradientChainRule.cc
+0
-12
platforms/common/src/kernels/customGBValueN2.cc
platforms/common/src/kernels/customGBValueN2.cc
+0
-26
platforms/common/src/kernels/customGBValueN2_cpu.cc
platforms/common/src/kernels/customGBValueN2_cpu.cc
+0
-34
platforms/common/src/kernels/customGBValuePerParticle.cc
platforms/common/src/kernels/customGBValuePerParticle.cc
+0
-11
platforms/common/src/kernels/customHbondForce.cc
platforms/common/src/kernels/customHbondForce.cc
+0
-52
platforms/common/src/kernels/customNonbondedGroups.cc
platforms/common/src/kernels/customNonbondedGroups.cc
+0
-38
platforms/common/src/kernels/gbsaObc.cc
platforms/common/src/kernels/gbsaObc.cc
+0
-46
platforms/common/src/kernels/gbsaObc2.cc
platforms/common/src/kernels/gbsaObc2.cc
+0
-5
platforms/common/src/kernels/gbsaObcReductions.cc
platforms/common/src/kernels/gbsaObcReductions.cc
+1
-26
platforms/common/src/kernels/gbsaObc_cpu.cc
platforms/common/src/kernels/gbsaObc_cpu.cc
+0
-82
platforms/common/src/kernels/pme.cc
platforms/common/src/kernels/pme.cc
+0
-232
platforms/opencl/include/OpenCLBondedUtilities.h
platforms/opencl/include/OpenCLBondedUtilities.h
+3
-11
platforms/opencl/include/OpenCLNonbondedUtilities.h
platforms/opencl/include/OpenCLNonbondedUtilities.h
+2
-2
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+62
-171
No files found.
platforms/common/include/openmm/common/CommonKernels.h
View file @
ae686364
...
@@ -786,8 +786,6 @@ private:
...
@@ -786,8 +786,6 @@ private:
ComputeArray
globals
;
ComputeArray
globals
;
ComputeArray
donors
;
ComputeArray
donors
;
ComputeArray
acceptors
;
ComputeArray
acceptors
;
ComputeArray
donorBufferIndices
;
ComputeArray
acceptorBufferIndices
;
ComputeArray
donorExclusions
;
ComputeArray
donorExclusions
;
ComputeArray
acceptorExclusions
;
ComputeArray
acceptorExclusions
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
...
...
platforms/common/src/CommonKernels.cpp
View file @
ae686364
This diff is collapsed.
Click to expand it.
platforms/common/src/IntegrationUtilities.cpp
View file @
ae686364
...
@@ -528,8 +528,6 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
...
@@ -528,8 +528,6 @@ IntegrationUtilities::IntegrationUtilities(ComputeContext& context, const System
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
if
(
atomCounts
[
i
]
>
1
)
if
(
atomCounts
[
i
]
>
1
)
hasOverlappingVsites
=
true
;
hasOverlappingVsites
=
true
;
if
(
hasOverlappingVsites
&&
!
context
.
getSupports64BitGlobalAtomics
())
throw
OpenMMException
(
"This device does not support 64 bit atomics. Cannot have multiple virtual sites that depend on the same atom."
);
// Create the kernels used by this class.
// Create the kernels used by this class.
...
...
platforms/common/src/kernels/customGBEnergyN2.cc
View file @
ae686364
#ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[LOCAL_ID]));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[LOCAL_ID]));
#else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[LOCAL_ID];
#endif
/**
/**
* Compute a force based on pair interactions.
* Compute a force based on pair interactions.
*/
*/
KERNEL
void
computeN2Energy
(
KERNEL
void
computeN2Energy
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
GLOBAL
const
int2
*
exclusionTiles
,
int
needEnergy
,
GLOBAL
const
int2
*
exclusionTiles
,
int
needEnergy
,
...
@@ -160,7 +151,6 @@ KERNEL void computeN2Energy(
...
@@ -160,7 +151,6 @@ KERNEL void computeN2Energy(
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
...
@@ -173,18 +163,6 @@ KERNEL void computeN2Energy(
...
@@ -173,18 +163,6 @@ KERNEL void computeN2Energy(
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
z
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
z
));
STORE_DERIVATIVES_2
STORE_DERIVATIVES_2
}
}
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
offset1
;
forceBuffers
[
offset1
].
xyz
+=
force
.
xyz
;
STORE_DERIVATIVES_1
if
(
x
!=
y
)
{
offset
=
offset2
;
forceBuffers
[
offset2
]
+=
(
real4
)
(
local_force
[
LOCAL_ID
].
x
,
local_force
[
LOCAL_ID
].
y
,
local_force
[
LOCAL_ID
].
z
,
0.0
f
);
STORE_DERIVATIVES_2
}
#endif
}
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
@@ -363,7 +341,6 @@ KERNEL void computeN2Energy(
...
@@ -363,7 +341,6 @@ KERNEL void computeN2Energy(
#else
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
...
@@ -376,18 +353,6 @@ KERNEL void computeN2Energy(
...
@@ -376,18 +353,6 @@ KERNEL void computeN2Energy(
offset
=
atom2
;
offset
=
atom2
;
STORE_DERIVATIVES_2
STORE_DERIVATIVES_2
}
}
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset1
].
xyz
+=
force
.
xyz
;
unsigned
int
offset
=
offset1
;
STORE_DERIVATIVES_1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
forceBuffers
[
offset2
]
+=
(
real4
)
(
local_force
[
LOCAL_ID
].
x
,
local_force
[
LOCAL_ID
].
y
,
local_force
[
LOCAL_ID
].
z
,
0.0
f
);
offset
=
offset2
;
STORE_DERIVATIVES_2
}
#endif
}
}
pos
++
;
pos
++
;
}
}
...
...
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
View file @
ae686364
#ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[tgx]));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(local_deriv##INDEX[tgx]));
#else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[tgx];
#endif
/**
/**
* Compute a force based on pair interactions.
* Compute a force based on pair interactions.
*/
*/
KERNEL
void
computeN2Energy
(
KERNEL
void
computeN2Energy
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
GLOBAL
const
int2
*
exclusionTiles
,
int
needEnergy
,
GLOBAL
const
int2
*
exclusionTiles
,
int
needEnergy
,
...
@@ -100,17 +91,11 @@ KERNEL void computeN2Energy(
...
@@ -100,17 +91,11 @@ KERNEL void computeN2Energy(
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
force
.
xyz
;
STORE_DERIVATIVES_1
#endif
}
}
}
}
else
{
else
{
...
@@ -174,33 +159,21 @@ KERNEL void computeN2Energy(
...
@@ -174,33 +159,21 @@ KERNEL void computeN2Energy(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
force
.
xyz
;
STORE_DERIVATIVES_1
#endif
}
}
// Write results.
// Write results.
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
z
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
z
));
STORE_DERIVATIVES_2
STORE_DERIVATIVES_2
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
local_force
[
tgx
].
xyz
;
STORE_DERIVATIVES_2
#endif
}
}
}
}
}
}
...
@@ -316,17 +289,11 @@ KERNEL void computeN2Energy(
...
@@ -316,17 +289,11 @@ KERNEL void computeN2Energy(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
force
.
xyz
;
STORE_DERIVATIVES_1
#endif
}
}
}
}
else
else
...
@@ -375,17 +342,11 @@ KERNEL void computeN2Energy(
...
@@ -375,17 +342,11 @@ KERNEL void computeN2Energy(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
force
.
xyz
;
STORE_DERIVATIVES_1
#endif
}
}
}
}
...
@@ -398,17 +359,11 @@ KERNEL void computeN2Energy(
...
@@ -398,17 +359,11 @@ KERNEL void computeN2Energy(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
z
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
z
));
unsigned
int
offset
=
atom2
;
unsigned
int
offset
=
atom2
;
STORE_DERIVATIVES_2
STORE_DERIVATIVES_2
#else
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
].
xyz
+=
local_force
[
tgx
].
xyz
;
STORE_DERIVATIVES_2
#endif
}
}
}
}
}
}
...
...
platforms/common/src/kernels/customGBEnergyPerParticle.cc
View file @
ae686364
...
@@ -10,20 +10,13 @@
...
@@ -10,20 +10,13 @@
*/
*/
KERNEL
void
computePerParticleEnergy
(
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
KERNEL
void
computePerParticleEnergy
(
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
RESTRICT
forceBuffers
GLOBAL
mm_long
*
RESTRICT
forceBuffers
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
int
bufferSize
,
int
numBuffers
#endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
mixed
energy
=
0
;
mixed
energy
=
0
;
INIT_PARAM_DERIVS
INIT_PARAM_DERIVS
for
(
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
for
(
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
// Reduce the derivatives
// Reduce the derivatives
#ifndef SUPPORTS_64_BIT_ATOMICS
int
totalSize
=
bufferSize
*
numBuffers
;
#endif
REDUCE_DERIVATIVES
REDUCE_DERIVATIVES
// Now calculate the per-particle energy terms.
// Now calculate the per-particle energy terms.
...
...
platforms/common/src/kernels/customGBGradientChainRule.cc
View file @
ae686364
...
@@ -3,29 +3,17 @@
...
@@ -3,29 +3,17 @@
*/
*/
KERNEL
void
computeGradientChainRuleTerms
(
GLOBAL
const
real4
*
RESTRICT
posq
,
KERNEL
void
computeGradientChainRuleTerms
(
GLOBAL
const
real4
*
RESTRICT
posq
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
RESTRICT
forceBuffers
GLOBAL
mm_long
*
RESTRICT
forceBuffers
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
#endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
INIT_PARAM_DERIVS
INIT_PARAM_DERIVS
const
real
scale
=
RECIP
((
real
)
0x100000000
);
const
real
scale
=
RECIP
((
real
)
0x100000000
);
for
(
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
for
(
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
real4
pos
=
posq
[
index
];
real4
pos
=
posq
[
index
];
#ifdef SUPPORTS_64_BIT_ATOMICS
real3
force
=
make_real3
(
scale
*
forceBuffers
[
index
],
scale
*
forceBuffers
[
index
+
PADDED_NUM_ATOMS
],
scale
*
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]);
real3
force
=
make_real3
(
scale
*
forceBuffers
[
index
],
scale
*
forceBuffers
[
index
+
PADDED_NUM_ATOMS
],
scale
*
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]);
#else
real3
force
=
trimTo3
(
forceBuffers
[
index
]);
#endif
COMPUTE_FORCES
COMPUTE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
forceBuffers
[
index
]
=
realToFixedPoint
(
force
.
x
);
forceBuffers
[
index
]
=
realToFixedPoint
(
force
.
x
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
]
=
realToFixedPoint
(
force
.
y
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
]
=
realToFixedPoint
(
force
.
y
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]
=
realToFixedPoint
(
force
.
z
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]
=
realToFixedPoint
(
force
.
z
);
#else
forceBuffers
[
index
]
=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
#endif
}
}
SAVE_PARAM_DERIVS
SAVE_PARAM_DERIVS
}
}
platforms/common/src/kernels/customGBValueN2.cc
View file @
ae686364
...
@@ -3,11 +3,7 @@
...
@@ -3,11 +3,7 @@
*/
*/
KERNEL
void
computeN2Value
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
KERNEL
void
computeN2Value
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
GLOBAL
const
int2
*
exclusionTiles
,
GLOBAL
const
int2
*
exclusionTiles
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
global_value
,
GLOBAL
mm_ulong
*
RESTRICT
global_value
,
#else
GLOBAL
real
*
RESTRICT
global_value
,
#endif
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
GLOBAL
const
real4
*
RESTRICT
blockCenter
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
GLOBAL
const
real4
*
RESTRICT
blockCenter
,
...
@@ -137,7 +133,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -137,7 +133,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
STORE_PARAM_DERIVS1
STORE_PARAM_DERIVS1
...
@@ -146,16 +141,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -146,16 +141,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
LOCAL_ID
]));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
LOCAL_ID
]));
STORE_PARAM_DERIVS2
STORE_PARAM_DERIVS2
}
}
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
STORE_PARAM_DERIVS1
if
(
x
!=
y
)
{
global_value
[
offset2
]
+=
local_value
[
LOCAL_ID
];
STORE_PARAM_DERIVS2
}
#endif
}
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
@@ -317,7 +302,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -317,7 +302,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#else
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
STORE_PARAM_DERIVS1
STORE_PARAM_DERIVS1
...
@@ -326,16 +310,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -326,16 +310,6 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
LOCAL_ID
]));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
LOCAL_ID
]));
STORE_PARAM_DERIVS2
STORE_PARAM_DERIVS2
}
}
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
STORE_PARAM_DERIVS1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
global_value
[
offset2
]
+=
local_value
[
LOCAL_ID
];
STORE_PARAM_DERIVS2
}
#endif
}
}
pos
++
;
pos
++
;
}
}
...
...
platforms/common/src/kernels/customGBValueN2_cpu.cc
View file @
ae686364
...
@@ -3,11 +3,7 @@
...
@@ -3,11 +3,7 @@
*/
*/
KERNEL
void
computeN2Value
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
KERNEL
void
computeN2Value
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
unsigned
int
*
RESTRICT
exclusions
,
GLOBAL
const
int2
*
exclusionTiles
,
GLOBAL
const
int2
*
exclusionTiles
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
global_value
,
GLOBAL
mm_ulong
*
RESTRICT
global_value
,
#else
GLOBAL
real
*
RESTRICT
global_value
,
#endif
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
GLOBAL
const
real4
*
RESTRICT
blockCenter
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
unsigned
int
maxTiles
,
GLOBAL
const
real4
*
RESTRICT
blockCenter
,
...
@@ -84,13 +80,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -84,13 +80,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
#endif
STORE_PARAM_DERIVS1
STORE_PARAM_DERIVS1
}
}
}
}
...
@@ -146,26 +137,16 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -146,26 +137,16 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
#endif
STORE_PARAM_DERIVS1
STORE_PARAM_DERIVS1
}
}
// Write results.
// Write results.
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
tgx
]));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
tgx
]));
#else
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset2
]
+=
local_value
[
tgx
];
#endif
STORE_PARAM_DERIVS2
STORE_PARAM_DERIVS2
}
}
}
}
...
@@ -273,13 +254,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -273,13 +254,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
#endif
STORE_PARAM_DERIVS1
STORE_PARAM_DERIVS1
}
}
}
}
...
@@ -322,13 +298,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -322,13 +298,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
#endif
STORE_PARAM_DERIVS1
STORE_PARAM_DERIVS1
}
}
}
}
...
@@ -342,13 +313,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -342,13 +313,8 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset2
=
atom2
;
unsigned
int
offset2
=
atom2
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
tgx
]));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
tgx
]));
#else
unsigned
int
offset2
=
atom2
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset2
]
+=
local_value
[
tgx
];
#endif
STORE_PARAM_DERIVS2
STORE_PARAM_DERIVS2
}
}
}
}
...
...
platforms/common/src/kernels/customGBValuePerParticle.cc
View file @
ae686364
...
@@ -3,23 +3,12 @@
...
@@ -3,23 +3,12 @@
*/
*/
KERNEL
void
computePerParticleValues
(
GLOBAL
real4
*
posq
,
KERNEL
void
computePerParticleValues
(
GLOBAL
real4
*
posq
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
valueBuffers
GLOBAL
mm_long
*
valueBuffers
#else
GLOBAL
real
*
valueBuffers
,
int
bufferSize
,
int
numBuffers
#endif
PARAMETER_ARGUMENTS
)
{
PARAMETER_ARGUMENTS
)
{
for
(
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
for
(
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
// Reduce the pairwise value
// Reduce the pairwise value
#ifdef SUPPORTS_64_BIT_ATOMICS
real
sum
=
valueBuffers
[
index
]
/
(
real
)
0x100000000
;
real
sum
=
valueBuffers
[
index
]
/
(
real
)
0x100000000
;
#else
int
totalSize
=
bufferSize
*
numBuffers
;
real
sum
=
valueBuffers
[
index
];
for
(
int
i
=
index
+
bufferSize
;
i
<
totalSize
;
i
+=
bufferSize
)
sum
+=
valueBuffers
[
i
];
#endif
REDUCE_PARAM0_DERIV
REDUCE_PARAM0_DERIV
// Now calculate other values
// Now calculate other values
...
...
platforms/common/src/kernels/customHbondForce.cc
View file @
ae686364
...
@@ -44,11 +44,7 @@ inline DEVICE real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -44,11 +44,7 @@ inline DEVICE real4 computeCross(real4 vec1, real4 vec2) {
* Compute forces on donors.
* Compute forces on donors.
*/
*/
KERNEL
void
computeDonorForces
(
KERNEL
void
computeDonorForces
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
force
,
GLOBAL
mm_ulong
*
RESTRICT
force
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
GLOBAL
const
int4
*
RESTRICT
donorBufferIndices
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
int4
*
RESTRICT
exclusions
,
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
int4
*
RESTRICT
exclusions
,
GLOBAL
const
int4
*
RESTRICT
donorAtoms
,
GLOBAL
const
int4
*
RESTRICT
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
GLOBAL
const
int4
*
RESTRICT
donorAtoms
,
GLOBAL
const
int4
*
RESTRICT
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
...
@@ -114,7 +110,6 @@ KERNEL void computeDonorForces(
...
@@ -114,7 +110,6 @@ KERNEL void computeDonorForces(
// Write results
// Write results
if
(
donorIndex
<
NUM_DONORS
)
{
if
(
donorIndex
<
NUM_DONORS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
atoms
.
x
>
-
1
)
{
if
(
atoms
.
x
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
y
));
...
@@ -133,27 +128,6 @@ KERNEL void computeDonorForces(
...
@@ -133,27 +128,6 @@ KERNEL void computeDonorForces(
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
z
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
z
));
MEM_FENCE
;
MEM_FENCE
;
}
}
#else
int4
bufferIndices
=
donorBufferIndices
[
donorIndex
];
if
(
atoms
.
x
>
-
1
)
{
unsigned
int
offset
=
atoms
.
x
+
bufferIndices
.
x
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f1
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
if
(
atoms
.
y
>
-
1
)
{
unsigned
int
offset
=
atoms
.
y
+
bufferIndices
.
y
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f2
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
if
(
atoms
.
z
>
-
1
)
{
unsigned
int
offset
=
atoms
.
z
+
bufferIndices
.
z
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f3
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
#endif
}
}
}
}
energyBuffer
[
GLOBAL_ID
]
+=
energy
;
energyBuffer
[
GLOBAL_ID
]
+=
energy
;
...
@@ -162,11 +136,7 @@ KERNEL void computeDonorForces(
...
@@ -162,11 +136,7 @@ KERNEL void computeDonorForces(
* Compute forces on acceptors.
* Compute forces on acceptors.
*/
*/
KERNEL
void
computeAcceptorForces
(
KERNEL
void
computeAcceptorForces
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
force
,
GLOBAL
mm_ulong
*
RESTRICT
force
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
GLOBAL
const
int4
*
RESTRICT
acceptorBufferIndices
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
int4
*
RESTRICT
exclusions
,
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
int4
*
RESTRICT
exclusions
,
GLOBAL
const
int4
*
RESTRICT
donorAtoms
,
GLOBAL
const
int4
*
RESTRICT
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
GLOBAL
const
int4
*
RESTRICT
donorAtoms
,
GLOBAL
const
int4
*
RESTRICT
acceptorAtoms
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
...
@@ -231,7 +201,6 @@ KERNEL void computeAcceptorForces(
...
@@ -231,7 +201,6 @@ KERNEL void computeAcceptorForces(
// Write results
// Write results
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
atoms
.
x
>
-
1
)
{
if
(
atoms
.
x
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
y
));
...
@@ -250,27 +219,6 @@ KERNEL void computeAcceptorForces(
...
@@ -250,27 +219,6 @@ KERNEL void computeAcceptorForces(
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
z
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
z
));
MEM_FENCE
;
MEM_FENCE
;
}
}
#else
int4
bufferIndices
=
acceptorBufferIndices
[
acceptorIndex
];
if
(
atoms
.
x
>
-
1
)
{
unsigned
int
offset
=
atoms
.
x
+
bufferIndices
.
x
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f1
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
if
(
atoms
.
y
>
-
1
)
{
unsigned
int
offset
=
atoms
.
y
+
bufferIndices
.
y
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f2
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
if
(
atoms
.
z
>
-
1
)
{
unsigned
int
offset
=
atoms
.
z
+
bufferIndices
.
z
*
PADDED_NUM_ATOMS
;
real4
force
=
forceBuffers
[
offset
];
force
.
xyz
+=
f3
.
xyz
;
forceBuffers
[
offset
]
=
force
;
}
#endif
}
}
}
}
}
}
platforms/common/src/kernels/customNonbondedGroups.cc
View file @
ae686364
...
@@ -35,38 +35,8 @@ DEVICE int reduceMax(int val, LOCAL_ARG int* temp) {
...
@@ -35,38 +35,8 @@ DEVICE int reduceMax(int val, LOCAL_ARG int* temp) {
#endif
#endif
}
}
#ifndef SUPPORTS_64_BIT_ATOMICS
/**
* This function is used on devices that don't support 64 bit atomics. Multiple threads within
* a single tile might have computed forces on the same atom. This loops over them and makes sure
* that only one thread updates the force on any given atom.
*/
void
writeForces
(
GLOBAL
real4
*
forceBuffers
,
LOCAL
AtomData
*
localData
,
int
atomIndex
)
{
localData
[
LOCAL_ID
].
x
=
atomIndex
;
SYNC_WARPS
;
real4
forceSum
=
make_real4
(
0
);
int
start
=
(
LOCAL_ID
/
TILE_SIZE
)
*
TILE_SIZE
;
int
end
=
start
+
32
;
bool
isFirst
=
true
;
for
(
int
i
=
start
;
i
<
end
;
i
++
)
if
(
localData
[
i
].
x
==
atomIndex
)
{
forceSum
+=
(
real4
)
(
localData
[
i
].
fx
,
localData
[
i
].
fy
,
localData
[
i
].
fz
,
0
);
isFirst
&=
(
i
>=
LOCAL_ID
);
}
const
unsigned
int
warp
=
GLOBAL_ID
/
TILE_SIZE
;
unsigned
int
offset
=
atomIndex
+
warp
*
PADDED_NUM_ATOMS
;
if
(
isFirst
)
forceBuffers
[
offset
]
+=
forceSum
;
SYNC_WARPS
;
}
#endif
KERNEL
void
computeInteractionGroups
(
KERNEL
void
computeInteractionGroups
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
int4
*
RESTRICT
groupData
,
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
int4
*
RESTRICT
groupData
,
GLOBAL
const
int
*
RESTRICT
numGroupTiles
,
int
useNeighborList
,
GLOBAL
const
int
*
RESTRICT
numGroupTiles
,
int
useNeighborList
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
...
@@ -139,7 +109,6 @@ KERNEL void computeInteractionGroups(
...
@@ -139,7 +109,6 @@ KERNEL void computeInteractionGroups(
}
}
SYNC_WARPS
;
SYNC_WARPS
;
}
}
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
exclusions
!=
0
)
{
if
(
exclusions
!=
0
)
{
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
...
@@ -149,13 +118,6 @@ KERNEL void computeInteractionGroups(
...
@@ -149,13 +118,6 @@ KERNEL void computeInteractionGroups(
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
SYNC_WARPS
;
SYNC_WARPS
;
#else
writeForces
(
forceBuffers
,
localData
,
atom2
);
localData
[
LOCAL_ID
].
fx
=
force
.
x
;
localData
[
LOCAL_ID
].
fy
=
force
.
y
;
localData
[
LOCAL_ID
].
fz
=
force
.
z
;
writeForces
(
forceBuffers
,
localData
,
atom1
);
#endif
}
}
energyBuffer
[
GLOBAL_ID
]
+=
energy
;
energyBuffer
[
GLOBAL_ID
]
+=
energy
;
SAVE_DERIVATIVES
SAVE_DERIVATIVES
...
...
platforms/common/src/kernels/gbsaObc.cc
View file @
ae686364
...
@@ -17,11 +17,7 @@ typedef struct ALIGN {
...
@@ -17,11 +17,7 @@ typedef struct ALIGN {
* Compute the Born sum.
* Compute the Born sum.
*/
*/
KERNEL
void
computeBornSum
(
KERNEL
void
computeBornSum
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
global_bornSum
,
GLOBAL
mm_ulong
*
RESTRICT
global_bornSum
,
#else
GLOBAL
real
*
RESTRICT
global_bornSum
,
#endif
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
const
float2
*
RESTRICT
global_params
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
const
float2
*
RESTRICT
global_params
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
...
@@ -152,20 +148,12 @@ KERNEL void computeBornSum(
...
@@ -152,20 +148,12 @@ KERNEL void computeBornSum(
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
bornSum
));
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
bornSum
));
if
(
x
!=
y
)
{
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
}
}
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset1
]
+=
bornSum
;
if
(
x
!=
y
)
global_bornSum
[
offset2
]
+=
localData
[
LOCAL_ID
].
bornSum
;
#endif
}
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
@@ -357,17 +345,9 @@ KERNEL void computeBornSum(
...
@@ -357,17 +345,9 @@ KERNEL void computeBornSum(
#else
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
bornSum
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
bornSum
));
if
(
atom2
<
PADDED_NUM_ATOMS
)
if
(
atom2
<
PADDED_NUM_ATOMS
)
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset1
]
+=
bornSum
;
if
(
atom2
<
PADDED_NUM_ATOMS
)
global_bornSum
[
offset2
]
+=
localData
[
LOCAL_ID
].
bornSum
;
#endif
}
}
pos
++
;
pos
++
;
}
}
...
@@ -385,11 +365,7 @@ typedef struct ALIGN {
...
@@ -385,11 +365,7 @@ typedef struct ALIGN {
*/
*/
KERNEL
void
computeGBSAForce1
(
KERNEL
void
computeGBSAForce1
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
GLOBAL
mm_ulong
*
RESTRICT
global_bornForce
,
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
,
GLOBAL
mm_ulong
*
RESTRICT
global_bornForce
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
GLOBAL
real
*
RESTRICT
global_bornForce
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
const
real
*
RESTRICT
global_bornRadii
,
int
needEnergy
,
GLOBAL
const
real
*
RESTRICT
global_bornRadii
,
int
needEnergy
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -538,7 +514,6 @@ KERNEL void computeGBSAForce1(
...
@@ -538,7 +514,6 @@ KERNEL void computeGBSAForce1(
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
...
@@ -551,16 +526,6 @@ KERNEL void computeGBSAForce1(
...
@@ -551,16 +526,6 @@ KERNEL void computeGBSAForce1(
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fw
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fw
));
}
}
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset1
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset1
]
+=
force
.
w
;
if
(
x
!=
y
)
{
forceBuffers
[
offset2
]
+=
(
real4
)
(
localData
[
LOCAL_ID
].
fx
,
localData
[
LOCAL_ID
].
fy
,
localData
[
LOCAL_ID
].
fz
,
0.0
f
);
global_bornForce
[
offset2
]
+=
localData
[
LOCAL_ID
].
fw
;
}
#endif
}
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
@@ -763,7 +728,6 @@ KERNEL void computeGBSAForce1(
...
@@ -763,7 +728,6 @@ KERNEL void computeGBSAForce1(
#else
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
...
@@ -774,16 +738,6 @@ KERNEL void computeGBSAForce1(
...
@@ -774,16 +738,6 @@ KERNEL void computeGBSAForce1(
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fw
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fw
));
}
}
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset1
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset1
]
+=
force
.
w
;
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
forceBuffers
[
offset2
]
+=
(
real4
)
(
localData
[
LOCAL_ID
].
fx
,
localData
[
LOCAL_ID
].
fy
,
localData
[
LOCAL_ID
].
fz
,
0.0
f
);
global_bornForce
[
offset2
]
+=
localData
[
LOCAL_ID
].
fw
;
}
#endif
}
}
pos
++
;
pos
++
;
}
}
...
...
platforms/common/src/kernels/gbsaObc2.cc
View file @
ae686364
...
@@ -16,13 +16,8 @@
...
@@ -16,13 +16,8 @@
real
t2I
=
(
l_ij2I
-
u_ij2I
);
real
t2I
=
(
l_ij2I
-
u_ij2I
);
real
term1
=
(
0.5
f
*
(
0.25
f
+
OBC_PARAMS2
.
y
*
OBC_PARAMS2
.
y
*
invRSquaredOver4
)
*
t2J
+
t1J
*
invRSquaredOver4
)
*
invR
;
real
term1
=
(
0.5
f
*
(
0.25
f
+
OBC_PARAMS2
.
y
*
OBC_PARAMS2
.
y
*
invRSquaredOver4
)
*
t2J
+
t1J
*
invRSquaredOver4
)
*
invR
;
real
term2
=
(
0.5
f
*
(
0.25
f
+
OBC_PARAMS1
.
y
*
OBC_PARAMS1
.
y
*
invRSquaredOver4
)
*
t2I
+
t1I
*
invRSquaredOver4
)
*
invR
;
real
term2
=
(
0.5
f
*
(
0.25
f
+
OBC_PARAMS1
.
y
*
OBC_PARAMS1
.
y
*
invRSquaredOver4
)
*
t2I
+
t1I
*
invRSquaredOver4
)
*
invR
;
#ifdef SUPPORTS_64_BIT_ATOMICS
real
tempdEdR
=
(
OBC_PARAMS1
.
x
<
rScaledRadiusJ
?
BORN_FORCE1
*
term1
/
0x100000000
:
0
);
real
tempdEdR
=
(
OBC_PARAMS1
.
x
<
rScaledRadiusJ
?
BORN_FORCE1
*
term1
/
0x100000000
:
0
);
tempdEdR
+=
(
OBC_PARAMS2
.
x
<
rScaledRadiusI
?
BORN_FORCE2
*
term2
/
0x100000000
:
0
);
tempdEdR
+=
(
OBC_PARAMS2
.
x
<
rScaledRadiusI
?
BORN_FORCE2
*
term2
/
0x100000000
:
0
);
#else
real
tempdEdR
=
(
OBC_PARAMS1
.
x
<
rScaledRadiusJ
?
BORN_FORCE1
*
term1
:
(
real
)
0
);
tempdEdR
+=
(
OBC_PARAMS2
.
x
<
rScaledRadiusI
?
BORN_FORCE2
*
term2
:
(
real
)
0
);
#endif
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
includeInteraction
=
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
&&
r2
<
CUTOFF_SQUARED
);
unsigned
int
includeInteraction
=
(
atom1
<
NUM_ATOMS
&&
atom2
<
NUM_ATOMS
&&
atom1
!=
atom2
&&
r2
<
CUTOFF_SQUARED
);
#else
#else
...
...
platforms/common/src/kernels/gbsaObcReductions.cc
View file @
ae686364
...
@@ -6,23 +6,12 @@
...
@@ -6,23 +6,12 @@
*/
*/
KERNEL
void
reduceBornSum
(
float
alpha
,
float
beta
,
float
gamma
,
KERNEL
void
reduceBornSum
(
float
alpha
,
float
beta
,
float
gamma
,
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
const
mm_long
*
RESTRICT
bornSum
,
GLOBAL
const
mm_long
*
RESTRICT
bornSum
,
#else
GLOBAL
const
real
*
RESTRICT
bornSum
,
int
bufferSize
,
int
numBuffers
,
#endif
GLOBAL
const
float2
*
RESTRICT
params
,
GLOBAL
real
*
RESTRICT
bornRadii
,
GLOBAL
real
*
RESTRICT
obcChain
)
{
GLOBAL
const
float2
*
RESTRICT
params
,
GLOBAL
real
*
RESTRICT
bornRadii
,
GLOBAL
real
*
RESTRICT
obcChain
)
{
for
(
unsigned
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
for
(
unsigned
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
// Get summed Born data
// Get summed Born data
#ifdef SUPPORTS_64_BIT_ATOMICS
real
sum
=
RECIP
((
real
)
0x100000000
)
*
bornSum
[
index
];
real
sum
=
RECIP
((
real
)
0x100000000
)
*
bornSum
[
index
];
#else
real
sum
=
bornSum
[
index
];
int
totalSize
=
bufferSize
*
numBuffers
;
for
(
int
i
=
index
+
bufferSize
;
i
<
totalSize
;
i
+=
bufferSize
)
sum
+=
bornSum
[
i
];
#endif
// Now calculate Born radius and OBC term.
// Now calculate Born radius and OBC term.
...
@@ -45,24 +34,14 @@ KERNEL void reduceBornSum(float alpha, float beta, float gamma,
...
@@ -45,24 +34,14 @@ KERNEL void reduceBornSum(float alpha, float beta, float gamma,
*/
*/
KERNEL
void
reduceBornForce
(
KERNEL
void
reduceBornForce
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
RESTRICT
bornForce
,
GLOBAL
mm_long
*
RESTRICT
bornForce
,
#else
GLOBAL
real
*
bornForce
,
int
bufferSize
,
int
numBuffers
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
float2
*
RESTRICT
params
,
GLOBAL
const
real
*
RESTRICT
bornRadii
,
GLOBAL
const
real
*
RESTRICT
obcChain
)
{
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
float2
*
RESTRICT
params
,
GLOBAL
const
real
*
RESTRICT
bornRadii
,
GLOBAL
const
real
*
RESTRICT
obcChain
)
{
mixed
energy
=
0
;
mixed
energy
=
0
;
for
(
unsigned
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
for
(
unsigned
int
index
=
GLOBAL_ID
;
index
<
NUM_ATOMS
;
index
+=
GLOBAL_SIZE
)
{
// Get summed Born force
// Get summed Born force
#ifdef SUPPORTS_64_BIT_ATOMICS
real
force
=
RECIP
((
real
)
0x100000000
)
*
bornForce
[
index
];
real
force
=
RECIP
((
real
)
0x100000000
)
*
bornForce
[
index
];
#else
real
force
=
bornForce
[
index
];
int
totalSize
=
bufferSize
*
numBuffers
;
for
(
int
i
=
index
+
bufferSize
;
i
<
totalSize
;
i
+=
bufferSize
)
force
+=
bornForce
[
i
];
#endif
// Now calculate the actual force
// Now calculate the actual force
float
offsetRadius
=
params
[
index
].
x
;
float
offsetRadius
=
params
[
index
].
x
;
...
@@ -73,11 +52,7 @@ KERNEL void reduceBornForce(
...
@@ -73,11 +52,7 @@ KERNEL void reduceBornForce(
force
+=
saTerm
/
bornRadius
;
force
+=
saTerm
/
bornRadius
;
energy
+=
saTerm
;
energy
+=
saTerm
;
force
*=
bornRadius
*
bornRadius
*
obcChain
[
index
];
force
*=
bornRadius
*
bornRadius
*
obcChain
[
index
];
#ifdef SUPPORTS_64_BIT_ATOMICS
bornForce
[
index
]
=
realToFixedPoint
(
force
);
bornForce
[
index
]
=
realToFixedPoint
(
force
);
#else
bornForce
[
index
]
=
force
;
#endif
}
}
energyBuffer
[
GLOBAL_ID
]
+=
energy
/-
6
;
energyBuffer
[
GLOBAL_ID
]
+=
energy
/-
6
;
}
}
platforms/common/src/kernels/gbsaObc_cpu.cc
View file @
ae686364
...
@@ -9,11 +9,7 @@ typedef struct {
...
@@ -9,11 +9,7 @@ typedef struct {
* Compute the Born sum.
* Compute the Born sum.
*/
*/
KERNEL
void
computeBornSum
(
KERNEL
void
computeBornSum
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
RESTRICT
global_bornSum
,
GLOBAL
mm_long
*
RESTRICT
global_bornSum
,
#else
GLOBAL
real
*
RESTRICT
global_bornSum
,
#endif
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
const
float2
*
RESTRICT
global_params
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
const
float2
*
RESTRICT
global_params
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
GLOBAL
const
int
*
RESTRICT
tiles
,
GLOBAL
const
unsigned
int
*
RESTRICT
interactionCount
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
...
@@ -87,12 +83,7 @@ KERNEL void computeBornSum(
...
@@ -87,12 +83,7 @@ KERNEL void computeBornSum(
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
#endif
}
}
}
}
else
{
else
{
...
@@ -149,24 +140,14 @@ KERNEL void computeBornSum(
...
@@ -149,24 +140,14 @@ KERNEL void computeBornSum(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
#endif
}
}
// Write results.
// Write results.
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
bornSum
));
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
bornSum
));
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
localData
[
tgx
].
bornSum
;
#endif
}
}
}
}
}
}
...
@@ -296,12 +277,7 @@ KERNEL void computeBornSum(
...
@@ -296,12 +277,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
#endif
}
}
}
}
else
else
...
@@ -359,12 +335,7 @@ KERNEL void computeBornSum(
...
@@ -359,12 +335,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
#endif
}
}
}
}
...
@@ -377,12 +348,7 @@ KERNEL void computeBornSum(
...
@@ -377,12 +348,7 @@ KERNEL void computeBornSum(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
bornSum
));
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
bornSum
));
#else
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
localData
[
tgx
].
bornSum
;
#endif
}
}
}
}
}
}
...
@@ -402,11 +368,7 @@ typedef struct {
...
@@ -402,11 +368,7 @@ typedef struct {
*/
*/
KERNEL
void
computeGBSAForce1
(
KERNEL
void
computeGBSAForce1
(
#ifdef SUPPORTS_64_BIT_ATOMICS
GLOBAL
mm_long
*
RESTRICT
forceBuffers
,
GLOBAL
mm_long
*
RESTRICT
global_bornForce
,
GLOBAL
mm_long
*
RESTRICT
forceBuffers
,
GLOBAL
mm_long
*
RESTRICT
global_bornForce
,
#else
GLOBAL
real4
*
RESTRICT
forceBuffers
,
GLOBAL
real
*
RESTRICT
global_bornForce
,
#endif
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
mixed
*
RESTRICT
energyBuffer
,
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
const
real
*
RESTRICT
charge
,
GLOBAL
const
real
*
RESTRICT
global_bornRadii
,
int
needEnergy
,
GLOBAL
const
real
*
RESTRICT
global_bornRadii
,
int
needEnergy
,
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -490,16 +452,10 @@ KERNEL void computeGBSAForce1(
...
@@ -490,16 +452,10 @@ KERNEL void computeGBSAForce1(
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset
]
+=
force
.
w
;
#endif
}
}
}
}
else
{
else
{
...
@@ -561,36 +517,20 @@ KERNEL void computeGBSAForce1(
...
@@ -561,36 +517,20 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset
]
+=
force
.
w
;
#endif
}
}
// Write results.
// Write results.
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fz
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
fw
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
fw
));
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
real4
f
=
forceBuffers
[
offset
];
f
.
x
+=
localData
[
tgx
].
fx
;
f
.
y
+=
localData
[
tgx
].
fy
;
f
.
z
+=
localData
[
tgx
].
fz
;
forceBuffers
[
offset
]
=
f
;
global_bornForce
[
offset
]
+=
localData
[
tgx
].
fw
;
#endif
}
}
}
}
}
}
...
@@ -722,16 +662,10 @@ KERNEL void computeGBSAForce1(
...
@@ -722,16 +662,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset
]
+=
force
.
w
;
#endif
}
}
}
}
else
else
...
@@ -790,16 +724,10 @@ KERNEL void computeGBSAForce1(
...
@@ -790,16 +724,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
global_bornForce
[
offset
]
+=
force
.
w
;
#endif
}
}
}
}
...
@@ -812,20 +740,10 @@ KERNEL void computeGBSAForce1(
...
@@ -812,20 +740,10 @@ KERNEL void computeGBSAForce1(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fz
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
fw
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
fw
));
#else
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
real4
f
=
forceBuffers
[
offset
];
f
.
x
+=
localData
[
tgx
].
fx
;
f
.
y
+=
localData
[
tgx
].
fy
;
f
.
z
+=
localData
[
tgx
].
fz
;
forceBuffers
[
offset
]
=
f
;
global_bornForce
[
offset
]
+=
localData
[
tgx
].
fw
;
#endif
}
}
}
}
}
}
...
...
platforms/common/src/kernels/pme.cc
View file @
ae686364
KERNEL
void
findAtomGridIndex
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
int2
*
RESTRICT
pmeAtomGridIndex
,
KERNEL
void
findAtomGridIndex
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
int2
*
RESTRICT
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
recipBoxVecX
,
real4
recipBoxVecY
,
real4
recipBoxVecZ
real4
recipBoxVecX
,
real4
recipBoxVecY
,
real4
recipBoxVecZ
#ifndef SUPPORTS_64_BIT_ATOMICS
,
GLOBAL
real4
*
RESTRICT
pmeBsplineTheta
,
LOCAL
real4
*
RESTRICT
bsplinesCache
,
#ifdef CHARGE_FROM_SIGEPS
GLOBAL
const
float2
*
RESTRICT
sigmaEpsilon
#else
GLOBAL
const
real
*
RESTRICT
charges
#endif
#endif
)
{
)
{
// Compute the index of the grid point each atom is associated with.
// Compute the index of the grid point each atom is associated with.
...
@@ -25,42 +17,9 @@ KERNEL void findAtomGridIndex(GLOBAL const real4* RESTRICT posq, GLOBAL int2* RE
...
@@ -25,42 +17,9 @@ KERNEL void findAtomGridIndex(GLOBAL const real4* RESTRICT posq, GLOBAL int2* RE
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
((
int
)
t
.
z
)
%
GRID_SIZE_Z
);
pmeAtomGridIndex
[
atom
]
=
make_int2
(
atom
,
gridIndex
.
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
gridIndex
.
y
*
GRID_SIZE_Z
+
gridIndex
.
z
);
pmeAtomGridIndex
[
atom
]
=
make_int2
(
atom
,
gridIndex
.
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
gridIndex
.
y
*
GRID_SIZE_Z
+
gridIndex
.
z
);
#ifndef SUPPORTS_64_BIT_ATOMICS
// Compute B-splines here for use in the charge spreading kernel.
const
real4
scale
=
1
/
(
real
)
(
PME_ORDER
-
1
);
LOCAL
real4
*
data
=
&
bsplinesCache
[
LOCAL_ID
*
PME_ORDER
];
real4
dr
=
(
real4
)
(
t
.
x
-
(
int
)
t
.
x
,
t
.
y
-
(
int
)
t
.
y
,
t
.
z
-
(
int
)
t
.
z
,
0.0
f
);
data
[
PME_ORDER
-
1
]
=
0.0
f
;
data
[
1
]
=
dr
;
data
[
0
]
=
1.0
f
-
dr
;
for
(
int
j
=
3
;
j
<
PME_ORDER
;
j
++
)
{
real
div
=
RECIP
(
j
-
1.0
f
);
data
[
j
-
1
]
=
div
*
dr
*
data
[
j
-
2
];
for
(
int
k
=
1
;
k
<
(
j
-
1
);
k
++
)
data
[
j
-
k
-
1
]
=
div
*
((
dr
+
make_real4
(
k
))
*
data
[
j
-
k
-
2
]
+
(
-
dr
+
make_real4
(
j
-
k
))
*
data
[
j
-
k
-
1
]);
data
[
0
]
=
div
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
}
data
[
PME_ORDER
-
1
]
=
scale
*
dr
*
data
[
PME_ORDER
-
2
];
for
(
int
j
=
1
;
j
<
(
PME_ORDER
-
1
);
j
++
)
data
[
PME_ORDER
-
j
-
1
]
=
scale
*
((
dr
+
make_real4
(
j
))
*
data
[
PME_ORDER
-
j
-
2
]
+
(
-
dr
+
make_real4
(
PME_ORDER
-
j
))
*
data
[
PME_ORDER
-
j
-
1
]);
data
[
0
]
=
scale
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
for
(
int
j
=
0
;
j
<
PME_ORDER
;
j
++
)
{
#ifdef CHARGE_FROM_SIGEPS
const
float2
sigEps
=
sigmaEpsilon
[
atom
];
const
real
charge
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
#else
const
real
charge
=
CHARGE
;
#endif
data
[
j
].
w
=
charge
;
// Storing the charge here improves cache coherency in the charge spreading kernel
pmeBsplineTheta
[
atom
+
j
*
NUM_ATOMS
]
=
data
[
j
];
}
#endif
}
}
}
}
#ifdef SUPPORTS_64_BIT_ATOMICS
#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
#if defined(USE_HIP) && !defined(AMD_RDNA)
#if defined(USE_HIP) && !defined(AMD_RDNA)
LAUNCH_BOUNDS_EXACT
(
128
,
1
)
LAUNCH_BOUNDS_EXACT
(
128
,
1
)
#endif
#endif
...
@@ -206,197 +165,6 @@ KERNEL void finishSpreadCharge(
...
@@ -206,197 +165,6 @@ KERNEL void finishSpreadCharge(
#endif
#endif
}
}
}
}
#elif defined(DEVICE_IS_CPU)
KERNEL
void
gridSpreadCharge
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
real
*
RESTRICT
pmeGrid
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real4
recipBoxVecX
,
real4
recipBoxVecY
,
real4
recipBoxVecZ
,
#ifdef CHARGE_FROM_SIGEPS
GLOBAL
const
float2
*
RESTRICT
sigmaEpsilon
#else
GLOBAL
const
real
*
RESTRICT
charges
#endif
)
{
const
int
firstx
=
GLOBAL_ID
*
GRID_SIZE_X
/
GLOBAL_SIZE
;
const
int
lastx
=
(
GLOBAL_ID
+
1
)
*
GRID_SIZE_X
/
GLOBAL_SIZE
;
if
(
firstx
==
lastx
)
return
;
const
real4
scale
=
1
/
(
real
)
(
PME_ORDER
-
1
);
real4
data
[
PME_ORDER
];
// Process the atoms in spatially sorted order. This improves efficiency when writing
// the grid values.
for
(
int
i
=
0
;
i
<
NUM_ATOMS
;
i
++
)
{
int
atom
=
i
;
real4
pos
=
posq
[
atom
];
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
(
real3
)
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
t
.
x
=
(
t
.
x
-
floor
(
t
.
x
))
*
GRID_SIZE_X
;
t
.
y
=
(
t
.
y
-
floor
(
t
.
y
))
*
GRID_SIZE_Y
;
t
.
z
=
(
t
.
z
-
floor
(
t
.
z
))
*
GRID_SIZE_Z
;
int4
gridIndex
=
(
int4
)
(((
int
)
t
.
x
)
%
GRID_SIZE_X
,
((
int
)
t
.
y
)
%
GRID_SIZE_Y
,
((
int
)
t
.
z
)
%
GRID_SIZE_Z
,
0
);
// Spread the charge from this atom onto each grid point.
#ifdef CHARGE_FROM_SIGEPS
const
float2
sigEps
=
sigmaEpsilon
[
atom
];
const
real
charge
=
8
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
x
*
sigEps
.
y
;
#else
const
real
charge
=
(
CHARGE
)
*
EPSILON_FACTOR
;
#endif
if
(
charge
==
0
)
continue
;
bool
hasComputedThetas
=
false
;
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
ix
++
)
{
int
xindex
=
gridIndex
.
x
+
ix
;
xindex
-=
(
xindex
>=
GRID_SIZE_X
?
GRID_SIZE_X
:
0
);
if
(
xindex
<
firstx
||
xindex
>=
lastx
)
continue
;
if
(
!
hasComputedThetas
)
{
hasComputedThetas
=
true
;
// Since we need the full set of thetas, it's faster to compute them here than load them
// from global memory.
real4
dr
=
(
real4
)
(
t
.
x
-
(
int
)
t
.
x
,
t
.
y
-
(
int
)
t
.
y
,
t
.
z
-
(
int
)
t
.
z
,
0.0
f
);
data
[
PME_ORDER
-
1
]
=
0.0
f
;
data
[
1
]
=
dr
;
data
[
0
]
=
1.0
f
-
dr
;
for
(
int
j
=
3
;
j
<
PME_ORDER
;
j
++
)
{
real
div
=
RECIP
(
j
-
1.0
f
);
data
[
j
-
1
]
=
div
*
dr
*
data
[
j
-
2
];
for
(
int
k
=
1
;
k
<
(
j
-
1
);
k
++
)
data
[
j
-
k
-
1
]
=
div
*
((
dr
+
(
real4
)
k
)
*
data
[
j
-
k
-
2
]
+
(
-
dr
+
(
real4
)
(
j
-
k
))
*
data
[
j
-
k
-
1
]);
data
[
0
]
=
div
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
}
data
[
PME_ORDER
-
1
]
=
scale
*
dr
*
data
[
PME_ORDER
-
2
];
for
(
int
j
=
1
;
j
<
(
PME_ORDER
-
1
);
j
++
)
data
[
PME_ORDER
-
j
-
1
]
=
scale
*
((
dr
+
(
real4
)
j
)
*
data
[
PME_ORDER
-
j
-
2
]
+
(
-
dr
+
(
real4
)
(
PME_ORDER
-
j
))
*
data
[
PME_ORDER
-
j
-
1
]);
data
[
0
]
=
scale
*
(
-
dr
+
1.0
f
)
*
data
[
0
];
}
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
iy
++
)
{
int
yindex
=
gridIndex
.
y
+
iy
;
yindex
-=
(
yindex
>=
GRID_SIZE_Y
?
GRID_SIZE_Y
:
0
);
for
(
int
iz
=
0
;
iz
<
PME_ORDER
;
iz
++
)
{
int
zindex
=
gridIndex
.
z
+
iz
;
zindex
-=
(
zindex
>=
GRID_SIZE_Z
?
GRID_SIZE_Z
:
0
);
int
index
=
xindex
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
yindex
*
GRID_SIZE_Z
+
zindex
;
pmeGrid
[
index
]
+=
charge
*
data
[
ix
].
x
*
data
[
iy
].
y
*
data
[
iz
].
z
;
}
}
}
}
}
#else
/**
* For each grid point, find the range of sorted atoms associated with that point.
*/
KERNEL
void
findAtomRangeForGrid
(
GLOBAL
int2
*
RESTRICT
pmeAtomGridIndex
,
GLOBAL
int
*
RESTRICT
pmeAtomRange
,
GLOBAL
const
real4
*
RESTRICT
posq
)
{
int
start
=
(
NUM_ATOMS
*
GLOBAL_ID
)
/
GLOBAL_SIZE
;
int
end
=
(
NUM_ATOMS
*
(
GLOBAL_ID
+
1
))
/
GLOBAL_SIZE
;
int
last
=
(
start
==
0
?
-
1
:
pmeAtomGridIndex
[
start
-
1
].
y
);
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
int2
atomData
=
pmeAtomGridIndex
[
i
];
int
gridIndex
=
atomData
.
y
;
if
(
gridIndex
!=
last
)
{
for
(
int
j
=
last
+
1
;
j
<=
gridIndex
;
++
j
)
pmeAtomRange
[
j
]
=
i
;
last
=
gridIndex
;
}
}
// Fill in values beyond the last atom.
if
(
GLOBAL_ID
==
GLOBAL_SIZE
-
1
)
{
int
gridSize
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
for
(
int
j
=
last
+
1
;
j
<=
gridSize
;
++
j
)
pmeAtomRange
[
j
]
=
NUM_ATOMS
;
}
}
/**
* The grid index won't be needed again. Reuse that component to hold the z index, thus saving
* some work in the charge spreading kernel.
*/
KERNEL
void
recordZIndex
(
GLOBAL
int2
*
RESTRICT
pmeAtomGridIndex
,
GLOBAL
const
real4
*
RESTRICT
posq
,
real4
periodicBoxSize
,
real4
recipBoxVecZ
)
{
int
start
=
(
NUM_ATOMS
*
GLOBAL_ID
)
/
GLOBAL_SIZE
;
int
end
=
(
NUM_ATOMS
*
(
GLOBAL_ID
+
1
))
/
GLOBAL_SIZE
;
for
(
int
i
=
start
;
i
<
end
;
++
i
)
{
real
posz
=
posq
[
pmeAtomGridIndex
[
i
].
x
].
z
;
posz
-=
floor
(
posz
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
int
z
=
((
int
)
((
posz
*
recipBoxVecZ
.
z
)
*
GRID_SIZE_Z
))
%
GRID_SIZE_Z
;
pmeAtomGridIndex
[
i
].
y
=
z
;
}
}
KERNEL
void
gridSpreadCharge
(
GLOBAL
const
real4
*
RESTRICT
posq
,
GLOBAL
real
*
RESTRICT
pmeGrid
,
GLOBAL
const
int2
*
RESTRICT
pmeAtomGridIndex
,
GLOBAL
const
int
*
RESTRICT
pmeAtomRange
,
GLOBAL
const
real4
*
RESTRICT
pmeBsplineTheta
#ifdef CHARGE_FROM_SIGEPS
,
GLOBAL
const
float2
*
RESTRICT
sigmaEpsilon
#else
,
GLOBAL
const
real
*
RESTRICT
charges
#endif
)
{
unsigned
int
numGridPoints
=
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
for
(
int
gridIndex
=
GLOBAL_ID
;
gridIndex
<
numGridPoints
;
gridIndex
+=
GLOBAL_SIZE
)
{
// Compute the charge on a grid point.
int4
gridPoint
;
gridPoint
.
x
=
gridIndex
/
(
GRID_SIZE_Y
*
GRID_SIZE_Z
);
int
remainder
=
gridIndex
-
gridPoint
.
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
;
gridPoint
.
y
=
remainder
/
GRID_SIZE_Z
;
gridPoint
.
z
=
remainder
-
gridPoint
.
y
*
GRID_SIZE_Z
;
real
result
=
0.0
f
;
// Loop over all atoms that affect this grid point.
for
(
int
ix
=
0
;
ix
<
PME_ORDER
;
++
ix
)
{
int
x
=
gridPoint
.
x
-
ix
+
(
gridPoint
.
x
>=
ix
?
0
:
GRID_SIZE_X
);
for
(
int
iy
=
0
;
iy
<
PME_ORDER
;
++
iy
)
{
int
y
=
gridPoint
.
y
-
iy
+
(
gridPoint
.
y
>=
iy
?
0
:
GRID_SIZE_Y
);
int
z1
=
gridPoint
.
z
-
PME_ORDER
+
1
;
z1
+=
(
z1
>=
0
?
0
:
GRID_SIZE_Z
);
int
z2
=
(
z1
<
gridPoint
.
z
?
gridPoint
.
z
:
GRID_SIZE_Z
-
1
);
int
gridIndex1
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
+
z1
;
int
gridIndex2
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
+
z2
;
int
firstAtom
=
pmeAtomRange
[
gridIndex1
];
int
lastAtom
=
pmeAtomRange
[
gridIndex2
+
1
];
for
(
int
i
=
firstAtom
;
i
<
lastAtom
;
++
i
)
{
int2
atomData
=
pmeAtomGridIndex
[
i
];
int
atomIndex
=
atomData
.
x
;
int
z
=
atomData
.
y
;
int
iz
=
gridPoint
.
z
-
z
+
(
gridPoint
.
z
>=
z
?
0
:
GRID_SIZE_Z
);
real
atomCharge
=
pmeBsplineTheta
[
atomIndex
+
ix
*
NUM_ATOMS
].
w
;
result
+=
atomCharge
*
pmeBsplineTheta
[
atomIndex
+
ix
*
NUM_ATOMS
].
x
*
pmeBsplineTheta
[
atomIndex
+
iy
*
NUM_ATOMS
].
y
*
pmeBsplineTheta
[
atomIndex
+
iz
*
NUM_ATOMS
].
z
;
}
if
(
z1
>
gridPoint
.
z
)
{
gridIndex1
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
;
gridIndex2
=
x
*
GRID_SIZE_Y
*
GRID_SIZE_Z
+
y
*
GRID_SIZE_Z
+
gridPoint
.
z
;
firstAtom
=
pmeAtomRange
[
gridIndex1
];
lastAtom
=
pmeAtomRange
[
gridIndex2
+
1
];
for
(
int
i
=
firstAtom
;
i
<
lastAtom
;
++
i
)
{
int2
atomData
=
pmeAtomGridIndex
[
i
];
int
atomIndex
=
atomData
.
x
;
int
z
=
atomData
.
y
;
int
iz
=
gridPoint
.
z
-
z
+
(
gridPoint
.
z
>=
z
?
0
:
GRID_SIZE_Z
);
real
atomCharge
=
pmeBsplineTheta
[
atomIndex
+
ix
*
NUM_ATOMS
].
w
;
result
+=
atomCharge
*
pmeBsplineTheta
[
atomIndex
+
ix
*
NUM_ATOMS
].
x
*
pmeBsplineTheta
[
atomIndex
+
iy
*
NUM_ATOMS
].
y
*
pmeBsplineTheta
[
atomIndex
+
iz
*
NUM_ATOMS
].
z
;
}
}
}
}
pmeGrid
[
gridIndex
]
=
result
*
EPSILON_FACTOR
;
}
}
#endif
KERNEL
void
reciprocalConvolution
(
GLOBAL
real2
*
RESTRICT
pmeGrid
,
GLOBAL
const
real
*
RESTRICT
pmeBsplineModuliX
,
KERNEL
void
reciprocalConvolution
(
GLOBAL
real2
*
RESTRICT
pmeGrid
,
GLOBAL
const
real
*
RESTRICT
pmeBsplineModuliX
,
GLOBAL
const
real
*
RESTRICT
pmeBsplineModuliY
,
GLOBAL
const
real
*
RESTRICT
pmeBsplineModuliZ
,
GLOBAL
const
real
*
RESTRICT
pmeBsplineModuliY
,
GLOBAL
const
real
*
RESTRICT
pmeBsplineModuliZ
,
...
...
platforms/opencl/include/OpenCLBondedUtilities.h
View file @
ae686364
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-20
19
Stanford University and the Authors. *
* Portions copyright (c) 2011-20
22
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -133,12 +133,6 @@ public:
...
@@ -133,12 +133,6 @@ public:
* Initialize this object in preparation for a simulation.
* Initialize this object in preparation for a simulation.
*/
*/
void
initialize
(
const
System
&
system
);
void
initialize
(
const
System
&
system
);
/**
* Get the number of force buffers required for bonded forces.
*/
int
getNumForceBuffers
()
{
return
numForceBuffers
;
}
/**
/**
* Compute the bonded interactions.
* Compute the bonded interactions.
*
*
...
@@ -148,19 +142,17 @@ public:
...
@@ -148,19 +142,17 @@ public:
private:
private:
std
::
string
createForceSource
(
int
forceIndex
,
int
numBonds
,
int
numAtoms
,
int
group
,
const
std
::
string
&
computeForce
);
std
::
string
createForceSource
(
int
forceIndex
,
int
numBonds
,
int
numAtoms
,
int
group
,
const
std
::
string
&
computeForce
);
OpenCLContext
&
context
;
OpenCLContext
&
context
;
std
::
vector
<
cl
::
Kernel
>
kernel
s
;
cl
::
Kernel
kernel
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>
forceAtoms
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>
forceAtoms
;
std
::
vector
<
int
>
indexWidth
;
std
::
vector
<
int
>
indexWidth
;
std
::
vector
<
std
::
string
>
forceSource
;
std
::
vector
<
std
::
string
>
forceSource
;
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
std
::
vector
<
int
>
>
forceSets
;
std
::
vector
<
cl
::
Memory
*>
arguments
;
std
::
vector
<
cl
::
Memory
*>
arguments
;
std
::
vector
<
std
::
string
>
argTypes
;
std
::
vector
<
std
::
string
>
argTypes
;
std
::
vector
<
OpenCLArray
>
atomIndices
;
std
::
vector
<
OpenCLArray
>
atomIndices
;
std
::
vector
<
OpenCLArray
>
bufferIndices
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
std
::
vector
<
std
::
string
>
energyParameterDerivatives
;
int
numForceBuffers
,
maxBonds
,
allGroups
;
int
maxBonds
,
allGroups
;
bool
hasInitializedKernels
;
bool
hasInitializedKernels
;
};
};
...
...
platforms/opencl/include/OpenCLNonbondedUtilities.h
View file @
ae686364
...
@@ -125,7 +125,7 @@ public:
...
@@ -125,7 +125,7 @@ public:
* Get the number of force buffers required for nonbonded forces.
* Get the number of force buffers required for nonbonded forces.
*/
*/
int
getNumForceBuffers
()
const
{
int
getNumForceBuffers
()
const
{
return
numForceBuffers
;
return
1
;
}
}
/**
/**
* Get the number of energy buffers required for nonbonded forces.
* Get the number of energy buffers required for nonbonded forces.
...
@@ -331,7 +331,7 @@ private:
...
@@ -331,7 +331,7 @@ private:
std
::
map
<
int
,
std
::
string
>
groupKernelSource
;
std
::
map
<
int
,
std
::
string
>
groupKernelSource
;
double
lastCutoff
;
double
lastCutoff
;
bool
useCutoff
,
usePeriodic
,
deviceIsCpu
,
anyExclusions
,
usePadding
,
forceRebuildNeighborList
;
bool
useCutoff
,
usePeriodic
,
deviceIsCpu
,
anyExclusions
,
usePadding
,
forceRebuildNeighborList
;
int
numForceBuffers
,
startTileIndex
,
startBlockIndex
,
numBlocks
,
maxExclusions
,
numForceThreadBlocks
;
int
startTileIndex
,
startBlockIndex
,
numBlocks
,
maxExclusions
,
numForceThreadBlocks
;
int
forceThreadBlockSize
,
interactingBlocksThreadBlockSize
,
groupFlags
;
int
forceThreadBlockSize
,
interactingBlocksThreadBlockSize
,
groupFlags
;
unsigned
int
tilesAfterReorder
;
unsigned
int
tilesAfterReorder
;
long
long
numTiles
;
long
long
numTiles
;
...
...
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
ae686364
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2011-20
19
Stanford University and the Authors. *
* Portions copyright (c) 2011-20
22
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -34,7 +34,7 @@
...
@@ -34,7 +34,7 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
using
namespace
std
;
using
namespace
std
;
OpenCLBondedUtilities
::
OpenCLBondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
numForceBuffers
(
0
),
maxBonds
(
0
),
allGroups
(
0
),
hasInitializedKernels
(
false
)
{
OpenCLBondedUtilities
::
OpenCLBondedUtilities
(
OpenCLContext
&
context
)
:
context
(
context
),
maxBonds
(
0
),
allGroups
(
0
),
hasInitializedKernels
(
false
)
{
}
}
void
OpenCLBondedUtilities
::
addInteraction
(
const
vector
<
vector
<
int
>
>&
atoms
,
const
string
&
source
,
int
group
)
{
void
OpenCLBondedUtilities
::
addInteraction
(
const
vector
<
vector
<
int
>
>&
atoms
,
const
string
&
source
,
int
group
)
{
...
@@ -85,11 +85,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
...
@@ -85,11 +85,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
if
(
numForces
==
0
)
if
(
numForces
==
0
)
return
;
return
;
// Build the lists of atom
indices and buffer
indices.
// Build the lists of atom indices.
vector
<
vector
<
cl_uint
>
>
bufferVec
(
numForces
);
vector
<
vector
<
int
>
>
bufferCounter
(
numForces
,
vector
<
int
>
(
system
.
getNumParticles
(),
0
));
vector
<
int
>
numBuffers
(
numForces
,
0
);
atomIndices
.
resize
(
numForces
);
atomIndices
.
resize
(
numForces
);
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
{
int
numBonds
=
forceAtoms
[
i
].
size
();
int
numBonds
=
forceAtoms
[
i
].
size
();
...
@@ -102,100 +99,17 @@ void OpenCLBondedUtilities::initialize(const System& system) {
...
@@ -102,100 +99,17 @@ void OpenCLBondedUtilities::initialize(const System& system) {
}
}
atomIndices
[
i
].
initialize
<
cl_uint
>
(
context
,
indexVec
.
size
(),
"bondedIndices"
);
atomIndices
[
i
].
initialize
<
cl_uint
>
(
context
,
indexVec
.
size
(),
"bondedIndices"
);
atomIndices
[
i
].
upload
(
indexVec
);
atomIndices
[
i
].
upload
(
indexVec
);
bufferVec
[
i
].
resize
(
width
*
numBonds
,
0
);
for
(
int
bond
=
0
;
bond
<
numBonds
;
bond
++
)
{
for
(
int
atom
=
0
;
atom
<
numAtoms
;
atom
++
)
bufferVec
[
i
][
bond
*
width
+
atom
]
=
bufferCounter
[
i
][
forceAtoms
[
i
][
bond
][
atom
]]
++
;
}
for
(
int
j
=
0
;
j
<
(
int
)
bufferCounter
[
i
].
size
();
j
++
)
numBuffers
[
i
]
=
max
(
numBuffers
[
i
],
bufferCounter
[
i
][
j
]);
}
// For efficiency, we want to merge multiple forces into a single kernel - but only if that
// won't increase the number of force buffers.
if
(
context
.
getSupports64BitGlobalAtomics
())
{
// Put all the forces in the same set.
numForceBuffers
=
1
;
forceSets
.
push_back
(
vector
<
int
>
());
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
forceSets
[
0
].
push_back
(
i
);
}
else
{
// Figure out how many force buffers will be required.
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
numForceBuffers
=
max
(
numForceBuffers
,
numBuffers
[
i
]);
int
bufferLimit
=
max
(
numForceBuffers
,
(
int
)
context
.
getPlatformData
().
contexts
.
size
());
if
(
context
.
getNonbondedUtilities
().
getHasInteractions
())
bufferLimit
=
max
(
bufferLimit
,
context
.
getNonbondedUtilities
().
getNumForceBuffers
());
// Figure out sets of forces that can be merged.
vector
<
int
>
unmerged
(
numForces
);
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
unmerged
[
i
]
=
i
;
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
for
(
int
j
=
i
-
1
;
j
>=
0
;
j
--
)
{
if
(
numBuffers
[
unmerged
[
j
]]
<=
numBuffers
[
unmerged
[
j
+
1
]])
break
;
int
temp
=
unmerged
[
j
+
1
];
unmerged
[
j
+
1
]
=
unmerged
[
j
];
unmerged
[
j
]
=
temp
;
}
while
(
unmerged
.
size
()
>
0
)
{
int
sum
=
numBuffers
[
unmerged
.
back
()];
int
i
;
for
(
i
=
0
;
i
<
(
int
)
unmerged
.
size
()
-
1
;
i
++
)
{
if
(
sum
+
numBuffers
[
unmerged
[
i
]]
>
bufferLimit
)
break
;
sum
+=
numBuffers
[
unmerged
[
i
]];
}
forceSets
.
push_back
(
vector
<
int
>
());
for
(
int
j
=
0
;
j
<
i
;
j
++
)
forceSets
.
back
().
push_back
(
unmerged
[
j
]);
forceSets
.
back
().
push_back
(
unmerged
.
back
());
for
(
int
j
=
0
;
j
<
i
;
j
++
)
unmerged
.
erase
(
unmerged
.
begin
());
unmerged
.
pop_back
();
}
}
// Update the buffer indices based on merged sets.
bufferIndices
.
resize
(
numForces
);
for
(
int
i
=
0
;
i
<
(
int
)
forceSets
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
(
int
)
forceSets
[
i
].
size
();
j
++
)
{
int
force
=
forceSets
[
i
][
j
];
int
numBonds
=
forceAtoms
[
force
].
size
();
int
numAtoms
=
forceAtoms
[
force
][
0
].
size
();
int
width
=
indexWidth
[
force
];
for
(
int
k
=
0
;
k
<
j
;
k
++
)
for
(
int
bond
=
0
;
bond
<
numBonds
;
bond
++
)
for
(
int
atom
=
0
;
atom
<
numAtoms
;
atom
++
)
bufferVec
[
force
][
bond
*
width
+
atom
]
+=
bufferCounter
[
forceSets
[
i
][
k
]][
forceAtoms
[
force
][
bond
][
atom
]];
bufferIndices
[
force
].
initialize
<
cl_uint
>
(
context
,
bufferVec
[
force
].
size
(),
"bondedBufferIndices"
);
bufferIndices
[
force
].
upload
(
bufferVec
[
force
]);
}
}
// Create the kernel
s
.
// Create the kernel.
for
(
auto
&
set
:
forceSets
)
{
int
setSize
=
set
.
size
();
stringstream
s
;
stringstream
s
;
s
<<
"#ifdef SUPPORTS_64_BIT_ATOMICS
\n
"
;
s
<<
"#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable
\n
"
;
s
<<
"#endif
\n
"
;
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
s
<<
prefixCode
[
i
];
s
<<
prefixCode
[
i
];
string
bufferType
=
(
context
.
getSupports64BitGlobalAtomics
()
?
"long"
:
"real4"
);
s
<<
"__kernel void computeBondedForces(__global long* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
s
<<
"__kernel void computeBondedForces(__global "
<<
bufferType
<<
"* restrict forceBuffers, __global mixed* restrict energyBuffer, __global const real4* restrict posq, int groups, real4 periodicBoxSize, real4 invPeriodicBoxSize, real4 periodicBoxVecX, real4 periodicBoxVecY, real4 periodicBoxVecZ"
;
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
{
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
int
force
=
set
[
i
];
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
context
.
intToString
(
indexWidth
[
force
]));
string
indexType
=
"uint"
+
(
indexWidth
[
force
]
==
1
?
""
:
context
.
intToString
(
indexWidth
[
force
]));
s
<<
", __global const "
<<
indexType
<<
"* restrict atomIndices"
<<
i
;
s
<<
", __global const "
<<
indexType
<<
"* restrict atomIndices"
<<
force
;
s
<<
", __global const "
<<
indexType
<<
"* restrict bufferIndices"
<<
i
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
", __global "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
...
@@ -205,10 +119,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
...
@@ -205,10 +119,8 @@ void OpenCLBondedUtilities::initialize(const System& system) {
s
<<
"mixed energy = 0;
\n
"
;
s
<<
"mixed energy = 0;
\n
"
;
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
energyParameterDerivatives
.
size
();
i
++
)
s
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
s
<<
"mixed energyParamDeriv"
<<
i
<<
" = 0;
\n
"
;
for
(
int
i
=
0
;
i
<
setSize
;
i
++
)
{
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
int
force
=
set
[
i
];
s
<<
createForceSource
(
force
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
s
<<
createForceSource
(
i
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
}
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
s
<<
"energyBuffer[get_global_id(0)] += energy;
\n
"
;
const
vector
<
string
>&
allParamDerivNames
=
context
.
getEnergyParamDerivNames
();
const
vector
<
string
>&
allParamDerivNames
=
context
.
getEnergyParamDerivNames
();
int
numDerivs
=
allParamDerivNames
.
size
();
int
numDerivs
=
allParamDerivNames
.
size
();
...
@@ -220,8 +132,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
...
@@ -220,8 +132,7 @@ void OpenCLBondedUtilities::initialize(const System& system) {
map
<
string
,
string
>
defines
;
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
cl
::
Program
program
=
context
.
createProgram
(
s
.
str
(),
defines
);
cl
::
Program
program
=
context
.
createProgram
(
s
.
str
(),
defines
);
kernels
.
push_back
(
cl
::
Kernel
(
program
,
"computeBondedForces"
));
kernel
=
cl
::
Kernel
(
program
,
"computeBondedForces"
);
}
forceAtoms
.
clear
();
forceAtoms
.
clear
();
forceSource
.
clear
();
forceSource
.
clear
();
}
}
...
@@ -247,7 +158,6 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
...
@@ -247,7 +158,6 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
s
<<
"if ((groups&"
<<
(
1
<<
group
)
<<
") != 0)
\n
"
;
s
<<
"if ((groups&"
<<
(
1
<<
group
)
<<
") != 0)
\n
"
;
s
<<
"for (unsigned int index = get_global_id(0); index < "
<<
numBonds
<<
"; index += get_global_size(0)) {
\n
"
;
s
<<
"for (unsigned int index = get_global_id(0); index < "
<<
numBonds
<<
"; index += get_global_size(0)) {
\n
"
;
s
<<
" "
<<
indexType
<<
" atoms = atomIndices"
<<
forceIndex
<<
"[index];
\n
"
;
s
<<
" "
<<
indexType
<<
" atoms = atomIndices"
<<
forceIndex
<<
"[index];
\n
"
;
s
<<
" "
<<
indexType
<<
" buffers = bufferIndices"
<<
forceIndex
<<
"[index];
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" unsigned int atom"
<<
(
i
+
1
)
<<
" = atoms"
<<
suffix
[
i
]
<<
";
\n
"
;
s
<<
" unsigned int atom"
<<
(
i
+
1
)
<<
" = atoms"
<<
suffix
[
i
]
<<
";
\n
"
;
s
<<
" real4 pos"
<<
(
i
+
1
)
<<
" = posq[atom"
<<
(
i
+
1
)
<<
"];
\n
"
;
s
<<
" real4 pos"
<<
(
i
+
1
)
<<
" = posq[atom"
<<
(
i
+
1
)
<<
"];
\n
"
;
...
@@ -255,17 +165,9 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
...
@@ -255,17 +165,9 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
s
<<
computeForce
<<
"
\n
"
;
s
<<
computeForce
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" {
\n
"
;
s
<<
" {
\n
"
;
if
(
context
.
getSupports64BitGlobalAtomics
())
{
s
<<
" ATOMIC_ADD(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"], (mm_ulong) realToFixedPoint(force"
<<
(
i
+
1
)
<<
".x));
\n
"
;
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"], realToFixedPoint(force"
<<
(
i
+
1
)
<<
".x));
\n
"
;
s
<<
" ATOMIC_ADD(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force"
<<
(
i
+
1
)
<<
".y));
\n
"
;
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], realToFixedPoint(force"
<<
(
i
+
1
)
<<
".y));
\n
"
;
s
<<
" ATOMIC_ADD(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+2*PADDED_NUM_ATOMS], (mm_ulong) realToFixedPoint(force"
<<
(
i
+
1
)
<<
".z));
\n
"
;
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+2*PADDED_NUM_ATOMS], realToFixedPoint(force"
<<
(
i
+
1
)
<<
".z));
\n
"
;
}
else
{
s
<<
" unsigned int offset = atom"
<<
(
i
+
1
)
<<
"+buffers"
<<
suffix
[
i
]
<<
"*PADDED_NUM_ATOMS;
\n
"
;
s
<<
" real4 force = forceBuffers[offset];
\n
"
;
s
<<
" force.xyz += force"
<<
(
i
+
1
)
<<
".xyz;
\n
"
;
s
<<
" forceBuffers[offset] = force;
\n
"
;
}
s
<<
" }
\n
"
;
s
<<
" }
\n
"
;
}
}
s
<<
"}
\n
"
;
s
<<
"}
\n
"
;
...
@@ -277,28 +179,18 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
...
@@ -277,28 +179,18 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
return
;
return
;
if
(
!
hasInitializedKernels
)
{
if
(
!
hasInitializedKernels
)
{
hasInitializedKernels
=
true
;
hasInitializedKernels
=
true
;
for
(
int
i
=
0
;
i
<
(
int
)
forceSets
.
size
();
i
++
)
{
int
index
=
0
;
int
index
=
0
;
cl
::
Kernel
&
kernel
=
kernels
[
i
];
if
(
context
.
getSupports64BitGlobalAtomics
())
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getLongForceBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getLongForceBuffer
().
getDeviceBuffer
());
else
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getForceBuffers
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getEnergyBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
context
.
getPosq
().
getDeviceBuffer
());
index
+=
6
;
index
+=
6
;
for
(
int
j
=
0
;
j
<
(
int
)
forceSets
[
i
].
size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
.
size
();
j
++
)
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
atomIndices
[
forceSets
[
i
][
j
]].
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
atomIndices
[
j
].
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
bufferIndices
[
forceSets
[
i
][
j
]].
getDeviceBuffer
());
}
for
(
int
j
=
0
;
j
<
(
int
)
arguments
.
size
();
j
++
)
for
(
int
j
=
0
;
j
<
(
int
)
arguments
.
size
();
j
++
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
*
arguments
[
j
]);
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
*
arguments
[
j
]);
if
(
energyParameterDerivatives
.
size
()
>
0
)
if
(
energyParameterDerivatives
.
size
()
>
0
)
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
context
.
getEnergyParamDerivBuffer
().
getDeviceBuffer
());
kernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
context
.
getEnergyParamDerivBuffer
().
getDeviceBuffer
());
}
}
}
for
(
int
i
=
0
;
i
<
(
int
)
kernels
.
size
();
i
++
)
{
cl
::
Kernel
&
kernel
=
kernels
[
i
];
kernel
.
setArg
<
cl_int
>
(
3
,
groups
);
kernel
.
setArg
<
cl_int
>
(
3
,
groups
);
if
(
context
.
getUseDoublePrecision
())
{
if
(
context
.
getUseDoublePrecision
())
{
kernel
.
setArg
<
mm_double4
>
(
4
,
context
.
getPeriodicBoxSizeDouble
());
kernel
.
setArg
<
mm_double4
>
(
4
,
context
.
getPeriodicBoxSizeDouble
());
...
@@ -314,6 +206,5 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
...
@@ -314,6 +206,5 @@ void OpenCLBondedUtilities::computeInteractions(int groups) {
kernel
.
setArg
<
mm_float4
>
(
7
,
context
.
getPeriodicBoxVecY
());
kernel
.
setArg
<
mm_float4
>
(
7
,
context
.
getPeriodicBoxVecY
());
kernel
.
setArg
<
mm_float4
>
(
8
,
context
.
getPeriodicBoxVecZ
());
kernel
.
setArg
<
mm_float4
>
(
8
,
context
.
getPeriodicBoxVecZ
());
}
}
context
.
executeKernel
(
kernels
[
i
],
maxBonds
);
context
.
executeKernel
(
kernel
,
maxBonds
);
}
}
}
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment