Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
434d7afb
Unverified
Commit
434d7afb
authored
Mar 08, 2022
by
Anton Gorenko
Committed by
GitHub
Mar 07, 2022
Browse files
Add realToFixedPoint to all platforms (#3504)
It allows to use a faster float-to-int64 in the HIP platform.
parent
ca80579a
Changes
37
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
231 additions
and
227 deletions
+231
-227
platforms/common/src/CommonKernels.cpp
platforms/common/src/CommonKernels.cpp
+9
-9
platforms/common/src/kernels/customGBEnergyN2.cc
platforms/common/src/kernels/customGBEnergyN2.cc
+14
-14
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
+20
-20
platforms/common/src/kernels/customGBGradientChainRule.cc
platforms/common/src/kernels/customGBGradientChainRule.cc
+3
-3
platforms/common/src/kernels/customGBValueN2.cc
platforms/common/src/kernels/customGBValueN2.cc
+4
-4
platforms/common/src/kernels/customGBValueN2_cpu.cc
platforms/common/src/kernels/customGBValueN2_cpu.cc
+6
-6
platforms/common/src/kernels/customHbondForce.cc
platforms/common/src/kernels/customHbondForce.cc
+18
-18
platforms/common/src/kernels/customManyParticle.cc
platforms/common/src/kernels/customManyParticle.cc
+3
-3
platforms/common/src/kernels/customNonbondedGroups.cc
platforms/common/src/kernels/customNonbondedGroups.cc
+6
-6
platforms/common/src/kernels/ewald.cc
platforms/common/src/kernels/ewald.cc
+3
-3
platforms/common/src/kernels/gayBerne.cc
platforms/common/src/kernels/gayBerne.cc
+45
-45
platforms/common/src/kernels/gbsaObc.cc
platforms/common/src/kernels/gbsaObc.cc
+20
-20
platforms/common/src/kernels/gbsaObcReductions.cc
platforms/common/src/kernels/gbsaObcReductions.cc
+2
-2
platforms/common/src/kernels/gbsaObc_cpu.cc
platforms/common/src/kernels/gbsaObc_cpu.cc
+30
-30
platforms/common/src/kernels/integrationUtilities.cc
platforms/common/src/kernels/integrationUtilities.cc
+6
-6
platforms/common/src/kernels/pme.cc
platforms/common/src/kernels/pme.cc
+10
-10
platforms/common/src/kernels/rmsd.cc
platforms/common/src/kernels/rmsd.cc
+3
-3
platforms/cuda/src/CudaBondedUtilities.cpp
platforms/cuda/src/CudaBondedUtilities.cpp
+3
-3
platforms/cuda/src/kernels/common.cu
platforms/cuda/src/kernels/common.cu
+4
-0
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+22
-22
No files found.
platforms/common/src/CommonKernels.cpp
View file @
434d7afb
...
...
@@ -1651,9 +1651,9 @@ void CommonCalcCustomCentroidBondForceKernel::initialize(const System& system, c
// Save the forces to global memory.
for
(
int
i
=
0
;
i
<
groupsPerBond
;
i
++
)
{
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"], (mm_ulong)
((mm_long)
(force"
<<
(
i
+
1
)
<<
".x
*0x100000000)
));
\n
"
;
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"+numParticleGroups], (mm_ulong)
((mm_long)
(force"
<<
(
i
+
1
)
<<
".y
*0x100000000)
));
\n
"
;
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"+numParticleGroups*2], (mm_ulong)
((mm_long)
(force"
<<
(
i
+
1
)
<<
".z
*0x100000000)
));
\n
"
;
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"], (mm_ulong)
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".x));
\n
"
;
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"+numParticleGroups], (mm_ulong)
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".y));
\n
"
;
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"+numParticleGroups*2], (mm_ulong)
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".z));
\n
"
;
compute
<<
"MEM_FENCE;
\n
"
;
}
map
<
string
,
string
>
replacements
;
...
...
@@ -2975,11 +2975,11 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
else
tempDerivs2
<<
"local_"
<<
derivName
<<
"[tbx+tj] += temp_"
<<
derivName
<<
"_2;
\n
"
;
if
(
useLong
)
{
storeDeriv1
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset1], (mm_ulong)
((mm_long)
("
<<
derivName
<<
"
*0x100000000)
));
\n
"
;
storeDeriv1
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset1], (mm_ulong)
realToFixedPoint
("
<<
derivName
<<
"));
\n
"
;
if
(
deviceIsCpu
)
storeDeriv2
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset2], (mm_ulong)
((mm_long)
(local_"
<<
derivName
<<
"[tgx]
*0x100000000)
));
\n
"
;
storeDeriv2
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset2], (mm_ulong)
realToFixedPoint
(local_"
<<
derivName
<<
"[tgx]));
\n
"
;
else
storeDeriv2
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset2], (mm_ulong)
((mm_long)
(local_"
<<
derivName
<<
"[LOCAL_ID]
*0x100000000)
));
\n
"
;
storeDeriv2
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset2], (mm_ulong)
realToFixedPoint
(local_"
<<
derivName
<<
"[LOCAL_ID]));
\n
"
;
}
else
{
storeDeriv1
<<
"global_"
<<
derivName
<<
"[offset1] += "
<<
derivName
<<
";
\n
"
;
...
...
@@ -3353,9 +3353,9 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
compute
<<
"derivBuffers"
<<
index
<<
"[index] = deriv"
<<
index
<<
";
\n
"
;
}
if
(
useLong
)
{
compute
<<
"forceBuffers[index] +=
(mm_long) (force.x*0x100000000
);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS] +=
(mm_long) (force.y*0x100000000
);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS*2] +=
(mm_long) (force.z*0x100000000
);
\n
"
;
compute
<<
"forceBuffers[index] +=
realToFixedPoint(force.x
);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS] +=
realToFixedPoint(force.y
);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS*2] +=
realToFixedPoint(force.z
);
\n
"
;
}
else
compute
<<
"forceBuffers[index] = forceBuffers[index]+make_real4(force.x, force.y, force.z, 0);
\n
"
;
...
...
platforms/common/src/kernels/customGBEnergyN2.cc
View file @
434d7afb
#ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
((mm_long)
(deriv##INDEX##_1
*0x100000000)
));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
((mm_long)
(local_deriv##INDEX[LOCAL_ID]
*0x100000000)
));
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
realToFixedPoint
(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
realToFixedPoint
(local_deriv##INDEX[LOCAL_ID]));
#else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[LOCAL_ID];
...
...
@@ -162,15 +162,15 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
z
));
STORE_DERIVATIVES_2
}
#else
...
...
@@ -364,15 +364,15 @@ KERNEL void computeN2Energy(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
unsigned
int
offset
=
atom1
;
STORE_DERIVATIVES_1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
z
));
offset
=
atom2
;
STORE_DERIVATIVES_2
}
...
...
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
View file @
434d7afb
#ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
((mm_long)
(deriv##INDEX##_1
*0x100000000)
));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
((mm_long)
(local_deriv##INDEX[tgx]
*0x100000000)
));
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
realToFixedPoint
(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
realToFixedPoint
(local_deriv##INDEX[tgx]));
#else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[tgx];
...
...
@@ -102,9 +102,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
...
...
@@ -176,9 +176,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
...
...
@@ -192,9 +192,9 @@ KERNEL void computeN2Energy(
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
z
));
STORE_DERIVATIVES_2
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
...
...
@@ -318,9 +318,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
...
...
@@ -377,9 +377,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
...
...
@@ -399,9 +399,9 @@ KERNEL void computeN2Energy(
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
z
));
unsigned
int
offset
=
atom2
;
STORE_DERIVATIVES_2
#else
...
...
platforms/common/src/kernels/customGBGradientChainRule.cc
View file @
434d7afb
...
...
@@ -20,9 +20,9 @@ KERNEL void computeGradientChainRuleTerms(GLOBAL const real4* RESTRICT posq,
#endif
COMPUTE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
forceBuffers
[
index
]
=
(
mm_long
)
(
force
.
x
*
0x100000000
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
]
=
(
mm_long
)
(
force
.
y
*
0x100000000
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]
=
(
mm_long
)
(
force
.
z
*
0x100000000
);
forceBuffers
[
index
]
=
realToFixedPoint
(
force
.
x
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
]
=
realToFixedPoint
(
force
.
y
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]
=
realToFixedPoint
(
force
.
z
);
#else
forceBuffers
[
index
]
=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
#endif
...
...
platforms/common/src/kernels/customGBValueN2.cc
View file @
434d7afb
...
...
@@ -139,11 +139,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
STORE_PARAM_DERIVS1
if
(
x
!=
y
)
{
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
((
mm_long
)
(
local_value
[
LOCAL_ID
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
LOCAL_ID
]));
STORE_PARAM_DERIVS2
}
#else
...
...
@@ -319,11 +319,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
STORE_PARAM_DERIVS1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
unsigned
int
offset2
=
atom2
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
((
mm_long
)
(
local_value
[
LOCAL_ID
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
LOCAL_ID
]));
STORE_PARAM_DERIVS2
}
#else
...
...
platforms/common/src/kernels/customGBValueN2_cpu.cc
View file @
434d7afb
...
...
@@ -86,7 +86,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
...
...
@@ -148,7 +148,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
...
...
@@ -161,7 +161,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
((
mm_long
)
(
local_value
[
tgx
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
tgx
]));
#else
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset2
]
+=
local_value
[
tgx
];
...
...
@@ -275,7 +275,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
...
...
@@ -324,7 +324,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
...
...
@@ -344,7 +344,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset2
=
atom2
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
((
mm_long
)
(
local_value
[
tgx
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
tgx
]));
#else
unsigned
int
offset2
=
atom2
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset2
]
+=
local_value
[
tgx
];
...
...
platforms/common/src/kernels/customHbondForce.cc
View file @
434d7afb
...
...
@@ -116,21 +116,21 @@ KERNEL void computeDonorForces(
if
(
donorIndex
<
NUM_DONORS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
atoms
.
x
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
z
));
MEM_FENCE
;
}
if
(
atoms
.
y
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
y
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
z
));
MEM_FENCE
;
}
if
(
atoms
.
z
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
z
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
z
));
MEM_FENCE
;
}
#else
...
...
@@ -233,21 +233,21 @@ KERNEL void computeAcceptorForces(
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
atoms
.
x
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
z
));
MEM_FENCE
;
}
if
(
atoms
.
y
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
y
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
z
));
MEM_FENCE
;
}
if
(
atoms
.
z
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
z
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
z
));
MEM_FENCE
;
}
#else
...
...
platforms/common/src/kernels/customManyParticle.cc
View file @
434d7afb
...
...
@@ -2,9 +2,9 @@
* Record the force on an atom to global memory.
*/
inline
DEVICE
void
storeForce
(
int
atom
,
real3
force
,
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
)
{
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
}
/**
...
...
platforms/common/src/kernels/customNonbondedGroups.cc
View file @
434d7afb
...
...
@@ -137,13 +137,13 @@ KERNEL void computeInteractionGroups(
}
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
exclusions
!=
0
)
{
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
}
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fx
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fy
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fz
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
SYNC_WARPS
;
#else
writeForces
(
forceBuffers
,
localData
,
atom2
);
...
...
platforms/common/src/kernels/ewald.cc
View file @
434d7afb
...
...
@@ -102,9 +102,9 @@ KERNEL void calculateEwaldForces(GLOBAL mm_long* RESTRICT forceBuffers, GLOBAL c
// Record the force on the atom.
forceBuffers
[
atom
]
+=
(
mm_long
)
(
force
.
x
*
0x100000000
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
force
.
y
*
0x100000000
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
force
.
z
*
0x100000000
);
forceBuffers
[
atom
]
+=
realToFixedPoint
(
force
.
x
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
force
.
y
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
force
.
z
);
atom
+=
GLOBAL_SIZE
;
}
}
platforms/common/src/kernels/gayBerne.cc
View file @
434d7afb
...
...
@@ -384,19 +384,19 @@ KERNEL void computeForce(
real
sigma
=
data1
.
sig
.
x
+
data2
.
sig
.
x
;
real
epsilon
=
data1
.
eps
.
x
*
data2
.
eps
.
x
;
computeOneInteraction
(
&
data1
,
&
data2
,
sigma
,
epsilon
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
z
));
}
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
z
));
}
#else
for
(
int
atom1
=
GLOBAL_ID
;
atom1
<
numAtoms
;
atom1
+=
GLOBAL_SIZE
)
{
...
...
@@ -432,19 +432,19 @@ KERNEL void computeForce(
real
sigma
=
data1
.
sig
.
x
+
data2
.
sig
.
x
;
real
epsilon
=
data1
.
eps
.
x
*
data2
.
eps
.
x
;
computeOneInteraction
(
&
data1
,
&
data2
,
sigma
,
epsilon
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
z
));
}
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
z
));
}
#endif
...
...
@@ -466,18 +466,18 @@ KERNEL void computeForce(
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
computeOneInteraction
(
&
data1
,
&
data2
,
params
.
x
,
params
.
y
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
z
));
#ifdef USE_CUTOFF
}
#endif
...
...
@@ -522,16 +522,16 @@ KERNEL void applyTorques(
yforce
+=
f
;
force
-=
f
;
}
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
],
(
mm_ulong
)
((
mm_long
)
(
xforce
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
xforce
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
xforce
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
],
(
mm_ulong
)
realToFixedPoint
(
xforce
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
xforce
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
xforce
.
z
));
if
(
axisParticles
.
y
!=
-
1
)
{
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
],
(
mm_ulong
)
((
mm_long
)
(
yforce
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
yforce
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
yforce
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
],
(
mm_ulong
)
realToFixedPoint
(
yforce
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
yforce
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
yforce
.
z
));
}
}
}
...
...
platforms/common/src/kernels/gbsaObc.cc
View file @
434d7afb
...
...
@@ -148,10 +148,10 @@ KERNEL void computeBornSum(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
bornSum
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
bornSum
));
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
bornSum
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
}
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
...
...
@@ -352,9 +352,9 @@ KERNEL void computeBornSum(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
bornSum
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
bornSum
));
if
(
atom2
<
PADDED_NUM_ATOMS
)
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
bornSum
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
...
...
@@ -534,16 +534,16 @@ KERNEL void computeGBSAForce1(
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
w
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
w
));
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fx
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fy
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fz
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fw
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fw
));
}
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
...
...
@@ -758,15 +758,15 @@ KERNEL void computeGBSAForce1(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
force
.
w
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
w
));
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fx
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fy
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fz
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fw
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fw
));
}
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
...
...
platforms/common/src/kernels/gbsaObcReductions.cc
View file @
434d7afb
...
...
@@ -74,7 +74,7 @@ KERNEL void reduceBornForce(
energy
+=
saTerm
;
force
*=
bornRadius
*
bornRadius
*
obcChain
[
index
];
#ifdef SUPPORTS_64_BIT_ATOMICS
bornForce
[
index
]
=
(
mm_long
)
(
force
*
0x100000000
);
bornForce
[
index
]
=
realToFixedPoint
(
force
);
#else
bornForce
[
index
]
=
force
;
#endif
...
...
platforms/common/src/kernels/gbsaObc_cpu.cc
View file @
434d7afb
...
...
@@ -88,7 +88,7 @@ KERNEL void computeBornSum(
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_long
)
(
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
...
...
@@ -150,7 +150,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_long
)
(
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
...
...
@@ -162,7 +162,7 @@ KERNEL void computeBornSum(
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_long
)
(
localData
[
tgx
].
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
bornSum
));
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
localData
[
tgx
].
bornSum
;
...
...
@@ -297,7 +297,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_long
)
(
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
...
...
@@ -360,7 +360,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_long
)
(
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
...
...
@@ -378,7 +378,7 @@ KERNEL void computeBornSum(
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
(
mm_long
)
(
localData
[
tgx
].
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
bornSum
));
#else
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
localData
[
tgx
].
bornSum
;
...
...
@@ -491,10 +491,10 @@ KERNEL void computeGBSAForce1(
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_long
)
(
force
.
x
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
y
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
z
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_long
)
(
force
.
w
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
...
...
@@ -562,10 +562,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_long
)
(
force
.
x
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
y
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
z
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_long
)
(
force
.
w
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
...
...
@@ -578,10 +578,10 @@ KERNEL void computeGBSAForce1(
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_long
)
(
localData
[
tgx
].
fx
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
localData
[
tgx
].
fy
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
localData
[
tgx
].
fz
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_long
)
(
localData
[
tgx
].
fw
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
fw
));
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
real4
f
=
forceBuffers
[
offset
];
...
...
@@ -723,10 +723,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_long
)
(
force
.
x
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
y
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
z
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_long
)
(
force
.
w
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
...
...
@@ -791,10 +791,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_long
)
(
force
.
x
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
y
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
z
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_long
)
(
force
.
w
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
...
...
@@ -813,10 +813,10 @@ KERNEL void computeGBSAForce1(
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_long
)
(
localData
[
tgx
].
fx
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
localData
[
tgx
].
fy
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
localData
[
tgx
].
fz
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
(
mm_long
)
(
localData
[
tgx
].
fw
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
fw
));
#else
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
real4
f
=
forceBuffers
[
offset
];
...
...
platforms/common/src/kernels/integrationUtilities.cc
View file @
434d7afb
...
...
@@ -896,13 +896,13 @@ inline DEVICE real3 loadForce(int index, GLOBAL const mm_long* RESTRICT force) {
inline
DEVICE
void
addForce
(
int
index
,
GLOBAL
mm_long
*
RESTRICT
force
,
real3
value
)
{
GLOBAL
mm_ulong
*
f
=
(
GLOBAL
mm_ulong
*
)
force
;
#ifdef HAS_OVERLAPPING_VSITES
ATOMIC_ADD
(
&
f
[
index
],
(
mm_ulong
)
((
mm_long
)
(
value
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
f
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
value
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
f
[
index
+
PADDED_NUM_ATOMS
*
2
],
(
mm_ulong
)
((
mm_long
)
(
value
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
f
[
index
],
(
mm_ulong
)
realToFixedPoint
(
value
.
x
));
ATOMIC_ADD
(
&
f
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
value
.
y
));
ATOMIC_ADD
(
&
f
[
index
+
PADDED_NUM_ATOMS
*
2
],
(
mm_ulong
)
realToFixedPoint
(
value
.
z
));
#else
f
[
index
]
+=
(
mm_ulong
)
((
mm_long
)
(
value
.
x
*
0x100000000
)
);
f
[
index
+
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
((
mm_long
)
(
value
.
y
*
0x100000000
)
);
f
[
index
+
PADDED_NUM_ATOMS
*
2
]
+=
(
mm_ulong
)
((
mm_long
)
(
value
.
z
*
0x100000000
)
);
f
[
index
]
+=
(
mm_ulong
)
realToFixedPoint
(
value
.
x
);
f
[
index
+
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
realToFixedPoint
(
value
.
y
);
f
[
index
+
PADDED_NUM_ATOMS
*
2
]
+=
(
mm_ulong
)
realToFixedPoint
(
value
.
z
);
#endif
}
...
...
platforms/common/src/kernels/pme.cc
View file @
434d7afb
...
...
@@ -154,7 +154,7 @@ KERNEL void gridSpreadCharge(GLOBAL const real4* RESTRICT posq,
int
index
=
ybase
+
zindexTable
[
zindex
];
real
add
=
dxdy
*
data
[
iz
].
z
;
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD
(
&
pmeGrid
[
index
],
(
mm_ulong
)
((
mm_long
)
(
add
*
0x100000000
)
));
ATOMIC_ADD
(
&
pmeGrid
[
index
],
(
mm_ulong
)
realToFixedPoint
(
add
));
#else
ATOMIC_ADD
(
&
pmeGrid
[
index
],
add
);
#endif
...
...
@@ -593,13 +593,13 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
#ifdef USE_PME_STREAM
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
((
mm_long
)
(
forceX
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forceY
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forceZ
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
realToFixedPoint
(
forceX
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forceY
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forceZ
));
#else
forceBuffers
[
atom
]
+=
(
mm_ulong
)
((
mm_long
)
(
forceX
*
0x100000000
)
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
((
mm_long
)
(
forceY
*
0x100000000
)
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
((
mm_long
)
(
forceZ
*
0x100000000
)
);
forceBuffers
[
atom
]
+=
(
mm_ulong
)
realToFixedPoint
(
forceX
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
realToFixedPoint
(
forceY
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
realToFixedPoint
(
forceZ
);
#endif
}
}
...
...
@@ -607,9 +607,9 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul
KERNEL
void
addForces
(
GLOBAL
const
real4
*
RESTRICT
forces
,
GLOBAL
mm_long
*
RESTRICT
forceBuffers
)
{
for
(
int
atom
=
GLOBAL_ID
;
atom
<
NUM_ATOMS
;
atom
+=
GLOBAL_SIZE
)
{
real4
f
=
forces
[
atom
];
forceBuffers
[
atom
]
+=
(
mm_long
)
(
f
.
x
*
0x100000000
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
f
.
y
*
0x100000000
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
f
.
z
*
0x100000000
);
forceBuffers
[
atom
]
+=
realToFixedPoint
(
f
.
x
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
f
.
y
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
f
.
z
);
}
}
...
...
platforms/common/src/kernels/rmsd.cc
View file @
434d7afb
...
...
@@ -90,8 +90,8 @@ KERNEL void computeRMSDForces(int numParticles, int paddedNumAtoms, GLOBAL const
buffer
[
1
]
*
refPos
.
x
+
buffer
[
4
]
*
refPos
.
y
+
buffer
[
7
]
*
refPos
.
z
,
buffer
[
2
]
*
refPos
.
x
+
buffer
[
5
]
*
refPos
.
y
+
buffer
[
8
]
*
refPos
.
z
);
real3
force
=
(
rotatedRef
-
pos
)
*
scale
;
forceBuffers
[
index
]
+=
(
mm_long
)
(
force
.
x
*
0x100000000
);
forceBuffers
[
index
+
paddedNumAtoms
]
+=
(
mm_long
)
(
force
.
y
*
0x100000000
);
forceBuffers
[
index
+
2
*
paddedNumAtoms
]
+=
(
mm_long
)
(
force
.
z
*
0x100000000
);
forceBuffers
[
index
]
+=
realToFixedPoint
(
force
.
x
);
forceBuffers
[
index
+
paddedNumAtoms
]
+=
realToFixedPoint
(
force
.
y
);
forceBuffers
[
index
+
2
*
paddedNumAtoms
]
+=
realToFixedPoint
(
force
.
z
);
}
}
platforms/cuda/src/CudaBondedUtilities.cpp
View file @
434d7afb
...
...
@@ -166,9 +166,9 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
}
s
<<
computeForce
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"], static_cast<unsigned long long>(
(long long)
(force"
<<
(
i
+
1
)
<<
".x
*0x100000000
)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], static_cast<unsigned long long>(
(long long)
(force"
<<
(
i
+
1
)
<<
".y
*0x100000000
)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>(
(long long)
(force"
<<
(
i
+
1
)
<<
".z
*0x100000000
)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"], static_cast<unsigned long long>(
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".x)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], static_cast<unsigned long long>(
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".y)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>(
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".z)));
\n
"
;
s
<<
" __threadfence_block();
\n
"
;
}
s
<<
"}
\n
"
;
...
...
platforms/cuda/src/kernels/common.cu
View file @
434d7afb
...
...
@@ -24,3 +24,7 @@ typedef unsigned long long mm_ulong;
#define SUPPORTS_64_BIT_ATOMICS 1
#define SUPPORTS_DOUBLE_PRECISION 1
__device__
inline
long
long
realToFixedPoint
(
real
x
)
{
return
static_cast
<
long
long
>
(
x
*
0x100000000
);
}
platforms/cuda/src/kernels/nonbonded.cu
View file @
434d7afb
...
...
@@ -46,11 +46,11 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
*/
__device__
void
saveSingleForce
(
int
atom
,
real3
force
,
unsigned
long
long
*
forceBuffers
)
{
if
(
force
.
x
!=
0
)
atomicAdd
(
&
forceBuffers
[
atom
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
x
)));
if
(
force
.
y
!=
0
)
atomicAdd
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
y
)));
if
(
force
.
z
!=
0
)
atomicAdd
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
z
)));
}
/**
...
...
@@ -310,22 +310,22 @@ extern "C" __global__ void computeNonbonded(
// write results for off diagonal tiles
#ifdef INCLUDE_FORCES
#ifdef ENABLE_SHUFFLE
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
x
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
y
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
z
)));
#else
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fx
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fy
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fz
)));
#endif
#endif
}
// Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
x
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
y
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
z
)));
#endif
}
...
...
@@ -581,9 +581,9 @@ extern "C" __global__ void computeNonbonded(
// Write results.
#ifdef INCLUDE_FORCES
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
x
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
y
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
z
)));
#ifdef USE_CUTOFF
unsigned
int
atom2
=
atomIndices
[
threadIdx
.
x
];
#else
...
...
@@ -591,13 +591,13 @@ extern "C" __global__ void computeNonbonded(
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef ENABLE_SHUFFLE
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
x
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
y
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
z
)));
#else
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fx
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fy
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fz
)));
#endif
}
#endif
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment