Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
434d7afb
Unverified
Commit
434d7afb
authored
Mar 08, 2022
by
Anton Gorenko
Committed by
GitHub
Mar 07, 2022
Browse files
Add realToFixedPoint to all platforms (#3504)
It allows to use a faster float-to-int64 in the HIP platform.
parent
ca80579a
Changes
37
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
231 additions
and
227 deletions
+231
-227
platforms/common/src/CommonKernels.cpp
platforms/common/src/CommonKernels.cpp
+9
-9
platforms/common/src/kernels/customGBEnergyN2.cc
platforms/common/src/kernels/customGBEnergyN2.cc
+14
-14
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
+20
-20
platforms/common/src/kernels/customGBGradientChainRule.cc
platforms/common/src/kernels/customGBGradientChainRule.cc
+3
-3
platforms/common/src/kernels/customGBValueN2.cc
platforms/common/src/kernels/customGBValueN2.cc
+4
-4
platforms/common/src/kernels/customGBValueN2_cpu.cc
platforms/common/src/kernels/customGBValueN2_cpu.cc
+6
-6
platforms/common/src/kernels/customHbondForce.cc
platforms/common/src/kernels/customHbondForce.cc
+18
-18
platforms/common/src/kernels/customManyParticle.cc
platforms/common/src/kernels/customManyParticle.cc
+3
-3
platforms/common/src/kernels/customNonbondedGroups.cc
platforms/common/src/kernels/customNonbondedGroups.cc
+6
-6
platforms/common/src/kernels/ewald.cc
platforms/common/src/kernels/ewald.cc
+3
-3
platforms/common/src/kernels/gayBerne.cc
platforms/common/src/kernels/gayBerne.cc
+45
-45
platforms/common/src/kernels/gbsaObc.cc
platforms/common/src/kernels/gbsaObc.cc
+20
-20
platforms/common/src/kernels/gbsaObcReductions.cc
platforms/common/src/kernels/gbsaObcReductions.cc
+2
-2
platforms/common/src/kernels/gbsaObc_cpu.cc
platforms/common/src/kernels/gbsaObc_cpu.cc
+30
-30
platforms/common/src/kernels/integrationUtilities.cc
platforms/common/src/kernels/integrationUtilities.cc
+6
-6
platforms/common/src/kernels/pme.cc
platforms/common/src/kernels/pme.cc
+10
-10
platforms/common/src/kernels/rmsd.cc
platforms/common/src/kernels/rmsd.cc
+3
-3
platforms/cuda/src/CudaBondedUtilities.cpp
platforms/cuda/src/CudaBondedUtilities.cpp
+3
-3
platforms/cuda/src/kernels/common.cu
platforms/cuda/src/kernels/common.cu
+4
-0
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+22
-22
No files found.
platforms/common/src/CommonKernels.cpp
View file @
434d7afb
...
@@ -1651,9 +1651,9 @@ void CommonCalcCustomCentroidBondForceKernel::initialize(const System& system, c
...
@@ -1651,9 +1651,9 @@ void CommonCalcCustomCentroidBondForceKernel::initialize(const System& system, c
// Save the forces to global memory.
// Save the forces to global memory.
for
(
int
i
=
0
;
i
<
groupsPerBond
;
i
++
)
{
for
(
int
i
=
0
;
i
<
groupsPerBond
;
i
++
)
{
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"], (mm_ulong)
((mm_long)
(force"
<<
(
i
+
1
)
<<
".x
*0x100000000)
));
\n
"
;
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"], (mm_ulong)
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".x));
\n
"
;
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"+numParticleGroups], (mm_ulong)
((mm_long)
(force"
<<
(
i
+
1
)
<<
".y
*0x100000000)
));
\n
"
;
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"+numParticleGroups], (mm_ulong)
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".y));
\n
"
;
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"+numParticleGroups*2], (mm_ulong)
((mm_long)
(force"
<<
(
i
+
1
)
<<
".z
*0x100000000)
));
\n
"
;
compute
<<
"ATOMIC_ADD(&groupForce[group"
<<
(
i
+
1
)
<<
"+numParticleGroups*2], (mm_ulong)
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".z));
\n
"
;
compute
<<
"MEM_FENCE;
\n
"
;
compute
<<
"MEM_FENCE;
\n
"
;
}
}
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
...
@@ -2975,11 +2975,11 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -2975,11 +2975,11 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
else
else
tempDerivs2
<<
"local_"
<<
derivName
<<
"[tbx+tj] += temp_"
<<
derivName
<<
"_2;
\n
"
;
tempDerivs2
<<
"local_"
<<
derivName
<<
"[tbx+tj] += temp_"
<<
derivName
<<
"_2;
\n
"
;
if
(
useLong
)
{
if
(
useLong
)
{
storeDeriv1
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset1], (mm_ulong)
((mm_long)
("
<<
derivName
<<
"
*0x100000000)
));
\n
"
;
storeDeriv1
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset1], (mm_ulong)
realToFixedPoint
("
<<
derivName
<<
"));
\n
"
;
if
(
deviceIsCpu
)
if
(
deviceIsCpu
)
storeDeriv2
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset2], (mm_ulong)
((mm_long)
(local_"
<<
derivName
<<
"[tgx]
*0x100000000)
));
\n
"
;
storeDeriv2
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset2], (mm_ulong)
realToFixedPoint
(local_"
<<
derivName
<<
"[tgx]));
\n
"
;
else
else
storeDeriv2
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset2], (mm_ulong)
((mm_long)
(local_"
<<
derivName
<<
"[LOCAL_ID]
*0x100000000)
));
\n
"
;
storeDeriv2
<<
"ATOMIC_ADD(&global_"
<<
derivName
<<
"[offset2], (mm_ulong)
realToFixedPoint
(local_"
<<
derivName
<<
"[LOCAL_ID]));
\n
"
;
}
}
else
{
else
{
storeDeriv1
<<
"global_"
<<
derivName
<<
"[offset1] += "
<<
derivName
<<
";
\n
"
;
storeDeriv1
<<
"global_"
<<
derivName
<<
"[offset1] += "
<<
derivName
<<
";
\n
"
;
...
@@ -3353,9 +3353,9 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
...
@@ -3353,9 +3353,9 @@ void CommonCalcCustomGBForceKernel::initialize(const System& system, const Custo
compute
<<
"derivBuffers"
<<
index
<<
"[index] = deriv"
<<
index
<<
";
\n
"
;
compute
<<
"derivBuffers"
<<
index
<<
"[index] = deriv"
<<
index
<<
";
\n
"
;
}
}
if
(
useLong
)
{
if
(
useLong
)
{
compute
<<
"forceBuffers[index] +=
(mm_long) (force.x*0x100000000
);
\n
"
;
compute
<<
"forceBuffers[index] +=
realToFixedPoint(force.x
);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS] +=
(mm_long) (force.y*0x100000000
);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS] +=
realToFixedPoint(force.y
);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS*2] +=
(mm_long) (force.z*0x100000000
);
\n
"
;
compute
<<
"forceBuffers[index+PADDED_NUM_ATOMS*2] +=
realToFixedPoint(force.z
);
\n
"
;
}
}
else
else
compute
<<
"forceBuffers[index] = forceBuffers[index]+make_real4(force.x, force.y, force.z, 0);
\n
"
;
compute
<<
"forceBuffers[index] = forceBuffers[index]+make_real4(force.x, force.y, force.z, 0);
\n
"
;
...
...
platforms/common/src/kernels/customGBEnergyN2.cc
View file @
434d7afb
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
((mm_long)
(deriv##INDEX##_1
*0x100000000)
));
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
realToFixedPoint
(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
((mm_long)
(local_deriv##INDEX[LOCAL_ID]
*0x100000000)
));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
realToFixedPoint
(local_deriv##INDEX[LOCAL_ID]));
#else
#else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[LOCAL_ID];
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[LOCAL_ID];
...
@@ -162,15 +162,15 @@ KERNEL void computeN2Energy(
...
@@ -162,15 +162,15 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
if
(
x
!=
y
)
{
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
z
));
STORE_DERIVATIVES_2
STORE_DERIVATIVES_2
}
}
#else
#else
...
@@ -364,15 +364,15 @@ KERNEL void computeN2Energy(
...
@@ -364,15 +364,15 @@ KERNEL void computeN2Energy(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
unsigned
int
offset
=
atom1
;
unsigned
int
offset
=
atom1
;
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
LOCAL_ID
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
LOCAL_ID
].
z
));
offset
=
atom2
;
offset
=
atom2
;
STORE_DERIVATIVES_2
STORE_DERIVATIVES_2
}
}
...
...
platforms/common/src/kernels/customGBEnergyN2_cpu.cc
View file @
434d7afb
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
((mm_long)
(deriv##INDEX##_1
*0x100000000)
));
#define STORE_DERIVATIVE_1(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
realToFixedPoint
(deriv##INDEX##_1));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
((mm_long)
(local_deriv##INDEX[tgx]
*0x100000000)
));
#define STORE_DERIVATIVE_2(INDEX) ATOMIC_ADD(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], (mm_ulong)
realToFixedPoint
(local_deriv##INDEX[tgx]));
#else
#else
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_1(INDEX) derivBuffers##INDEX[offset] += deriv##INDEX##_1;
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[tgx];
#define STORE_DERIVATIVE_2(INDEX) derivBuffers##INDEX[offset] += local_deriv##INDEX[tgx];
...
@@ -102,9 +102,9 @@ KERNEL void computeN2Energy(
...
@@ -102,9 +102,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
...
@@ -176,9 +176,9 @@ KERNEL void computeN2Energy(
...
@@ -176,9 +176,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
...
@@ -192,9 +192,9 @@ KERNEL void computeN2Energy(
...
@@ -192,9 +192,9 @@ KERNEL void computeN2Energy(
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
z
));
STORE_DERIVATIVES_2
STORE_DERIVATIVES_2
#else
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
...
@@ -318,9 +318,9 @@ KERNEL void computeN2Energy(
...
@@ -318,9 +318,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
...
@@ -377,9 +377,9 @@ KERNEL void computeN2Energy(
...
@@ -377,9 +377,9 @@ KERNEL void computeN2Energy(
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
atom1
;
unsigned
int
offset
=
atom1
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
STORE_DERIVATIVES_1
STORE_DERIVATIVES_1
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
...
@@ -399,9 +399,9 @@ KERNEL void computeN2Energy(
...
@@ -399,9 +399,9 @@ KERNEL void computeN2Energy(
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
local_force
[
tgx
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
local_force
[
tgx
].
z
));
unsigned
int
offset
=
atom2
;
unsigned
int
offset
=
atom2
;
STORE_DERIVATIVES_2
STORE_DERIVATIVES_2
#else
#else
...
...
platforms/common/src/kernels/customGBGradientChainRule.cc
View file @
434d7afb
...
@@ -20,9 +20,9 @@ KERNEL void computeGradientChainRuleTerms(GLOBAL const real4* RESTRICT posq,
...
@@ -20,9 +20,9 @@ KERNEL void computeGradientChainRuleTerms(GLOBAL const real4* RESTRICT posq,
#endif
#endif
COMPUTE_FORCES
COMPUTE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
forceBuffers
[
index
]
=
(
mm_long
)
(
force
.
x
*
0x100000000
);
forceBuffers
[
index
]
=
realToFixedPoint
(
force
.
x
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
]
=
(
mm_long
)
(
force
.
y
*
0x100000000
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
]
=
realToFixedPoint
(
force
.
y
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]
=
(
mm_long
)
(
force
.
z
*
0x100000000
);
forceBuffers
[
index
+
PADDED_NUM_ATOMS
*
2
]
=
realToFixedPoint
(
force
.
z
);
#else
#else
forceBuffers
[
index
]
=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
forceBuffers
[
index
]
=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
#endif
#endif
...
...
platforms/common/src/kernels/customGBValueN2.cc
View file @
434d7afb
...
@@ -139,11 +139,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -139,11 +139,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
STORE_PARAM_DERIVS1
STORE_PARAM_DERIVS1
if
(
x
!=
y
)
{
if
(
x
!=
y
)
{
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
((
mm_long
)
(
local_value
[
LOCAL_ID
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
LOCAL_ID
]));
STORE_PARAM_DERIVS2
STORE_PARAM_DERIVS2
}
}
#else
#else
...
@@ -319,11 +319,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -319,11 +319,11 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#endif
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
STORE_PARAM_DERIVS1
STORE_PARAM_DERIVS1
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
unsigned
int
offset2
=
atom2
;
unsigned
int
offset2
=
atom2
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
((
mm_long
)
(
local_value
[
LOCAL_ID
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
LOCAL_ID
]));
STORE_PARAM_DERIVS2
STORE_PARAM_DERIVS2
}
}
#else
#else
...
...
platforms/common/src/kernels/customGBValueN2_cpu.cc
View file @
434d7afb
...
@@ -86,7 +86,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -86,7 +86,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
global_value
[
offset1
]
+=
value
;
...
@@ -148,7 +148,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -148,7 +148,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
global_value
[
offset1
]
+=
value
;
...
@@ -161,7 +161,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -161,7 +161,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
((
mm_long
)
(
local_value
[
tgx
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
tgx
]));
#else
#else
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset2
]
+=
local_value
[
tgx
];
global_value
[
offset2
]
+=
local_value
[
tgx
];
...
@@ -275,7 +275,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -275,7 +275,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
global_value
[
offset1
]
+=
value
;
...
@@ -324,7 +324,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -324,7 +324,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset1
=
atom1
;
unsigned
int
offset1
=
atom1
;
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
value
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
value
));
#else
#else
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
unsigned
int
offset1
=
atom1
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset1
]
+=
value
;
global_value
[
offset1
]
+=
value
;
...
@@ -344,7 +344,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
...
@@ -344,7 +344,7 @@ KERNEL void computeN2Value(GLOBAL const real4* RESTRICT posq, GLOBAL const unsig
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset2
=
atom2
;
unsigned
int
offset2
=
atom2
;
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
((
mm_long
)
(
local_value
[
tgx
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_value
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
local_value
[
tgx
]));
#else
#else
unsigned
int
offset2
=
atom2
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
atom2
+
get_group_id
(
0
)
*
PADDED_NUM_ATOMS
;
global_value
[
offset2
]
+=
local_value
[
tgx
];
global_value
[
offset2
]
+=
local_value
[
tgx
];
...
...
platforms/common/src/kernels/customHbondForce.cc
View file @
434d7afb
...
@@ -116,21 +116,21 @@ KERNEL void computeDonorForces(
...
@@ -116,21 +116,21 @@ KERNEL void computeDonorForces(
if
(
donorIndex
<
NUM_DONORS
)
{
if
(
donorIndex
<
NUM_DONORS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
atoms
.
x
>
-
1
)
{
if
(
atoms
.
x
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
z
));
MEM_FENCE
;
MEM_FENCE
;
}
}
if
(
atoms
.
y
>
-
1
)
{
if
(
atoms
.
y
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
y
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
z
));
MEM_FENCE
;
MEM_FENCE
;
}
}
if
(
atoms
.
z
>
-
1
)
{
if
(
atoms
.
z
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
z
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
z
));
MEM_FENCE
;
MEM_FENCE
;
}
}
#else
#else
...
@@ -233,21 +233,21 @@ KERNEL void computeAcceptorForces(
...
@@ -233,21 +233,21 @@ KERNEL void computeAcceptorForces(
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
if
(
acceptorIndex
<
NUM_ACCEPTORS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
atoms
.
x
>
-
1
)
{
if
(
atoms
.
x
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f1
.
z
));
MEM_FENCE
;
MEM_FENCE
;
}
}
if
(
atoms
.
y
>
-
1
)
{
if
(
atoms
.
y
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
y
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f2
.
z
));
MEM_FENCE
;
MEM_FENCE
;
}
}
if
(
atoms
.
z
>
-
1
)
{
if
(
atoms
.
z
>
-
1
)
{
ATOMIC_ADD
(
&
force
[
atoms
.
z
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
x
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
y
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
f3
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
force
[
atoms
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
f3
.
z
));
MEM_FENCE
;
MEM_FENCE
;
}
}
#else
#else
...
...
platforms/common/src/kernels/customManyParticle.cc
View file @
434d7afb
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
* Record the force on an atom to global memory.
* Record the force on an atom to global memory.
*/
*/
inline
DEVICE
void
storeForce
(
int
atom
,
real3
force
,
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
)
{
inline
DEVICE
void
storeForce
(
int
atom
,
real3
force
,
GLOBAL
mm_ulong
*
RESTRICT
forceBuffers
)
{
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
}
}
/**
/**
...
...
platforms/common/src/kernels/customNonbondedGroups.cc
View file @
434d7afb
...
@@ -137,13 +137,13 @@ KERNEL void computeInteractionGroups(
...
@@ -137,13 +137,13 @@ KERNEL void computeInteractionGroups(
}
}
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
if
(
exclusions
!=
0
)
{
if
(
exclusions
!=
0
)
{
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
}
}
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fx
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fy
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fz
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
SYNC_WARPS
;
SYNC_WARPS
;
#else
#else
writeForces
(
forceBuffers
,
localData
,
atom2
);
writeForces
(
forceBuffers
,
localData
,
atom2
);
...
...
platforms/common/src/kernels/ewald.cc
View file @
434d7afb
...
@@ -102,9 +102,9 @@ KERNEL void calculateEwaldForces(GLOBAL mm_long* RESTRICT forceBuffers, GLOBAL c
...
@@ -102,9 +102,9 @@ KERNEL void calculateEwaldForces(GLOBAL mm_long* RESTRICT forceBuffers, GLOBAL c
// Record the force on the atom.
// Record the force on the atom.
forceBuffers
[
atom
]
+=
(
mm_long
)
(
force
.
x
*
0x100000000
);
forceBuffers
[
atom
]
+=
realToFixedPoint
(
force
.
x
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
force
.
y
*
0x100000000
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
force
.
y
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
force
.
z
*
0x100000000
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
force
.
z
);
atom
+=
GLOBAL_SIZE
;
atom
+=
GLOBAL_SIZE
;
}
}
}
}
platforms/common/src/kernels/gayBerne.cc
View file @
434d7afb
...
@@ -384,19 +384,19 @@ KERNEL void computeForce(
...
@@ -384,19 +384,19 @@ KERNEL void computeForce(
real
sigma
=
data1
.
sig
.
x
+
data2
.
sig
.
x
;
real
sigma
=
data1
.
sig
.
x
+
data2
.
sig
.
x
;
real
epsilon
=
data1
.
eps
.
x
*
data2
.
eps
.
x
;
real
epsilon
=
data1
.
eps
.
x
*
data2
.
eps
.
x
;
computeOneInteraction
(
&
data1
,
&
data2
,
sigma
,
epsilon
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
computeOneInteraction
(
&
data1
,
&
data2
,
sigma
,
epsilon
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
z
));
}
}
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
z
));
}
}
#else
#else
for
(
int
atom1
=
GLOBAL_ID
;
atom1
<
numAtoms
;
atom1
+=
GLOBAL_SIZE
)
{
for
(
int
atom1
=
GLOBAL_ID
;
atom1
<
numAtoms
;
atom1
+=
GLOBAL_SIZE
)
{
...
@@ -432,19 +432,19 @@ KERNEL void computeForce(
...
@@ -432,19 +432,19 @@ KERNEL void computeForce(
real
sigma
=
data1
.
sig
.
x
+
data2
.
sig
.
x
;
real
sigma
=
data1
.
sig
.
x
+
data2
.
sig
.
x
;
real
epsilon
=
data1
.
eps
.
x
*
data2
.
eps
.
x
;
real
epsilon
=
data1
.
eps
.
x
*
data2
.
eps
.
x
;
computeOneInteraction
(
&
data1
,
&
data2
,
sigma
,
epsilon
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
computeOneInteraction
(
&
data1
,
&
data2
,
sigma
,
epsilon
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
z
));
}
}
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
z
));
}
}
#endif
#endif
...
@@ -466,18 +466,18 @@ KERNEL void computeForce(
...
@@ -466,18 +466,18 @@ KERNEL void computeForce(
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
#endif
#endif
computeOneInteraction
(
&
data1
,
&
data2
,
params
.
x
,
params
.
y
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
computeOneInteraction
(
&
data1
,
&
data2
,
params
.
x
,
params
.
y
,
delta
,
r2
,
&
force1
,
&
force2
,
&
torque1
,
&
torque2
,
&
energy
);
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force1
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force2
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque1
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
index2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque2
.
z
));
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
}
}
#endif
#endif
...
@@ -522,16 +522,16 @@ KERNEL void applyTorques(
...
@@ -522,16 +522,16 @@ KERNEL void applyTorques(
yforce
+=
f
;
yforce
+=
f
;
force
-=
f
;
force
-=
f
;
}
}
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
originalIndex
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
],
(
mm_ulong
)
((
mm_long
)
(
xforce
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
],
(
mm_ulong
)
realToFixedPoint
(
xforce
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
xforce
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
xforce
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
xforce
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
xforce
.
z
));
if
(
axisParticles
.
y
!=
-
1
)
{
if
(
axisParticles
.
y
!=
-
1
)
{
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
],
(
mm_ulong
)
((
mm_long
)
(
yforce
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
],
(
mm_ulong
)
realToFixedPoint
(
yforce
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
yforce
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
yforce
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
yforce
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
axisParticles
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
yforce
.
z
));
}
}
}
}
}
}
...
...
platforms/common/src/kernels/gbsaObc.cc
View file @
434d7afb
...
@@ -148,10 +148,10 @@ KERNEL void computeBornSum(
...
@@ -148,10 +148,10 @@ KERNEL void computeBornSum(
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
bornSum
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
bornSum
));
if
(
x
!=
y
)
{
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
bornSum
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
}
}
#else
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
...
@@ -352,9 +352,9 @@ KERNEL void computeBornSum(
...
@@ -352,9 +352,9 @@ KERNEL void computeBornSum(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
bornSum
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
bornSum
));
if
(
atom2
<
PADDED_NUM_ATOMS
)
if
(
atom2
<
PADDED_NUM_ATOMS
)
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
bornSum
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
#else
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset2
=
atom2
+
warp
*
PADDED_NUM_ATOMS
;
...
@@ -534,16 +534,16 @@ KERNEL void computeGBSAForce1(
...
@@ -534,16 +534,16 @@ KERNEL void computeGBSAForce1(
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
w
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
w
));
if
(
x
!=
y
)
{
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fx
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fy
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fz
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fw
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fw
));
}
}
#else
#else
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
+
warp
*
PADDED_NUM_ATOMS
;
...
@@ -758,15 +758,15 @@ KERNEL void computeGBSAForce1(
...
@@ -758,15 +758,15 @@ KERNEL void computeGBSAForce1(
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
force
.
w
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
w
));
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fx
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fy
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fz
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fw
*
0x100000000
)
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fw
));
}
}
#else
#else
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
unsigned
int
offset1
=
atom1
+
warp
*
PADDED_NUM_ATOMS
;
...
...
platforms/common/src/kernels/gbsaObcReductions.cc
View file @
434d7afb
...
@@ -74,10 +74,10 @@ KERNEL void reduceBornForce(
...
@@ -74,10 +74,10 @@ KERNEL void reduceBornForce(
energy
+=
saTerm
;
energy
+=
saTerm
;
force
*=
bornRadius
*
bornRadius
*
obcChain
[
index
];
force
*=
bornRadius
*
bornRadius
*
obcChain
[
index
];
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
bornForce
[
index
]
=
(
mm_long
)
(
force
*
0x100000000
);
bornForce
[
index
]
=
realToFixedPoint
(
force
);
#else
#else
bornForce
[
index
]
=
force
;
bornForce
[
index
]
=
force
;
#endif
#endif
}
}
energyBuffer
[
GLOBAL_ID
]
+=
energy
/-
6
;
energyBuffer
[
GLOBAL_ID
]
+=
energy
/-
6
;
}
}
\ No newline at end of file
platforms/common/src/kernels/gbsaObc_cpu.cc
View file @
434d7afb
...
@@ -88,7 +88,7 @@ KERNEL void computeBornSum(
...
@@ -88,7 +88,7 @@ KERNEL void computeBornSum(
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_long
)
(
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
global_bornSum
[
offset
]
+=
bornSum
;
...
@@ -150,7 +150,7 @@ KERNEL void computeBornSum(
...
@@ -150,7 +150,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_long
)
(
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
global_bornSum
[
offset
]
+=
bornSum
;
...
@@ -162,7 +162,7 @@ KERNEL void computeBornSum(
...
@@ -162,7 +162,7 @@ KERNEL void computeBornSum(
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
(
mm_long
)
(
localData
[
tgx
].
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
bornSum
));
#else
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
localData
[
tgx
].
bornSum
;
global_bornSum
[
offset
]
+=
localData
[
tgx
].
bornSum
;
...
@@ -297,7 +297,7 @@ KERNEL void computeBornSum(
...
@@ -297,7 +297,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_long
)
(
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
global_bornSum
[
offset
]
+=
bornSum
;
...
@@ -360,7 +360,7 @@ KERNEL void computeBornSum(
...
@@ -360,7 +360,7 @@ KERNEL void computeBornSum(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
(
mm_long
)
(
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
atom1
],
realToFixedPoint
(
bornSum
));
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
bornSum
;
global_bornSum
[
offset
]
+=
bornSum
;
...
@@ -378,7 +378,7 @@ KERNEL void computeBornSum(
...
@@ -378,7 +378,7 @@ KERNEL void computeBornSum(
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
(
mm_long
)
(
localData
[
tgx
].
bornSum
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornSum
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
bornSum
));
#else
#else
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
global_bornSum
[
offset
]
+=
localData
[
tgx
].
bornSum
;
global_bornSum
[
offset
]
+=
localData
[
tgx
].
bornSum
;
...
@@ -491,10 +491,10 @@ KERNEL void computeGBSAForce1(
...
@@ -491,10 +491,10 @@ KERNEL void computeGBSAForce1(
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_long
)
(
force
.
x
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
y
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
z
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_long
)
(
force
.
w
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
...
@@ -562,10 +562,10 @@ KERNEL void computeGBSAForce1(
...
@@ -562,10 +562,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_long
)
(
force
.
x
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
y
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
z
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_long
)
(
force
.
w
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
...
@@ -578,10 +578,10 @@ KERNEL void computeGBSAForce1(
...
@@ -578,10 +578,10 @@ KERNEL void computeGBSAForce1(
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
for
(
int
tgx
=
0
;
tgx
<
TILE_SIZE
;
tgx
++
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_long
)
(
localData
[
tgx
].
fx
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
localData
[
tgx
].
fy
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
localData
[
tgx
].
fz
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
(
mm_long
)
(
localData
[
tgx
].
fw
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
offset
],
realToFixedPoint
(
localData
[
tgx
].
fw
));
#else
#else
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
real4
f
=
forceBuffers
[
offset
];
real4
f
=
forceBuffers
[
offset
];
...
@@ -723,10 +723,10 @@ KERNEL void computeGBSAForce1(
...
@@ -723,10 +723,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_long
)
(
force
.
x
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
y
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
z
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_long
)
(
force
.
w
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
...
@@ -791,10 +791,10 @@ KERNEL void computeGBSAForce1(
...
@@ -791,10 +791,10 @@ KERNEL void computeGBSAForce1(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_long
)
(
force
.
x
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
y
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
force
.
z
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
(
mm_long
)
(
force
.
w
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
atom1
],
realToFixedPoint
(
force
.
w
));
#else
#else
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
forceBuffers
[
offset
]
+=
make_real4
(
force
.
x
,
force
.
y
,
force
.
z
,
0
);
...
@@ -813,10 +813,10 @@ KERNEL void computeGBSAForce1(
...
@@ -813,10 +813,10 @@ KERNEL void computeGBSAForce1(
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_long
)
(
localData
[
tgx
].
fx
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_long
)
(
localData
[
tgx
].
fy
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_long
)
(
localData
[
tgx
].
fz
*
0x100000000
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
realToFixedPoint
(
localData
[
tgx
].
fz
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
(
mm_long
)
(
localData
[
tgx
].
fw
*
0x100000000
));
ATOMIC_ADD
(
&
global_bornForce
[
atom2
],
realToFixedPoint
(
localData
[
tgx
].
fw
));
#else
#else
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom2
+
GROUP_ID
*
PADDED_NUM_ATOMS
;
real4
f
=
forceBuffers
[
offset
];
real4
f
=
forceBuffers
[
offset
];
...
...
platforms/common/src/kernels/integrationUtilities.cc
View file @
434d7afb
...
@@ -896,13 +896,13 @@ inline DEVICE real3 loadForce(int index, GLOBAL const mm_long* RESTRICT force) {
...
@@ -896,13 +896,13 @@ inline DEVICE real3 loadForce(int index, GLOBAL const mm_long* RESTRICT force) {
inline
DEVICE
void
addForce
(
int
index
,
GLOBAL
mm_long
*
RESTRICT
force
,
real3
value
)
{
inline
DEVICE
void
addForce
(
int
index
,
GLOBAL
mm_long
*
RESTRICT
force
,
real3
value
)
{
GLOBAL
mm_ulong
*
f
=
(
GLOBAL
mm_ulong
*
)
force
;
GLOBAL
mm_ulong
*
f
=
(
GLOBAL
mm_ulong
*
)
force
;
#ifdef HAS_OVERLAPPING_VSITES
#ifdef HAS_OVERLAPPING_VSITES
ATOMIC_ADD
(
&
f
[
index
],
(
mm_ulong
)
((
mm_long
)
(
value
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
f
[
index
],
(
mm_ulong
)
realToFixedPoint
(
value
.
x
));
ATOMIC_ADD
(
&
f
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
value
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
f
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
value
.
y
));
ATOMIC_ADD
(
&
f
[
index
+
PADDED_NUM_ATOMS
*
2
],
(
mm_ulong
)
((
mm_long
)
(
value
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
f
[
index
+
PADDED_NUM_ATOMS
*
2
],
(
mm_ulong
)
realToFixedPoint
(
value
.
z
));
#else
#else
f
[
index
]
+=
(
mm_ulong
)
((
mm_long
)
(
value
.
x
*
0x100000000
)
);
f
[
index
]
+=
(
mm_ulong
)
realToFixedPoint
(
value
.
x
);
f
[
index
+
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
((
mm_long
)
(
value
.
y
*
0x100000000
)
);
f
[
index
+
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
realToFixedPoint
(
value
.
y
);
f
[
index
+
PADDED_NUM_ATOMS
*
2
]
+=
(
mm_ulong
)
((
mm_long
)
(
value
.
z
*
0x100000000
)
);
f
[
index
+
PADDED_NUM_ATOMS
*
2
]
+=
(
mm_ulong
)
realToFixedPoint
(
value
.
z
);
#endif
#endif
}
}
...
...
platforms/common/src/kernels/pme.cc
View file @
434d7afb
...
@@ -154,7 +154,7 @@ KERNEL void gridSpreadCharge(GLOBAL const real4* RESTRICT posq,
...
@@ -154,7 +154,7 @@ KERNEL void gridSpreadCharge(GLOBAL const real4* RESTRICT posq,
int
index
=
ybase
+
zindexTable
[
zindex
];
int
index
=
ybase
+
zindexTable
[
zindex
];
real
add
=
dxdy
*
data
[
iz
].
z
;
real
add
=
dxdy
*
data
[
iz
].
z
;
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD
(
&
pmeGrid
[
index
],
(
mm_ulong
)
((
mm_long
)
(
add
*
0x100000000
)
));
ATOMIC_ADD
(
&
pmeGrid
[
index
],
(
mm_ulong
)
realToFixedPoint
(
add
));
#else
#else
ATOMIC_ADD
(
&
pmeGrid
[
index
],
add
);
ATOMIC_ADD
(
&
pmeGrid
[
index
],
add
);
#endif
#endif
...
@@ -593,13 +593,13 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul
...
@@ -593,13 +593,13 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceY
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecY
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecY
.
y
);
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
real
forceZ
=
-
q
*
(
force
.
x
*
GRID_SIZE_X
*
recipBoxVecZ
.
x
+
force
.
y
*
GRID_SIZE_Y
*
recipBoxVecZ
.
y
+
force
.
z
*
GRID_SIZE_Z
*
recipBoxVecZ
.
z
);
#ifdef USE_PME_STREAM
#ifdef USE_PME_STREAM
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
((
mm_long
)
(
forceX
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
realToFixedPoint
(
forceX
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forceY
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forceY
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forceZ
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forceZ
));
#else
#else
forceBuffers
[
atom
]
+=
(
mm_ulong
)
((
mm_long
)
(
forceX
*
0x100000000
)
);
forceBuffers
[
atom
]
+=
(
mm_ulong
)
realToFixedPoint
(
forceX
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
((
mm_long
)
(
forceY
*
0x100000000
)
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
realToFixedPoint
(
forceY
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
((
mm_long
)
(
forceZ
*
0x100000000
)
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
(
mm_ulong
)
realToFixedPoint
(
forceZ
);
#endif
#endif
}
}
}
}
...
@@ -607,9 +607,9 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul
...
@@ -607,9 +607,9 @@ KERNEL void gridInterpolateForce(GLOBAL const real4* RESTRICT posq, GLOBAL mm_ul
KERNEL
void
addForces
(
GLOBAL
const
real4
*
RESTRICT
forces
,
GLOBAL
mm_long
*
RESTRICT
forceBuffers
)
{
KERNEL
void
addForces
(
GLOBAL
const
real4
*
RESTRICT
forces
,
GLOBAL
mm_long
*
RESTRICT
forceBuffers
)
{
for
(
int
atom
=
GLOBAL_ID
;
atom
<
NUM_ATOMS
;
atom
+=
GLOBAL_SIZE
)
{
for
(
int
atom
=
GLOBAL_ID
;
atom
<
NUM_ATOMS
;
atom
+=
GLOBAL_SIZE
)
{
real4
f
=
forces
[
atom
];
real4
f
=
forces
[
atom
];
forceBuffers
[
atom
]
+=
(
mm_long
)
(
f
.
x
*
0x100000000
);
forceBuffers
[
atom
]
+=
realToFixedPoint
(
f
.
x
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
f
.
y
*
0x100000000
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
f
.
y
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
f
.
z
*
0x100000000
);
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
f
.
z
);
}
}
}
}
...
...
platforms/common/src/kernels/rmsd.cc
View file @
434d7afb
...
@@ -90,8 +90,8 @@ KERNEL void computeRMSDForces(int numParticles, int paddedNumAtoms, GLOBAL const
...
@@ -90,8 +90,8 @@ KERNEL void computeRMSDForces(int numParticles, int paddedNumAtoms, GLOBAL const
buffer
[
1
]
*
refPos
.
x
+
buffer
[
4
]
*
refPos
.
y
+
buffer
[
7
]
*
refPos
.
z
,
buffer
[
1
]
*
refPos
.
x
+
buffer
[
4
]
*
refPos
.
y
+
buffer
[
7
]
*
refPos
.
z
,
buffer
[
2
]
*
refPos
.
x
+
buffer
[
5
]
*
refPos
.
y
+
buffer
[
8
]
*
refPos
.
z
);
buffer
[
2
]
*
refPos
.
x
+
buffer
[
5
]
*
refPos
.
y
+
buffer
[
8
]
*
refPos
.
z
);
real3
force
=
(
rotatedRef
-
pos
)
*
scale
;
real3
force
=
(
rotatedRef
-
pos
)
*
scale
;
forceBuffers
[
index
]
+=
(
mm_long
)
(
force
.
x
*
0x100000000
);
forceBuffers
[
index
]
+=
realToFixedPoint
(
force
.
x
);
forceBuffers
[
index
+
paddedNumAtoms
]
+=
(
mm_long
)
(
force
.
y
*
0x100000000
);
forceBuffers
[
index
+
paddedNumAtoms
]
+=
realToFixedPoint
(
force
.
y
);
forceBuffers
[
index
+
2
*
paddedNumAtoms
]
+=
(
mm_long
)
(
force
.
z
*
0x100000000
);
forceBuffers
[
index
+
2
*
paddedNumAtoms
]
+=
realToFixedPoint
(
force
.
z
);
}
}
}
}
platforms/cuda/src/CudaBondedUtilities.cpp
View file @
434d7afb
...
@@ -166,9 +166,9 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
...
@@ -166,9 +166,9 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
}
}
s
<<
computeForce
<<
"
\n
"
;
s
<<
computeForce
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"], static_cast<unsigned long long>(
(long long)
(force"
<<
(
i
+
1
)
<<
".x
*0x100000000
)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"], static_cast<unsigned long long>(
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".x)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], static_cast<unsigned long long>(
(long long)
(force"
<<
(
i
+
1
)
<<
".y
*0x100000000
)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], static_cast<unsigned long long>(
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".y)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>(
(long long)
(force"
<<
(
i
+
1
)
<<
".z
*0x100000000
)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>(
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".z)));
\n
"
;
s
<<
" __threadfence_block();
\n
"
;
s
<<
" __threadfence_block();
\n
"
;
}
}
s
<<
"}
\n
"
;
s
<<
"}
\n
"
;
...
...
platforms/cuda/src/kernels/common.cu
View file @
434d7afb
...
@@ -24,3 +24,7 @@ typedef unsigned long long mm_ulong;
...
@@ -24,3 +24,7 @@ typedef unsigned long long mm_ulong;
#define SUPPORTS_64_BIT_ATOMICS 1
#define SUPPORTS_64_BIT_ATOMICS 1
#define SUPPORTS_DOUBLE_PRECISION 1
#define SUPPORTS_DOUBLE_PRECISION 1
__device__
inline
long
long
realToFixedPoint
(
real
x
)
{
return
static_cast
<
long
long
>
(
x
*
0x100000000
);
}
platforms/cuda/src/kernels/nonbonded.cu
View file @
434d7afb
...
@@ -46,11 +46,11 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
...
@@ -46,11 +46,11 @@ static __inline__ __device__ long long real_shfl(long long var, int srcLane) {
*/
*/
__device__
void
saveSingleForce
(
int
atom
,
real3
force
,
unsigned
long
long
*
forceBuffers
)
{
__device__
void
saveSingleForce
(
int
atom
,
real3
force
,
unsigned
long
long
*
forceBuffers
)
{
if
(
force
.
x
!=
0
)
if
(
force
.
x
!=
0
)
atomicAdd
(
&
forceBuffers
[
atom
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
x
)));
if
(
force
.
y
!=
0
)
if
(
force
.
y
!=
0
)
atomicAdd
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
y
)));
if
(
force
.
z
!=
0
)
if
(
force
.
z
!=
0
)
atomicAdd
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
z
)));
}
}
/**
/**
...
@@ -310,22 +310,22 @@ extern "C" __global__ void computeNonbonded(
...
@@ -310,22 +310,22 @@ extern "C" __global__ void computeNonbonded(
// write results for off diagonal tiles
// write results for off diagonal tiles
#ifdef INCLUDE_FORCES
#ifdef INCLUDE_FORCES
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
x
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
y
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
z
)));
#else
#else
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fx
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fy
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fz
)));
#endif
#endif
#endif
#endif
}
}
// Write results for on and off diagonal tiles
// Write results for on and off diagonal tiles
#ifdef INCLUDE_FORCES
#ifdef INCLUDE_FORCES
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
x
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
y
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
z
)));
#endif
#endif
}
}
...
@@ -581,9 +581,9 @@ extern "C" __global__ void computeNonbonded(
...
@@ -581,9 +581,9 @@ extern "C" __global__ void computeNonbonded(
// Write results.
// Write results.
#ifdef INCLUDE_FORCES
#ifdef INCLUDE_FORCES
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
x
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
y
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
force
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
force
.
z
)));
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
atom2
=
atomIndices
[
threadIdx
.
x
];
unsigned
int
atom2
=
atomIndices
[
threadIdx
.
x
];
#else
#else
...
@@ -591,13 +591,13 @@ extern "C" __global__ void computeNonbonded(
...
@@ -591,13 +591,13 @@ extern "C" __global__ void computeNonbonded(
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
x
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
y
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
shflForce
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
shflForce
.
z
)));
#else
#else
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fx
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fy
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
(
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
(
realToFixedPoint
(
localData
[
threadIdx
.
x
].
fz
)));
#endif
#endif
}
}
#endif
#endif
...
@@ -652,4 +652,4 @@ extern "C" __global__ void computeNonbonded(
...
@@ -652,4 +652,4 @@ extern "C" __global__ void computeNonbonded(
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
#endif
#endif
SAVE_DERIVATIVES
SAVE_DERIVATIVES
}
}
\ No newline at end of file
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment