Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
434d7afb
Unverified
Commit
434d7afb
authored
Mar 08, 2022
by
Anton Gorenko
Committed by
GitHub
Mar 07, 2022
Browse files
Add realToFixedPoint to all platforms (#3504)
It allows to use a faster float-to-int64 in the HIP platform.
parent
ca80579a
Changes
37
Hide whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
370 additions
and
366 deletions
+370
-366
platforms/opencl/src/OpenCLBondedUtilities.cpp
platforms/opencl/src/OpenCLBondedUtilities.cpp
+3
-3
platforms/opencl/src/kernels/common.cl
platforms/opencl/src/kernels/common.cl
+4
-0
platforms/opencl/src/kernels/nonbonded.cl
platforms/opencl/src/kernels/nonbonded.cl
+12
-12
platforms/opencl/src/kernels/nonbonded_cpu.cl
platforms/opencl/src/kernels/nonbonded_cpu.cl
+18
-18
platforms/opencl/src/kernels/utilities.cl
platforms/opencl/src/kernels/utilities.cl
+4
-4
plugins/amoeba/platforms/common/src/kernels/amoebaGk.cc
plugins/amoeba/platforms/common/src/kernels/amoebaGk.cc
+65
-65
plugins/amoeba/platforms/common/src/kernels/amoebaWcaForce.cc
...ins/amoeba/platforms/common/src/kernels/amoebaWcaForce.cc
+6
-6
plugins/amoeba/platforms/common/src/kernels/hippoComputeField.cc
.../amoeba/platforms/common/src/kernels/hippoComputeField.cc
+18
-18
plugins/amoeba/platforms/common/src/kernels/hippoMultipoles.cc
...ns/amoeba/platforms/common/src/kernels/hippoMultipoles.cc
+12
-12
plugins/amoeba/platforms/common/src/kernels/hippoNonbonded.cc
...ins/amoeba/platforms/common/src/kernels/hippoNonbonded.cc
+36
-36
plugins/amoeba/platforms/common/src/kernels/hippoNonbondedExceptions.cc
.../platforms/common/src/kernels/hippoNonbondedExceptions.cc
+12
-12
plugins/amoeba/platforms/common/src/kernels/multipoleElectrostatics.cc
...a/platforms/common/src/kernels/multipoleElectrostatics.cc
+31
-31
plugins/amoeba/platforms/common/src/kernels/multipoleFixedField.cc
...moeba/platforms/common/src/kernels/multipoleFixedField.cc
+36
-36
plugins/amoeba/platforms/common/src/kernels/multipoleInducedField.cc
...eba/platforms/common/src/kernels/multipoleInducedField.cc
+19
-19
plugins/amoeba/platforms/common/src/kernels/multipolePme.cc
plugins/amoeba/platforms/common/src/kernels/multipolePme.cc
+51
-51
plugins/amoeba/platforms/common/src/kernels/multipoles.cc
plugins/amoeba/platforms/common/src/kernels/multipoles.cc
+13
-13
plugins/amoeba/platforms/common/src/kernels/pmeMultipoleElectrostatics.cc
...latforms/common/src/kernels/pmeMultipoleElectrostatics.cc
+30
-30
No files found.
platforms/opencl/src/OpenCLBondedUtilities.cpp
View file @
434d7afb
...
@@ -256,9 +256,9 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
...
@@ -256,9 +256,9 @@ string OpenCLBondedUtilities::createForceSource(int forceIndex, int numBonds, in
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" {
\n
"
;
s
<<
" {
\n
"
;
if
(
context
.
getSupports64BitGlobalAtomics
())
{
if
(
context
.
getSupports64BitGlobalAtomics
())
{
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"],
(long)
(force"
<<
(
i
+
1
)
<<
".x
*0x100000000
));
\n
"
;
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"],
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".x));
\n
"
;
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS],
(long)
(force"
<<
(
i
+
1
)
<<
".y
*0x100000000
));
\n
"
;
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS],
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".y));
\n
"
;
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+2*PADDED_NUM_ATOMS],
(long)
(force"
<<
(
i
+
1
)
<<
".z
*0x100000000
));
\n
"
;
s
<<
" atom_add(&forceBuffers[atom"
<<
(
i
+
1
)
<<
"+2*PADDED_NUM_ATOMS],
realToFixedPoint
(force"
<<
(
i
+
1
)
<<
".z));
\n
"
;
}
}
else
{
else
{
s
<<
" unsigned int offset = atom"
<<
(
i
+
1
)
<<
"+buffers"
<<
suffix
[
i
]
<<
"*PADDED_NUM_ATOMS;
\n
"
;
s
<<
" unsigned int offset = atom"
<<
(
i
+
1
)
<<
"+buffers"
<<
suffix
[
i
]
<<
"*PADDED_NUM_ATOMS;
\n
"
;
...
...
platforms/opencl/src/kernels/common.cl
View file @
434d7afb
...
@@ -59,3 +59,7 @@ typedef unsigned long mm_ulong;
...
@@ -59,3 +59,7 @@ typedef unsigned long mm_ulong;
#
define
asinf
(
x
)
asin
(
x
)
#
define
asinf
(
x
)
asin
(
x
)
#
define
atanf
(
x
)
atan
(
x
)
#
define
atanf
(
x
)
atan
(
x
)
#
define
atan2f
(
x,
y
)
atan2
(
x,
y
)
#
define
atan2f
(
x,
y
)
atan2
(
x,
y
)
inline
long
realToFixedPoint
(
real
x
)
{
return
(
long
)
(
x
*
0x100000000
)
;
}
platforms/opencl/src/kernels/nonbonded.cl
View file @
434d7afb
...
@@ -178,14 +178,14 @@ __kernel void computeNonbonded(
...
@@ -178,14 +178,14 @@ __kernel void computeNonbonded(
#ifdef INCLUDE_FORCES
#ifdef INCLUDE_FORCES
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = x*TILE_SIZE + tgx;
unsigned int offset = x*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset],
(long) (force.x*0x100000000
));
atom_add(&forceBuffers[offset],
realToFixedPoint(force.x
));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS],
(long) (force.y*0x100000000
));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS],
realToFixedPoint(force.y
));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS],
(long) (force.z*0x100000000
));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS],
realToFixedPoint(force.z
));
if (x != y) {
if (x != y) {
offset = y*TILE_SIZE + tgx;
offset = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset],
(long)
(localData[get_local_id(0)].fx
*0x100000000
));
atom_add(&forceBuffers[offset],
realToFixedPoint
(localData[get_local_id(0)].fx));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS],
(long)
(localData[get_local_id(0)].fy
*0x100000000
));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS],
realToFixedPoint
(localData[get_local_id(0)].fy));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS],
(long)
(localData[get_local_id(0)].fz
*0x100000000
));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS],
realToFixedPoint
(localData[get_local_id(0)].fz));
}
}
#else
#else
unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
unsigned int offset1 = x*TILE_SIZE + tgx + warp*PADDED_NUM_ATOMS;
...
@@ -410,13 +410,13 @@ __kernel void computeNonbonded(
...
@@ -410,13 +410,13 @@ __kernel void computeNonbonded(
unsigned
int
atom2
=
y*TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y*TILE_SIZE
+
tgx
;
#
endif
#
endif
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[atom1],
(
long
)
(
force.x*0x100000000
))
;
atom_add
(
&forceBuffers[atom1],
realToFixedPoint
(
force.x
))
;
atom_add
(
&forceBuffers[atom1+PADDED_NUM_ATOMS],
(
long
)
(
force.y*0x100000000
))
;
atom_add
(
&forceBuffers[atom1+PADDED_NUM_ATOMS],
realToFixedPoint
(
force.y
))
;
atom_add
(
&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
(
long
)
(
force.z*0x100000000
))
;
atom_add
(
&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
realToFixedPoint
(
force.z
))
;
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
atom_add
(
&forceBuffers[atom2],
(
long
)
(
localData[get_local_id
(
0
)
].fx
*0x100000000
))
;
atom_add
(
&forceBuffers[atom2],
realToFixedPoint
(
localData[get_local_id
(
0
)
].fx
))
;
atom_add
(
&forceBuffers[atom2+PADDED_NUM_ATOMS],
(
long
)
(
localData[get_local_id
(
0
)
].fy
*0x100000000
))
;
atom_add
(
&forceBuffers[atom2+PADDED_NUM_ATOMS],
realToFixedPoint
(
localData[get_local_id
(
0
)
].fy
))
;
atom_add
(
&forceBuffers[atom2+2*PADDED_NUM_ATOMS],
(
long
)
(
localData[get_local_id
(
0
)
].fz
*0x100000000
))
;
atom_add
(
&forceBuffers[atom2+2*PADDED_NUM_ATOMS],
realToFixedPoint
(
localData[get_local_id
(
0
)
].fz
))
;
}
}
#
else
#
else
unsigned
int
offset1
=
atom1
+
warp*PADDED_NUM_ATOMS
;
unsigned
int
offset1
=
atom1
+
warp*PADDED_NUM_ATOMS
;
...
...
platforms/opencl/src/kernels/nonbonded_cpu.cl
View file @
434d7afb
...
@@ -107,9 +107,9 @@ __kernel void computeNonbonded(
...
@@ -107,9 +107,9 @@ __kernel void computeNonbonded(
// Write results.
// Write results.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[atom1],
(long) (force.x*0x100000000
));
atom_add(&forceBuffers[atom1],
realToFixedPoint(force.x
));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS],
(long) (force.y*0x100000000
));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS],
realToFixedPoint(force.y
));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
(long) (force.z*0x100000000
));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
realToFixedPoint(force.z
));
#else
#else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
...
@@ -183,9 +183,9 @@ __kernel void computeNonbonded(
...
@@ -183,9 +183,9 @@ __kernel void computeNonbonded(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[atom1],
(long) (force.x*0x100000000
));
atom_add(&forceBuffers[atom1],
realToFixedPoint(force.x
));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS],
(long) (force.y*0x100000000
));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS],
realToFixedPoint(force.y
));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
(long) (force.z*0x100000000
));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
realToFixedPoint(force.z
));
#else
#else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
...
@@ -197,9 +197,9 @@ __kernel void computeNonbonded(
...
@@ -197,9 +197,9 @@ __kernel void computeNonbonded(
for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
for (int tgx = 0; tgx < TILE_SIZE; tgx++) {
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
unsigned int offset = y*TILE_SIZE + tgx;
unsigned int offset = y*TILE_SIZE + tgx;
atom_add(&forceBuffers[offset],
(long)
(localData[tgx].fx
*0x100000000
));
atom_add(&forceBuffers[offset],
realToFixedPoint
(localData[tgx].fx));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS],
(long)
(localData[tgx].fy
*0x100000000
));
atom_add(&forceBuffers[offset+PADDED_NUM_ATOMS],
realToFixedPoint
(localData[tgx].fy));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS],
(long)
(localData[tgx].fz
*0x100000000
));
atom_add(&forceBuffers[offset+2*PADDED_NUM_ATOMS],
realToFixedPoint
(localData[tgx].fz));
#else
#else
unsigned int offset = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
unsigned int offset = y*TILE_SIZE+tgx + get_group_id(0)*PADDED_NUM_ATOMS;
real4 f = forceBuffers[offset];
real4 f = forceBuffers[offset];
...
@@ -342,9 +342,9 @@ __kernel void computeNonbonded(
...
@@ -342,9 +342,9 @@ __kernel void computeNonbonded(
// Write results for atom1.
// Write results for atom1.
#ifdef SUPPORTS_64_BIT_ATOMICS
#ifdef SUPPORTS_64_BIT_ATOMICS
atom_add(&forceBuffers[atom1],
(long) (force.x*0x100000000
));
atom_add(&forceBuffers[atom1],
realToFixedPoint(force.x
));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS],
(long) (force.y*0x100000000
));
atom_add(&forceBuffers[atom1+PADDED_NUM_ATOMS],
realToFixedPoint(force.y
));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
(long) (force.z*0x100000000
));
atom_add(&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
realToFixedPoint(force.z
));
#else
#else
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
unsigned int offset = atom1 + get_group_id(0)*PADDED_NUM_ATOMS;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
forceBuffers[offset].xyz = forceBuffers[offset].xyz+force.xyz;
...
@@ -409,9 +409,9 @@ __kernel void computeNonbonded(
...
@@ -409,9 +409,9 @@ __kernel void computeNonbonded(
//
Write
results
for
atom1.
//
Write
results
for
atom1.
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[atom1],
(
long
)
(
force.x*0x100000000
))
;
atom_add
(
&forceBuffers[atom1],
realToFixedPoint
(
force.x
))
;
atom_add
(
&forceBuffers[atom1+PADDED_NUM_ATOMS],
(
long
)
(
force.y*0x100000000
))
;
atom_add
(
&forceBuffers[atom1+PADDED_NUM_ATOMS],
realToFixedPoint
(
force.y
))
;
atom_add
(
&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
(
long
)
(
force.z*0x100000000
))
;
atom_add
(
&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
realToFixedPoint
(
force.z
))
;
#
else
#
else
unsigned
int
offset
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom1
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
forceBuffers[offset].xyz
=
forceBuffers[offset].xyz+force.xyz
;
forceBuffers[offset].xyz
=
forceBuffers[offset].xyz+force.xyz
;
...
@@ -429,9 +429,9 @@ __kernel void computeNonbonded(
...
@@ -429,9 +429,9 @@ __kernel void computeNonbonded(
#
endif
#
endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
ifdef
SUPPORTS_64_BIT_ATOMICS
atom_add
(
&forceBuffers[atom2],
(
long
)
(
localData[tgx].fx
*0x100000000
))
;
atom_add
(
&forceBuffers[atom2],
realToFixedPoint
(
localData[tgx].fx
))
;
atom_add
(
&forceBuffers[atom2+PADDED_NUM_ATOMS],
(
long
)
(
localData[tgx].fy
*0x100000000
))
;
atom_add
(
&forceBuffers[atom2+PADDED_NUM_ATOMS],
realToFixedPoint
(
localData[tgx].fy
))
;
atom_add
(
&forceBuffers[atom2+2*PADDED_NUM_ATOMS],
(
long
)
(
localData[tgx].fz
*0x100000000
))
;
atom_add
(
&forceBuffers[atom2+2*PADDED_NUM_ATOMS],
realToFixedPoint
(
localData[tgx].fz
))
;
#
else
#
else
unsigned
int
offset
=
atom2
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
unsigned
int
offset
=
atom2
+
get_group_id
(
0
)
*PADDED_NUM_ATOMS
;
real4
f
=
forceBuffers[offset]
;
real4
f
=
forceBuffers[offset]
;
...
...
platforms/opencl/src/kernels/utilities.cl
View file @
434d7afb
...
@@ -96,9 +96,9 @@ __kernel void reduceForces(__global long* restrict longBuffer, __global real4* r
...
@@ -96,9 +96,9 @@ __kernel void reduceForces(__global long* restrict longBuffer, __global real4* r
for
(
int
i
=
index
; i < totalSize; i += bufferSize)
for
(
int
i
=
index
; i < totalSize; i += bufferSize)
sum
+=
buffer[i]
;
sum
+=
buffer[i]
;
buffer[index]
=
sum
;
buffer[index]
=
sum
;
longBuffer[index]
=
(
long
)
(
sum.x*0x100000000
)
;
longBuffer[index]
=
realToFixedPoint
(
sum.x
)
;
longBuffer[index+bufferSize]
=
(
long
)
(
sum.y*0x100000000
)
;
longBuffer[index+bufferSize]
=
realToFixedPoint
(
sum.y
)
;
longBuffer[index+2*bufferSize]
=
(
long
)
(
sum.z*0x100000000
)
;
longBuffer[index+2*bufferSize]
=
realToFixedPoint
(
sum.z
)
;
}
}
}
}
...
@@ -137,4 +137,4 @@ __kernel void determineNativeAccuracy(__global float8* restrict values, int numV
...
@@ -137,4 +137,4 @@ __kernel void determineNativeAccuracy(__global float8* restrict values, int numV
__kernel
void
setCharges
(
__global
real*
restrict
charges,
__global
real4*
restrict
posq,
__global
int*
restrict
atomOrder,
int
numAtoms
)
{
__kernel
void
setCharges
(
__global
real*
restrict
charges,
__global
real4*
restrict
posq,
__global
int*
restrict
atomOrder,
int
numAtoms
)
{
for
(
int
i
=
get_global_id
(
0
)
; i < numAtoms; i += get_global_size(0))
for
(
int
i
=
get_global_id
(
0
)
; i < numAtoms; i += get_global_size(0))
posq[i].w
=
charges[atomOrder[i]]
;
posq[i].w
=
charges[atomOrder[i]]
;
}
}
\ No newline at end of file
plugins/amoeba/platforms/common/src/kernels/amoebaGk.cc
View file @
434d7afb
...
@@ -33,7 +33,7 @@ KERNEL void computeSurfaceAreaForce(GLOBAL mm_long* RESTRICT bornForce, GLOBAL m
...
@@ -33,7 +33,7 @@ KERNEL void computeSurfaceAreaForce(GLOBAL mm_long* RESTRICT bornForce, GLOBAL m
ratio6
=
ratio6
*
ratio6
*
ratio6
;
ratio6
=
ratio6
*
ratio6
*
ratio6
;
ratio6
=
ratio6
*
ratio6
;
ratio6
=
ratio6
*
ratio6
;
real
saTerm
=
SURFACE_AREA_FACTOR
*
r
*
r
*
ratio6
;
real
saTerm
=
SURFACE_AREA_FACTOR
*
r
*
r
*
ratio6
;
bornForce
[
index
]
+=
(
mm_long
)
(
saTerm
*
0x100000000
/
bornRadius
);
bornForce
[
index
]
+=
realToFixedPoint
(
saTerm
/
bornRadius
);
energy
+=
saTerm
;
energy
+=
saTerm
;
}
}
energyBuffer
[
GLOBAL_ID
]
-=
energy
/
6
;
energyBuffer
[
GLOBAL_ID
]
-=
energy
/
6
;
...
@@ -169,11 +169,11 @@ KERNEL void computeBornSum(GLOBAL mm_ulong* RESTRICT bornSum, GLOBAL const real4
...
@@ -169,11 +169,11 @@ KERNEL void computeBornSum(GLOBAL mm_ulong* RESTRICT bornSum, GLOBAL const real4
if
(
pos
<
end
)
{
if
(
pos
<
end
)
{
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
bornSum
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
bornSum
*
0x100000000
)
));
ATOMIC_ADD
(
&
bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
bornSum
));
}
}
if
(
pos
<
end
&&
x
!=
y
)
{
if
(
pos
<
end
&&
x
!=
y
)
{
const
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
const
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
bornSum
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
bornSum
*
0x100000000
)
));
ATOMIC_ADD
(
&
bornSum
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornSum
));
}
}
lasty
=
y
;
lasty
=
y
;
pos
++
;
pos
++
;
...
@@ -283,10 +283,10 @@ KERNEL void computeGKForces(
...
@@ -283,10 +283,10 @@ KERNEL void computeGKForces(
}
}
SYNC_WARPS
;
SYNC_WARPS
;
data
.
force
*=
0.5
f
;
data
.
force
*=
0.5
f
;
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
// Compute torques.
// Compute torques.
data
.
force
=
make_real3
(
0
);
data
.
force
=
make_real3
(
0
);
...
@@ -301,10 +301,10 @@ KERNEL void computeGKForces(
...
@@ -301,10 +301,10 @@ KERNEL void computeGKForces(
}
}
}
}
SYNC_WARPS
;
SYNC_WARPS
;
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
// Compute chain rule terms.
// Compute chain rule terms.
data
.
force
=
make_real3
(
0
);
data
.
force
=
make_real3
(
0
);
...
@@ -319,7 +319,7 @@ KERNEL void computeGKForces(
...
@@ -319,7 +319,7 @@ KERNEL void computeGKForces(
SYNC_WARPS
;
SYNC_WARPS
;
}
}
}
}
ATOMIC_ADD
(
&
bornForce
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
data
.
bornForce
*
0x100000000
)
));
ATOMIC_ADD
(
&
bornForce
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
data
.
bornForce
));
}
}
else
{
else
{
// This is an off-diagonal tile.
// This is an off-diagonal tile.
...
@@ -348,13 +348,13 @@ KERNEL void computeGKForces(
...
@@ -348,13 +348,13 @@ KERNEL void computeGKForces(
localData
[
LOCAL_ID
].
force
*=
0.5
f
;
localData
[
LOCAL_ID
].
force
*=
0.5
f
;
if
(
pos
<
end
)
{
if
(
pos
<
end
)
{
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
}
}
// Compute torques.
// Compute torques.
...
@@ -380,13 +380,13 @@ KERNEL void computeGKForces(
...
@@ -380,13 +380,13 @@ KERNEL void computeGKForces(
}
}
if
(
pos
<
end
)
{
if
(
pos
<
end
)
{
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
}
}
// Compute chain rule terms.
// Compute chain rule terms.
...
@@ -409,9 +409,9 @@ KERNEL void computeGKForces(
...
@@ -409,9 +409,9 @@ KERNEL void computeGKForces(
}
}
if
(
pos
<
end
)
{
if
(
pos
<
end
)
{
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
bornForce
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
bornForce
*
0x100000000
)
));
ATOMIC_ADD
(
&
bornForce
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
bornForce
));
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
bornForce
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
bornForce
*
0x100000000
)
));
ATOMIC_ADD
(
&
bornForce
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
bornForce
));
}
}
}
}
}
}
...
@@ -543,9 +543,9 @@ KERNEL void computeChainRuleForce(
...
@@ -543,9 +543,9 @@ KERNEL void computeChainRuleForce(
}
}
SYNC_WARPS
;
SYNC_WARPS
;
}
}
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
((
data
.
force
.
x
+
localData
[
LOCAL_ID
].
force
.
x
)
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
((
data
.
force
.
x
+
localData
[
LOCAL_ID
].
force
.
x
)));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
((
data
.
force
.
y
+
localData
[
LOCAL_ID
].
force
.
y
)
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
((
data
.
force
.
y
+
localData
[
LOCAL_ID
].
force
.
y
)));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
((
data
.
force
.
z
+
localData
[
LOCAL_ID
].
force
.
z
)
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
((
data
.
force
.
z
+
localData
[
LOCAL_ID
].
force
.
z
)));
}
}
else
{
else
{
// This is an off-diagonal tile.
// This is an off-diagonal tile.
...
@@ -571,13 +571,13 @@ KERNEL void computeChainRuleForce(
...
@@ -571,13 +571,13 @@ KERNEL void computeChainRuleForce(
}
}
if
(
pos
<
end
)
{
if
(
pos
<
end
)
{
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
}
}
}
}
}
}
...
@@ -700,9 +700,9 @@ KERNEL void computeEDiffForce(
...
@@ -700,9 +700,9 @@ KERNEL void computeEDiffForce(
}
}
SYNC_WARPS
;
SYNC_WARPS
;
data
.
force
*=
ENERGY_SCALE_FACTOR
;
data
.
force
*=
ENERGY_SCALE_FACTOR
;
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
// Compute torques.
// Compute torques.
...
@@ -718,9 +718,9 @@ KERNEL void computeEDiffForce(
...
@@ -718,9 +718,9 @@ KERNEL void computeEDiffForce(
}
}
}
}
data
.
force
*=
ENERGY_SCALE_FACTOR
;
data
.
force
*=
ENERGY_SCALE_FACTOR
;
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
SYNC_WARPS
;
SYNC_WARPS
;
}
}
else
{
else
{
...
@@ -753,13 +753,13 @@ KERNEL void computeEDiffForce(
...
@@ -753,13 +753,13 @@ KERNEL void computeEDiffForce(
data
.
force
*=
ENERGY_SCALE_FACTOR
;
data
.
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
force
*=
ENERGY_SCALE_FACTOR
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
// Compute torques.
// Compute torques.
...
@@ -783,13 +783,13 @@ KERNEL void computeEDiffForce(
...
@@ -783,13 +783,13 @@ KERNEL void computeEDiffForce(
data
.
force
*=
ENERGY_SCALE_FACTOR
;
data
.
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
force
*=
ENERGY_SCALE_FACTOR
;
offset
=
x
*
TILE_SIZE
+
tgx
;
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
SYNC_WARPS
;
SYNC_WARPS
;
}
}
}
}
...
@@ -865,13 +865,13 @@ KERNEL void computeEDiffForce(
...
@@ -865,13 +865,13 @@ KERNEL void computeEDiffForce(
data
.
force
*=
ENERGY_SCALE_FACTOR
;
data
.
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
force
*=
ENERGY_SCALE_FACTOR
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
// Compute torques.
// Compute torques.
...
@@ -893,13 +893,13 @@ KERNEL void computeEDiffForce(
...
@@ -893,13 +893,13 @@ KERNEL void computeEDiffForce(
data
.
force
*=
ENERGY_SCALE_FACTOR
;
data
.
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
force
*=
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
force
*=
ENERGY_SCALE_FACTOR
;
offset
=
x
*
TILE_SIZE
+
tgx
;
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
}
}
pos
++
;
pos
++
;
}
}
...
...
plugins/amoeba/platforms/common/src/kernels/amoebaWcaForce.cc
View file @
434d7afb
...
@@ -250,14 +250,14 @@ KERNEL void computeWCAForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL mixed
...
@@ -250,14 +250,14 @@ KERNEL void computeWCAForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL mixed
SYNC_WARPS
;
SYNC_WARPS
;
}
}
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
if
(
x
!=
y
)
{
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
}
}
}
}
pos
++
;
pos
++
;
...
...
plugins/amoeba/platforms/common/src/kernels/hippoComputeField.cc
View file @
434d7afb
...
@@ -174,14 +174,14 @@ KERNEL void computeField(GLOBAL const real4* RESTRICT posq, GLOBAL const unsigne
...
@@ -174,14 +174,14 @@ KERNEL void computeField(GLOBAL const real4* RESTRICT posq, GLOBAL const unsigne
// Write results.
// Write results.
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset1
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
fieldBuffers
[
offset1
],
(
mm_ulong
)
((
mm_long
)
(
field
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset1
],
(
mm_ulong
)
realToFixedPoint
(
field
.
x
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
field
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
field
.
y
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
field
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
field
.
z
));
if
(
x
!=
y
)
{
if
(
x
!=
y
)
{
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
offset2
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
fieldBuffers
[
offset2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fx
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fx
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fy
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fz
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
}
}
}
}
...
@@ -344,18 +344,18 @@ KERNEL void computeField(GLOBAL const real4* RESTRICT posq, GLOBAL const unsigne
...
@@ -344,18 +344,18 @@ KERNEL void computeField(GLOBAL const real4* RESTRICT posq, GLOBAL const unsigne
// Write results.
// Write results.
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
field
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
field
.
x
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
field
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
field
.
y
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
field
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
field
.
z
));
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
atom2
=
atomIndices
[
LOCAL_ID
];
unsigned
int
atom2
=
atomIndices
[
LOCAL_ID
];
#else
#else
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
atom2
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fx
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fx
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fy
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fz
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
}
}
}
}
tile
++
;
tile
++
;
...
@@ -395,12 +395,12 @@ KERNEL void computeFieldExceptions(GLOBAL const real4* RESTRICT posq, GLOBAL mm_
...
@@ -395,12 +395,12 @@ KERNEL void computeFieldExceptions(GLOBAL const real4* RESTRICT posq, GLOBAL mm_
real3
tempField1
=
make_real3
(
0
);
real3
tempField1
=
make_real3
(
0
);
real3
tempField2
=
make_real3
(
0
);
real3
tempField2
=
make_real3
(
0
);
COMPUTE_FIELD
COMPUTE_FIELD
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
tempField1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
tempField1
.
x
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
tempField1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
tempField1
.
y
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
tempField1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
tempField1
.
z
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
tempField2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
tempField2
.
x
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
tempField2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
tempField2
.
y
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
tempField2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
tempField2
.
z
));
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
}
}
#endif
#endif
...
...
plugins/amoeba/platforms/common/src/kernels/hippoMultipoles.cc
View file @
434d7afb
...
@@ -362,22 +362,22 @@ KERNEL void mapTorqueToForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL cons
...
@@ -362,22 +362,22 @@ KERNEL void mapTorqueToForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL cons
// Store results
// Store results
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Z
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Z
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Z
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Z
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Z
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Z
].
z
));
if
(
axisType
!=
4
)
{
if
(
axisType
!=
4
)
{
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
X
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
X
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
X
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
X
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
X
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
X
].
z
));
}
}
if
((
axisType
==
2
||
axisType
==
3
)
&&
particles
.
y
>
-
1
)
{
if
((
axisType
==
2
||
axisType
==
3
)
&&
particles
.
y
>
-
1
)
{
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Y
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Y
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Y
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Y
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Y
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Y
].
z
));
}
}
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
I
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
I
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
I
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
I
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
I
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
I
].
z
));
}
}
}
}
}
}
plugins/amoeba/platforms/common/src/kernels/hippoNonbonded.cc
View file @
434d7afb
...
@@ -201,30 +201,30 @@ KERNEL void computeNonbonded(
...
@@ -201,30 +201,30 @@ KERNEL void computeNonbonded(
const
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
const
unsigned
int
offset
=
y
*
TILE_SIZE
+
tgx
;
// write results for off diagonal tiles
// write results for off diagonal tiles
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
shflForce
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
shflForce
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
shflForce
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
shflForce
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
shflForce
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
shflForce
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
shflTorque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
shflTorque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
shflTorque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
shflTorque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
shflTorque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
shflTorque
.
z
));
#else
#else
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fx
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fy
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fz
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
tx
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
tx
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
ty
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
ty
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
tz
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
tz
));
#endif
#endif
}
}
// Write results for on and off diagonal tiles
// Write results for on and off diagonal tiles
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
const
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque
.
z
));
}
}
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
...
@@ -444,12 +444,12 @@ KERNEL void computeNonbonded(
...
@@ -444,12 +444,12 @@ KERNEL void computeNonbonded(
// Write results.
// Write results.
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
torque
.
z
));
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
atom2
=
atomIndices
[
LOCAL_ID
];
unsigned
int
atom2
=
atomIndices
[
LOCAL_ID
];
#else
#else
...
@@ -457,19 +457,19 @@ KERNEL void computeNonbonded(
...
@@ -457,19 +457,19 @@ KERNEL void computeNonbonded(
#endif
#endif
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
if
(
atom2
<
PADDED_NUM_ATOMS
)
{
#ifdef ENABLE_SHUFFLE
#ifdef ENABLE_SHUFFLE
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
shflForce
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
shflForce
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
shflForce
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
shflForce
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
shflForce
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
shflForce
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
shflTorque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
shflTorque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
shflTorque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
shflTorque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
shflTorque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
shflTorque
.
z
));
#else
#else
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fx
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fx
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fy
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fy
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fz
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fz
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
tx
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
tx
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
ty
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
ty
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
tz
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
tz
));
#endif
#endif
}
}
}
}
...
...
plugins/amoeba/platforms/common/src/kernels/hippoNonbondedExceptions.cc
View file @
434d7afb
...
@@ -79,18 +79,18 @@ KERNEL void computeNonbondedExceptions(
...
@@ -79,18 +79,18 @@ KERNEL void computeNonbondedExceptions(
real
tempEnergy
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
energy
+=
tempEnergy
;
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
tempForce
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
tempForce
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
tempForce
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
tempForce
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
tempForce
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
tempForce
.
z
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
-
tempForce
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
-
tempForce
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
-
tempForce
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
-
tempForce
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
-
tempForce
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
-
tempForce
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
tempTorque1
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
tempTorque1
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
tempTorque1
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
tempTorque1
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
tempTorque1
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
tempTorque1
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
],
(
mm_ulong
)
((
mm_long
)
(
tempTorque2
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
],
(
mm_ulong
)
realToFixedPoint
(
tempTorque2
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
tempTorque2
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
tempTorque2
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
tempTorque2
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
tempTorque2
.
z
));
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
}
}
#endif
#endif
...
...
plugins/amoeba/platforms/common/src/kernels/multipoleElectrostatics.cc
View file @
434d7afb
...
@@ -436,12 +436,12 @@ KERNEL void computeElectrostatics(
...
@@ -436,12 +436,12 @@ KERNEL void computeElectrostatics(
}
}
data
.
force
*=
-
ENERGY_SCALE_FACTOR
;
data
.
force
*=
-
ENERGY_SCALE_FACTOR
;
data
.
torque
*=
ENERGY_SCALE_FACTOR
;
data
.
torque
*=
ENERGY_SCALE_FACTOR
;
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
z
));
SYNC_WARPS
;
SYNC_WARPS
;
}
}
else
{
else
{
...
@@ -468,19 +468,19 @@ KERNEL void computeElectrostatics(
...
@@ -468,19 +468,19 @@ KERNEL void computeElectrostatics(
localData
[
LOCAL_ID
].
force
*=
-
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
force
*=
-
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
torque
*=
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
torque
*=
ENERGY_SCALE_FACTOR
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
z
));
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
z
));
SYNC_WARPS
;
SYNC_WARPS
;
}
}
}
}
...
@@ -578,25 +578,25 @@ KERNEL void computeElectrostatics(
...
@@ -578,25 +578,25 @@ KERNEL void computeElectrostatics(
// Write results.
// Write results.
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
z
));
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
offset
=
atomIndices
[
LOCAL_ID
];
offset
=
atomIndices
[
LOCAL_ID
];
#else
#else
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
z
));
}
}
pos
++
;
pos
++
;
}
}
energyBuffer
[
GLOBAL_ID
]
+=
energy
*
ENERGY_SCALE_FACTOR
;
energyBuffer
[
GLOBAL_ID
]
+=
energy
*
ENERGY_SCALE_FACTOR
;
}
}
\ No newline at end of file
plugins/amoeba/platforms/common/src/kernels/multipoleFixedField.cc
View file @
434d7afb
...
@@ -564,29 +564,29 @@ KERNEL void computeFixedField(
...
@@ -564,29 +564,29 @@ KERNEL void computeFixedField(
// Write results.
// Write results.
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
fieldBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
field
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
field
.
x
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
field
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
field
.
y
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
field
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
field
.
z
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolar
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolar
.
x
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolar
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolar
.
y
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolar
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolar
.
z
));
#ifdef USE_GK
#ifdef USE_GK
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
gkField
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
gkField
.
x
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
gkField
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
gkField
.
y
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
gkField
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
gkField
.
z
));
#endif
#endif
if
(
x
!=
y
)
{
if
(
x
!=
y
)
{
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
fieldBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
field
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
field
.
x
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
field
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
field
.
y
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
field
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
field
.
z
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fieldPolar
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fieldPolar
.
x
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fieldPolar
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fieldPolar
.
y
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fieldPolar
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fieldPolar
.
z
));
#ifdef USE_GK
#ifdef USE_GK
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
gkField
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
gkField
.
x
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
gkField
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
gkField
.
y
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
gkField
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
gkField
.
z
));
#endif
#endif
}
}
}
}
...
@@ -706,32 +706,32 @@ KERNEL void computeFixedField(
...
@@ -706,32 +706,32 @@ KERNEL void computeFixedField(
// Write results.
// Write results.
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
fieldBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
field
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
field
.
x
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
field
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
field
.
y
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
field
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
field
.
z
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolar
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolar
.
x
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolar
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolar
.
y
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolar
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolar
.
z
));
#ifdef USE_GK
#ifdef USE_GK
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
gkField
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
gkField
.
x
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
gkField
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
gkField
.
y
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
gkField
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
gkField
.
z
));
#endif
#endif
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
offset
=
atomIndices
[
LOCAL_ID
];
offset
=
atomIndices
[
LOCAL_ID
];
#else
#else
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
ATOMIC_ADD
(
&
fieldBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
field
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
field
.
x
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
field
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
field
.
y
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
field
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
field
.
z
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fieldPolar
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fieldPolar
.
x
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fieldPolar
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fieldPolar
.
y
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
fieldPolar
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
fieldPolar
.
z
));
#ifdef USE_GK
#ifdef USE_GK
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
gkField
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
gkField
.
x
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
gkField
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
gkField
.
y
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
gkField
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
gkFieldBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
gkField
.
z
));
#endif
#endif
}
}
pos
++
;
pos
++
;
...
...
plugins/amoeba/platforms/common/src/kernels/multipoleInducedField.cc
View file @
434d7afb
...
@@ -107,27 +107,27 @@ inline DEVICE void saveAtomData(int index, AtomData data, GLOBAL mm_ulong* RESTR
...
@@ -107,27 +107,27 @@ inline DEVICE void saveAtomData(int index, AtomData data, GLOBAL mm_ulong* RESTR
#endif
#endif
#endif
#endif
)
{
)
{
ATOMIC_ADD
(
&
field
[
index
],
(
mm_ulong
)
((
mm_long
)
(
data
.
field
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
field
[
index
],
(
mm_ulong
)
realToFixedPoint
(
data
.
field
.
x
));
ATOMIC_ADD
(
&
field
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
field
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
field
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
field
.
y
));
ATOMIC_ADD
(
&
field
[
index
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
field
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
field
[
index
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
field
.
z
));
ATOMIC_ADD
(
&
fieldPolar
[
index
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolar
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolar
[
index
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolar
.
x
));
ATOMIC_ADD
(
&
fieldPolar
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolar
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolar
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolar
.
y
));
ATOMIC_ADD
(
&
fieldPolar
[
index
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolar
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolar
[
index
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolar
.
z
));
#ifdef USE_GK
#ifdef USE_GK
ATOMIC_ADD
(
&
fieldS
[
index
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldS
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldS
[
index
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldS
.
x
));
ATOMIC_ADD
(
&
fieldS
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldS
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldS
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldS
.
y
));
ATOMIC_ADD
(
&
fieldS
[
index
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldS
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldS
[
index
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldS
.
z
));
ATOMIC_ADD
(
&
fieldPolarS
[
index
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolarS
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarS
[
index
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolarS
.
x
));
ATOMIC_ADD
(
&
fieldPolarS
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolarS
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarS
[
index
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolarS
.
y
));
ATOMIC_ADD
(
&
fieldPolarS
[
index
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldPolarS
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldPolarS
[
index
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldPolarS
.
z
));
#endif
#endif
#ifdef EXTRAPOLATED_POLARIZATION
#ifdef EXTRAPOLATED_POLARIZATION
for
(
int
i
=
0
;
i
<
6
;
i
++
)
{
for
(
int
i
=
0
;
i
<
6
;
i
++
)
{
ATOMIC_ADD
(
&
fieldGradient
[
6
*
index
+
i
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldGradient
[
i
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
index
+
i
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldGradient
[
i
]));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
index
+
i
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldGradientPolar
[
i
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
index
+
i
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldGradientPolar
[
i
]));
#ifdef USE_GK
#ifdef USE_GK
ATOMIC_ADD
(
&
fieldGradientS
[
6
*
index
+
i
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldGradientS
[
i
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradientS
[
6
*
index
+
i
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldGradientS
[
i
]));
ATOMIC_ADD
(
&
fieldGradientPolarS
[
6
*
index
+
i
],
(
mm_ulong
)
((
mm_long
)
(
data
.
fieldGradientPolarS
[
i
]
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradientPolarS
[
6
*
index
+
i
],
(
mm_ulong
)
realToFixedPoint
(
data
.
fieldGradientPolarS
[
i
]));
#endif
#endif
}
}
#endif
#endif
...
@@ -995,9 +995,9 @@ KERNEL void addExtrapolatedFieldGradientToForce(GLOBAL mm_long* RESTRICT forceBu
...
@@ -995,9 +995,9 @@ KERNEL void addExtrapolatedFieldGradientToForce(GLOBAL mm_long* RESTRICT forceBu
#endif
#endif
}
}
}
}
forceBuffers
[
atom
]
+=
(
mm_long
)
(
fx
*
0x100000000
);
forceBuffers
[
atom
]
+=
realToFixedPoint
(
fx
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
fy
*
0x100000000
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
fy
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
*
2
]
+=
(
mm_long
)
(
fz
*
0x100000000
);
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
*
2
]
+=
realToFixedPoint
(
fz
);
}
}
}
}
...
...
plugins/amoeba/platforms/common/src/kernels/multipolePme.cc
View file @
434d7afb
...
@@ -275,13 +275,13 @@ KERNEL void gridSpreadFixedMultipoles(GLOBAL const real4* RESTRICT posq, GLOBAL
...
@@ -275,13 +275,13 @@ KERNEL void gridSpreadFixedMultipoles(GLOBAL const real4* RESTRICT posq, GLOBAL
real
add
=
term0
*
v
.
x
+
term1
*
v
.
y
+
term2
*
v
.
z
;
real
add
=
term0
*
v
.
x
+
term1
*
v
.
y
+
term2
*
v
.
z
;
#ifdef HIPPO
#ifdef HIPPO
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD
(
&
pmeGrid
[
index
],
(
mm_ulong
)
((
mm_long
)
(
add
*
0x100000000
)
));
ATOMIC_ADD
(
&
pmeGrid
[
index
],
(
mm_ulong
)
realToFixedPoint
(
add
));
#else
#else
ATOMIC_ADD
(
&
pmeGrid
[
index
],
add
);
ATOMIC_ADD
(
&
pmeGrid
[
index
],
add
);
#endif
#endif
#else
#else
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD
(
&
pmeGrid
[
2
*
index
],
(
mm_ulong
)
((
mm_long
)
(
add
*
0x100000000
)
));
ATOMIC_ADD
(
&
pmeGrid
[
2
*
index
],
(
mm_ulong
)
realToFixedPoint
(
add
));
#else
#else
ATOMIC_ADD
(
&
pmeGrid
[
index
].
x
,
add
);
ATOMIC_ADD
(
&
pmeGrid
[
index
].
x
,
add
);
#endif
#endif
...
@@ -397,15 +397,15 @@ KERNEL void gridSpreadInducedDipoles(GLOBAL const real4* RESTRICT posq, GLOBAL c
...
@@ -397,15 +397,15 @@ KERNEL void gridSpreadInducedDipoles(GLOBAL const real4* RESTRICT posq, GLOBAL c
real
add1
=
term01
*
v
.
x
+
term11
*
v
.
y
;
real
add1
=
term01
*
v
.
x
+
term11
*
v
.
y
;
#ifdef HIPPO
#ifdef HIPPO
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD
(
&
pmeGrid
[
index
],
(
mm_ulong
)
((
mm_long
)
(
add1
*
0x100000000
)
));
ATOMIC_ADD
(
&
pmeGrid
[
index
],
(
mm_ulong
)
realToFixedPoint
(
add1
));
#else
#else
ATOMIC_ADD
(
&
pmeGrid
[
index
],
add1
);
ATOMIC_ADD
(
&
pmeGrid
[
index
],
add1
);
#endif
#endif
#else
#else
real
add2
=
term02
*
v
.
x
+
term12
*
v
.
y
;
real
add2
=
term02
*
v
.
x
+
term12
*
v
.
y
;
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
#ifdef USE_FIXED_POINT_CHARGE_SPREADING
ATOMIC_ADD
(
&
pmeGrid
[
2
*
index
],
(
mm_ulong
)
((
mm_long
)
(
add1
*
0x100000000
)
));
ATOMIC_ADD
(
&
pmeGrid
[
2
*
index
],
(
mm_ulong
)
realToFixedPoint
(
add1
));
ATOMIC_ADD
(
&
pmeGrid
[
2
*
index
+
1
],
(
mm_ulong
)
((
mm_long
)
(
add2
*
0x100000000
)
));
ATOMIC_ADD
(
&
pmeGrid
[
2
*
index
+
1
],
(
mm_ulong
)
realToFixedPoint
(
add2
));
#else
#else
ATOMIC_ADD
(
&
pmeGrid
[
index
].
x
,
add1
);
ATOMIC_ADD
(
&
pmeGrid
[
index
].
x
,
add1
);
ATOMIC_ADD
(
&
pmeGrid
[
index
].
y
,
add2
);
ATOMIC_ADD
(
&
pmeGrid
[
index
].
y
,
add2
);
...
@@ -648,9 +648,9 @@ KERNEL void computeFixedPotentialFromGrid(
...
@@ -648,9 +648,9 @@ KERNEL void computeFixedPotentialFromGrid(
phi
[
m
+
NUM_ATOMS
*
18
]
=
tuv012
;
phi
[
m
+
NUM_ATOMS
*
18
]
=
tuv012
;
phi
[
m
+
NUM_ATOMS
*
19
]
=
tuv111
;
phi
[
m
+
NUM_ATOMS
*
19
]
=
tuv111
;
real
dipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
real
dipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
mm_long
fieldx
=
(
mm_long
)
(
(
dipoleScale
*
labDipole
[
m
*
3
]
-
tuv100
*
fracToCart
[
0
][
0
]
-
tuv010
*
fracToCart
[
0
][
1
]
-
tuv001
*
fracToCart
[
0
][
2
])
*
0x100000000
)
;
mm_long
fieldx
=
realToFixedPoint
(
dipoleScale
*
labDipole
[
m
*
3
]
-
tuv100
*
fracToCart
[
0
][
0
]
-
tuv010
*
fracToCart
[
0
][
1
]
-
tuv001
*
fracToCart
[
0
][
2
]);
mm_long
fieldy
=
(
mm_long
)
(
(
dipoleScale
*
labDipole
[
m
*
3
+
1
]
-
tuv100
*
fracToCart
[
1
][
0
]
-
tuv010
*
fracToCart
[
1
][
1
]
-
tuv001
*
fracToCart
[
1
][
2
])
*
0x100000000
)
;
mm_long
fieldy
=
realToFixedPoint
(
dipoleScale
*
labDipole
[
m
*
3
+
1
]
-
tuv100
*
fracToCart
[
1
][
0
]
-
tuv010
*
fracToCart
[
1
][
1
]
-
tuv001
*
fracToCart
[
1
][
2
]);
mm_long
fieldz
=
(
mm_long
)
(
(
dipoleScale
*
labDipole
[
m
*
3
+
2
]
-
tuv100
*
fracToCart
[
2
][
0
]
-
tuv010
*
fracToCart
[
2
][
1
]
-
tuv001
*
fracToCart
[
2
][
2
])
*
0x100000000
)
;
mm_long
fieldz
=
realToFixedPoint
(
dipoleScale
*
labDipole
[
m
*
3
+
2
]
-
tuv100
*
fracToCart
[
2
][
0
]
-
tuv010
*
fracToCart
[
2
][
1
]
-
tuv001
*
fracToCart
[
2
][
2
]);
fieldBuffers
[
m
]
=
fieldx
;
fieldBuffers
[
m
]
=
fieldx
;
fieldBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
fieldy
;
fieldBuffers
[
m
+
PADDED_NUM_ATOMS
]
=
fieldy
;
fieldBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
fieldz
;
fieldBuffers
[
m
+
2
*
PADDED_NUM_ATOMS
]
=
fieldz
;
...
@@ -999,20 +999,20 @@ KERNEL void computeFixedMultipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLO
...
@@ -999,20 +999,20 @@ KERNEL void computeFixedMultipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLO
GLOBAL
const
real
*
cphi
=
&
cphi_global
[
10
*
i
];
GLOBAL
const
real
*
cphi
=
&
cphi_global
[
10
*
i
];
torqueBuffers
[
i
]
=
(
mm_long
)
(
EPSILON_FACTOR
*
(
multipole
[
3
]
*
cphi
[
2
]
-
multipole
[
2
]
*
cphi
[
3
]
torqueBuffers
[
i
]
=
realToFixedPoint
(
EPSILON_FACTOR
*
(
multipole
[
3
]
*
cphi
[
2
]
-
multipole
[
2
]
*
cphi
[
3
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
cphi
[
9
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
cphi
[
9
]
+
multipole
[
8
]
*
cphi
[
7
]
+
multipole
[
9
]
*
cphi
[
5
]
+
multipole
[
8
]
*
cphi
[
7
]
+
multipole
[
9
]
*
cphi
[
5
]
-
multipole
[
7
]
*
cphi
[
8
]
-
multipole
[
9
]
*
cphi
[
6
])
*
0x100000000
);
-
multipole
[
7
]
*
cphi
[
8
]
-
multipole
[
9
]
*
cphi
[
6
]));
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
=
(
mm_long
)
(
EPSILON_FACTOR
*
(
multipole
[
1
]
*
cphi
[
3
]
-
multipole
[
3
]
*
cphi
[
1
]
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
=
realToFixedPoint
(
EPSILON_FACTOR
*
(
multipole
[
1
]
*
cphi
[
3
]
-
multipole
[
3
]
*
cphi
[
1
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
cphi
[
8
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
cphi
[
8
]
+
multipole
[
7
]
*
cphi
[
9
]
+
multipole
[
8
]
*
cphi
[
6
]
+
multipole
[
7
]
*
cphi
[
9
]
+
multipole
[
8
]
*
cphi
[
6
]
-
multipole
[
8
]
*
cphi
[
4
]
-
multipole
[
9
]
*
cphi
[
7
])
*
0x100000000
);
-
multipole
[
8
]
*
cphi
[
4
]
-
multipole
[
9
]
*
cphi
[
7
]));
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
=
(
mm_long
)
(
EPSILON_FACTOR
*
(
multipole
[
2
]
*
cphi
[
1
]
-
multipole
[
1
]
*
cphi
[
2
]
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
=
realToFixedPoint
(
EPSILON_FACTOR
*
(
multipole
[
2
]
*
cphi
[
1
]
-
multipole
[
1
]
*
cphi
[
2
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
cphi
[
7
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
cphi
[
7
]
+
multipole
[
7
]
*
cphi
[
4
]
+
multipole
[
9
]
*
cphi
[
8
]
+
multipole
[
7
]
*
cphi
[
4
]
+
multipole
[
9
]
*
cphi
[
8
]
-
multipole
[
7
]
*
cphi
[
5
]
-
multipole
[
8
]
*
cphi
[
9
])
*
0x100000000
);
-
multipole
[
7
]
*
cphi
[
5
]
-
multipole
[
8
]
*
cphi
[
9
]));
// Compute the force and energy.
// Compute the force and energy.
...
@@ -1036,9 +1036,9 @@ KERNEL void computeFixedMultipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLO
...
@@ -1036,9 +1036,9 @@ KERNEL void computeFixedMultipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLO
f
=
make_real3
(
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
0
][
0
]
+
f
.
y
*
fracToCart
[
0
][
1
]
+
f
.
z
*
fracToCart
[
0
][
2
]),
f
=
make_real3
(
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
0
][
0
]
+
f
.
y
*
fracToCart
[
0
][
1
]
+
f
.
z
*
fracToCart
[
0
][
2
]),
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
1
][
0
]
+
f
.
y
*
fracToCart
[
1
][
1
]
+
f
.
z
*
fracToCart
[
1
][
2
]),
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
1
][
0
]
+
f
.
y
*
fracToCart
[
1
][
1
]
+
f
.
z
*
fracToCart
[
1
][
2
]),
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
2
][
0
]
+
f
.
y
*
fracToCart
[
2
][
1
]
+
f
.
z
*
fracToCart
[
2
][
2
]));
EPSILON_FACTOR
*
(
f
.
x
*
fracToCart
[
2
][
0
]
+
f
.
y
*
fracToCart
[
2
][
1
]
+
f
.
z
*
fracToCart
[
2
][
2
]));
forceBuffers
[
i
]
-=
(
mm_ulong
)
((
mm_long
)
(
f
.
x
*
0x100000000
)
);
forceBuffers
[
i
]
-=
(
mm_ulong
)
realToFixedPoint
(
f
.
x
);
forceBuffers
[
i
+
PADDED_NUM_ATOMS
]
-=
(
mm_ulong
)
((
mm_long
)
(
f
.
y
*
0x100000000
)
);
forceBuffers
[
i
+
PADDED_NUM_ATOMS
]
-=
(
mm_ulong
)
realToFixedPoint
(
f
.
y
);
forceBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
mm_ulong
)
((
mm_long
)
(
f
.
z
*
0x100000000
)
);
forceBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
mm_ulong
)
realToFixedPoint
(
f
.
z
);
}
}
energyBuffer
[
GLOBAL_ID
]
+=
0.5
f
*
EPSILON_FACTOR
*
energy
;
energyBuffer
[
GLOBAL_ID
]
+=
0.5
f
*
EPSILON_FACTOR
*
energy
;
}
}
...
@@ -1110,20 +1110,20 @@ KERNEL void computeInducedDipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLOB
...
@@ -1110,20 +1110,20 @@ KERNEL void computeInducedDipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLOB
multipole
[
6
]
=
-
(
multipole
[
4
]
+
multipole
[
5
]);
multipole
[
6
]
=
-
(
multipole
[
4
]
+
multipole
[
5
]);
GLOBAL
const
real
*
cphi
=
&
cphi_global
[
10
*
i
];
GLOBAL
const
real
*
cphi
=
&
cphi_global
[
10
*
i
];
torqueBuffers
[
i
]
+=
(
mm_long
)
(
scale
*
(
multipole
[
3
]
*
cphi
[
2
]
-
multipole
[
2
]
*
cphi
[
3
]
torqueBuffers
[
i
]
+=
realToFixedPoint
(
scale
*
(
multipole
[
3
]
*
cphi
[
2
]
-
multipole
[
2
]
*
cphi
[
3
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
cphi
[
9
]
+
2
*
(
multipole
[
6
]
-
multipole
[
5
])
*
cphi
[
9
]
+
multipole
[
8
]
*
cphi
[
7
]
+
multipole
[
9
]
*
cphi
[
5
]
+
multipole
[
8
]
*
cphi
[
7
]
+
multipole
[
9
]
*
cphi
[
5
]
-
multipole
[
7
]
*
cphi
[
8
]
-
multipole
[
9
]
*
cphi
[
6
])
*
0x100000000
);
-
multipole
[
7
]
*
cphi
[
8
]
-
multipole
[
9
]
*
cphi
[
6
]));
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
scale
*
(
multipole
[
1
]
*
cphi
[
3
]
-
multipole
[
3
]
*
cphi
[
1
]
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
scale
*
(
multipole
[
1
]
*
cphi
[
3
]
-
multipole
[
3
]
*
cphi
[
1
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
cphi
[
8
]
+
2
*
(
multipole
[
4
]
-
multipole
[
6
])
*
cphi
[
8
]
+
multipole
[
7
]
*
cphi
[
9
]
+
multipole
[
8
]
*
cphi
[
6
]
+
multipole
[
7
]
*
cphi
[
9
]
+
multipole
[
8
]
*
cphi
[
6
]
-
multipole
[
8
]
*
cphi
[
4
]
-
multipole
[
9
]
*
cphi
[
7
])
*
0x100000000
);
-
multipole
[
8
]
*
cphi
[
4
]
-
multipole
[
9
]
*
cphi
[
7
]));
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
+=
(
mm_long
)
(
scale
*
(
multipole
[
2
]
*
cphi
[
1
]
-
multipole
[
1
]
*
cphi
[
2
]
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
+=
realToFixedPoint
(
scale
*
(
multipole
[
2
]
*
cphi
[
1
]
-
multipole
[
1
]
*
cphi
[
2
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
cphi
[
7
]
+
2
*
(
multipole
[
5
]
-
multipole
[
4
])
*
cphi
[
7
]
+
multipole
[
7
]
*
cphi
[
4
]
+
multipole
[
9
]
*
cphi
[
8
]
+
multipole
[
7
]
*
cphi
[
4
]
+
multipole
[
9
]
*
cphi
[
8
]
-
multipole
[
7
]
*
cphi
[
5
]
-
multipole
[
8
]
*
cphi
[
9
])
*
0x100000000
);
-
multipole
[
7
]
*
cphi
[
5
]
-
multipole
[
8
]
*
cphi
[
9
]));
// Compute the force and energy.
// Compute the force and energy.
...
@@ -1206,9 +1206,9 @@ KERNEL void computeInducedDipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLOB
...
@@ -1206,9 +1206,9 @@ KERNEL void computeInducedDipoleForceAndEnergy(GLOBAL real4* RESTRICT posq, GLOB
f
=
make_real3
(
scale
*
(
f
.
x
*
fracToCart
[
0
][
0
]
+
f
.
y
*
fracToCart
[
0
][
1
]
+
f
.
z
*
fracToCart
[
0
][
2
]),
f
=
make_real3
(
scale
*
(
f
.
x
*
fracToCart
[
0
][
0
]
+
f
.
y
*
fracToCart
[
0
][
1
]
+
f
.
z
*
fracToCart
[
0
][
2
]),
scale
*
(
f
.
x
*
fracToCart
[
1
][
0
]
+
f
.
y
*
fracToCart
[
1
][
1
]
+
f
.
z
*
fracToCart
[
1
][
2
]),
scale
*
(
f
.
x
*
fracToCart
[
1
][
0
]
+
f
.
y
*
fracToCart
[
1
][
1
]
+
f
.
z
*
fracToCart
[
1
][
2
]),
scale
*
(
f
.
x
*
fracToCart
[
2
][
0
]
+
f
.
y
*
fracToCart
[
2
][
1
]
+
f
.
z
*
fracToCart
[
2
][
2
]));
scale
*
(
f
.
x
*
fracToCart
[
2
][
0
]
+
f
.
y
*
fracToCart
[
2
][
1
]
+
f
.
z
*
fracToCart
[
2
][
2
]));
forceBuffers
[
i
]
-=
(
mm_ulong
)
((
mm_long
)
(
f
.
x
*
0x100000000
)
);
forceBuffers
[
i
]
-=
(
mm_ulong
)
realToFixedPoint
(
f
.
x
);
forceBuffers
[
i
+
PADDED_NUM_ATOMS
]
-=
(
mm_ulong
)
((
mm_long
)
(
f
.
y
*
0x100000000
)
);
forceBuffers
[
i
+
PADDED_NUM_ATOMS
]
-=
(
mm_ulong
)
realToFixedPoint
(
f
.
y
);
forceBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
mm_ulong
)
((
mm_long
)
(
f
.
z
*
0x100000000
)
);
forceBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
mm_ulong
)
realToFixedPoint
(
f
.
z
);
}
}
#ifndef HIPPO
#ifndef HIPPO
energyBuffer
[
GLOBAL_ID
]
+=
0.25
f
*
EPSILON_FACTOR
*
energy
;
energyBuffer
[
GLOBAL_ID
]
+=
0.25
f
*
EPSILON_FACTOR
*
energy
;
...
@@ -1233,9 +1233,9 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phidp, GLOBAL
...
@@ -1233,9 +1233,9 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phidp, GLOBAL
SYNC_THREADS
;
SYNC_THREADS
;
real
selfDipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
real
selfDipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
for
(
int
i
=
GLOBAL_ID
;
i
<
NUM_ATOMS
;
i
+=
GLOBAL_SIZE
)
{
for
(
int
i
=
GLOBAL_ID
;
i
<
NUM_ATOMS
;
i
+=
GLOBAL_SIZE
)
{
inducedField
[
i
]
-=
(
mm_long
)
(
0x100000000
*
(
phidp
[
i
+
NUM_ATOMS
]
*
fracToCart
[
0
][
0
]
+
phidp
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
0
][
1
]
+
phidp
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
0
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
])
)
;
inducedField
[
i
]
-=
realToFixedPoint
(
phidp
[
i
+
NUM_ATOMS
]
*
fracToCart
[
0
][
0
]
+
phidp
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
0
][
1
]
+
phidp
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
0
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
]);
inducedField
[
i
+
PADDED_NUM_ATOMS
]
-=
(
mm_long
)
(
0x100000000
*
(
phidp
[
i
+
NUM_ATOMS
]
*
fracToCart
[
1
][
0
]
+
phidp
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
1
][
1
]
+
phidp
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
1
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
+
1
])
)
;
inducedField
[
i
+
PADDED_NUM_ATOMS
]
-=
realToFixedPoint
(
phidp
[
i
+
NUM_ATOMS
]
*
fracToCart
[
1
][
0
]
+
phidp
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
1
][
1
]
+
phidp
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
1
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
+
1
]);
inducedField
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
mm_long
)
(
0x100000000
*
(
phidp
[
i
+
NUM_ATOMS
]
*
fracToCart
[
2
][
0
]
+
phidp
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
2
][
1
]
+
phidp
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
2
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
+
2
])
)
;
inducedField
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
realToFixedPoint
(
phidp
[
i
+
NUM_ATOMS
]
*
fracToCart
[
2
][
0
]
+
phidp
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
2
][
1
]
+
phidp
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
2
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
+
2
]);
}
}
}
}
...
@@ -1264,9 +1264,9 @@ KERNEL void calculateSelfEnergyAndTorque(GLOBAL mm_long* RESTRICT torqueBuffers,
...
@@ -1264,9 +1264,9 @@ KERNEL void calculateSelfEnergyAndTorque(GLOBAL mm_long* RESTRICT torqueBuffers,
qii
+=
qXX
*
qXX
+
qYY
*
qYY
+
qZZ
*
qZZ
+
2
*
(
qXY
*
qXY
+
qXZ
*
qXZ
+
qYZ
*
qYZ
);
qii
+=
qXX
*
qXX
+
qYY
*
qYY
+
qZZ
*
qZZ
+
2
*
(
qXY
*
qXY
+
qXZ
*
qXZ
+
qYZ
*
qYZ
);
c6ii
+=
c6i
*
c6i
;
c6ii
+=
c6i
*
c6i
;
real3
torque
=
torqueScale
*
cross
(
dipole
,
induced
);
real3
torque
=
torqueScale
*
cross
(
dipole
,
induced
);
torqueBuffers
[
i
]
+=
(
mm_long
)
(
torque
.
x
*
0x100000000
);
torqueBuffers
[
i
]
+=
realToFixedPoint
(
torque
.
x
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
+=
(
mm_long
)
(
torque
.
y
*
0x100000000
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
]
+=
realToFixedPoint
(
torque
.
y
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
+=
(
mm_long
)
(
torque
.
z
*
0x100000000
);
torqueBuffers
[
i
+
PADDED_NUM_ATOMS
*
2
]
+=
realToFixedPoint
(
torque
.
z
);
}
}
real
term
=
2
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
term
=
2
*
EWALD_ALPHA
*
EWALD_ALPHA
;
real
fterm
=
-
EPSILON_FACTOR
*
EWALD_ALPHA
/
SQRT_PI
;
real
fterm
=
-
EPSILON_FACTOR
*
EWALD_ALPHA
/
SQRT_PI
;
...
@@ -1296,12 +1296,12 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r
...
@@ -1296,12 +1296,12 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r
SYNC_THREADS
;
SYNC_THREADS
;
real
selfDipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
real
selfDipoleScale
=
(
4
/
(
real
)
3
)
*
(
EWALD_ALPHA
*
EWALD_ALPHA
*
EWALD_ALPHA
)
/
SQRT_PI
;
for
(
int
i
=
GLOBAL_ID
;
i
<
NUM_ATOMS
;
i
+=
GLOBAL_SIZE
)
{
for
(
int
i
=
GLOBAL_ID
;
i
<
NUM_ATOMS
;
i
+=
GLOBAL_SIZE
)
{
inducedField
[
i
]
-=
(
mm_long
)
(
0x100000000
*
(
phid
[
i
+
NUM_ATOMS
]
*
fracToCart
[
0
][
0
]
+
phid
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
0
][
1
]
+
phid
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
0
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
])
)
;
inducedField
[
i
]
-=
realToFixedPoint
(
phid
[
i
+
NUM_ATOMS
]
*
fracToCart
[
0
][
0
]
+
phid
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
0
][
1
]
+
phid
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
0
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
]);
inducedField
[
i
+
PADDED_NUM_ATOMS
]
-=
(
mm_long
)
(
0x100000000
*
(
phid
[
i
+
NUM_ATOMS
]
*
fracToCart
[
1
][
0
]
+
phid
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
1
][
1
]
+
phid
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
1
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
+
1
])
)
;
inducedField
[
i
+
PADDED_NUM_ATOMS
]
-=
realToFixedPoint
(
phid
[
i
+
NUM_ATOMS
]
*
fracToCart
[
1
][
0
]
+
phid
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
1
][
1
]
+
phid
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
1
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
+
1
]);
inducedField
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
mm_long
)
(
0x100000000
*
(
phid
[
i
+
NUM_ATOMS
]
*
fracToCart
[
2
][
0
]
+
phid
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
2
][
1
]
+
phid
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
2
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
+
2
])
)
;
inducedField
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
realToFixedPoint
(
phid
[
i
+
NUM_ATOMS
]
*
fracToCart
[
2
][
0
]
+
phid
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
2
][
1
]
+
phid
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
2
][
2
]
-
selfDipoleScale
*
inducedDipole
[
3
*
i
+
2
]);
inducedFieldPolar
[
i
]
-=
(
mm_long
)
(
0x100000000
*
(
phip
[
i
+
NUM_ATOMS
]
*
fracToCart
[
0
][
0
]
+
phip
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
0
][
1
]
+
phip
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
0
][
2
]
-
selfDipoleScale
*
inducedDipolePolar
[
3
*
i
])
)
;
inducedFieldPolar
[
i
]
-=
realToFixedPoint
(
phip
[
i
+
NUM_ATOMS
]
*
fracToCart
[
0
][
0
]
+
phip
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
0
][
1
]
+
phip
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
0
][
2
]
-
selfDipoleScale
*
inducedDipolePolar
[
3
*
i
]);
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
]
-=
(
mm_long
)
(
0x100000000
*
(
phip
[
i
+
NUM_ATOMS
]
*
fracToCart
[
1
][
0
]
+
phip
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
1
][
1
]
+
phip
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
1
][
2
]
-
selfDipoleScale
*
inducedDipolePolar
[
3
*
i
+
1
])
)
;
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
]
-=
realToFixedPoint
(
phip
[
i
+
NUM_ATOMS
]
*
fracToCart
[
1
][
0
]
+
phip
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
1
][
1
]
+
phip
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
1
][
2
]
-
selfDipoleScale
*
inducedDipolePolar
[
3
*
i
+
1
]);
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
(
mm_long
)
(
0x100000000
*
(
phip
[
i
+
NUM_ATOMS
]
*
fracToCart
[
2
][
0
]
+
phip
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
2
][
1
]
+
phip
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
2
][
2
]
-
selfDipoleScale
*
inducedDipolePolar
[
3
*
i
+
2
])
)
;
inducedFieldPolar
[
i
+
PADDED_NUM_ATOMS
*
2
]
-=
realToFixedPoint
(
phip
[
i
+
NUM_ATOMS
]
*
fracToCart
[
2
][
0
]
+
phip
[
i
+
NUM_ATOMS
*
2
]
*
fracToCart
[
2
][
1
]
+
phip
[
i
+
NUM_ATOMS
*
3
]
*
fracToCart
[
2
][
2
]
-
selfDipoleScale
*
inducedDipolePolar
[
3
*
i
+
2
]);
#ifdef EXTRAPOLATED_POLARIZATION
#ifdef EXTRAPOLATED_POLARIZATION
// Compute and store the field gradients for later use.
// Compute and store the field gradients for later use.
...
@@ -1321,12 +1321,12 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r
...
@@ -1321,12 +1321,12 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r
Eyz
+=
fracToCart
[
1
][
k
]
*
EmatD
[
k
][
l
]
*
fracToCart
[
2
][
l
];
Eyz
+=
fracToCart
[
1
][
k
]
*
EmatD
[
k
][
l
]
*
fracToCart
[
2
][
l
];
}
}
}
}
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
0
],
(
mm_ulong
)
((
mm_long
)
(
-
Exx
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
0
],
(
mm_ulong
)
realToFixedPoint
(
-
Exx
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
1
],
(
mm_ulong
)
((
mm_long
)
(
-
Eyy
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
1
],
(
mm_ulong
)
realToFixedPoint
(
-
Eyy
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
2
],
(
mm_ulong
)
((
mm_long
)
(
-
Ezz
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
2
],
(
mm_ulong
)
realToFixedPoint
(
-
Ezz
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
3
],
(
mm_ulong
)
((
mm_long
)
(
-
Exy
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
3
],
(
mm_ulong
)
realToFixedPoint
(
-
Exy
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
4
],
(
mm_ulong
)
((
mm_long
)
(
-
Exz
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
4
],
(
mm_ulong
)
realToFixedPoint
(
-
Exz
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
5
],
(
mm_ulong
)
((
mm_long
)
(
-
Eyz
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradient
[
6
*
i
+
5
],
(
mm_ulong
)
realToFixedPoint
(
-
Eyz
));
real
EmatP
[
3
][
3
]
=
{
real
EmatP
[
3
][
3
]
=
{
{
phip
[
i
+
NUM_ATOMS
*
4
],
phip
[
i
+
NUM_ATOMS
*
7
],
phip
[
i
+
NUM_ATOMS
*
8
]},
{
phip
[
i
+
NUM_ATOMS
*
4
],
phip
[
i
+
NUM_ATOMS
*
7
],
phip
[
i
+
NUM_ATOMS
*
8
]},
...
@@ -1344,13 +1344,13 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r
...
@@ -1344,13 +1344,13 @@ KERNEL void recordInducedFieldDipoles(GLOBAL const real* RESTRICT phid, GLOBAL r
Eyz
+=
fracToCart
[
1
][
k
]
*
EmatP
[
k
][
l
]
*
fracToCart
[
2
][
l
];
Eyz
+=
fracToCart
[
1
][
k
]
*
EmatP
[
k
][
l
]
*
fracToCart
[
2
][
l
];
}
}
}
}
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
0
],
(
mm_ulong
)
((
mm_long
)
(
-
Exx
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
0
],
(
mm_ulong
)
realToFixedPoint
(
-
Exx
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
1
],
(
mm_ulong
)
((
mm_long
)
(
-
Eyy
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
1
],
(
mm_ulong
)
realToFixedPoint
(
-
Eyy
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
2
],
(
mm_ulong
)
((
mm_long
)
(
-
Ezz
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
2
],
(
mm_ulong
)
realToFixedPoint
(
-
Ezz
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
3
],
(
mm_ulong
)
((
mm_long
)
(
-
Exy
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
3
],
(
mm_ulong
)
realToFixedPoint
(
-
Exy
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
4
],
(
mm_ulong
)
((
mm_long
)
(
-
Exz
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
4
],
(
mm_ulong
)
realToFixedPoint
(
-
Exz
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
5
],
(
mm_ulong
)
((
mm_long
)
(
-
Eyz
*
0x100000000
)
));
ATOMIC_ADD
(
&
fieldGradientPolar
[
6
*
i
+
5
],
(
mm_ulong
)
realToFixedPoint
(
-
Eyz
));
#endif
#endif
}
}
}
}
#endif
#endif
\ No newline at end of file
plugins/amoeba/platforms/common/src/kernels/multipoles.cc
View file @
434d7afb
...
@@ -509,22 +509,22 @@ KERNEL void mapTorqueToForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL cons
...
@@ -509,22 +509,22 @@ KERNEL void mapTorqueToForce(GLOBAL mm_ulong* RESTRICT forceBuffers, GLOBAL cons
// Store results
// Store results
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Z
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Z
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Z
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Z
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Z
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
z
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Z
].
z
));
if
(
axisType
!=
4
)
{
if
(
axisType
!=
4
)
{
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
X
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
X
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
X
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
X
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
X
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
x
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
X
].
z
));
}
}
if
((
axisType
==
2
||
axisType
==
3
)
&&
particles
.
y
>
-
1
)
{
if
((
axisType
==
2
||
axisType
==
3
)
&&
particles
.
y
>
-
1
)
{
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Y
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Y
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Y
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Y
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
Y
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
particles
.
y
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
Y
].
z
));
}
}
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
I
].
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
I
].
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
I
].
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
I
].
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
forces
[
I
].
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
forces
[
I
].
z
));
}
}
}
}
}
}
...
@@ -589,4 +589,4 @@ KERNEL void computePotentialAtPoints(GLOBAL const real4* RESTRICT posq, GLOBAL c
...
@@ -589,4 +589,4 @@ KERNEL void computePotentialAtPoints(GLOBAL const real4* RESTRICT posq, GLOBAL c
if
(
point
<
numPoints
)
if
(
point
<
numPoints
)
potential
[
point
]
=
p
*
ENERGY_SCALE_FACTOR
;
potential
[
point
]
=
p
*
ENERGY_SCALE_FACTOR
;
}
}
}
}
\ No newline at end of file
plugins/amoeba/platforms/common/src/kernels/pmeMultipoleElectrostatics.cc
View file @
434d7afb
...
@@ -502,12 +502,12 @@ KERNEL void computeElectrostatics(
...
@@ -502,12 +502,12 @@ KERNEL void computeElectrostatics(
computeSelfEnergyAndTorque
(
&
data
,
&
energy
);
computeSelfEnergyAndTorque
(
&
data
,
&
energy
);
data
.
force
*=
-
ENERGY_SCALE_FACTOR
;
data
.
force
*=
-
ENERGY_SCALE_FACTOR
;
data
.
torque
*=
ENERGY_SCALE_FACTOR
;
data
.
torque
*=
ENERGY_SCALE_FACTOR
;
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
z
));
SYNC_WARPS
;
SYNC_WARPS
;
}
}
else
{
else
{
...
@@ -535,19 +535,19 @@ KERNEL void computeElectrostatics(
...
@@ -535,19 +535,19 @@ KERNEL void computeElectrostatics(
localData
[
LOCAL_ID
].
force
*=
-
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
force
*=
-
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
torque
*=
ENERGY_SCALE_FACTOR
;
localData
[
LOCAL_ID
].
torque
*=
ENERGY_SCALE_FACTOR
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
z
));
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
z
));
SYNC_WARPS
;
SYNC_WARPS
;
}
}
}
}
...
@@ -645,23 +645,23 @@ KERNEL void computeElectrostatics(
...
@@ -645,23 +645,23 @@ KERNEL void computeElectrostatics(
// Write results.
// Write results.
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
offset
=
x
*
TILE_SIZE
+
tgx
;
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
data
.
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
data
.
torque
.
z
));
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
offset
=
atomIndices
[
LOCAL_ID
];
offset
=
atomIndices
[
LOCAL_ID
];
#else
#else
offset
=
y
*
TILE_SIZE
+
tgx
;
offset
=
y
*
TILE_SIZE
+
tgx
;
#endif
#endif
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
x
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
y
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
force
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
forceBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
force
.
z
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
x
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
x
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
y
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
y
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
((
mm_long
)
(
localData
[
LOCAL_ID
].
torque
.
z
*
0x100000000
)
));
ATOMIC_ADD
(
&
torqueBuffers
[
offset
+
2
*
PADDED_NUM_ATOMS
],
(
mm_ulong
)
realToFixedPoint
(
localData
[
LOCAL_ID
].
torque
.
z
));
}
}
pos
++
;
pos
++
;
}
}
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment