Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
1410ab35
Commit
1410ab35
authored
Oct 27, 2014
by
peastman
Browse files
Merge pull request #678 from peastman/gbshared
Reduced share memory for CustomGBForce
parents
858c90ab
a0769389
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
46 additions
and
42 deletions
+46
-42
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-1
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+22
-20
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+23
-21
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
1410ab35
...
@@ -3034,7 +3034,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
...
@@ -3034,7 +3034,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
pairEnergyDefines
[
"USE_PERIODIC"
]
=
"1"
;
pairEnergyDefines
[
"USE_PERIODIC"
]
=
"1"
;
if
(
anyExclusions
)
if
(
anyExclusions
)
pairEnergyDefines
[
"USE_EXCLUSIONS"
]
=
"1"
;
pairEnergyDefines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
atomParamSize
%
2
=
=
0
&&
!
cu
.
getUseDoublePrecision
())
if
(
atomParamSize
%
2
!
=
0
&&
!
cu
.
getUseDoublePrecision
())
pairEnergyDefines
[
"NEED_PADDING"
]
=
"1"
;
pairEnergyDefines
[
"NEED_PADDING"
]
=
"1"
;
pairEnergyDefines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
pairEnergyDefines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
pairEnergyDefines
[
"WARPS_PER_GROUP"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
CudaContext
::
TileSize
);
pairEnergyDefines
[
"WARPS_PER_GROUP"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
CudaContext
::
TileSize
);
...
...
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
1410ab35
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
#define STORE_DERIVATIVE_2(INDEX) atomicAdd(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].deriv##INDEX*0x100000000)));
#define STORE_DERIVATIVE_2(INDEX) atomicAdd(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].deriv##INDEX*0x100000000)));
typedef
struct
{
typedef
struct
{
real
4
pos
q
;
real
3
pos
;
real3
force
;
real3
force
;
ATOM_PARAMETER_DATA
ATOM_PARAMETER_DATA
#ifdef NEED_PADDING
#ifdef NEED_PADDING
...
@@ -40,7 +40,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -40,7 +40,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
real3
force
=
make_real3
(
0
);
real3
force
=
make_real3
(
0
);
DECLARE_ATOM1_DERIVATIVES
DECLARE_ATOM1_DERIVATIVES
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
real4
pos
q
1
=
posq
[
atom1
];
real4
pos1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
LOAD_ATOM1_PARAMETERS
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
unsigned
int
excl
=
exclusions
[
pos
*
TILE_SIZE
+
tgx
];
unsigned
int
excl
=
exclusions
[
pos
*
TILE_SIZE
+
tgx
];
...
@@ -49,12 +49,12 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -49,12 +49,12 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
// This tile is on the diagonal.
// This tile is on the diagonal.
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
localData
[
localAtomIndex
].
pos
q
=
posq1
;
localData
[
localAtomIndex
].
pos
=
make_real3
(
pos1
.
x
,
pos1
.
y
,
pos1
.
z
)
;
LOAD_LOCAL_PARAMETERS_FROM_1
LOAD_LOCAL_PARAMETERS_FROM_1
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
j
;
int
atom2
=
tbx
+
j
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
@@ -95,7 +95,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -95,7 +95,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
real4
tempPosq
=
posq
[
j
];
localData
[
localAtomIndex
].
pos
=
make_real3
(
tempPosq
.
x
,
tempPosq
.
y
,
tempPosq
.
z
);
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
localData
[
localAtomIndex
].
force
=
make_real3
(
0
);
localData
[
localAtomIndex
].
force
=
make_real3
(
0
);
CLEAR_LOCAL_DERIVATIVES
CLEAR_LOCAL_DERIVATIVES
...
@@ -105,8 +106,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -105,8 +106,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
@@ -231,7 +232,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -231,7 +232,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
// Load atom data for this tile.
// Load atom data for this tile.
real4
pos
q
1
=
posq
[
atom1
];
real4
pos1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
LOAD_ATOM1_PARAMETERS
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -241,7 +242,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -241,7 +242,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
#endif
#endif
atomIndices
[
threadIdx
.
x
]
=
j
;
atomIndices
[
threadIdx
.
x
]
=
j
;
if
(
j
<
PADDED_NUM_ATOMS
)
{
if
(
j
<
PADDED_NUM_ATOMS
)
{
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
real4
tempPosq
=
posq
[
j
];
localData
[
localAtomIndex
].
pos
=
make_real3
(
tempPosq
.
x
,
tempPosq
.
y
,
tempPosq
.
z
);
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
localData
[
localAtomIndex
].
force
=
make_real3
(
0
);
localData
[
localAtomIndex
].
force
=
make_real3
(
0
);
CLEAR_LOCAL_DERIVATIVES
CLEAR_LOCAL_DERIVATIVES
...
@@ -252,17 +254,17 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -252,17 +254,17 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
// box, then skip having to apply periodic boundary conditions later.
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
real4
blockCenterX
=
blockCenter
[
x
];
pos
q
1
.
x
-=
floor
((
pos
q
1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
pos1
.
x
-=
floor
((
pos1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
pos
q
1
.
y
-=
floor
((
pos
q
1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
pos1
.
y
-=
floor
((
pos1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
pos
q
1
.
z
-=
floor
((
pos
q
1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
pos1
.
z
-=
floor
((
pos1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
pos
q
.
x
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
pos
.
x
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
pos
q
.
y
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
pos
.
y
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
pos
q
.
z
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
pos
.
z
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
...
@@ -301,8 +303,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -301,8 +303,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
...
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
1410ab35
typedef
struct
{
typedef
struct
{
real
4
pos
q
;
real
3
pos
;
real
value
,
temp
;
real
value
;
ATOM_PARAMETER_DATA
ATOM_PARAMETER_DATA
#ifdef NEED_PADDING
#ifdef NEED_PADDING
float
padding
;
float
padding
;
...
@@ -35,7 +35,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -35,7 +35,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
const
unsigned
int
y
=
tileIndices
.
y
;
const
unsigned
int
y
=
tileIndices
.
y
;
real
value
=
0
;
real
value
=
0
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
real4
pos
q
1
=
posq
[
atom1
];
real4
pos1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
LOAD_ATOM1_PARAMETERS
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
unsigned
int
excl
=
exclusions
[
pos
*
TILE_SIZE
+
tgx
];
unsigned
int
excl
=
exclusions
[
pos
*
TILE_SIZE
+
tgx
];
...
@@ -44,12 +44,12 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -44,12 +44,12 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// This tile is on the diagonal.
// This tile is on the diagonal.
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
localData
[
localAtomIndex
].
pos
q
=
posq1
;
localData
[
localAtomIndex
].
pos
=
make_real3
(
pos1
.
x
,
pos1
.
y
,
pos1
.
z
)
;
LOAD_LOCAL_PARAMETERS_FROM_1
LOAD_LOCAL_PARAMETERS_FROM_1
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
j
;
int
atom2
=
tbx
+
j
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
@@ -87,7 +87,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -87,7 +87,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
real4
tempPosq
=
posq
[
j
];
localData
[
localAtomIndex
].
pos
=
make_real3
(
tempPosq
.
x
,
tempPosq
.
y
,
tempPosq
.
z
);
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
localData
[
localAtomIndex
].
value
=
0
;
localData
[
localAtomIndex
].
value
=
0
;
#ifdef USE_EXCLUSIONS
#ifdef USE_EXCLUSIONS
...
@@ -96,8 +97,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -96,8 +97,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
@@ -207,7 +208,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -207,7 +208,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Load atom data for this tile.
// Load atom data for this tile.
real4
pos
q
1
=
posq
[
atom1
];
real4
pos1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
LOAD_ATOM1_PARAMETERS
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
...
@@ -217,7 +218,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -217,7 +218,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
#endif
#endif
atomIndices
[
threadIdx
.
x
]
=
j
;
atomIndices
[
threadIdx
.
x
]
=
j
;
if
(
j
<
PADDED_NUM_ATOMS
)
{
if
(
j
<
PADDED_NUM_ATOMS
)
{
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
real4
tempPosq
=
posq
[
j
];
localData
[
localAtomIndex
].
pos
=
make_real3
(
tempPosq
.
x
,
tempPosq
.
y
,
tempPosq
.
z
);
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
localData
[
localAtomIndex
].
value
=
0
;
localData
[
localAtomIndex
].
value
=
0
;
}
}
...
@@ -227,17 +229,17 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -227,17 +229,17 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// box, then skip having to apply periodic boundary conditions later.
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
real4
blockCenterX
=
blockCenter
[
x
];
pos
q
1
.
x
-=
floor
((
pos
q
1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
pos1
.
x
-=
floor
((
pos1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
pos
q
1
.
y
-=
floor
((
pos
q
1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
pos1
.
y
-=
floor
((
pos1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
pos
q
1
.
z
-=
floor
((
pos
q
1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
pos1
.
z
-=
floor
((
pos1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
pos
q
.
x
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
pos
.
x
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
pos
q
.
y
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
pos
.
y
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
pos
q
.
z
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
pos
.
z
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
if
(
r2
<
CUTOFF_SQUARED
)
{
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
...
@@ -263,8 +265,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -263,8 +265,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment