Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a0769389
Commit
a0769389
authored
Oct 27, 2014
by
peastman
Browse files
Reduced share memory for CustomGBForce
parent
5b591ab0
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
46 additions
and
42 deletions
+46
-42
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-1
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+22
-20
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+23
-21
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
a0769389
...
...
@@ -3034,7 +3034,7 @@ void CudaCalcCustomGBForceKernel::initialize(const System& system, const CustomG
pairEnergyDefines
[
"USE_PERIODIC"
]
=
"1"
;
if
(
anyExclusions
)
pairEnergyDefines
[
"USE_EXCLUSIONS"
]
=
"1"
;
if
(
atomParamSize
%
2
=
=
0
&&
!
cu
.
getUseDoublePrecision
())
if
(
atomParamSize
%
2
!
=
0
&&
!
cu
.
getUseDoublePrecision
())
pairEnergyDefines
[
"NEED_PADDING"
]
=
"1"
;
pairEnergyDefines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
pairEnergyDefines
[
"WARPS_PER_GROUP"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
()
/
CudaContext
::
TileSize
);
...
...
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
a0769389
...
...
@@ -2,7 +2,7 @@
#define STORE_DERIVATIVE_2(INDEX) atomicAdd(&derivBuffers[offset+(INDEX-1)*PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (localData[threadIdx.x].deriv##INDEX*0x100000000)));
typedef
struct
{
real
4
pos
q
;
real
3
pos
;
real3
force
;
ATOM_PARAMETER_DATA
#ifdef NEED_PADDING
...
...
@@ -40,7 +40,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
real3
force
=
make_real3
(
0
);
DECLARE_ATOM1_DERIVATIVES
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
real4
pos
q
1
=
posq
[
atom1
];
real4
pos1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
#ifdef USE_EXCLUSIONS
unsigned
int
excl
=
exclusions
[
pos
*
TILE_SIZE
+
tgx
];
...
...
@@ -49,12 +49,12 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
// This tile is on the diagonal.
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
localData
[
localAtomIndex
].
pos
q
=
posq1
;
localData
[
localAtomIndex
].
pos
=
make_real3
(
pos1
.
x
,
pos1
.
y
,
pos1
.
z
)
;
LOAD_LOCAL_PARAMETERS_FROM_1
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
j
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
...
@@ -95,7 +95,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
real4
tempPosq
=
posq
[
j
];
localData
[
localAtomIndex
].
pos
=
make_real3
(
tempPosq
.
x
,
tempPosq
.
y
,
tempPosq
.
z
);
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
localData
[
localAtomIndex
].
force
=
make_real3
(
0
);
CLEAR_LOCAL_DERIVATIVES
...
...
@@ -105,8 +106,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
...
@@ -231,7 +232,7 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
// Load atom data for this tile.
real4
pos
q
1
=
posq
[
atom1
];
real4
pos1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
#ifdef USE_CUTOFF
...
...
@@ -241,7 +242,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
#endif
atomIndices
[
threadIdx
.
x
]
=
j
;
if
(
j
<
PADDED_NUM_ATOMS
)
{
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
real4
tempPosq
=
posq
[
j
];
localData
[
localAtomIndex
].
pos
=
make_real3
(
tempPosq
.
x
,
tempPosq
.
y
,
tempPosq
.
z
);
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
localData
[
localAtomIndex
].
force
=
make_real3
(
0
);
CLEAR_LOCAL_DERIVATIVES
...
...
@@ -252,17 +254,17 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
pos
q
1
.
x
-=
floor
((
pos
q
1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
pos
q
1
.
y
-=
floor
((
pos
q
1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
pos
q
1
.
z
-=
floor
((
pos
q
1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
pos
q
.
x
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
pos
q
.
y
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
pos
q
.
z
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
pos1
.
x
-=
floor
((
pos1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
pos1
.
y
-=
floor
((
pos1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
pos1
.
z
-=
floor
((
pos1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
pos
.
x
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
pos
.
y
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
pos
.
z
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
if
(
r2
<
CUTOFF_SQUARED
)
{
...
...
@@ -301,8 +303,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
...
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
a0769389
typedef
struct
{
real
4
pos
q
;
real
value
,
temp
;
real
3
pos
;
real
value
;
ATOM_PARAMETER_DATA
#ifdef NEED_PADDING
float
padding
;
...
...
@@ -35,7 +35,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
const
unsigned
int
y
=
tileIndices
.
y
;
real
value
=
0
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
real4
pos
q
1
=
posq
[
atom1
];
real4
pos1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
#ifdef USE_EXCLUSIONS
unsigned
int
excl
=
exclusions
[
pos
*
TILE_SIZE
+
tgx
];
...
...
@@ -44,12 +44,12 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// This tile is on the diagonal.
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
localData
[
localAtomIndex
].
pos
q
=
posq1
;
localData
[
localAtomIndex
].
pos
=
make_real3
(
pos1
.
x
,
pos1
.
y
,
pos1
.
z
)
;
LOAD_LOCAL_PARAMETERS_FROM_1
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
j
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
...
@@ -87,7 +87,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
unsigned
int
j
=
y
*
TILE_SIZE
+
tgx
;
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
real4
tempPosq
=
posq
[
j
];
localData
[
localAtomIndex
].
pos
=
make_real3
(
tempPosq
.
x
,
tempPosq
.
y
,
tempPosq
.
z
);
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
localData
[
localAtomIndex
].
value
=
0
;
#ifdef USE_EXCLUSIONS
...
...
@@ -96,8 +97,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
unsigned
int
tj
=
tgx
;
for
(
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
...
@@ -207,7 +208,7 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// Load atom data for this tile.
real4
pos
q
1
=
posq
[
atom1
];
real4
pos1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
const
unsigned
int
localAtomIndex
=
threadIdx
.
x
;
#ifdef USE_CUTOFF
...
...
@@ -217,7 +218,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
#endif
atomIndices
[
threadIdx
.
x
]
=
j
;
if
(
j
<
PADDED_NUM_ATOMS
)
{
localData
[
localAtomIndex
].
posq
=
posq
[
j
];
real4
tempPosq
=
posq
[
j
];
localData
[
localAtomIndex
].
pos
=
make_real3
(
tempPosq
.
x
,
tempPosq
.
y
,
tempPosq
.
z
);
LOAD_LOCAL_PARAMETERS_FROM_GLOBAL
localData
[
localAtomIndex
].
value
=
0
;
}
...
...
@@ -227,17 +229,17 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
// box, then skip having to apply periodic boundary conditions later.
real4
blockCenterX
=
blockCenter
[
x
];
pos
q
1
.
x
-=
floor
((
pos
q
1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
pos
q
1
.
y
-=
floor
((
pos
q
1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
pos
q
1
.
z
-=
floor
((
pos
q
1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
pos
q
.
x
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
pos
q
.
y
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
pos
q
.
z
-=
floor
((
localData
[
threadIdx
.
x
].
pos
q
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
pos1
.
x
-=
floor
((
pos1
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
pos1
.
y
-=
floor
((
pos1
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
pos1
.
z
-=
floor
((
pos1
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
localData
[
threadIdx
.
x
].
pos
.
x
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
x
-
blockCenterX
.
x
)
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
localData
[
threadIdx
.
x
].
pos
.
y
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
y
-
blockCenterX
.
y
)
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
localData
[
threadIdx
.
x
].
pos
.
z
-=
floor
((
localData
[
threadIdx
.
x
].
pos
.
z
-
blockCenterX
.
z
)
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
if
(
r2
<
CUTOFF_SQUARED
)
{
real
invR
=
RSQRT
(
r2
);
...
...
@@ -263,8 +265,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
unsigned
int
tj
=
tgx
;
for
(
unsigned
int
j
=
0
;
j
<
TILE_SIZE
;
j
++
)
{
int
atom2
=
tbx
+
tj
;
real
4
pos
q
2
=
localData
[
atom2
].
pos
q
;
real3
delta
=
make_real3
(
pos
q
2
.
x
-
pos
q
1
.
x
,
pos
q
2
.
y
-
pos
q
1
.
y
,
pos
q
2
.
z
-
pos
q
1
.
z
);
real
3
pos2
=
localData
[
atom2
].
pos
;
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment