Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
e04283fe
Commit
e04283fe
authored
Mar 10, 2009
by
Peter Eastman
Browse files
Implemented finer grained neighbor list. Also various minor optimizations.
parent
56d7ed02
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
816 additions
and
221 deletions
+816
-221
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-1
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+2
-2
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+8
-4
platforms/cuda/src/kernels/kCalculateCDLJForces.h
platforms/cuda/src/kernels/kCalculateCDLJForces.h
+129
-31
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
+14
-8
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+168
-44
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
+4
-4
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
+152
-49
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.cu
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.cu
+4
-5
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
+219
-73
platforms/cuda/src/kernels/kFindInteractingBlocks.h
platforms/cuda/src/kernels/kFindInteractingBlocks.h
+85
-0
platforms/cuda/tests/TestCudaNonbondedForce.cpp
platforms/cuda/tests/TestCudaNonbondedForce.cpp
+30
-0
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
e04283fe
...
@@ -47,7 +47,7 @@ using namespace std;
...
@@ -47,7 +47,7 @@ using namespace std;
static
void
calcForces
(
OpenMMContextImpl
&
context
,
CudaPlatform
::
PlatformData
&
data
)
{
static
void
calcForces
(
OpenMMContextImpl
&
context
,
CudaPlatform
::
PlatformData
&
data
)
{
_gpuContext
*
gpu
=
data
.
gpu
;
_gpuContext
*
gpu
=
data
.
gpu
;
if
(
data
.
stepCount
%
100
==
0
)
if
(
data
.
nonbondedMethod
!=
NO_CUTOFF
&&
data
.
stepCount
%
100
==
0
)
gpuReorderAtoms
(
gpu
);
gpuReorderAtoms
(
gpu
);
data
.
stepCount
++
;
data
.
stepCount
++
;
kClearForces
(
gpu
);
kClearForces
(
gpu
);
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
e04283fe
...
@@ -3127,10 +3127,10 @@ void gpuReorderAtoms(gpuContext gpu)
...
@@ -3127,10 +3127,10 @@ void gpuReorderAtoms(gpuContext gpu)
// Select a bin for each molecule, then sort them by bin.
// Select a bin for each molecule, then sort them by bin.
bool
useHilbert
=
(
numMolecules
>
5000
);
// For small systems, a simple zigzag curve works better than a Hilbert curve.
bool
useHilbert
=
(
numMolecules
>
5000
||
atoms
.
size
()
>
8
);
// For small systems, a simple zigzag curve works better than a Hilbert curve.
float
binWidth
;
float
binWidth
;
if
(
useHilbert
)
if
(
useHilbert
)
binWidth
=
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
;
binWidth
=
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
;
else
else
binWidth
=
0.2
*
sqrt
(
gpu
->
sim
.
nonbondedCutoffSqr
);
binWidth
=
0.2
*
sqrt
(
gpu
->
sim
.
nonbondedCutoffSqr
);
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
/
binWidth
);
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
/
binWidth
);
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
e04283fe
...
@@ -140,12 +140,14 @@ void kCalculateCDLJForces(gpuContext gpu)
...
@@ -140,12 +140,14 @@ void kCalculateCDLJForces(gpuContext gpu)
}
}
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractionCount
->
Download
();
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
kFindInteractionsWithinBlocksCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJCutoffByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJCutoffByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
else
else
kCalculateCDLJCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
LAUNCHERROR
(
"kCalculateCDLJCutoffForces"
);
LAUNCHERROR
(
"kCalculateCDLJCutoffForces"
);
break
;
break
;
case
PERIODIC
:
case
PERIODIC
:
...
@@ -162,12 +164,14 @@ void kCalculateCDLJForces(gpuContext gpu)
...
@@ -162,12 +164,14 @@ void kCalculateCDLJForces(gpuContext gpu)
}
}
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractionCount
->
Download
();
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJPeriodicByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJPeriodicByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
else
else
kCalculateCDLJPeriodicForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJPeriodicForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
LAUNCHERROR
(
"kCalculateCDLJPeriodicForces"
);
LAUNCHERROR
(
"kCalculateCDLJPeriodicForces"
);
}
}
}
}
\ No newline at end of file
platforms/cuda/src/kernels/kCalculateCDLJForces.h
View file @
e04283fe
...
@@ -42,6 +42,9 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
...
@@ -42,6 +42,9 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
int
lasty
=
-
1
;
int
lasty
=
-
1
;
while
(
pos
<
end
)
while
(
pos
<
end
)
...
@@ -210,47 +213,142 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
...
@@ -210,47 +213,142 @@ __global__ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUni
apos
.
w
*=
cSim
.
epsfac
;
apos
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
if
(
!
bExclusionFlag
)
{
{
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
// No interactions in this block.
dy
=
psA
[
tj
].
y
-
apos
.
y
;
}
dz
=
psA
[
tj
].
z
-
apos
.
z
;
else
if
(
flags
==
0xFFFFFFFF
)
#endif
{
// Compute all interactions within this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
invR
=
1
.
0
f
/
sqrt
(
r2
);
invR
=
1
.
0
f
/
sqrt
(
r2
);
sig
=
a
.
x
+
psA
[
tj
].
sig
;
sig
=
a
.
x
+
psA
[
tj
].
sig
;
sig2
=
invR
*
sig
;
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
sig6
=
sig2
*
sig2
*
sig2
;
eps
=
a
.
y
*
psA
[
tj
].
eps
;
eps
=
a
.
y
*
psA
[
tj
].
eps
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
#endif
#endif
dEdR
*=
invR
*
invR
;
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
{
dEdR
=
0
.
0
f
;
dEdR
=
0
.
0
f
;
}
#endif
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
#endif
dx
*=
dEdR
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
dy
*=
dEdR
;
invR
=
1
.
0
f
/
sqrt
(
r2
);
dz
*=
dEdR
;
sig
=
a
.
x
+
psA
[
j
].
sig
;
af
.
x
-=
dx
;
sig2
=
invR
*
sig
;
af
.
y
-=
dy
;
sig2
*=
sig2
;
af
.
z
-=
dz
;
sig6
=
sig2
*
sig2
*
sig2
;
psA
[
tj
].
fx
+=
dx
;
eps
=
a
.
y
*
psA
[
j
].
eps
;
psA
[
tj
].
fy
+=
dy
;
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
psA
[
tj
].
fz
+=
dz
;
#ifdef USE_CUTOFF
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#endif
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tempBuffer
[
threadIdx
.
x
].
x
=
dx
;
tempBuffer
[
threadIdx
.
x
].
y
=
dy
;
tempBuffer
[
threadIdx
.
x
].
z
=
dz
;
// Sum the forces on atom j.
if
(
tgx
%
2
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
1
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
1
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
1
].
z
;
}
if
(
tgx
%
4
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
2
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
2
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
2
].
z
;
}
if
(
tgx
%
8
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
4
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
4
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
4
].
z
;
}
if
(
tgx
%
16
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
8
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
8
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
8
].
z
;
}
if
(
tgx
==
0
)
{
psA
[
j
].
fx
+=
tempBuffer
[
threadIdx
.
x
].
x
+
tempBuffer
[
threadIdx
.
x
+
16
].
x
;
psA
[
j
].
fy
+=
tempBuffer
[
threadIdx
.
x
].
y
+
tempBuffer
[
threadIdx
.
x
+
16
].
y
;
psA
[
j
].
fz
+=
tempBuffer
[
threadIdx
.
x
].
z
+
tempBuffer
[
threadIdx
.
x
+
16
].
z
;
}
}
}
}
}
#endif
}
}
else
// bExclusion
else
// bExclusion
{
{
...
...
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
View file @
e04283fe
...
@@ -106,6 +106,8 @@ extern __global__ void kFindBlockBoundsCutoff_kernel();
...
@@ -106,6 +106,8 @@ extern __global__ void kFindBlockBoundsCutoff_kernel();
extern
__global__
void
kFindBlockBoundsPeriodic_kernel
();
extern
__global__
void
kFindBlockBoundsPeriodic_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsCutoff_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsCutoff_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsPeriodic_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsPeriodic_kernel
();
extern
__global__
void
kFindInteractionsWithinBlocksCutoff_kernel
(
unsigned
int
*
,
int
);
extern
__global__
void
kFindInteractionsWithinBlocksPeriodic_kernel
(
unsigned
int
*
,
int
);
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
{
{
...
@@ -145,19 +147,21 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
...
@@ -145,19 +147,21 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
exit
(
-
1
);
exit
(
-
1
);
}
}
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractionCount
->
Download
();
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
kFindInteractionsWithinBlocksCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
if
(
gpu
->
bRecalculateBornRadii
)
if
(
gpu
->
bRecalculateBornRadii
)
{
{
kCalculateObcGbsaBornSum
(
gpu
);
kCalculateObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
}
}
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaCutoffByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJObcGbsaCutoffByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
else
else
kCalculateCDLJObcGbsaCutoffForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJObcGbsaCutoffForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
LAUNCHERROR
(
"kCalculateCDLJCutoffForces"
);
LAUNCHERROR
(
"kCalculateCDLJ
ObcGbsa
CutoffForces
1
"
);
break
;
break
;
case
PERIODIC
:
case
PERIODIC
:
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
kFindBlockBoundsPeriodic_kernel
<<<
(
gpu
->
psGridBoundingBox
->
_length
+
63
)
/
64
,
64
>>>
();
...
@@ -172,19 +176,21 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
...
@@ -172,19 +176,21 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
exit
(
-
1
);
exit
(
-
1
);
}
}
gpu
->
psInteractionCount
->
Download
();
gpu
->
psInteractionCount
->
Download
();
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
if
(
gpu
->
bRecalculateBornRadii
)
if
(
gpu
->
bRecalculateBornRadii
)
{
{
kCalculateObcGbsaBornSum
(
gpu
);
kCalculateObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
kReduceObcGbsaBornSum
(
gpu
);
}
}
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaPeriodicByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJObcGbsaPeriodicByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
else
else
kCalculateCDLJObcGbsaPeriodicForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateCDLJObcGbsaPeriodicForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
LAUNCHERROR
(
"kCalculateCDLJPeriodicForces"
);
LAUNCHERROR
(
"kCalculateCDLJ
ObcGbsa
PeriodicForces
1
"
);
break
;
break
;
}
}
}
}
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
View file @
e04283fe
...
@@ -42,6 +42,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
...
@@ -42,6 +42,9 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
int
lasty
=
-
1
;
int
lasty
=
-
1
;
while
(
pos
<
end
)
while
(
pos
<
end
)
...
@@ -241,63 +244,184 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
...
@@ -241,63 +244,184 @@ __global__ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int*
apos
.
w
*=
cSim
.
epsfac
;
apos
.
w
*=
cSim
.
epsfac
;
if
(
!
bExclusionFlag
)
if
(
!
bExclusionFlag
)
{
{
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
// No interactions in this block.
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
}
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
else
if
(
flags
==
0xFFFFFFFF
)
#endif
{
// Compute all interactions within this block.
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
// CDLJ part
// CDLJ part
float
invR
=
1
.
0
f
/
sqrt
(
r2
);
float
invR
=
1
.
0
f
/
sqrt
(
r2
);
float
sig
=
a
.
x
+
psA
[
tj
].
sig
;
float
sig
=
a
.
x
+
psA
[
tj
].
sig
;
float
sig2
=
invR
*
sig
;
float
sig2
=
invR
*
sig
;
sig2
*=
sig2
;
sig2
*=
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
sig6
=
sig2
*
sig2
*
sig2
;
float
eps
=
a
.
y
*
psA
[
tj
].
eps
;
float
eps
=
a
.
y
*
psA
[
tj
].
eps
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
#else
#else
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
dEdR
+=
apos
.
w
*
psA
[
tj
].
q
*
invR
;
#endif
#endif
dEdR
*=
invR
*
invR
;
dEdR
*=
invR
*
invR
;
// ObcGbsaForce1 part
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
tj
].
br
;
float
alpha2_ij
=
br
*
psA
[
tj
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
tj
].
q
)
/
(
denominator
*
denominator2
);
float
Gpol
=
(
q2
*
psA
[
tj
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
tj
].
br
;
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
tj
].
br
;
psA
[
tj
].
fb
+=
dGpol_dalpha2_ij
*
br
;
psA
[
tj
].
fb
+=
dGpol_dalpha2_ij
*
br
;
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
{
dEdR
=
0
.
0
f
;
dEdR
=
0
.
0
f
;
}
#endif
// Add forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
// Add forces
// CDLJ part
dx
*=
dEdR
;
float
invR
=
1
.
0
f
/
sqrt
(
r2
);
dy
*=
dEdR
;
float
sig
=
a
.
x
+
psA
[
j
].
sig
;
dz
*=
dEdR
;
float
sig2
=
invR
*
sig
;
af
.
x
-=
dx
;
sig2
*=
sig2
;
af
.
y
-=
dy
;
float
sig6
=
sig2
*
sig2
*
sig2
;
af
.
z
-=
dz
;
float
eps
=
a
.
y
*
psA
[
j
].
eps
;
psA
[
tj
].
fx
+=
dx
;
float
dEdR
=
eps
*
(
12
.
0
f
*
sig6
-
6
.
0
f
)
*
sig6
;
psA
[
tj
].
fy
+=
dy
;
#ifdef USE_CUTOFF
psA
[
tj
].
fz
+=
dz
;
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
(
invR
-
2
.
0
f
*
cSim
.
reactionFieldK
*
r2
);
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
#else
dEdR
+=
apos
.
w
*
psA
[
j
].
q
*
invR
;
#endif
dEdR
*=
invR
*
invR
;
// ObcGbsaForce1 part
float
alpha2_ij
=
br
*
psA
[
j
].
br
;
float
D_ij
=
r2
/
(
4
.
0
f
*
alpha2_ij
);
float
expTerm
=
exp
(
-
D_ij
);
float
denominator2
=
r2
+
alpha2_ij
*
expTerm
;
float
denominator
=
sqrt
(
denominator2
);
float
Gpol
=
(
q2
*
psA
[
j
].
q
)
/
(
denominator
*
denominator2
);
float
dGpol_dalpha2_ij
=
-
0
.
5
f
*
Gpol
*
expTerm
*
(
1
.
0
f
+
D_ij
);
af
.
w
+=
dGpol_dalpha2_ij
*
psA
[
j
].
br
;
dEdR
+=
Gpol
*
(
1
.
0
f
-
0
.
25
f
*
expTerm
);
// Sum the Born forces.
tempBuffer
[
threadIdx
.
x
]
=
dGpol_dalpha2_ij
*
br
;
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
fb
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
#ifdef USE_CUTOFF
if
(
r2
>
cSim
.
nonbondedCutoffSqr
)
{
dEdR
=
0
.
0
f
;
}
#endif
// Add forces
dx
*=
dEdR
;
dy
*=
dEdR
;
dz
*=
dEdR
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tempBuffer
[
threadIdx
.
x
]
=
dx
;
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
fx
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
tempBuffer
[
threadIdx
.
x
]
=
dy
;
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
fy
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
tempBuffer
[
threadIdx
.
x
]
=
dz
;
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
fz
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
}
}
}
}
#endif
}
}
else
// bExclusion
else
// bExclusion
{
{
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
View file @
e04283fe
...
@@ -187,19 +187,19 @@ void kCalculateObcGbsaBornSum(gpuContext gpu)
...
@@ -187,19 +187,19 @@ void kCalculateObcGbsaBornSum(gpuContext gpu)
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaCutoffByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateObcGbsaCutoffByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
else
else
kCalculateObcGbsaCutoffBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateObcGbsaCutoffBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
break
;
break
;
case
PERIODIC
:
case
PERIODIC
:
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaPeriodicByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateObcGbsaPeriodicByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
else
else
kCalculateObcGbsaPeriodicBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
kCalculateObcGbsaPeriodicBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
break
;
break
;
}
}
LAUNCHERROR
(
"kCalculateBornSum"
);
LAUNCHERROR
(
"kCalculateBornSum"
);
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
View file @
e04283fe
...
@@ -43,6 +43,9 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* wor
...
@@ -43,6 +43,9 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* wor
#ifdef USE_OUTPUT_BUFFER_PER_WARP
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
#endif
#endif
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
while
(
pos
>=
0
)
while
(
pos
>=
0
)
{
{
...
@@ -107,8 +110,8 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* wor
...
@@ -107,8 +110,8 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* wor
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
j
].
sr
*
psA
[
j
].
sr
*
rInverse
)
*
(
0
.
25
f
*
psA
[
j
].
sr
*
psA
[
j
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
(
l_ij2
-
u_ij2
);
float
rj
=
psA
[
j
].
r
;
if
(
ar
.
x
<
(
psA
[
j
].
r
-
r
))
if
(
ar
.
x
<
(
r
j
-
r
))
{
{
apos
.
w
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
apos
.
w
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
}
}
...
@@ -142,69 +145,169 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* wor
...
@@ -142,69 +145,169 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* wor
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
sum
=
apos
.
w
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
sum
=
apos
.
w
=
0
.
0
f
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
+
(
blockIdx
.
x
*
workUnits
)
/
gridDim
.
x
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
if
(
flags
==
0xFFFFFFFF
)
#endif
{
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
// Compute all interactions within this block.
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
dx
=
psA
[
tj
].
x
-
apos
.
x
;
dy
=
psA
[
tj
].
y
-
apos
.
y
;
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
if
(
i
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
if
(
i
<
cSim
.
atoms
&&
y
+
tj
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
#elif defined USE_CUTOFF
if
(
r2
<
cSim
.
nonbondedCutoffSqr
)
if
(
r2
<
cSim
.
nonbondedCutoffSqr
)
#endif
#endif
{
r
=
sqrt
(
r2
);
float
rInverse
=
1
.
0
f
/
r
;
float
rScaledRadiusJ
=
r
+
psA
[
tj
].
sr
;
if
(
ar
.
x
<
rScaledRadiusJ
)
{
{
float
l_ij
=
1
.
0
f
/
max
(
ar
.
x
,
fabs
(
r
-
psA
[
tj
].
sr
));
r
=
sqrt
(
r2
);
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
rInverse
=
1
.
0
f
/
r
;
float
l_ij2
=
l_ij
*
l_ij
;
float
rScaledRadiusJ
=
r
+
psA
[
tj
].
sr
;
float
u_ij2
=
u_ij
*
u_ij
;
if
(
ar
.
x
<
rScaledRadiusJ
)
float
ratio
=
log
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
tj
].
sr
*
psA
[
tj
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
if
(
ar
.
x
<
(
psA
[
tj
].
sr
-
r
))
{
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
float
l_ij
=
1
.
0
f
/
max
(
ar
.
x
,
fabs
(
r
-
psA
[
tj
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
tj
].
sr
*
psA
[
tj
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
float
srj
=
psA
[
tj
].
sr
;
if
(
ar
.
x
<
(
srj
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
}
apos
.
w
+=
term
;
}
float
rScaledRadiusI
=
r
+
ar
.
y
;
if
(
psA
[
tj
].
r
<
rScaledRadiusI
)
{
float
l_ij
=
1
.
0
f
/
max
(
psA
[
tj
].
r
,
fabs
(
r
-
ar
.
y
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusI
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
ar
.
y
*
ar
.
y
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
float
rj
=
psA
[
tj
].
r
;
if
(
rj
<
(
ar
.
y
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
psA
[
tj
].
r
)
-
l_ij
);
}
psA
[
tj
].
sum
+=
term
;
}
}
apos
.
w
+=
term
;
}
}
float
rScaledRadiusI
=
r
+
ar
.
y
;
tj
=
(
tj
-
1
)
&
(
GRID
-
1
);
if
(
psA
[
tj
].
r
<
rScaledRadiusI
)
}
{
}
float
l_ij
=
1
.
0
f
/
max
(
psA
[
tj
].
r
,
fabs
(
r
-
ar
.
y
));
#ifdef USE_CUTOFF
float
u_ij
=
1
.
0
f
/
rScaledRadiusI
;
else
float
l_ij2
=
l_ij
*
l_ij
;
{
float
u_ij2
=
u_ij
*
u_ij
;
// Compute only a subset of the interactions in this block.
float
ratio
=
log
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
ar
.
y
*
ar
.
y
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
if
(
psA
[
tj
].
r
<
(
ar
.
y
-
r
))
for
(
unsigned
int
j
=
0
;
j
<
GRID
;
j
++
)
{
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
tempBuffer
[
threadIdx
.
x
]
=
0
.
0
f
;
dx
=
psA
[
j
].
x
-
apos
.
x
;
dy
=
psA
[
j
].
y
-
apos
.
y
;
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
#ifdef USE_PERIODIC
if
(
i
<
cSim
.
atoms
&&
y
+
j
<
cSim
.
atoms
&&
r2
<
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
r2
<
cSim
.
nonbondedCutoffSqr
)
#endif
{
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
psA
[
tj
].
r
)
-
l_ij
);
r
=
sqrt
(
r2
);
float
rInverse
=
1
.
0
f
/
r
;
float
rScaledRadiusJ
=
r
+
psA
[
j
].
sr
;
if
(
ar
.
x
<
rScaledRadiusJ
)
{
float
l_ij
=
1
.
0
f
/
max
(
ar
.
x
,
fabs
(
r
-
psA
[
j
].
sr
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusJ
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
psA
[
j
].
sr
*
psA
[
j
].
sr
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
float
srj
=
psA
[
j
].
sr
;
if
(
ar
.
x
<
(
srj
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
ar
.
x
)
-
l_ij
);
}
apos
.
w
+=
term
;
}
float
rScaledRadiusI
=
r
+
ar
.
y
;
if
(
psA
[
j
].
r
<
rScaledRadiusI
)
{
float
l_ij
=
1
.
0
f
/
max
(
psA
[
j
].
r
,
fabs
(
r
-
ar
.
y
));
float
u_ij
=
1
.
0
f
/
rScaledRadiusI
;
float
l_ij2
=
l_ij
*
l_ij
;
float
u_ij2
=
u_ij
*
u_ij
;
float
ratio
=
log
(
u_ij
/
l_ij
);
float
term
=
l_ij
-
u_ij
+
0
.
25
f
*
r
*
(
u_ij2
-
l_ij2
)
+
(
0
.
50
f
*
rInverse
*
ratio
)
+
(
0
.
25
f
*
ar
.
y
*
ar
.
y
*
rInverse
)
*
(
l_ij2
-
u_ij2
);
float
rj
=
psA
[
j
].
r
;
if
(
rj
<
(
ar
.
y
-
r
))
{
term
+=
2
.
0
f
*
((
1
.
0
f
/
psA
[
j
].
r
)
-
l_ij
);
}
tempBuffer
[
threadIdx
.
x
]
=
term
;
}
}
}
psA
[
tj
].
sum
+=
term
;
// Sum the terms.
if
(
tgx
%
2
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
1
];
if
(
tgx
%
4
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
2
];
if
(
tgx
%
8
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
4
];
if
(
tgx
%
16
==
0
)
tempBuffer
[
threadIdx
.
x
]
+=
tempBuffer
[
threadIdx
.
x
+
8
];
if
(
tgx
==
0
)
psA
[
j
].
sum
+=
tempBuffer
[
threadIdx
.
x
]
+
tempBuffer
[
threadIdx
.
x
+
16
];
}
}
}
}
tj
=
(
tj
-
1
)
&
(
GRID
-
1
);
}
}
#endif
// Write results
// Write results
#ifdef USE_OUTPUT_BUFFER_PER_WARP
#ifdef USE_OUTPUT_BUFFER_PER_WARP
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.cu
View file @
e04283fe
...
@@ -47,7 +47,6 @@ struct Atom {
...
@@ -47,7 +47,6 @@ struct Atom {
float
z
;
float
z
;
float
r
;
float
r
;
float
sr
;
float
sr
;
float
sr2
;
float
fx
;
float
fx
;
float
fy
;
float
fy
;
float
fz
;
float
fz
;
...
@@ -123,19 +122,19 @@ void kCalculateObcGbsaForces2(gpuContext gpu)
...
@@ -123,19 +122,19 @@ void kCalculateObcGbsaForces2(gpuContext gpu)
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaCutoffByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
kCalculateObcGbsaCutoffByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
else
else
kCalculateObcGbsaCutoffForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
kCalculateObcGbsaCutoffForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
break
;
break
;
case
PERIODIC
:
case
PERIODIC
:
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaPeriodicByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
kCalculateObcGbsaPeriodicByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
else
else
kCalculateObcGbsaPeriodicForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
kCalculateObcGbsaPeriodicForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
break
;
break
;
}
}
LAUNCHERROR
(
"kCalculateObcGbsaForces2"
);
LAUNCHERROR
(
"kCalculateObcGbsaForces2"
);
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
View file @
e04283fe
...
@@ -38,10 +38,13 @@
...
@@ -38,10 +38,13 @@
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
,
int
numWorkUnits
)
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
,
int
numWorkUnits
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond
_blocks
*
cSim
.
nonbond
_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
bornForce2
_blocks
*
cSim
.
bornForce2
_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
#endif
int
lasty
=
-
1
;
int
lasty
=
-
1
;
while
(
pos
<
end
)
while
(
pos
<
end
)
...
@@ -72,7 +75,6 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, Forces2_kernel)(unsigned int* wor
...
@@ -72,7 +75,6 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, Forces2_kernel)(unsigned int* wor
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
z
=
apos
.
z
;
sA
[
threadIdx
.
x
].
r
=
a
.
x
;
sA
[
threadIdx
.
x
].
r
=
a
.
x
;
sA
[
threadIdx
.
x
].
sr
=
a
.
y
;
sA
[
threadIdx
.
x
].
sr
=
a
.
y
;
sA
[
threadIdx
.
x
].
sr2
=
a
.
y
*
a
.
y
;
sA
[
threadIdx
.
x
].
fb
=
fb
;
sA
[
threadIdx
.
x
].
fb
=
fb
;
for
(
unsigned
int
j
=
(
tgx
+
1
)
&
(
GRID
-
1
);
j
!=
tgx
;
j
=
(
j
+
1
)
&
(
GRID
-
1
))
for
(
unsigned
int
j
=
(
tgx
+
1
)
&
(
GRID
-
1
);
j
!=
tgx
;
j
=
(
j
+
1
)
&
(
GRID
-
1
))
...
@@ -105,7 +107,7 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, Forces2_kernel)(unsigned int* wor
...
@@ -105,7 +107,7 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, Forces2_kernel)(unsigned int* wor
// Born Forces term
// Born Forces term
float
term
=
0
.
125
f
*
float
term
=
0
.
125
f
*
(
1
.
000
f
+
psA
[
j
].
sr
2
*
r2Inverse
)
*
t3
+
(
1
.
000
f
+
psA
[
j
].
sr
*
psA
[
j
].
sr
*
r2Inverse
)
*
t3
+
0
.
250
f
*
t1
*
r2Inverse
;
0
.
250
f
*
t1
*
r2Inverse
;
float
dE
=
fb
*
term
;
float
dE
=
fb
*
term
;
...
@@ -162,98 +164,242 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, Forces2_kernel)(unsigned int* wor
...
@@ -162,98 +164,242 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, Forces2_kernel)(unsigned int* wor
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
z
=
temp
.
z
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
r
=
temp1
.
x
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
sr
=
temp1
.
y
;
sA
[
threadIdx
.
x
].
sr2
=
temp1
.
y
*
temp1
.
y
;
}
}
float
sr2
=
a
.
y
*
a
.
y
;
float
sr2
=
a
.
y
*
a
.
y
;
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
];
if
(
flags
==
0
)
{
// No interactions in this block.
}
else
if
(
flags
==
0xFFFFFFFF
)
#endif
{
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
// Compute all interactions within this block.
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
{
float
dx
=
psA
[
tj
].
x
-
apos
.
x
;
float
dy
=
psA
[
tj
].
y
-
apos
.
y
;
float
dz
=
psA
[
tj
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
float
r
=
sqrt
(
r2
);
// Interleaved Atom I and J Born Forces and sum components
// Interleaved Atom I and J Born Forces and sum components
float
r2Inverse
=
1
.
0
f
/
r2
;
float
r2Inverse
=
1
.
0
f
/
r2
;
float
rScaledRadiusJ
=
r
+
psA
[
tj
].
sr
;
float
rScaledRadiusJ
=
r
+
psA
[
tj
].
sr
;
float
rScaledRadiusI
=
r
+
a
.
y
;
float
rScaledRadiusI
=
r
+
a
.
y
;
float
rInverse
=
1
.
0
f
/
r
;
float
rInverse
=
1
.
0
f
/
r
;
float
l_ijJ
=
1
.
0
f
/
max
(
a
.
x
,
fabs
(
r
-
psA
[
tj
].
sr
));
float
l_ijJ
=
1
.
0
f
/
max
(
a
.
x
,
fabs
(
r
-
psA
[
tj
].
sr
));
float
l_ijI
=
1
.
0
f
/
max
(
psA
[
tj
].
r
,
fabs
(
r
-
a
.
y
));
float
l_ijI
=
1
.
0
f
/
max
(
psA
[
tj
].
r
,
fabs
(
r
-
a
.
y
));
float
u_ijJ
=
1
.
0
f
/
rScaledRadiusJ
;
float
u_ijJ
=
1
.
0
f
/
rScaledRadiusJ
;
float
u_ijI
=
1
.
0
f
/
rScaledRadiusI
;
float
u_ijI
=
1
.
0
f
/
rScaledRadiusI
;
float
l_ij2J
=
l_ijJ
*
l_ijJ
;
float
l_ij2J
=
l_ijJ
*
l_ijJ
;
float
l_ij2I
=
l_ijI
*
l_ijI
;
float
l_ij2I
=
l_ijI
*
l_ijI
;
float
u_ij2J
=
u_ijJ
*
u_ijJ
;
float
u_ij2J
=
u_ijJ
*
u_ijJ
;
float
u_ij2I
=
u_ijI
*
u_ijI
;
float
u_ij2I
=
u_ijI
*
u_ijI
;
float
t1J
=
log
(
u_ijJ
/
l_ijJ
);
float
t1J
=
log
(
u_ijJ
/
l_ijJ
);
float
t1I
=
log
(
u_ijI
/
l_ijI
);
float
t1I
=
log
(
u_ijI
/
l_ijI
);
float
t2J
=
(
l_ij2J
-
u_ij2J
);
float
t2J
=
(
l_ij2J
-
u_ij2J
);
float
t2I
=
(
l_ij2I
-
u_ij2I
);
float
t2I
=
(
l_ij2I
-
u_ij2I
);
float
t3J
=
t2J
*
rInverse
;
float
t3J
=
t2J
*
rInverse
;
float
t3I
=
t2I
*
rInverse
;
float
t3I
=
t2I
*
rInverse
;
t1J
*=
rInverse
;
t1J
*=
rInverse
;
t1I
*=
rInverse
;
t1I
*=
rInverse
;
// Born Forces term
// Born Forces term
float
term
=
0
.
125
f
*
float
term
=
0
.
125
f
*
(
1
.
000
f
+
psA
[
tj
].
sr
2
*
r2Inverse
)
*
t3J
+
(
1
.
000
f
+
psA
[
tj
].
sr
*
psA
[
tj
].
sr
*
r2Inverse
)
*
t3J
+
0
.
250
f
*
t1J
*
r2Inverse
;
0
.
250
f
*
t1J
*
r2Inverse
;
float
dE
=
fb
*
term
;
float
dE
=
fb
*
term
;
#if defined USE_PERIODIC
#if defined USE_PERIODIC
if
(
a
.
x
>=
rScaledRadiusJ
||
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
if
(
a
.
x
>=
rScaledRadiusJ
||
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
#elif defined USE_CUTOFF
if
(
a
.
x
>=
rScaledRadiusJ
||
r2
>
cSim
.
nonbondedCutoffSqr
)
if
(
a
.
x
>=
rScaledRadiusJ
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
#else
if
(
a
.
x
>=
rScaledRadiusJ
)
if
(
a
.
x
>=
rScaledRadiusJ
)
#endif
#endif
{
{
dE
=
0
.
0
f
;
dE
=
0
.
0
f
;
}
}
float
d
=
dx
*
dE
;
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
af
.
x
-=
d
;
psA
[
tj
].
fx
+=
d
;
psA
[
tj
].
fx
+=
d
;
d
=
dy
*
dE
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
af
.
y
-=
d
;
psA
[
tj
].
fy
+=
d
;
psA
[
tj
].
fy
+=
d
;
d
=
dz
*
dE
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
af
.
z
-=
d
;
psA
[
tj
].
fz
+=
d
;
psA
[
tj
].
fz
+=
d
;
// Atom J Born sum term
// Atom J Born sum term
term
=
0
.
125
f
*
term
=
0
.
125
f
*
(
1
.
000
f
+
sr2
*
r2Inverse
)
*
t3I
+
(
1
.
000
f
+
sr2
*
r2Inverse
)
*
t3I
+
0
.
250
f
*
t1I
*
r2Inverse
;
0
.
250
f
*
t1I
*
r2Inverse
;
dE
=
psA
[
tj
].
fb
*
term
;
dE
=
psA
[
tj
].
fb
*
term
;
float
rj
=
psA
[
tj
].
r
;
#ifdef USE_PERIODIC
#ifdef USE_PERIODIC
if
(
psA
[
tj
].
r
>=
rScaledRadiusI
||
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
if
(
rj
>=
rScaledRadiusI
||
i
>=
cSim
.
atoms
||
y
+
tj
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
#elif defined USE_CUTOFF
if
(
psA
[
tj
].
r
>=
rScaledRadiusI
||
r2
>
cSim
.
nonbondedCutoffSqr
)
if
(
rj
>=
rScaledRadiusI
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
#else
if
(
psA
[
tj
].
r
>=
rScaledRadiusI
)
if
(
rj
>=
rScaledRadiusI
)
#endif
#endif
{
dE
=
0
.
0
f
;
}
dx
*=
dE
;
dy
*=
dE
;
dz
*=
dE
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
#ifdef USE_CUTOFF
else
{
// Compute only a subset of the interactions in this block.
for
(
int
j
=
0
;
j
<
GRID
;
j
++
)
{
{
dE
=
0
.
0
f
;
if
((
flags
&
(
1
<<
j
))
!=
0
)
{
float
dx
=
psA
[
j
].
x
-
apos
.
x
;
float
dy
=
psA
[
j
].
y
-
apos
.
y
;
float
dz
=
psA
[
j
].
z
-
apos
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
float
r2
=
dx
*
dx
+
dy
*
dy
+
dz
*
dz
;
float
r
=
sqrt
(
r2
);
// Interleaved Atom I and J Born Forces and sum components
float
r2Inverse
=
1
.
0
f
/
r2
;
float
rScaledRadiusJ
=
r
+
psA
[
j
].
sr
;
float
rScaledRadiusI
=
r
+
a
.
y
;
float
rInverse
=
1
.
0
f
/
r
;
float
l_ijJ
=
1
.
0
f
/
max
(
a
.
x
,
fabs
(
r
-
psA
[
j
].
sr
));
float
l_ijI
=
1
.
0
f
/
max
(
psA
[
j
].
r
,
fabs
(
r
-
a
.
y
));
float
u_ijJ
=
1
.
0
f
/
rScaledRadiusJ
;
float
u_ijI
=
1
.
0
f
/
rScaledRadiusI
;
float
l_ij2J
=
l_ijJ
*
l_ijJ
;
float
l_ij2I
=
l_ijI
*
l_ijI
;
float
u_ij2J
=
u_ijJ
*
u_ijJ
;
float
u_ij2I
=
u_ijI
*
u_ijI
;
float
t1J
=
log
(
u_ijJ
/
l_ijJ
);
float
t1I
=
log
(
u_ijI
/
l_ijI
);
float
t2J
=
(
l_ij2J
-
u_ij2J
);
float
t2I
=
(
l_ij2I
-
u_ij2I
);
float
t3J
=
t2J
*
rInverse
;
float
t3I
=
t2I
*
rInverse
;
t1J
*=
rInverse
;
t1I
*=
rInverse
;
// Born Forces term
float
term
=
0
.
125
f
*
(
1
.
000
f
+
psA
[
j
].
sr
*
psA
[
j
].
sr
*
r2Inverse
)
*
t3J
+
0
.
250
f
*
t1J
*
r2Inverse
;
float
dE
=
fb
*
term
;
#if defined USE_PERIODIC
if
(
a
.
x
>=
rScaledRadiusJ
||
i
>=
cSim
.
atoms
||
y
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
a
.
x
>=
rScaledRadiusJ
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
a
.
x
>=
rScaledRadiusJ
)
#endif
{
dE
=
0
.
0
f
;
}
float
d
=
dx
*
dE
;
af
.
x
-=
d
;
tempBuffer
[
threadIdx
.
x
].
x
=
d
;
d
=
dy
*
dE
;
af
.
y
-=
d
;
tempBuffer
[
threadIdx
.
x
].
y
=
d
;
d
=
dz
*
dE
;
af
.
z
-=
d
;
tempBuffer
[
threadIdx
.
x
].
z
=
d
;
// Atom J Born sum term
term
=
0
.
125
f
*
(
1
.
000
f
+
sr2
*
r2Inverse
)
*
t3I
+
0
.
250
f
*
t1I
*
r2Inverse
;
dE
=
psA
[
j
].
fb
*
term
;
float
rj
=
psA
[
j
].
r
;
#ifdef USE_PERIODIC
if
(
rj
>=
rScaledRadiusI
||
i
>=
cSim
.
atoms
||
y
+
j
>=
cSim
.
atoms
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#elif defined USE_CUTOFF
if
(
rj
>=
rScaledRadiusI
||
r2
>
cSim
.
nonbondedCutoffSqr
)
#else
if
(
rj
>=
rScaledRadiusI
)
#endif
{
dE
=
0
.
0
f
;
}
dx
*=
dE
;
dy
*=
dE
;
dz
*=
dE
;
tempBuffer
[
threadIdx
.
x
].
x
+=
dx
;
tempBuffer
[
threadIdx
.
x
].
y
+=
dy
;
tempBuffer
[
threadIdx
.
x
].
z
+=
dz
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
// Sum the forces on atom j.
if
(
tgx
%
2
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
1
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
1
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
1
].
z
;
}
if
(
tgx
%
4
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
2
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
2
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
2
].
z
;
}
if
(
tgx
%
8
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
4
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
4
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
4
].
z
;
}
if
(
tgx
%
16
==
0
)
{
tempBuffer
[
threadIdx
.
x
].
x
+=
tempBuffer
[
threadIdx
.
x
+
8
].
x
;
tempBuffer
[
threadIdx
.
x
].
y
+=
tempBuffer
[
threadIdx
.
x
+
8
].
y
;
tempBuffer
[
threadIdx
.
x
].
z
+=
tempBuffer
[
threadIdx
.
x
+
8
].
z
;
}
if
(
tgx
==
0
)
{
psA
[
j
].
fx
+=
tempBuffer
[
threadIdx
.
x
].
x
+
tempBuffer
[
threadIdx
.
x
+
16
].
x
;
psA
[
j
].
fy
+=
tempBuffer
[
threadIdx
.
x
].
y
+
tempBuffer
[
threadIdx
.
x
+
16
].
y
;
psA
[
j
].
fz
+=
tempBuffer
[
threadIdx
.
x
].
z
+
tempBuffer
[
threadIdx
.
x
+
16
].
z
;
}
}
}
}
dx
*=
dE
;
dy
*=
dE
;
dz
*=
dE
;
psA
[
tj
].
fx
+=
dx
;
psA
[
tj
].
fy
+=
dy
;
psA
[
tj
].
fz
+=
dz
;
af
.
x
-=
dx
;
af
.
y
-=
dy
;
af
.
z
-=
dz
;
tj
=
(
tj
+
1
)
&
(
GRID
-
1
);
}
}
#endif
// Write results
// Write results
float4
of
;
float4
of
;
...
...
platforms/cuda/src/kernels/kFindInteractingBlocks.h
View file @
e04283fe
...
@@ -112,3 +112,88 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractions, _kernel)()
...
@@ -112,3 +112,88 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractions, _kernel)()
cSim
.
pInteractionFlag
[
pos
]
=
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cSim
.
nonbondedCutoffSqr
?
0
:
1
);
cSim
.
pInteractionFlag
[
pos
]
=
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cSim
.
nonbondedCutoffSqr
?
0
:
1
);
}
}
}
}
/**
* Compare each atom in one block to the bounding box of another block, and set
* flags for which ones are interacting.
*/
__global__
void
METHOD_NAME
(
kFindInteractionsWithinBlocks
,
_kernel
)(
unsigned
int
*
workUnit
,
int
numWorkUnits
)
{
extern
__shared__
unsigned
int
flags
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
index
=
threadIdx
.
x
&
(
GRID
-
1
);
int
lasty
=
-
1
;
float4
apos
;
while
(
pos
<
end
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
);
bool
bExclusionFlag
=
(
x
&
0x1
);
x
=
(
x
>>
17
);
if
(
x
==
y
||
bExclusionFlag
)
{
// Assume this block will be dense.
if
(
index
==
0
)
cSim
.
pInteractionFlag
[
pos
]
=
0xFFFFFFFF
;
}
else
{
// Load the bounding box for x and the atom positions for y.
float4
center
=
cSim
.
pGridCenter
[
x
];
float4
boxSize
=
cSim
.
pGridBoundingBox
[
x
];
if
(
y
!=
lasty
)
{
apos
=
cSim
.
pPosq
[(
y
<<
GRIDBITS
)
+
index
];
}
// Find the distance of the atom from the bounding box.
float
dx
=
apos
.
x
-
center
.
x
;
float
dy
=
apos
.
y
-
center
.
y
;
float
dz
=
apos
.
z
-
center
.
z
;
#ifdef USE_PERIODIC
dx
-=
floor
(
dx
/
cSim
.
periodicBoxSizeX
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeX
;
dy
-=
floor
(
dy
/
cSim
.
periodicBoxSizeY
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeY
;
dz
-=
floor
(
dz
/
cSim
.
periodicBoxSizeZ
+
0
.
5
f
)
*
cSim
.
periodicBoxSizeZ
;
#endif
dx
=
max
(
0
.
0
f
,
abs
(
dx
)
-
boxSize
.
x
);
dy
=
max
(
0
.
0
f
,
abs
(
dy
)
-
boxSize
.
y
);
dz
=
max
(
0
.
0
f
,
abs
(
dz
)
-
boxSize
.
z
);
flags
[
threadIdx
.
x
]
=
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cSim
.
nonbondedCutoffSqr
?
0
:
1
<<
index
);
// Sum the flags.
if
(
index
%
2
==
0
)
flags
[
threadIdx
.
x
]
+=
flags
[
threadIdx
.
x
+
1
];
if
(
index
%
4
==
0
)
flags
[
threadIdx
.
x
]
+=
flags
[
threadIdx
.
x
+
2
];
if
(
index
%
8
==
0
)
flags
[
threadIdx
.
x
]
+=
flags
[
threadIdx
.
x
+
4
];
if
(
index
%
16
==
0
)
flags
[
threadIdx
.
x
]
+=
flags
[
threadIdx
.
x
+
8
];
if
(
index
==
0
)
{
unsigned
int
allFlags
=
flags
[
threadIdx
.
x
]
+
flags
[
threadIdx
.
x
+
16
];
// Count how many flags are set, and based on that decide whether to compute all interactions
// or only a fraction of them.
unsigned
int
bits
=
(
allFlags
&
0x55555555
)
+
((
allFlags
>>
1
)
&
0x55555555
);
bits
=
(
bits
&
0x33333333
)
+
((
bits
>>
2
)
&
0x33333333
);
bits
=
(
bits
&
0x0F0F0F0F
)
+
((
bits
>>
4
)
&
0x0F0F0F0F
);
bits
=
(
bits
&
0x00FF00FF
)
+
((
bits
>>
8
)
&
0x00FF00FF
);
bits
=
(
bits
&
0x0000FFFF
)
+
((
bits
>>
16
)
&
0x0000FFFF
);
cSim
.
pInteractionFlag
[
pos
]
=
(
bits
>
12
?
0xFFFFFFFF
:
allFlags
);
}
lasty
=
y
;
}
pos
++
;
}
}
platforms/cuda/tests/TestCudaNonbondedForce.cpp
View file @
e04283fe
...
@@ -497,6 +497,36 @@ void testBlockInteractions(bool periodic) {
...
@@ -497,6 +497,36 @@ void testBlockInteractions(bool periodic) {
dy
=
max
(
0.0
f
,
abs
(
dy
)
-
gridSize1
.
y
-
gridSize2
.
y
);
dy
=
max
(
0.0
f
,
abs
(
dy
)
-
gridSize1
.
y
-
gridSize2
.
y
);
dz
=
max
(
0.0
f
,
abs
(
dz
)
-
gridSize1
.
z
-
gridSize2
.
z
);
dz
=
max
(
0.0
f
,
abs
(
dz
)
-
gridSize1
.
z
-
gridSize2
.
z
);
ASSERT
(
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
)
<
cutoff
+
TOL
);
ASSERT
(
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
)
<
cutoff
+
TOL
);
// Check the interaction flags.
unsigned
int
flags
=
data
.
gpu
->
psInteractionFlag
->
_pSysData
[
i
];
for
(
int
atom2
=
0
;
atom2
<
32
;
atom2
++
)
{
if
((
flags
&
1
)
==
0
)
{
float4
pos2
=
data
.
gpu
->
psPosq4
->
_pSysData
[
y
*
blockSize
+
atom2
];
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
float4
pos1
=
data
.
gpu
->
psPosq4
->
_pSysData
[
x
*
blockSize
+
atom1
];
float
dx
=
pos2
.
x
-
pos1
.
x
;
float
dy
=
pos2
.
y
-
pos1
.
y
;
float
dz
=
pos2
.
z
-
pos1
.
z
;
if
(
periodic
)
{
dx
-=
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
;
dy
-=
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
;
dz
-=
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
;
}
if
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
<
cutoff
*
cutoff
)
{
dx
=
pos2
.
x
-
center1
.
x
;
dy
=
pos2
.
y
-
center1
.
y
;
dz
=
pos2
.
z
-
center1
.
z
;
dx
=
max
(
0.0
f
,
abs
(
dx
)
-
gridSize1
.
x
);
dy
=
max
(
0.0
f
,
abs
(
dy
)
-
gridSize1
.
y
);
dz
=
max
(
0.0
f
,
abs
(
dz
)
-
gridSize1
.
z
);
}
ASSERT
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cutoff
*
cutoff
);
}
}
flags
>>=
1
;
}
}
}
// Check the tiles that did not have interactions to make sure all atoms are beyond the cutoff.
// Check the tiles that did not have interactions to make sure all atoms are beyond the cutoff.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment