Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
c97d5ee1
Commit
c97d5ee1
authored
Apr 24, 2009
by
Peter Eastman
Browse files
Optimization: eliminated an unnecessary gpu->cpu data transfer on each time step
parent
d63d3fc6
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
48 additions
and
59 deletions
+48
-59
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+2
-0
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
+11
-18
platforms/cuda/src/kernels/kCalculateCDLJForces.h
platforms/cuda/src/kernels/kCalculateCDLJForces.h
+2
-1
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
+10
-15
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+2
-1
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
+6
-9
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
+5
-4
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.cu
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.cu
+6
-9
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
+2
-1
platforms/cuda/src/kernels/kFindInteractingBlocks.h
platforms/cuda/src/kernels/kFindInteractingBlocks.h
+2
-1
No files found.
platforms/cuda/src/kernels/gpu.cpp
View file @
c97d5ee1
...
...
@@ -1475,7 +1475,9 @@ int gpuBuildThreadBlockWorkList(gpuContext gpu)
count
++
;
}
}
(
*
gpu
->
psInteractionCount
)[
0
]
=
gpu
->
sim
.
workUnits
;
gpu
->
psInteractionCount
->
Upload
();
psWorkUnit
->
Upload
();
gpuSetConstants
(
gpu
);
return
cells
;
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.cu
View file @
c97d5ee1
...
...
@@ -132,16 +132,15 @@ void kCalculateCDLJForces(gpuContext gpu)
{
// printf("kCalculateCDLJCutoffForces\n");
CUDPPResult
result
;
size_t
numWithInteractions
;
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJN2ByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateCDLJN2Forces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJN2Forces"
);
break
;
case
CUTOFF
:
...
...
@@ -156,16 +155,14 @@ void kCalculateCDLJForces(gpuContext gpu)
printf
(
"Error in cudppCompact: %d
\n
"
,
result
);
exit
(
-
1
);
}
gpu
->
psInteractionCount
->
Download
();
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
kFindInteractionsWithinBlocksCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJCutoffByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJCutoffForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJCutoffForces"
);
break
;
case
PERIODIC
:
...
...
@@ -180,16 +177,14 @@ void kCalculateCDLJForces(gpuContext gpu)
printf
(
"Error in cudppCompact: %d
\n
"
,
result
);
exit
(
-
1
);
}
gpu
->
psInteractionCount
->
Download
();
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJPeriodicByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJPeriodicForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJPeriodicForces"
);
break
;
case
EWALD
:
...
...
@@ -204,16 +199,14 @@ void kCalculateCDLJForces(gpuContext gpu)
printf
(
"Error in cudppCompact: %d
\n
"
,
result
);
exit
(
-
1
);
}
gpu
->
psInteractionCount
->
Download
();
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
kFindInteractionsWithinBlocksEwaldDirect_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJEwaldDirectByWarpForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJEwaldDirectForces_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJEwaldDirectForces"
);
}
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.h
View file @
c97d5ee1
...
...
@@ -35,11 +35,12 @@
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
numWorkUnits
)
__global__
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
...
...
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.cu
View file @
c97d5ee1
...
...
@@ -106,8 +106,8 @@ extern __global__ void kFindBlockBoundsCutoff_kernel();
extern
__global__
void
kFindBlockBoundsPeriodic_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsCutoff_kernel
();
extern
__global__
void
kFindBlocksWithInteractionsPeriodic_kernel
();
extern
__global__
void
kFindInteractionsWithinBlocksCutoff_kernel
(
unsigned
int
*
,
unsigned
int
);
extern
__global__
void
kFindInteractionsWithinBlocksPeriodic_kernel
(
unsigned
int
*
,
unsigned
int
);
extern
__global__
void
kFindInteractionsWithinBlocksCutoff_kernel
(
unsigned
int
*
);
extern
__global__
void
kFindInteractionsWithinBlocksPeriodic_kernel
(
unsigned
int
*
);
void
kCalculateCDLJObcGbsaForces1
(
gpuContext
gpu
)
{
...
...
@@ -117,7 +117,6 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
kClearBornForces
(
gpu
);
CUDPPResult
result
;
size_t
numWithInteractions
;
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
...
...
@@ -128,10 +127,10 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaN2ByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateCDLJObcGbsaN2Forces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaN2Forces1"
);
break
;
case
CUTOFF
:
...
...
@@ -146,10 +145,8 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
printf
(
"Error in cudppCompact: %d
\n
"
,
result
);
exit
(
-
1
);
}
gpu
->
psInteractionCount
->
Download
();
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
kFindInteractionsWithinBlocksCutoff_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bRecalculateBornRadii
)
{
kCalculateObcGbsaBornSum
(
gpu
);
...
...
@@ -157,10 +154,10 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaCutoffByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaCutoffForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaCutoffForces1"
);
break
;
case
PERIODIC
:
...
...
@@ -175,10 +172,8 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
printf
(
"Error in cudppCompact: %d
\n
"
,
result
);
exit
(
-
1
);
}
gpu
->
psInteractionCount
->
Download
();
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
kFindInteractionsWithinBlocksPeriodic_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
sizeof
(
unsigned
int
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
if
(
gpu
->
bRecalculateBornRadii
)
{
kCalculateObcGbsaBornSum
(
gpu
);
...
...
@@ -186,10 +181,10 @@ void kCalculateCDLJObcGbsaForces1(gpuContext gpu)
}
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateCDLJObcGbsaPeriodicByWarpForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateCDLJObcGbsaPeriodicForces1_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
LAUNCHERROR
(
"kCalculateCDLJObcGbsaPeriodicForces1"
);
break
;
}
...
...
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
View file @
c97d5ee1
...
...
@@ -35,11 +35,12 @@
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
numWorkUnits
)
__global__
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.cu
View file @
c97d5ee1
...
...
@@ -160,34 +160,31 @@ void kCalculateObcGbsaBornSum(gpuContext gpu)
// printf("kCalculateObcgbsaBornSum\n");
kClearObcGbsaBornSum_kernel
<<<
gpu
->
sim
.
blocks
,
384
>>>
();
LAUNCHERROR
(
"kClearBornSum"
);
size_t
numWithInteractions
;
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaN2ByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateObcGbsaN2BornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
sizeof
(
Atom
)
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
break
;
case
CUTOFF
:
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaCutoffByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateObcGbsaCutoffBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
case
PERIODIC
:
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaPeriodicByWarpBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateObcGbsaPeriodicBornSum_kernel
<<<
gpu
->
sim
.
nonbond_blocks
,
gpu
->
sim
.
nonbond_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float
))
*
gpu
->
sim
.
nonbond_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
}
LAUNCHERROR
(
"kCalculateBornSum"
);
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
View file @
c97d5ee1
...
...
@@ -35,10 +35,11 @@
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
BornSum_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
workUnits
)
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
int
end
=
workUnits
/
gridDim
.
x
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
int
end
=
numWorkUnits
/
gridDim
.
x
;
int
pos
=
end
-
(
threadIdx
.
x
>>
GRIDBITS
)
-
1
;
#ifdef USE_OUTPUT_BUFFER_PER_WARP
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
...
...
@@ -50,7 +51,7 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* wor
while
(
pos
>=
0
)
{
// Extract cell coordinates from appropriate work unit
unsigned
int
x
=
workUnit
[
pos
+
(
blockIdx
.
x
*
w
orkUnits
)
/
gridDim
.
x
];
unsigned
int
x
=
workUnit
[
pos
+
(
blockIdx
.
x
*
numW
orkUnits
)
/
gridDim
.
x
];
unsigned
int
y
=
((
x
>>
2
)
&
0x7fff
)
<<
GRIDBITS
;
x
=
(
x
>>
17
)
<<
GRIDBITS
;
float
dx
;
...
...
@@ -146,7 +147,7 @@ __global__ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* wor
sA
[
threadIdx
.
x
].
sum
=
apos
.
w
=
0
.
0
f
;
#ifdef USE_CUTOFF
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
+
(
blockIdx
.
x
*
w
orkUnits
)
/
gridDim
.
x
];
unsigned
int
flags
=
cSim
.
pInteractionFlag
[
pos
+
(
blockIdx
.
x
*
numW
orkUnits
)
/
gridDim
.
x
];
if
(
flags
==
0
)
{
// No interactions in this block.
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.cu
View file @
c97d5ee1
...
...
@@ -107,34 +107,31 @@ void GetCalculateObcGbsaForces2Sim(gpuContext gpu)
void
kCalculateObcGbsaForces2
(
gpuContext
gpu
)
{
//printf("kCalculateObcGbsaForces2\n");
size_t
numWithInteractions
;
switch
(
gpu
->
sim
.
nonbondedMethod
)
{
case
NO_CUTOFF
:
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaN2ByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
else
kCalculateObcGbsaN2Forces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
,
gpu
->
sim
.
workUnits
);
sizeof
(
Atom
)
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pWorkUnit
);
break
;
case
CUTOFF
:
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaCutoffByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateObcGbsaCutoffForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
case
PERIODIC
:
numWithInteractions
=
gpu
->
psInteractionCount
->
_pSysData
[
0
];
if
(
gpu
->
bOutputBufferPerWarp
)
kCalculateObcGbsaPeriodicByWarpForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
else
kCalculateObcGbsaPeriodicForces2_kernel
<<<
gpu
->
sim
.
bornForce2_blocks
,
gpu
->
sim
.
bornForce2_threads_per_block
,
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
,
numWithInteractions
);
(
sizeof
(
Atom
)
+
sizeof
(
float3
))
*
gpu
->
sim
.
bornForce2_threads_per_block
>>>
(
gpu
->
sim
.
pInteractingWorkUnit
);
break
;
}
LAUNCHERROR
(
"kCalculateObcGbsaForces2"
);
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
View file @
c97d5ee1
...
...
@@ -35,11 +35,12 @@
* different versions of the kernels.
*/
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
numWorkUnits
)
__global__
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
...
...
platforms/cuda/src/kernels/kFindInteractingBlocks.h
View file @
c97d5ee1
...
...
@@ -117,11 +117,12 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractions, _kernel)()
* Compare each atom in one block to the bounding box of another block, and set
* flags for which ones are interacting.
*/
__global__
void
METHOD_NAME
(
kFindInteractionsWithinBlocks
,
_kernel
)(
unsigned
int
*
workUnit
,
unsigned
int
numWorkUnits
)
__global__
void
METHOD_NAME
(
kFindInteractionsWithinBlocks
,
_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
unsigned
int
flags
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
index
=
threadIdx
.
x
&
(
GRID
-
1
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment