Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ad2d5c0a
Commit
ad2d5c0a
authored
Nov 03, 2011
by
Peter Eastman
Browse files
Added volatile keywords to work correctly with CUDA 4.1
parent
0abe528f
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
33 additions
and
33 deletions
+33
-33
platforms/cuda/src/kernels/cudaCompact.cu
platforms/cuda/src/kernels/cudaCompact.cu
+2
-2
platforms/cuda/src/kernels/kCalculateCDLJForces.h
platforms/cuda/src/kernels/kCalculateCDLJForces.h
+3
-3
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+3
-3
platforms/cuda/src/kernels/kCalculateGBVIBornSum.h
platforms/cuda/src/kernels/kCalculateGBVIBornSum.h
+3
-3
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
+3
-3
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
+3
-3
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
+3
-3
platforms/cuda/src/kernels/kFindInteractingBlocks.h
platforms/cuda/src/kernels/kFindInteractingBlocks.h
+1
-1
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+1
-1
platforms/opencl/src/kernels/removeCM.cl
platforms/opencl/src/kernels/removeCM.cl
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/amoebaScaleFactors.h
...ns/amoeba/platforms/cuda/src/kernels/amoebaScaleFactors.h
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic_b.h
...ms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic_b.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
...ba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
+1
-1
No files found.
platforms/cuda/src/kernels/cudaCompact.cu
View file @
ad2d5c0a
...
@@ -29,7 +29,7 @@ typedef unsigned int T;
...
@@ -29,7 +29,7 @@ typedef unsigned int T;
// Phase 1: Count valid elements per thread block
// Phase 1: Count valid elements per thread block
// Hard-code 128 thd/blk
// Hard-code 128 thd/blk
__device__
unsigned
int
sumReduce128
(
unsigned
int
*
arr
)
{
__device__
unsigned
int
sumReduce128
(
volatile
unsigned
int
*
arr
)
{
// Parallel reduce element counts
// Parallel reduce element counts
// Assumes 128 thd/block
// Assumes 128 thd/block
if
(
threadIdx
.
x
<
64
)
arr
[
threadIdx
.
x
]
+=
arr
[
threadIdx
.
x
+
64
];
if
(
threadIdx
.
x
<
64
)
arr
[
threadIdx
.
x
]
+=
arr
[
threadIdx
.
x
+
64
];
...
@@ -47,7 +47,7 @@ __device__ unsigned int sumReduce128(unsigned int* arr) {
...
@@ -47,7 +47,7 @@ __device__ unsigned int sumReduce128(unsigned int* arr) {
}
}
__global__
void
countElts
(
unsigned
int
*
dgBlockCounts
,
const
unsigned
int
*
dgValid
,
const
size_t
eltsPerBlock
,
const
size_t
len
)
{
__global__
void
countElts
(
unsigned
int
*
dgBlockCounts
,
const
unsigned
int
*
dgValid
,
const
size_t
eltsPerBlock
,
const
size_t
len
)
{
__shared__
unsigned
int
dsCount
[
128
];
__shared__
volatile
unsigned
int
dsCount
[
128
];
dsCount
[
threadIdx
.
x
]
=
0
;
dsCount
[
threadIdx
.
x
]
=
0
;
size_t
ub
;
size_t
ub
;
ub
=
(
len
<
(
blockIdx
.
x
+
1
)
*
eltsPerBlock
)
?
len
:
((
blockIdx
.
x
+
1
)
*
eltsPerBlock
);
ub
=
(
len
<
(
blockIdx
.
x
+
1
)
*
eltsPerBlock
)
?
len
:
((
blockIdx
.
x
+
1
)
*
eltsPerBlock
);
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.h
View file @
ad2d5c0a
...
@@ -43,7 +43,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
...
@@ -43,7 +43,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
#endif
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
)
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
...
@@ -52,7 +52,7 @@ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUnit)
...
@@ -52,7 +52,7 @@ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUnit)
float
CDLJ_energy
;
float
CDLJ_energy
;
float
energy
=
0
.
0
f
;
float
energy
=
0
.
0
f
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
volatile
float3
*
tempBuffer
=
(
volatile
float3
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
#endif
#ifdef USE_EWALD
#ifdef USE_EWALD
...
@@ -83,7 +83,7 @@ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUnit)
...
@@ -83,7 +83,7 @@ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUnit)
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
unsigned
int
i
=
x
+
tgx
;
unsigned
int
i
=
x
+
tgx
;
apos
=
cSim
.
pPosq
[
i
];
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pAttr
[
i
];
float2
a
=
cSim
.
pAttr
[
i
];
...
...
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
View file @
ad2d5c0a
...
@@ -40,7 +40,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
...
@@ -40,7 +40,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
#endif
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
...
@@ -49,7 +49,7 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit )
...
@@ -49,7 +49,7 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit )
float
CDLJObcGbsa_energy
;
float
CDLJObcGbsa_energy
;
float
energy
=
0
.
0
f
;
float
energy
=
0
.
0
f
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
volatile
float
*
tempBuffer
=
(
volatile
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
unsigned
int
lasty
=
-
0xFFFFFFFF
;
...
@@ -68,7 +68,7 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit )
...
@@ -68,7 +68,7 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit )
float
br
=
cSim
.
pBornRadii
[
i
];
float
br
=
cSim
.
pBornRadii
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
float4
af
;
float4
af
;
af
.
x
=
0
.
0
f
;
af
.
x
=
0
.
0
f
;
af
.
y
=
0
.
0
f
;
af
.
y
=
0
.
0
f
;
...
...
platforms/cuda/src/kernels/kCalculateGBVIBornSum.h
View file @
ad2d5c0a
...
@@ -47,7 +47,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
...
@@ -47,7 +47,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
#endif
void
METHOD_NAME
(
kCalculateGBVI
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
void
METHOD_NAME
(
kCalculateGBVI
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
...
@@ -58,7 +58,7 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
...
@@ -58,7 +58,7 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
// int end = workUnits / gridDim.x;
// int end = workUnits / gridDim.x;
// int pos = end - (threadIdx.x >> GRIDBITS) - 1;
// int pos = end - (threadIdx.x >> GRIDBITS) - 1;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
volatile
float
*
tempBuffer
=
(
volatile
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
#endif
while
(
pos
<
end
)
while
(
pos
<
end
)
...
@@ -79,7 +79,7 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
...
@@ -79,7 +79,7 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
{
...
...
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
View file @
ad2d5c0a
...
@@ -47,14 +47,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
...
@@ -47,14 +47,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
#endif
#endif
METHOD_NAME
(
kCalculateGBVI
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
METHOD_NAME
(
kCalculateGBVI
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
volatile
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
#endif
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
unsigned
int
lasty
=
-
0xFFFFFFFF
;
...
@@ -72,7 +72,7 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
...
@@ -72,7 +72,7 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
float
fb
=
cSim
.
pBornForce
[
i
];
float
fb
=
cSim
.
pBornForce
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
float3
af
;
float3
af
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0
.
0
f
;
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
View file @
ad2d5c0a
...
@@ -40,7 +40,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
...
@@ -40,7 +40,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
#endif
void
METHOD_NAME
(
kCalculateObcGbsa
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
void
METHOD_NAME
(
kCalculateObcGbsa
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
...
@@ -48,7 +48,7 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
...
@@ -48,7 +48,7 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
volatile
float
*
tempBuffer
=
(
volatile
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
#endif
while
(
pos
<
end
)
while
(
pos
<
end
)
...
@@ -67,7 +67,7 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
...
@@ -67,7 +67,7 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
{
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
View file @
ad2d5c0a
...
@@ -40,14 +40,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
...
@@ -40,14 +40,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
#endif
#endif
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
volatile
float3
*
tempBuffer
=
(
volatile
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
#endif
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
unsigned
int
lasty
=
-
0xFFFFFFFF
;
...
@@ -65,7 +65,7 @@ void METHOD_NAME(kCalculateObcGbsa, Forces2_kernel)(unsigned int* workUnit)
...
@@ -65,7 +65,7 @@ void METHOD_NAME(kCalculateObcGbsa, Forces2_kernel)(unsigned int* workUnit)
float
fb
=
cSim
.
pBornForce
[
i
];
float
fb
=
cSim
.
pBornForce
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
float3
af
;
float3
af
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0
.
0
f
;
...
...
platforms/cuda/src/kernels/kFindInteractingBlocks.h
View file @
ad2d5c0a
...
@@ -115,7 +115,7 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractions, _kernel)()
...
@@ -115,7 +115,7 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractions, _kernel)()
*/
*/
__global__
void
METHOD_NAME
(
kFindInteractionsWithinBlocks
,
_kernel
)(
unsigned
int
*
workUnit
)
__global__
void
METHOD_NAME
(
kFindInteractionsWithinBlocks
,
_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
unsigned
int
flags
[];
extern
__shared__
volatile
unsigned
int
flags
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
ad2d5c0a
...
@@ -221,7 +221,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
...
@@ -221,7 +221,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
*
flags
for
which
ones
are
interacting.
*
flags
for
which
ones
are
interacting.
*/
*/
__kernel
void
findInteractionsWithinBlocks
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
const
float4*
restrict
posq,
__global
const
ushort2*
restrict
tiles,
__global
const
float4*
restrict
blockCenter,
__kernel
void
findInteractionsWithinBlocks
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
const
float4*
restrict
posq,
__global
const
ushort2*
restrict
tiles,
__global
const
float4*
restrict
blockCenter,
__global
const
float4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
interactionFlags,
__global
const
unsigned
int*
restrict
interactionCount,
__local
unsigned
int*
restrict
flags,
unsigned
int
maxTiles
)
{
__global
const
float4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
interactionFlags,
__global
const
unsigned
int*
restrict
interactionCount,
__local
volatile
unsigned
int*
restrict
flags,
unsigned
int
maxTiles
)
{
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
numTiles
=
interactionCount[0]
;
unsigned
int
numTiles
=
interactionCount[0]
;
...
...
platforms/opencl/src/kernels/removeCM.cl
View file @
ad2d5c0a
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
*
Calculate
the
center
of
mass
momentum.
*
Calculate
the
center
of
mass
momentum.
*/
*/
__kernel
void
calcCenterOfMassMomentum
(
int
numAtoms,
__global
const
float4*
restrict
velm,
__global
float4*
restrict
cmMomentum,
__local
float4*
restrict
temp
)
{
__kernel
void
calcCenterOfMassMomentum
(
int
numAtoms,
__global
const
float4*
restrict
velm,
__global
float4*
restrict
cmMomentum,
__local
volatile
float4*
restrict
temp
)
{
int
index
=
get_global_id
(
0
)
;
int
index
=
get_global_id
(
0
)
;
float4
cm
=
0.0f
;
float4
cm
=
0.0f
;
while
(
index
<
numAtoms
)
{
while
(
index
<
numAtoms
)
{
...
@@ -53,7 +53,7 @@ __kernel void calcCenterOfMassMomentum(int numAtoms, __global const float4* rest
...
@@ -53,7 +53,7 @@ __kernel void calcCenterOfMassMomentum(int numAtoms, __global const float4* rest
*
Remove
center
of
mass
motion.
*
Remove
center
of
mass
motion.
*/
*/
__kernel
void
removeCenterOfMassMomentum
(
int
numAtoms,
__global
float4*
restrict
velm,
__global
const
float4*
restrict
cmMomentum,
__local
float4*
restrict
temp
)
{
__kernel
void
removeCenterOfMassMomentum
(
int
numAtoms,
__global
float4*
restrict
velm,
__global
const
float4*
restrict
cmMomentum,
__local
volatile
float4*
restrict
temp
)
{
//
First
sum
all
of
the
momenta
that
were
calculated
by
individual
groups.
//
First
sum
all
of
the
momenta
that
were
calculated
by
individual
groups.
int
index
=
get_local_id
(
0
)
;
int
index
=
get_local_id
(
0
)
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaScaleFactors.h
View file @
ad2d5c0a
...
@@ -101,7 +101,7 @@ __device__ static void load3dArrayBufferPerWarp( unsigned int offset, float* for
...
@@ -101,7 +101,7 @@ __device__ static void load3dArrayBufferPerWarp( unsigned int offset, float* for
}
}
__device__
static
void
add3dArrayToFloat4
(
unsigned
int
offset
,
float
*
forceSum
,
float4
*
outputForce
)
__device__
static
void
add3dArrayToFloat4
(
unsigned
int
offset
,
volatile
float
*
forceSum
,
float4
*
outputForce
)
{
{
float4
of
;
float4
of
;
...
@@ -125,7 +125,7 @@ __device__ static void load3dArrayToFloat4( unsigned int offset, float* forceSum
...
@@ -125,7 +125,7 @@ __device__ static void load3dArrayToFloat4( unsigned int offset, float* forceSum
}
}
__device__
static
void
load3dArray
(
unsigned
int
offset
,
float
*
forceSum
,
float
*
outputForce
)
__device__
static
void
load3dArray
(
unsigned
int
offset
,
volatile
float
*
forceSum
,
float
*
outputForce
)
{
{
outputForce
[
offset
]
=
forceSum
[
0
];
outputForce
[
offset
]
=
forceSum
[
0
];
...
@@ -134,7 +134,7 @@ __device__ static void load3dArray( unsigned int offset, float* forceSum, float*
...
@@ -134,7 +134,7 @@ __device__ static void load3dArray( unsigned int offset, float* forceSum, float*
}
}
__device__
static
void
add3dArray
(
unsigned
int
offset
,
float
*
forceSum
,
float
*
outputForce
)
__device__
static
void
add3dArray
(
unsigned
int
offset
,
volatile
float
*
forceSum
,
float
*
outputForce
)
{
{
outputForce
[
offset
]
+=
forceSum
[
0
];
outputForce
[
offset
]
+=
forceSum
[
0
];
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
ad2d5c0a
...
@@ -468,7 +468,7 @@ __device__ void calculateElectrostaticPairIxnOrig_kernel( ElectrostaticParticle&
...
@@ -468,7 +468,7 @@ __device__ void calculateElectrostaticPairIxnOrig_kernel( ElectrostaticParticle&
}
}
#endif
#endif
static
__device__
void
loadElectrostaticParticle
(
struct
ElectrostaticParticle
*
sA
,
unsigned
int
atomI
){
static
__device__
void
loadElectrostaticParticle
(
volatile
struct
ElectrostaticParticle
*
sA
,
unsigned
int
atomI
){
// coordinates & charge
// coordinates & charge
...
@@ -512,7 +512,7 @@ static __device__ void loadElectrostaticParticle( struct ElectrostaticParticle*
...
@@ -512,7 +512,7 @@ static __device__ void loadElectrostaticParticle( struct ElectrostaticParticle*
}
}
static
__device__
void
zeroElectrostaticParticle
(
struct
ElectrostaticParticle
*
sA
){
static
__device__
void
zeroElectrostaticParticle
(
volatile
struct
ElectrostaticParticle
*
sA
){
sA
->
force
[
0
]
=
0.0
f
;
sA
->
force
[
0
]
=
0.0
f
;
sA
->
force
[
1
]
=
0.0
f
;
sA
->
force
[
1
]
=
0.0
f
;
sA
->
force
[
2
]
=
0.0
f
;
sA
->
force
[
2
]
=
0.0
f
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
View file @
ad2d5c0a
...
@@ -42,7 +42,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
...
@@ -42,7 +42,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
#endif
#endif
){
){
extern
__shared__
ElectrostaticParticle
sA
[];
extern
__shared__
volatile
ElectrostaticParticle
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
...
@@ -69,7 +69,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
...
@@ -69,7 +69,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
unsigned
int
tj
=
tgx
;
ElectrostaticParticle
*
psA
=
&
sA
[
tbx
];
volatile
ElectrostaticParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
unsigned
int
atomI
=
x
+
tgx
;
ElectrostaticParticle
localParticle
;
ElectrostaticParticle
localParticle
;
loadElectrostaticParticle
(
&
localParticle
,
atomI
);
loadElectrostaticParticle
(
&
localParticle
,
atomI
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic_b.h
View file @
ad2d5c0a
__device__
void
SUB_METHOD_NAME
(
calculateElectrostaticPairIxn
,
_kernel
)(
ElectrostaticParticle
&
atomI
,
ElectrostaticParticle
&
atomJ
,
__device__
void
SUB_METHOD_NAME
(
calculateElectrostaticPairIxn
,
_kernel
)(
ElectrostaticParticle
&
atomI
,
volatile
ElectrostaticParticle
&
atomJ
,
float
*
scalingFactors
,
float
*
scalingFactors
,
#ifdef F1
#ifdef F1
float
*
energy
,
float
*
energy
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
View file @
ad2d5c0a
...
@@ -117,7 +117,7 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractionsVdw, _kernel)()
...
@@ -117,7 +117,7 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractionsVdw, _kernel)()
*/
*/
__global__
void
METHOD_NAME
(
kFindInteractionsWithinBlocksVdw
,
_kernel
)(
unsigned
int
*
workUnit
)
__global__
void
METHOD_NAME
(
kFindInteractionsWithinBlocksVdw
,
_kernel
)(
unsigned
int
*
workUnit
)
{
{
extern
__shared__
unsigned
int
flags
[];
extern
__shared__
volatile
unsigned
int
flags
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment