Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
ad2d5c0a
Commit
ad2d5c0a
authored
Nov 03, 2011
by
Peter Eastman
Browse files
Added volatile keywords to work correctly with CUDA 4.1
parent
0abe528f
Changes
15
Show whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
33 additions
and
33 deletions
+33
-33
platforms/cuda/src/kernels/cudaCompact.cu
platforms/cuda/src/kernels/cudaCompact.cu
+2
-2
platforms/cuda/src/kernels/kCalculateCDLJForces.h
platforms/cuda/src/kernels/kCalculateCDLJForces.h
+3
-3
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
+3
-3
platforms/cuda/src/kernels/kCalculateGBVIBornSum.h
platforms/cuda/src/kernels/kCalculateGBVIBornSum.h
+3
-3
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
+3
-3
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
+3
-3
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
+3
-3
platforms/cuda/src/kernels/kFindInteractingBlocks.h
platforms/cuda/src/kernels/kFindInteractingBlocks.h
+1
-1
platforms/opencl/src/kernels/findInteractingBlocks.cl
platforms/opencl/src/kernels/findInteractingBlocks.cl
+1
-1
platforms/opencl/src/kernels/removeCM.cl
platforms/opencl/src/kernels/removeCM.cl
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/amoebaScaleFactors.h
...ns/amoeba/platforms/cuda/src/kernels/amoebaScaleFactors.h
+3
-3
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
...rms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
...orms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
+2
-2
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic_b.h
...ms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic_b.h
+1
-1
plugins/amoeba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
...ba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
+1
-1
No files found.
platforms/cuda/src/kernels/cudaCompact.cu
View file @
ad2d5c0a
...
...
@@ -29,7 +29,7 @@ typedef unsigned int T;
// Phase 1: Count valid elements per thread block
// Hard-code 128 thd/blk
__device__
unsigned
int
sumReduce128
(
unsigned
int
*
arr
)
{
__device__
unsigned
int
sumReduce128
(
volatile
unsigned
int
*
arr
)
{
// Parallel reduce element counts
// Assumes 128 thd/block
if
(
threadIdx
.
x
<
64
)
arr
[
threadIdx
.
x
]
+=
arr
[
threadIdx
.
x
+
64
];
...
...
@@ -47,7 +47,7 @@ __device__ unsigned int sumReduce128(unsigned int* arr) {
}
__global__
void
countElts
(
unsigned
int
*
dgBlockCounts
,
const
unsigned
int
*
dgValid
,
const
size_t
eltsPerBlock
,
const
size_t
len
)
{
__shared__
unsigned
int
dsCount
[
128
];
__shared__
volatile
unsigned
int
dsCount
[
128
];
dsCount
[
threadIdx
.
x
]
=
0
;
size_t
ub
;
ub
=
(
len
<
(
blockIdx
.
x
+
1
)
*
eltsPerBlock
)
?
len
:
((
blockIdx
.
x
+
1
)
*
eltsPerBlock
);
...
...
platforms/cuda/src/kernels/kCalculateCDLJForces.h
View file @
ad2d5c0a
...
...
@@ -43,7 +43,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void
METHOD_NAME
(
kCalculateCDLJ
,
Forces_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
...
...
@@ -52,7 +52,7 @@ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUnit)
float
CDLJ_energy
;
float
energy
=
0
.
0
f
;
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
volatile
float3
*
tempBuffer
=
(
volatile
float3
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
#ifdef USE_EWALD
...
...
@@ -83,7 +83,7 @@ void METHOD_NAME(kCalculateCDLJ, Forces_kernel)(unsigned int* workUnit)
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
unsigned
int
i
=
x
+
tgx
;
apos
=
cSim
.
pPosq
[
i
];
float2
a
=
cSim
.
pAttr
[
i
];
...
...
platforms/cuda/src/kernels/kCalculateCDLJObcGbsaForces1.h
View file @
ad2d5c0a
...
...
@@ -40,7 +40,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void
METHOD_NAME
(
kCalculateCDLJObcGbsa
,
Forces1_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
...
...
@@ -49,7 +49,7 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit )
float
CDLJObcGbsa_energy
;
float
energy
=
0
.
0
f
;
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
volatile
float
*
tempBuffer
=
(
volatile
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
...
...
@@ -68,7 +68,7 @@ void METHOD_NAME(kCalculateCDLJObcGbsa, Forces1_kernel)(unsigned int* workUnit )
float
br
=
cSim
.
pBornRadii
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
float4
af
;
af
.
x
=
0
.
0
f
;
af
.
y
=
0
.
0
f
;
...
...
platforms/cuda/src/kernels/kCalculateGBVIBornSum.h
View file @
ad2d5c0a
...
...
@@ -47,7 +47,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void
METHOD_NAME
(
kCalculateGBVI
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
...
...
@@ -58,7 +58,7 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
// int end = workUnits / gridDim.x;
// int pos = end - (threadIdx.x >> GRIDBITS) - 1;
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
volatile
float
*
tempBuffer
=
(
volatile
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
while
(
pos
<
end
)
...
...
@@ -79,7 +79,7 @@ void METHOD_NAME(kCalculateGBVI, BornSum_kernel)(unsigned int* workUnit)
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
...
...
platforms/cuda/src/kernels/kCalculateGBVIForces2.h
View file @
ad2d5c0a
...
...
@@ -47,14 +47,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
#endif
METHOD_NAME
(
kCalculateGBVI
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
volatile
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
...
...
@@ -72,7 +72,7 @@ METHOD_NAME(kCalculateGBVI, Forces2_kernel)(unsigned int* workUnit )
float
fb
=
cSim
.
pBornForce
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
float3
af
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0
.
0
f
;
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaBornSum.h
View file @
ad2d5c0a
...
...
@@ -40,7 +40,7 @@ __launch_bounds__(G8X_NONBOND_THREADS_PER_BLOCK, 1)
#endif
void
METHOD_NAME
(
kCalculateObcGbsa
,
BornSum_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
...
...
@@ -48,7 +48,7 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float
*
tempBuffer
=
(
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
volatile
float
*
tempBuffer
=
(
volatile
float
*
)
&
sA
[
cSim
.
nonbond_threads_per_block
];
#endif
while
(
pos
<
end
)
...
...
@@ -67,7 +67,7 @@ void METHOD_NAME(kCalculateObcGbsa, BornSum_kernel)(unsigned int* workUnit)
unsigned
int
tgx
=
threadIdx
.
x
&
(
GRID
-
1
);
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
if
(
x
==
y
)
// Handle diagonals uniquely at 50% efficiency
{
...
...
platforms/cuda/src/kernels/kCalculateObcGbsaForces2.h
View file @
ad2d5c0a
...
...
@@ -40,14 +40,14 @@ __launch_bounds__(G8X_BORNFORCE2_THREADS_PER_BLOCK, 1)
#endif
void
METHOD_NAME
(
kCalculateObcGbsa
,
Forces2_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
Atom
sA
[];
extern
__shared__
volatile
Atom
sA
[];
unsigned
int
totalWarps
=
cSim
.
bornForce2_blocks
*
cSim
.
bornForce2_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
unsigned
int
pos
=
warp
*
numWorkUnits
/
totalWarps
;
unsigned
int
end
=
(
warp
+
1
)
*
numWorkUnits
/
totalWarps
;
#ifdef USE_CUTOFF
float3
*
tempBuffer
=
(
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
volatile
float3
*
tempBuffer
=
(
volatile
float3
*
)
&
sA
[
cSim
.
bornForce2_threads_per_block
];
#endif
unsigned
int
lasty
=
-
0xFFFFFFFF
;
...
...
@@ -65,7 +65,7 @@ void METHOD_NAME(kCalculateObcGbsa, Forces2_kernel)(unsigned int* workUnit)
float
fb
=
cSim
.
pBornForce
[
i
];
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
Atom
*
psA
=
&
sA
[
tbx
];
volatile
Atom
*
psA
=
&
sA
[
tbx
];
float3
af
;
sA
[
threadIdx
.
x
].
fx
=
af
.
x
=
0
.
0
f
;
sA
[
threadIdx
.
x
].
fy
=
af
.
y
=
0
.
0
f
;
...
...
platforms/cuda/src/kernels/kFindInteractingBlocks.h
View file @
ad2d5c0a
...
...
@@ -115,7 +115,7 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractions, _kernel)()
*/
__global__
void
METHOD_NAME
(
kFindInteractionsWithinBlocks
,
_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
unsigned
int
flags
[];
extern
__shared__
volatile
unsigned
int
flags
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
...
...
platforms/opencl/src/kernels/findInteractingBlocks.cl
View file @
ad2d5c0a
...
...
@@ -221,7 +221,7 @@ __kernel void findBlocksWithInteractions(float cutoffSquared, float4 periodicBox
*
flags
for
which
ones
are
interacting.
*/
__kernel
void
findInteractionsWithinBlocks
(
float
cutoffSquared,
float4
periodicBoxSize,
float4
invPeriodicBoxSize,
__global
const
float4*
restrict
posq,
__global
const
ushort2*
restrict
tiles,
__global
const
float4*
restrict
blockCenter,
__global
const
float4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
interactionFlags,
__global
const
unsigned
int*
restrict
interactionCount,
__local
unsigned
int*
restrict
flags,
unsigned
int
maxTiles
)
{
__global
const
float4*
restrict
blockBoundingBox,
__global
unsigned
int*
restrict
interactionFlags,
__global
const
unsigned
int*
restrict
interactionCount,
__local
volatile
unsigned
int*
restrict
flags,
unsigned
int
maxTiles
)
{
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
;
unsigned
int
numTiles
=
interactionCount[0]
;
...
...
platforms/opencl/src/kernels/removeCM.cl
View file @
ad2d5c0a
...
...
@@ -2,7 +2,7 @@
*
Calculate
the
center
of
mass
momentum.
*/
__kernel
void
calcCenterOfMassMomentum
(
int
numAtoms,
__global
const
float4*
restrict
velm,
__global
float4*
restrict
cmMomentum,
__local
float4*
restrict
temp
)
{
__kernel
void
calcCenterOfMassMomentum
(
int
numAtoms,
__global
const
float4*
restrict
velm,
__global
float4*
restrict
cmMomentum,
__local
volatile
float4*
restrict
temp
)
{
int
index
=
get_global_id
(
0
)
;
float4
cm
=
0.0f
;
while
(
index
<
numAtoms
)
{
...
...
@@ -53,7 +53,7 @@ __kernel void calcCenterOfMassMomentum(int numAtoms, __global const float4* rest
*
Remove
center
of
mass
motion.
*/
__kernel
void
removeCenterOfMassMomentum
(
int
numAtoms,
__global
float4*
restrict
velm,
__global
const
float4*
restrict
cmMomentum,
__local
float4*
restrict
temp
)
{
__kernel
void
removeCenterOfMassMomentum
(
int
numAtoms,
__global
float4*
restrict
velm,
__global
const
float4*
restrict
cmMomentum,
__local
volatile
float4*
restrict
temp
)
{
//
First
sum
all
of
the
momenta
that
were
calculated
by
individual
groups.
int
index
=
get_local_id
(
0
)
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/amoebaScaleFactors.h
View file @
ad2d5c0a
...
...
@@ -101,7 +101,7 @@ __device__ static void load3dArrayBufferPerWarp( unsigned int offset, float* for
}
__device__
static
void
add3dArrayToFloat4
(
unsigned
int
offset
,
float
*
forceSum
,
float4
*
outputForce
)
__device__
static
void
add3dArrayToFloat4
(
unsigned
int
offset
,
volatile
float
*
forceSum
,
float4
*
outputForce
)
{
float4
of
;
...
...
@@ -125,7 +125,7 @@ __device__ static void load3dArrayToFloat4( unsigned int offset, float* forceSum
}
__device__
static
void
load3dArray
(
unsigned
int
offset
,
float
*
forceSum
,
float
*
outputForce
)
__device__
static
void
load3dArray
(
unsigned
int
offset
,
volatile
float
*
forceSum
,
float
*
outputForce
)
{
outputForce
[
offset
]
=
forceSum
[
0
];
...
...
@@ -134,7 +134,7 @@ __device__ static void load3dArray( unsigned int offset, float* forceSum, float*
}
__device__
static
void
add3dArray
(
unsigned
int
offset
,
float
*
forceSum
,
float
*
outputForce
)
__device__
static
void
add3dArray
(
unsigned
int
offset
,
volatile
float
*
forceSum
,
float
*
outputForce
)
{
outputForce
[
offset
]
+=
forceSum
[
0
];
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.cu
View file @
ad2d5c0a
...
...
@@ -468,7 +468,7 @@ __device__ void calculateElectrostaticPairIxnOrig_kernel( ElectrostaticParticle&
}
#endif
static
__device__
void
loadElectrostaticParticle
(
struct
ElectrostaticParticle
*
sA
,
unsigned
int
atomI
){
static
__device__
void
loadElectrostaticParticle
(
volatile
struct
ElectrostaticParticle
*
sA
,
unsigned
int
atomI
){
// coordinates & charge
...
...
@@ -512,7 +512,7 @@ static __device__ void loadElectrostaticParticle( struct ElectrostaticParticle*
}
static
__device__
void
zeroElectrostaticParticle
(
struct
ElectrostaticParticle
*
sA
){
static
__device__
void
zeroElectrostaticParticle
(
volatile
struct
ElectrostaticParticle
*
sA
){
sA
->
force
[
0
]
=
0.0
f
;
sA
->
force
[
1
]
=
0.0
f
;
sA
->
force
[
2
]
=
0.0
f
;
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic.h
View file @
ad2d5c0a
...
...
@@ -42,7 +42,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
#endif
){
extern
__shared__
ElectrostaticParticle
sA
[];
extern
__shared__
volatile
ElectrostaticParticle
sA
[];
unsigned
int
totalWarps
=
gridDim
.
x
*
blockDim
.
x
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
...
...
@@ -69,7 +69,7 @@ void METHOD_NAME(kCalculateAmoebaCudaElectrostatic, Forces_kernel)(
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
unsigned
int
tj
=
tgx
;
ElectrostaticParticle
*
psA
=
&
sA
[
tbx
];
volatile
ElectrostaticParticle
*
psA
=
&
sA
[
tbx
];
unsigned
int
atomI
=
x
+
tgx
;
ElectrostaticParticle
localParticle
;
loadElectrostaticParticle
(
&
localParticle
,
atomI
);
...
...
plugins/amoeba/platforms/cuda/src/kernels/kCalculateAmoebaCudaElectrostatic_b.h
View file @
ad2d5c0a
__device__
void
SUB_METHOD_NAME
(
calculateElectrostaticPairIxn
,
_kernel
)(
ElectrostaticParticle
&
atomI
,
ElectrostaticParticle
&
atomJ
,
__device__
void
SUB_METHOD_NAME
(
calculateElectrostaticPairIxn
,
_kernel
)(
ElectrostaticParticle
&
atomI
,
volatile
ElectrostaticParticle
&
atomJ
,
float
*
scalingFactors
,
#ifdef F1
float
*
energy
,
...
...
plugins/amoeba/platforms/cuda/src/kernels/kFindInteractingBlocksVdw.h
View file @
ad2d5c0a
...
...
@@ -117,7 +117,7 @@ __global__ void METHOD_NAME(kFindBlocksWithInteractionsVdw, _kernel)()
*/
__global__
void
METHOD_NAME
(
kFindInteractionsWithinBlocksVdw
,
_kernel
)(
unsigned
int
*
workUnit
)
{
extern
__shared__
unsigned
int
flags
[];
extern
__shared__
volatile
unsigned
int
flags
[];
unsigned
int
totalWarps
=
cSim
.
nonbond_blocks
*
cSim
.
nonbond_threads_per_block
/
GRID
;
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
GRID
;
unsigned
int
numWorkUnits
=
cSim
.
pInteractionCount
[
0
];
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment