Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
bddaf4e7
Commit
bddaf4e7
authored
Aug 21, 2014
by
peastman
Browse files
CUDA version of CustomManyParticleForce uses neighbor list
parent
e3b631f6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
22 additions
and
50 deletions
+22
-50
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+13
-6
platforms/cuda/src/kernels/customManyParticle.cu
platforms/cuda/src/kernels/customManyParticle.cu
+8
-43
No files found.
platforms/cuda/include/CudaKernels.h
View file @
bddaf4e7
...
@@ -964,7 +964,7 @@ private:
...
@@ -964,7 +964,7 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
int
maxNeighborPairs
;
int
maxNeighborPairs
,
forceWorkgroupSize
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
globals
;
CudaArray
*
particleTypes
;
CudaArray
*
particleTypes
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
bddaf4e7
...
@@ -4475,6 +4475,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
...
@@ -4475,6 +4475,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
int
numParticles
=
force
.
getNumParticles
();
int
numParticles
=
force
.
getNumParticles
();
int
particlesPerSet
=
force
.
getNumParticlesPerSet
();
int
particlesPerSet
=
force
.
getNumParticlesPerSet
();
nonbondedMethod
=
CalcCustomManyParticleForceKernel
::
NonbondedMethod
(
force
.
getNonbondedMethod
());
nonbondedMethod
=
CalcCustomManyParticleForceKernel
::
NonbondedMethod
(
force
.
getNonbondedMethod
());
forceWorkgroupSize
=
128
;
// Record parameter values.
// Record parameter values.
...
@@ -4804,19 +4805,21 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
...
@@ -4804,19 +4805,21 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
if
(
i
>
1
)
if
(
i
>
1
)
numCombinations
<<
"*"
;
numCombinations
<<
"*"
;
numCombinations
<<
"numNeighbors"
;
numCombinations
<<
"numNeighbors"
;
atomsForCombination
<<
"int p"
<<
(
i
+
1
)
<<
" = p1+1+tempIndex%numNeighbors;
\n
"
;
if
(
nonbondedMethod
==
NoCutoff
)
atomsForCombination
<<
"int p"
<<
(
i
+
1
)
<<
" = p1+1+tempIndex%numNeighbors;
\n
"
;
else
atomsForCombination
<<
"int p"
<<
(
i
+
1
)
<<
" = neighbors[firstNeighbor+tempIndex%numNeighbors];
\n
"
;
atomsForCombination
<<
"tempIndex /= numNeighbors;
\n
"
;
atomsForCombination
<<
"tempIndex /= numNeighbors;
\n
"
;
}
}
if
(
nonbondedMethod
!=
NoCutoff
)
{
if
(
nonbondedMethod
!=
NoCutoff
)
{
int
startCheckFrom
=
0
;
for
(
int
i
=
1
;
i
<
particlesPerSet
;
i
++
)
for
(
int
i
=
startCheckFrom
;
i
<
particlesPerSet
;
i
++
)
verifyCutoff
<<
"real4 pos"
<<
(
i
+
1
)
<<
" = posq[p"
<<
(
i
+
1
)
<<
"];
\n
"
;
verifyCutoff
<<
"real4 pos"
<<
(
i
+
1
)
<<
" = posq[p"
<<
(
i
+
1
)
<<
"];
\n
"
;
for
(
int
i
=
startCheckFrom
;
i
<
particlesPerSet
;
i
++
)
for
(
int
i
=
1
;
i
<
particlesPerSet
;
i
++
)
for
(
int
j
=
i
+
1
;
j
<
particlesPerSet
;
j
++
)
for
(
int
j
=
i
+
1
;
j
<
particlesPerSet
;
j
++
)
verifyCutoff
<<
"includeInteraction &= (delta(pos"
<<
(
i
+
1
)
<<
", pos"
<<
(
j
+
1
)
<<
", periodicBoxSize, invPeriodicBoxSize).w < CUTOFF_SQUARED);
\n
"
;
verifyCutoff
<<
"includeInteraction &= (delta(pos"
<<
(
i
+
1
)
<<
", pos"
<<
(
j
+
1
)
<<
", periodicBoxSize, invPeriodicBoxSize).w < CUTOFF_SQUARED);
\n
"
;
}
}
if
(
force
.
getNumExclusions
()
>
0
)
{
if
(
force
.
getNumExclusions
()
>
0
)
{
int
startCheckFrom
=
0
;
int
startCheckFrom
=
(
nonbondedMethod
==
NoCutoff
?
0
:
1
)
;
for
(
int
i
=
startCheckFrom
;
i
<
particlesPerSet
;
i
++
)
for
(
int
i
=
startCheckFrom
;
i
<
particlesPerSet
;
i
++
)
for
(
int
j
=
i
+
1
;
j
<
particlesPerSet
;
j
++
)
for
(
int
j
=
i
+
1
;
j
<
particlesPerSet
;
j
++
)
verifyExclusions
<<
"includeInteraction &= !isInteractionExcluded(p"
<<
(
i
+
1
)
<<
", p"
<<
(
j
+
1
)
<<
", exclusions, exclusionStartIndex);
\n
"
;
verifyExclusions
<<
"includeInteraction &= !isInteractionExcluded(p"
<<
(
i
+
1
)
<<
", p"
<<
(
j
+
1
)
<<
", exclusions, exclusionStartIndex);
\n
"
;
...
@@ -4883,6 +4886,10 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
...
@@ -4883,6 +4886,10 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
forceArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
forceArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
forceArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
if
(
nonbondedMethod
!=
NoCutoff
)
{
forceArgs
.
push_back
(
&
neighbors
->
getDevicePointer
());
forceArgs
.
push_back
(
&
neighborStartIndex
->
getDevicePointer
());
}
if
(
particleTypes
!=
NULL
)
{
if
(
particleTypes
!=
NULL
)
{
forceArgs
.
push_back
(
&
particleTypes
->
getDevicePointer
());
forceArgs
.
push_back
(
&
particleTypes
->
getDevicePointer
());
forceArgs
.
push_back
(
&
orderIndex
->
getDevicePointer
());
forceArgs
.
push_back
(
&
orderIndex
->
getDevicePointer
());
...
@@ -4967,7 +4974,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
...
@@ -4967,7 +4974,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
cu
.
executeKernel
(
startIndicesKernel
,
&
startIndicesArgs
[
0
],
256
,
256
,
256
*
sizeof
(
int
));
cu
.
executeKernel
(
startIndicesKernel
,
&
startIndicesArgs
[
0
],
256
,
256
,
256
*
sizeof
(
int
));
cu
.
executeKernel
(
copyPairsKernel
,
&
copyPairsArgs
[
0
],
maxNeighborPairs
);
cu
.
executeKernel
(
copyPairsKernel
,
&
copyPairsArgs
[
0
],
maxNeighborPairs
);
}
}
cu
.
executeKernel
(
forceKernel
,
&
forceArgs
[
0
],
cu
.
getNumAtoms
()
*
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlock
Size
);
cu
.
executeKernel
(
forceKernel
,
&
forceArgs
[
0
],
cu
.
getNumAtoms
()
*
forceWorkgroupSize
,
forceWorkgroup
Size
);
if
(
nonbondedMethod
!=
NoCutoff
)
{
if
(
nonbondedMethod
!=
NoCutoff
)
{
// Make sure there was enough memory for the neighbor list.
// Make sure there was enough memory for the neighbor list.
...
...
platforms/cuda/src/kernels/customManyParticle.cu
View file @
bddaf4e7
...
@@ -74,55 +74,15 @@ inline __device__ bool isInteractionExcluded(int atom1, int atom2, int* __restri
...
@@ -74,55 +74,15 @@ inline __device__ bool isInteractionExcluded(int atom1, int atom2, int* __restri
return
false
;
return
false
;
}
}
#define WARP_SIZE 32
/**
* Perform a parallel prefix sum of boolean values over an array. This is done as the first stage of compacting an array.
*/
__device__
void
prefixSum
(
bool
value
,
short
*
sum
,
ushort2
*
temp
)
{
#if __CUDA_ARCH__ >= 300
const
int
indexInWarp
=
threadIdx
.
x
%
WARP_SIZE
;
const
int
warpMask
=
(
2
<<
indexInWarp
)
-
1
;
temp
[
threadIdx
.
x
].
x
=
__popc
(
__ballot
(
value
)
&
warpMask
);
__syncthreads
();
if
(
threadIdx
.
x
<
WARP_SIZE
)
{
int
multiWarpSum
=
temp
[(
threadIdx
.
x
+
1
)
*
WARP_SIZE
-
1
].
x
;
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
/
WARP_SIZE
;
offset
*=
2
)
{
short
n
=
__shfl_up
(
multiWarpSum
,
offset
,
WARP_SIZE
);
if
(
indexInWarp
>=
offset
)
multiWarpSum
+=
n
;
}
temp
[
threadIdx
.
x
].
y
=
multiWarpSum
;
}
__syncthreads
();
sum
[
threadIdx
.
x
]
=
temp
[
threadIdx
.
x
].
x
+
(
threadIdx
.
x
<
WARP_SIZE
?
0
:
temp
[
threadIdx
.
x
/
WARP_SIZE
-
1
].
y
);
__syncthreads
();
#else
temp
[
threadIdx
.
x
].
x
=
value
;
__syncthreads
();
int
whichBuffer
=
0
;
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
whichBuffer
==
0
)
temp
[
threadIdx
.
x
].
y
=
(
threadIdx
.
x
<
offset
?
temp
[
threadIdx
.
x
].
x
:
temp
[
threadIdx
.
x
].
x
+
temp
[
threadIdx
.
x
-
offset
].
x
);
else
temp
[
threadIdx
.
x
].
x
=
(
threadIdx
.
x
<
offset
?
temp
[
threadIdx
.
x
].
y
:
temp
[
threadIdx
.
x
].
y
+
temp
[
threadIdx
.
x
-
offset
].
y
);
whichBuffer
=
1
-
whichBuffer
;
__syncthreads
();
}
if
(
whichBuffer
==
0
)
sum
[
threadIdx
.
x
]
=
temp
[
threadIdx
.
x
].
x
;
else
sum
[
threadIdx
.
x
]
=
temp
[
threadIdx
.
x
].
y
;
__syncthreads
();
#endif
}
/**
/**
* Compute the interaction.
* Compute the interaction.
*/
*/
extern
"C"
__global__
void
computeInteraction
(
extern
"C"
__global__
void
computeInteraction
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
#ifdef USE_CUTOFF
,
const
int
*
__restrict__
neighbors
,
const
int
*
__restrict__
neighborStartIndex
#endif
#ifdef USE_FILTERS
#ifdef USE_FILTERS
,
int
*
__restrict__
particleTypes
,
int
*
__restrict__
orderIndex
,
int
*
__restrict__
particleOrder
,
int
*
__restrict__
particleTypes
,
int
*
__restrict__
orderIndex
,
int
*
__restrict__
particleOrder
#endif
#endif
...
@@ -135,7 +95,12 @@ extern "C" __global__ void computeInteraction(
...
@@ -135,7 +95,12 @@ extern "C" __global__ void computeInteraction(
// Loop over particles to be the first one in the set.
// Loop over particles to be the first one in the set.
for
(
int
p1
=
blockIdx
.
x
;
p1
<
NUM_ATOMS
;
p1
+=
gridDim
.
x
)
{
for
(
int
p1
=
blockIdx
.
x
;
p1
<
NUM_ATOMS
;
p1
+=
gridDim
.
x
)
{
#ifdef USE_CUTOFF
int
firstNeighbor
=
neighborStartIndex
[
p1
];
int
numNeighbors
=
neighborStartIndex
[
p1
+
1
]
-
firstNeighbor
;
#else
int
numNeighbors
=
NUM_ATOMS
-
p1
-
1
;
int
numNeighbors
=
NUM_ATOMS
-
p1
-
1
;
#endif
int
numCombinations
=
NUM_CANDIDATE_COMBINATIONS
;
int
numCombinations
=
NUM_CANDIDATE_COMBINATIONS
;
for
(
int
index
=
threadIdx
.
x
;
index
<
numCombinations
;
index
+=
blockDim
.
x
)
{
for
(
int
index
=
threadIdx
.
x
;
index
<
numCombinations
;
index
+=
blockDim
.
x
)
{
FIND_ATOMS_FOR_COMBINATION_INDEX
;
FIND_ATOMS_FOR_COMBINATION_INDEX
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment