Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
bddaf4e7
"...ssh:/git@developer.sourcefind.cn:2222/tsoc/openmm.git" did not exist on "0665e20bd4e8f22927377373b84ad366e7f33829"
Commit
bddaf4e7
authored
Aug 21, 2014
by
peastman
Browse files
CUDA version of CustomManyParticleForce uses neighbor list
parent
e3b631f6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
22 additions
and
50 deletions
+22
-50
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+1
-1
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+13
-6
platforms/cuda/src/kernels/customManyParticle.cu
platforms/cuda/src/kernels/customManyParticle.cu
+8
-43
No files found.
platforms/cuda/include/CudaKernels.h
View file @
bddaf4e7
...
@@ -964,7 +964,7 @@ private:
...
@@ -964,7 +964,7 @@ private:
CudaContext
&
cu
;
CudaContext
&
cu
;
bool
hasInitializedKernel
;
bool
hasInitializedKernel
;
NonbondedMethod
nonbondedMethod
;
NonbondedMethod
nonbondedMethod
;
int
maxNeighborPairs
;
int
maxNeighborPairs
,
forceWorkgroupSize
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
globals
;
CudaArray
*
particleTypes
;
CudaArray
*
particleTypes
;
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
bddaf4e7
...
@@ -4475,6 +4475,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
...
@@ -4475,6 +4475,7 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
int
numParticles
=
force
.
getNumParticles
();
int
numParticles
=
force
.
getNumParticles
();
int
particlesPerSet
=
force
.
getNumParticlesPerSet
();
int
particlesPerSet
=
force
.
getNumParticlesPerSet
();
nonbondedMethod
=
CalcCustomManyParticleForceKernel
::
NonbondedMethod
(
force
.
getNonbondedMethod
());
nonbondedMethod
=
CalcCustomManyParticleForceKernel
::
NonbondedMethod
(
force
.
getNonbondedMethod
());
forceWorkgroupSize
=
128
;
// Record parameter values.
// Record parameter values.
...
@@ -4804,19 +4805,21 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
...
@@ -4804,19 +4805,21 @@ void CudaCalcCustomManyParticleForceKernel::initialize(const System& system, con
if
(
i
>
1
)
if
(
i
>
1
)
numCombinations
<<
"*"
;
numCombinations
<<
"*"
;
numCombinations
<<
"numNeighbors"
;
numCombinations
<<
"numNeighbors"
;
atomsForCombination
<<
"int p"
<<
(
i
+
1
)
<<
" = p1+1+tempIndex%numNeighbors;
\n
"
;
if
(
nonbondedMethod
==
NoCutoff
)
atomsForCombination
<<
"int p"
<<
(
i
+
1
)
<<
" = p1+1+tempIndex%numNeighbors;
\n
"
;
else
atomsForCombination
<<
"int p"
<<
(
i
+
1
)
<<
" = neighbors[firstNeighbor+tempIndex%numNeighbors];
\n
"
;
atomsForCombination
<<
"tempIndex /= numNeighbors;
\n
"
;
atomsForCombination
<<
"tempIndex /= numNeighbors;
\n
"
;
}
}
if
(
nonbondedMethod
!=
NoCutoff
)
{
if
(
nonbondedMethod
!=
NoCutoff
)
{
int
startCheckFrom
=
0
;
for
(
int
i
=
1
;
i
<
particlesPerSet
;
i
++
)
for
(
int
i
=
startCheckFrom
;
i
<
particlesPerSet
;
i
++
)
verifyCutoff
<<
"real4 pos"
<<
(
i
+
1
)
<<
" = posq[p"
<<
(
i
+
1
)
<<
"];
\n
"
;
verifyCutoff
<<
"real4 pos"
<<
(
i
+
1
)
<<
" = posq[p"
<<
(
i
+
1
)
<<
"];
\n
"
;
for
(
int
i
=
startCheckFrom
;
i
<
particlesPerSet
;
i
++
)
for
(
int
i
=
1
;
i
<
particlesPerSet
;
i
++
)
for
(
int
j
=
i
+
1
;
j
<
particlesPerSet
;
j
++
)
for
(
int
j
=
i
+
1
;
j
<
particlesPerSet
;
j
++
)
verifyCutoff
<<
"includeInteraction &= (delta(pos"
<<
(
i
+
1
)
<<
", pos"
<<
(
j
+
1
)
<<
", periodicBoxSize, invPeriodicBoxSize).w < CUTOFF_SQUARED);
\n
"
;
verifyCutoff
<<
"includeInteraction &= (delta(pos"
<<
(
i
+
1
)
<<
", pos"
<<
(
j
+
1
)
<<
", periodicBoxSize, invPeriodicBoxSize).w < CUTOFF_SQUARED);
\n
"
;
}
}
if
(
force
.
getNumExclusions
()
>
0
)
{
if
(
force
.
getNumExclusions
()
>
0
)
{
int
startCheckFrom
=
0
;
int
startCheckFrom
=
(
nonbondedMethod
==
NoCutoff
?
0
:
1
)
;
for
(
int
i
=
startCheckFrom
;
i
<
particlesPerSet
;
i
++
)
for
(
int
i
=
startCheckFrom
;
i
<
particlesPerSet
;
i
++
)
for
(
int
j
=
i
+
1
;
j
<
particlesPerSet
;
j
++
)
for
(
int
j
=
i
+
1
;
j
<
particlesPerSet
;
j
++
)
verifyExclusions
<<
"includeInteraction &= !isInteractionExcluded(p"
<<
(
i
+
1
)
<<
", p"
<<
(
j
+
1
)
<<
", exclusions, exclusionStartIndex);
\n
"
;
verifyExclusions
<<
"includeInteraction &= !isInteractionExcluded(p"
<<
(
i
+
1
)
<<
", p"
<<
(
j
+
1
)
<<
", exclusions, exclusionStartIndex);
\n
"
;
...
@@ -4883,6 +4886,10 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
...
@@ -4883,6 +4886,10 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
forceArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
forceArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
forceArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
forceArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
if
(
nonbondedMethod
!=
NoCutoff
)
{
forceArgs
.
push_back
(
&
neighbors
->
getDevicePointer
());
forceArgs
.
push_back
(
&
neighborStartIndex
->
getDevicePointer
());
}
if
(
particleTypes
!=
NULL
)
{
if
(
particleTypes
!=
NULL
)
{
forceArgs
.
push_back
(
&
particleTypes
->
getDevicePointer
());
forceArgs
.
push_back
(
&
particleTypes
->
getDevicePointer
());
forceArgs
.
push_back
(
&
orderIndex
->
getDevicePointer
());
forceArgs
.
push_back
(
&
orderIndex
->
getDevicePointer
());
...
@@ -4967,7 +4974,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
...
@@ -4967,7 +4974,7 @@ double CudaCalcCustomManyParticleForceKernel::execute(ContextImpl& context, bool
cu
.
executeKernel
(
startIndicesKernel
,
&
startIndicesArgs
[
0
],
256
,
256
,
256
*
sizeof
(
int
));
cu
.
executeKernel
(
startIndicesKernel
,
&
startIndicesArgs
[
0
],
256
,
256
,
256
*
sizeof
(
int
));
cu
.
executeKernel
(
copyPairsKernel
,
&
copyPairsArgs
[
0
],
maxNeighborPairs
);
cu
.
executeKernel
(
copyPairsKernel
,
&
copyPairsArgs
[
0
],
maxNeighborPairs
);
}
}
cu
.
executeKernel
(
forceKernel
,
&
forceArgs
[
0
],
cu
.
getNumAtoms
()
*
CudaContext
::
ThreadBlockSize
,
CudaContext
::
ThreadBlock
Size
);
cu
.
executeKernel
(
forceKernel
,
&
forceArgs
[
0
],
cu
.
getNumAtoms
()
*
forceWorkgroupSize
,
forceWorkgroup
Size
);
if
(
nonbondedMethod
!=
NoCutoff
)
{
if
(
nonbondedMethod
!=
NoCutoff
)
{
// Make sure there was enough memory for the neighbor list.
// Make sure there was enough memory for the neighbor list.
...
...
platforms/cuda/src/kernels/customManyParticle.cu
View file @
bddaf4e7
...
@@ -74,55 +74,15 @@ inline __device__ bool isInteractionExcluded(int atom1, int atom2, int* __restri
...
@@ -74,55 +74,15 @@ inline __device__ bool isInteractionExcluded(int atom1, int atom2, int* __restri
return
false
;
return
false
;
}
}
#define WARP_SIZE 32
/**
* Perform a parallel prefix sum of boolean values over an array. This is done as the first stage of compacting an array.
*/
__device__
void
prefixSum
(
bool
value
,
short
*
sum
,
ushort2
*
temp
)
{
#if __CUDA_ARCH__ >= 300
const
int
indexInWarp
=
threadIdx
.
x
%
WARP_SIZE
;
const
int
warpMask
=
(
2
<<
indexInWarp
)
-
1
;
temp
[
threadIdx
.
x
].
x
=
__popc
(
__ballot
(
value
)
&
warpMask
);
__syncthreads
();
if
(
threadIdx
.
x
<
WARP_SIZE
)
{
int
multiWarpSum
=
temp
[(
threadIdx
.
x
+
1
)
*
WARP_SIZE
-
1
].
x
;
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
/
WARP_SIZE
;
offset
*=
2
)
{
short
n
=
__shfl_up
(
multiWarpSum
,
offset
,
WARP_SIZE
);
if
(
indexInWarp
>=
offset
)
multiWarpSum
+=
n
;
}
temp
[
threadIdx
.
x
].
y
=
multiWarpSum
;
}
__syncthreads
();
sum
[
threadIdx
.
x
]
=
temp
[
threadIdx
.
x
].
x
+
(
threadIdx
.
x
<
WARP_SIZE
?
0
:
temp
[
threadIdx
.
x
/
WARP_SIZE
-
1
].
y
);
__syncthreads
();
#else
temp
[
threadIdx
.
x
].
x
=
value
;
__syncthreads
();
int
whichBuffer
=
0
;
for
(
int
offset
=
1
;
offset
<
blockDim
.
x
;
offset
*=
2
)
{
if
(
whichBuffer
==
0
)
temp
[
threadIdx
.
x
].
y
=
(
threadIdx
.
x
<
offset
?
temp
[
threadIdx
.
x
].
x
:
temp
[
threadIdx
.
x
].
x
+
temp
[
threadIdx
.
x
-
offset
].
x
);
else
temp
[
threadIdx
.
x
].
x
=
(
threadIdx
.
x
<
offset
?
temp
[
threadIdx
.
x
].
y
:
temp
[
threadIdx
.
x
].
y
+
temp
[
threadIdx
.
x
-
offset
].
y
);
whichBuffer
=
1
-
whichBuffer
;
__syncthreads
();
}
if
(
whichBuffer
==
0
)
sum
[
threadIdx
.
x
]
=
temp
[
threadIdx
.
x
].
x
;
else
sum
[
threadIdx
.
x
]
=
temp
[
threadIdx
.
x
].
y
;
__syncthreads
();
#endif
}
/**
/**
* Compute the interaction.
* Compute the interaction.
*/
*/
extern
"C"
__global__
void
computeInteraction
(
extern
"C"
__global__
void
computeInteraction
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
#ifdef USE_CUTOFF
,
const
int
*
__restrict__
neighbors
,
const
int
*
__restrict__
neighborStartIndex
#endif
#ifdef USE_FILTERS
#ifdef USE_FILTERS
,
int
*
__restrict__
particleTypes
,
int
*
__restrict__
orderIndex
,
int
*
__restrict__
particleOrder
,
int
*
__restrict__
particleTypes
,
int
*
__restrict__
orderIndex
,
int
*
__restrict__
particleOrder
#endif
#endif
...
@@ -135,7 +95,12 @@ extern "C" __global__ void computeInteraction(
...
@@ -135,7 +95,12 @@ extern "C" __global__ void computeInteraction(
// Loop over particles to be the first one in the set.
// Loop over particles to be the first one in the set.
for
(
int
p1
=
blockIdx
.
x
;
p1
<
NUM_ATOMS
;
p1
+=
gridDim
.
x
)
{
for
(
int
p1
=
blockIdx
.
x
;
p1
<
NUM_ATOMS
;
p1
+=
gridDim
.
x
)
{
#ifdef USE_CUTOFF
int
firstNeighbor
=
neighborStartIndex
[
p1
];
int
numNeighbors
=
neighborStartIndex
[
p1
+
1
]
-
firstNeighbor
;
#else
int
numNeighbors
=
NUM_ATOMS
-
p1
-
1
;
int
numNeighbors
=
NUM_ATOMS
-
p1
-
1
;
#endif
int
numCombinations
=
NUM_CANDIDATE_COMBINATIONS
;
int
numCombinations
=
NUM_CANDIDATE_COMBINATIONS
;
for
(
int
index
=
threadIdx
.
x
;
index
<
numCombinations
;
index
+=
blockDim
.
x
)
{
for
(
int
index
=
threadIdx
.
x
;
index
<
numCombinations
;
index
+=
blockDim
.
x
)
{
FIND_ATOMS_FOR_COMBINATION_INDEX
;
FIND_ATOMS_FOR_COMBINATION_INDEX
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment