Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
09611266
Commit
09611266
authored
Aug 07, 2013
by
peastman
Browse files
Created CUDA implementation of restricting CustomNonbondedForce to selected interaction groups
parent
9840ca70
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
447 additions
and
11 deletions
+447
-11
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+7
-2
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+253
-9
platforms/cuda/src/kernels/customNonbondedGroups.cu
platforms/cuda/src/kernels/customNonbondedGroups.cu
+82
-0
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
+105
-0
No files found.
platforms/cuda/include/CudaKernels.h
View file @
09611266
...
...
@@ -638,7 +638,7 @@ private:
class
CudaCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
CudaCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
tabulatedFunctionParams
(
NULL
),
forceCopy
(
NULL
),
system
(
system
)
{
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
tabulatedFunctionParams
(
NULL
),
interactionGroupData
(
NULL
),
forceCopy
(
NULL
),
system
(
system
)
,
hasInitializedKernel
(
false
)
{
}
~
CudaCalcCustomNonbondedForceKernel
();
/**
...
...
@@ -665,15 +665,20 @@ public:
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
private:
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
);
CudaContext
&
cu
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
tabulatedFunctionParams
;
CudaArray
*
interactionGroupData
;
CUfunction
interactionGroupKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*>
tabulatedFunctions
;
double
longRangeCoefficient
;
bool
hasInitializedLongRangeCorrection
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
;
int
numGroupThreadBlocks
;
CustomNonbondedForce
*
forceCopy
;
const
System
&
system
;
};
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
09611266
...
...
@@ -1862,6 +1862,17 @@ void CudaCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& context,
class
CudaCustomNonbondedForceInfo
:
public
CudaForceInfo
{
public:
CudaCustomNonbondedForceInfo
(
const
CustomNonbondedForce
&
force
)
:
force
(
force
)
{
if
(
force
.
getNumInteractionGroups
()
>
0
)
{
groupsForParticle
.
resize
(
force
.
getNumParticles
());
for
(
int
i
=
0
;
i
<
force
.
getNumInteractionGroups
();
i
++
)
{
set
<
int
>
set1
,
set2
;
force
.
getInteractionGroupParameters
(
i
,
set1
,
set2
);
for
(
set
<
int
>::
const_iterator
iter
=
set1
.
begin
();
iter
!=
set1
.
end
();
++
iter
)
groupsForParticle
[
*
iter
].
insert
(
2
*
i
);
for
(
set
<
int
>::
const_iterator
iter
=
set2
.
begin
();
iter
!=
set2
.
end
();
++
iter
)
groupsForParticle
[
*
iter
].
insert
(
2
*
i
+
1
);
}
}
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
vector
<
double
>
params1
;
...
...
@@ -1871,6 +1882,8 @@ public:
for
(
int
i
=
0
;
i
<
(
int
)
params1
.
size
();
i
++
)
if
(
params1
[
i
]
!=
params2
[
i
])
return
false
;
if
(
groupsForParticle
.
size
()
>
0
&&
groupsForParticle
[
particle1
]
!=
groupsForParticle
[
particle2
])
return
false
;
return
true
;
}
int
getNumParticleGroups
()
{
...
...
@@ -1888,6 +1901,7 @@ public:
}
private:
const
CustomNonbondedForce
&
force
;
vector
<
set
<
int
>
>
groupsForParticle
;
};
CudaCalcCustomNonbondedForceKernel
::~
CudaCalcCustomNonbondedForceKernel
()
{
...
...
@@ -1898,6 +1912,8 @@ CudaCalcCustomNonbondedForceKernel::~CudaCalcCustomNonbondedForceKernel() {
delete
globals
;
if
(
tabulatedFunctionParams
!=
NULL
)
delete
tabulatedFunctionParams
;
if
(
interactionGroupData
!=
NULL
)
delete
interactionGroupData
;
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
delete
tabulatedFunctions
[
i
];
if
(
forceCopy
!=
NULL
)
...
...
@@ -1909,7 +1925,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
int
forceIndex
;
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
;
string
prefix
=
"custom"
+
cu
.
intToString
(
forceIndex
)
+
"_"
;
string
prefix
=
(
force
.
getNumInteractionGroups
()
==
0
?
"custom"
+
cu
.
intToString
(
forceIndex
)
+
"_"
:
""
)
;
// Record parameters and exclusions.
...
...
@@ -2010,14 +2026,18 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
replacements
[
"SWITCH_C5"
]
=
cu
.
doubleToString
(
6
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
(),
5.0
));
}
string
source
=
cu
.
replaceStrings
(
CudaKernelSources
::
customNonbonded
,
replacements
);
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
cu
.
getNonbondedUtilities
().
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
cu
.
intToString
(
i
+
1
),
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
if
(
globals
!=
NULL
)
{
globals
->
upload
(
globalParamValues
);
cu
.
getNonbondedUtilities
().
addArgument
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
float
),
globals
->
getDevicePointer
()));
if
(
force
.
getNumInteractionGroups
()
>
0
)
initInteractionGroups
(
force
,
source
);
else
{
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
CudaNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
cu
.
getNonbondedUtilities
().
addParameter
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
cu
.
intToString
(
i
+
1
),
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
}
if
(
globals
!=
NULL
)
{
globals
->
upload
(
globalParamValues
);
cu
.
getNonbondedUtilities
().
addArgument
(
CudaNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
float
),
globals
->
getDevicePointer
()));
}
}
cu
.
addForce
(
new
CudaCustomNonbondedForceInfo
(
force
));
...
...
@@ -2033,6 +2053,215 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
}
}
void
CudaCalcCustomNonbondedForceKernel
::
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
string
&
interactionSource
)
{
// Process groups to form tiles.
vector
<
vector
<
int
>
>
atomLists
;
vector
<
pair
<
int
,
int
>
>
tiles
;
for
(
int
group
=
0
;
group
<
force
.
getNumInteractionGroups
();
group
++
)
{
// Get the list of atoms in this group and sort them.
set
<
int
>
set1
,
set2
;
force
.
getInteractionGroupParameters
(
group
,
set1
,
set2
);
vector
<
int
>
atoms1
,
atoms2
;
atoms1
.
insert
(
atoms1
.
begin
(),
set1
.
begin
(),
set1
.
end
());
atoms2
.
insert
(
atoms2
.
begin
(),
set2
.
begin
(),
set2
.
end
());
sort
(
atoms1
.
begin
(),
atoms1
.
end
());
sort
(
atoms2
.
begin
(),
atoms2
.
end
());
// Find how many tiles we will create for this group.
int
tileWidth
=
min
(
32
,
(
int
)
atoms2
.
size
());
int
numBlocks1
=
(
atoms1
.
size
()
+
tileWidth
-
1
)
/
tileWidth
;
int
numBlocks2
=
(
atoms2
.
size
()
+
tileWidth
-
1
)
/
tileWidth
;
// Add the tiles.
for
(
int
i
=
0
;
i
<
numBlocks1
;
i
++
)
for
(
int
j
=
0
;
j
<
numBlocks2
;
j
++
)
tiles
.
push_back
(
make_pair
(
atomLists
.
size
()
+
i
,
atomLists
.
size
()
+
numBlocks1
+
j
));
// Add the atom lists.
for
(
int
i
=
0
;
i
<
numBlocks1
;
i
++
)
{
vector
<
int
>
atoms
;
int
first
=
i
*
tileWidth
;
int
last
=
min
((
i
+
1
)
*
tileWidth
,
(
int
)
atoms1
.
size
());
for
(
int
j
=
first
;
j
<
last
;
j
++
)
atoms
.
push_back
(
atoms1
[
j
]);
atomLists
.
push_back
(
atoms
);
}
for
(
int
i
=
0
;
i
<
numBlocks2
;
i
++
)
{
vector
<
int
>
atoms
;
int
first
=
i
*
tileWidth
;
int
last
=
min
((
i
+
1
)
*
tileWidth
,
(
int
)
atoms2
.
size
());
for
(
int
j
=
first
;
j
<
last
;
j
++
)
atoms
.
push_back
(
atoms2
[
j
]);
atomLists
.
push_back
(
atoms
);
}
}
// Build a lookup table for quickly identifying excluded interactions.
set
<
pair
<
int
,
int
>
>
exclusions
;
for
(
int
i
=
0
;
i
<
force
.
getNumExclusions
();
i
++
)
{
int
p1
,
p2
;
force
.
getExclusionParticles
(
i
,
p1
,
p2
);
exclusions
.
insert
(
make_pair
(
p1
,
p2
));
}
// Build the exclusion flags for each tile. While we're at it, filter out tiles
// where all interactions are excluded, and sort the tiles by size.
vector
<
vector
<
int
>
>
exclusionFlags
(
tiles
.
size
());
vector
<
pair
<
int
,
int
>
>
tileOrder
;
for
(
int
tile
=
0
;
tile
<
tiles
.
size
();
tile
++
)
{
if
(
atomLists
[
tiles
[
tile
].
first
].
size
()
<
atomLists
[
tiles
[
tile
].
second
].
size
())
{
// For efficiency, we want the first axis to be the larger one.
int
swap
=
tiles
[
tile
].
first
;
tiles
[
tile
].
first
=
tiles
[
tile
].
second
;
tiles
[
tile
].
second
=
swap
;
}
vector
<
int
>&
atoms1
=
atomLists
[
tiles
[
tile
].
first
];
vector
<
int
>&
atoms2
=
atomLists
[
tiles
[
tile
].
second
];
vector
<
int
>
flags
(
atoms1
.
size
(),
(
1
<<
atoms2
.
size
())
-
1
);
int
numExcluded
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
atoms1
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
(
int
)
atoms2
.
size
();
j
++
)
{
int
a1
=
atoms1
[
i
];
int
a2
=
atoms2
[
j
];
if
(
a1
==
a2
||
exclusions
.
find
(
make_pair
(
a1
,
a2
))
!=
exclusions
.
end
()
||
exclusions
.
find
(
make_pair
(
a2
,
a1
))
!=
exclusions
.
end
())
{
flags
[
i
]
&=
-
1
-
(
1
<<
j
);
numExcluded
++
;
}
}
if
(
numExcluded
==
atoms1
.
size
()
*
atoms2
.
size
())
continue
;
// All interactions are excluded.
tileOrder
.
push_back
(
make_pair
((
int
)
-
atoms2
.
size
(),
tile
));
if
(
numExcluded
>
0
)
exclusionFlags
[
tile
]
=
flags
;
}
sort
(
tileOrder
.
begin
(),
tileOrder
.
end
());
// Merge tiles to get as close as possible to 32 along the first axis of each one.
vector
<
int
>
tileSetStart
;
tileSetStart
.
push_back
(
0
);
int
tileSetSize
=
0
;
for
(
int
i
=
0
;
i
<
tileOrder
.
size
();
i
++
)
{
int
tile
=
tileOrder
[
i
].
second
;
int
size
=
atomLists
[
tiles
[
tile
].
first
].
size
();
if
(
tileSetSize
+
size
>
32
)
{
tileSetStart
.
push_back
(
i
);
tileSetSize
=
0
;
}
tileSetSize
+=
size
;
}
tileSetStart
.
push_back
(
tileOrder
.
size
());
// Build the data structures.
int
numTileSets
=
tileSetStart
.
size
()
-
1
;
vector
<
int4
>
groupData
;
for
(
int
tileSet
=
0
;
tileSet
<
numTileSets
;
tileSet
++
)
{
int
indexInTileSet
=
0
;
for
(
int
i
=
tileSetStart
[
tileSet
];
i
<
tileSetStart
[
tileSet
+
1
];
i
++
)
{
int
tile
=
tileOrder
[
i
].
second
;
vector
<
int
>&
atoms1
=
atomLists
[
tiles
[
tile
].
first
];
vector
<
int
>&
atoms2
=
atomLists
[
tiles
[
tile
].
second
];
int
range
=
indexInTileSet
+
((
indexInTileSet
+
atoms1
.
size
())
<<
16
);
int
allFlags
=
(
1
<<
atoms2
.
size
())
-
1
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms1
.
size
();
j
++
)
{
int
a1
=
atoms1
[
j
];
int
a2
=
(
j
<
atoms2
.
size
()
?
atoms2
[
j
]
:
0
);
int
flags
=
(
exclusionFlags
[
tile
].
size
()
>
0
?
exclusionFlags
[
tile
][
j
]
:
allFlags
);
groupData
.
push_back
(
make_int4
(
a1
,
a2
,
range
,
flags
<<
indexInTileSet
));
}
indexInTileSet
+=
atoms1
.
size
();
}
for
(;
indexInTileSet
<
32
;
indexInTileSet
++
)
groupData
.
push_back
(
make_int4
(
0
,
0
,
0
,
0
));
}
interactionGroupData
=
CudaArray
::
create
<
int4
>
(
cu
,
groupData
.
size
(),
"interactionGroupData"
);
interactionGroupData
->
upload
(
groupData
);
// Create the kernel.
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
interactionSource
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
stringstream
localData
;
int
localDataSize
=
0
;
vector
<
CudaNonbondedUtilities
::
ParameterInfo
>&
buffers
=
params
->
getBuffers
();
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
getNumComponents
()
==
1
)
localData
<<
buffers
[
i
].
getComponentType
()
<<
" params"
<<
(
i
+
1
)
<<
";
\n
"
;
else
{
for
(
int
j
=
0
;
j
<
buffers
[
i
].
getNumComponents
();
++
j
)
localData
<<
buffers
[
i
].
getComponentType
()
<<
" params"
<<
(
i
+
1
)
<<
"_"
<<
suffixes
[
j
]
<<
";
\n
"
;
}
localDataSize
+=
buffers
[
i
].
getSize
();
}
replacements
[
"ATOM_PARAMETER_DATA"
]
=
localData
.
str
();
stringstream
args
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
args
<<
", const "
<<
buffers
[
i
].
getType
()
<<
"* __restrict__ global_params"
<<
(
i
+
1
);
if
(
globals
!=
NULL
)
args
<<
", const float* __restrict__ globals"
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
stringstream
load1
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
load1
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"1 = global_params"
<<
(
i
+
1
)
<<
"[atom1];
\n
"
;
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
stringstream
loadLocal2
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
getNumComponents
()
==
1
)
loadLocal2
<<
"localData[threadIdx.x].params"
<<
(
i
+
1
)
<<
" = global_params"
<<
(
i
+
1
)
<<
"[atom2];
\n
"
;
else
{
loadLocal2
<<
buffers
[
i
].
getType
()
<<
" temp_params"
<<
(
i
+
1
)
<<
" = global_params"
<<
(
i
+
1
)
<<
"[atom2];
\n
"
;
for
(
int
j
=
0
;
j
<
buffers
[
i
].
getNumComponents
();
++
j
)
loadLocal2
<<
"localData[threadIdx.x].params"
<<
(
i
+
1
)
<<
"_"
<<
suffixes
[
j
]
<<
" = temp_params"
<<
(
i
+
1
)
<<
"."
<<
suffixes
[
j
]
<<
";
\n
"
;
}
}
replacements
[
"LOAD_LOCAL_PARAMETERS"
]
=
loadLocal2
.
str
();
stringstream
load2
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
getNumComponents
()
==
1
)
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 = localData[localIndex].params"
<<
(
i
+
1
)
<<
";
\n
"
;
else
{
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 = make_"
<<
buffers
[
i
].
getType
()
<<
"("
;
for
(
int
j
=
0
;
j
<
buffers
[
i
].
getNumComponents
();
++
j
)
{
if
(
j
>
0
)
load2
<<
", "
;
load2
<<
"localData[localIndex].params"
<<
(
i
+
1
)
<<
"_"
<<
suffixes
[
j
];
}
load2
<<
");
\n
"
;
}
}
replacements
[
"LOAD_ATOM2_PARAMETERS"
]
=
load2
.
str
();
map
<
string
,
string
>
defines
;
if
(
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
NoCutoff
)
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
force
.
getNonbondedMethod
()
==
CustomNonbondedForce
::
CutoffPeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"THREAD_BLOCK_SIZE"
]
=
cu
.
intToString
(
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
double
cutoff
=
force
.
getCutoffDistance
();
defines
[
"CUTOFF_SQUARED"
]
=
cu
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"PADDED_NUM_ATOMS"
]
=
cu
.
intToString
(
cu
.
getPaddedNumAtoms
());
defines
[
"TILE_SIZE"
]
=
"32"
;
int
numContexts
=
cu
.
getPlatformData
().
contexts
.
size
();
int
startIndex
=
cu
.
getContextIndex
()
*
numTileSets
/
numContexts
;
int
endIndex
=
(
cu
.
getContextIndex
()
+
1
)
*
numTileSets
/
numContexts
;
defines
[
"FIRST_TILE"
]
=
cu
.
intToString
(
startIndex
);
defines
[
"LAST_TILE"
]
=
cu
.
intToString
(
endIndex
);
if
((
localDataSize
/
4
)
%
2
==
0
&&
!
cu
.
getUseDoublePrecision
())
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
CUmodule
program
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
cu
.
replaceStrings
(
CudaKernelSources
::
customNonbondedGroups
,
replacements
),
defines
);
interactionGroupKernel
=
cu
.
getKernel
(
program
,
"computeInteractionGroups"
);
numGroupThreadBlocks
=
cu
.
getNonbondedUtilities
().
getNumForceThreadBlocks
();
}
double
CudaCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
globals
!=
NULL
)
{
bool
changed
=
false
;
...
...
@@ -2054,6 +2283,21 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
longRangeCoefficient
=
CustomNonbondedForceImpl
::
calcLongRangeCorrection
(
*
forceCopy
,
context
.
getOwner
());
hasInitializedLongRangeCorrection
=
true
;
}
if
(
interactionGroupData
!=
NULL
)
{
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
interactionGroupArgs
.
push_back
(
&
cu
.
getForce
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
cu
.
getEnergyBuffer
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
cu
.
getPosq
().
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
interactionGroupData
->
getDevicePointer
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
interactionGroupArgs
.
push_back
(
&
params
->
getBuffers
()[
i
].
getMemory
());
if
(
globals
!=
NULL
)
interactionGroupArgs
.
push_back
(
&
globals
->
getDevicePointer
());
}
int
forceThreadBlockSize
=
cu
.
getNonbondedUtilities
().
getForceThreadBlockSize
();
cu
.
executeKernel
(
interactionGroupKernel
,
&
interactionGroupArgs
[
0
],
numGroupThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
double4
boxSize
=
cu
.
getPeriodicBoxSize
();
return
longRangeCoefficient
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
}
...
...
platforms/cuda/src/kernels/customNonbondedGroups.cu
0 → 100644
View file @
09611266
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef
struct
{
real
x
,
y
,
z
;
real
q
;
real
fx
,
fy
,
fz
;
ATOM_PARAMETER_DATA
#ifndef PARAMETER_SIZE_IS_EVEN
real
padding
;
#endif
}
AtomData
;
extern
"C"
__global__
void
computeInteractionGroups
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
real
energy
=
0.0
f
;
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
const
unsigned
int
startTile
=
FIRST_TILE
+
warp
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
const
unsigned
int
endTile
=
FIRST_TILE
+
(
warp
+
1
)
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
for
(
int
tile
=
startTile
;
tile
<
endTile
;
tile
++
)
{
const
int4
atomData
=
groupData
[
TILE_SIZE
*
tile
+
tgx
];
const
int
atom1
=
atomData
.
x
;
const
int
atom2
=
atomData
.
y
;
const
int
rangeStart
=
atomData
.
z
&
0xFFFF
;
const
int
rangeEnd
=
(
atomData
.
z
>>
16
)
&
0xFFFF
;
const
int
exclusions
=
atomData
.
w
;
real4
posq1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
real3
force
=
make_real3
(
0
);
real4
posq2
=
posq
[
atom2
];
localData
[
threadIdx
.
x
].
x
=
posq2
.
x
;
localData
[
threadIdx
.
x
].
y
=
posq2
.
y
;
localData
[
threadIdx
.
x
].
z
=
posq2
.
z
;
localData
[
threadIdx
.
x
].
q
=
posq2
.
w
;
LOAD_LOCAL_PARAMETERS
localData
[
threadIdx
.
x
].
fx
=
0.0
f
;
localData
[
threadIdx
.
x
].
fy
=
0.0
f
;
localData
[
threadIdx
.
x
].
fz
=
0.0
f
;
int
tj
=
tgx
;
for
(
int
j
=
rangeStart
;
j
<
rangeEnd
;
j
++
)
{
bool
isExcluded
=
(((
exclusions
>>
tj
)
&
1
)
==
0
);
int
localIndex
=
tbx
+
tj
;
posq2
=
make_real4
(
localData
[
localIndex
].
x
,
localData
[
localIndex
].
y
,
localData
[
localIndex
].
z
,
localData
[
localIndex
].
q
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
);
LOAD_ATOM2_PARAMETERS
real
dEdR
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
localData
[
localIndex
].
fx
+=
delta
.
x
;
localData
[
localIndex
].
fy
+=
delta
.
y
;
localData
[
localIndex
].
fz
+=
delta
.
z
;
tj
=
(
tj
==
rangeEnd
-
1
?
rangeStart
:
tj
+
1
);
}
if
(
exclusions
!=
0
)
{
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
z
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
}
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
\ No newline at end of file
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
View file @
09611266
...
...
@@ -538,6 +538,109 @@ void testLongRangeCorrection() {
ASSERT_EQUAL_TOL
(
standardEnergy1
-
standardEnergy2
,
customEnergy1
-
customEnergy2
,
1e-4
);
}
void
testInteractionGroups
()
{
const
int
numParticles
=
6
;
System
system
;
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"v1+v2"
);
nonbonded
->
addPerParticleParameter
(
"v"
);
vector
<
double
>
params
(
1
,
0.001
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
params
);
params
[
0
]
*=
10
;
}
set
<
int
>
set1
,
set2
,
set3
,
set4
;
set1
.
insert
(
2
);
set2
.
insert
(
0
);
set2
.
insert
(
1
);
set2
.
insert
(
2
);
set2
.
insert
(
3
);
set2
.
insert
(
4
);
set2
.
insert
(
5
);
nonbonded
->
addInteractionGroup
(
set1
,
set2
);
// Particle 2 interacts with every other particle.
set3
.
insert
(
0
);
set3
.
insert
(
1
);
set4
.
insert
(
4
);
set4
.
insert
(
5
);
nonbonded
->
addInteractionGroup
(
set3
,
set4
);
// Particles 0 and 1 interact with 4 and 5.
nonbonded
->
addExclusion
(
1
,
2
);
// Add an exclusion to make sure it gets skipped.
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Energy
);
double
expectedEnergy
=
331.423
;
// Each digit is the number of interactions a particle particle is involved in.
ASSERT_EQUAL_TOL
(
expectedEnergy
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testLargeInteractionGroup
()
{
const
int
numMolecules
=
300
;
const
int
numParticles
=
numMolecules
*
2
;
const
double
boxSize
=
20.0
;
// Create a large system.
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"4*eps*((sigma/r)^12-(sigma/r)^6)+138.935456*q/r; q=q1*q2; sigma=0.5*(sigma1+sigma2); eps=sqrt(eps1*eps2)"
);
nonbonded
->
addPerParticleParameter
(
"q"
);
nonbonded
->
addPerParticleParameter
(
"sigma"
);
nonbonded
->
addPerParticleParameter
(
"eps"
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
double
>
params
(
3
);
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
if
(
i
<
numMolecules
/
2
)
{
params
[
0
]
=
1.0
;
params
[
1
]
=
0.2
;
params
[
2
]
=
0.1
;
nonbonded
->
addParticle
(
params
);
params
[
0
]
=
-
1.0
;
params
[
1
]
=
0.1
;
nonbonded
->
addParticle
(
params
);
}
else
{
params
[
0
]
=
1.0
;
params
[
1
]
=
0.2
;
params
[
2
]
=
0.2
;
nonbonded
->
addParticle
(
params
);
params
[
0
]
=
-
1.0
;
params
[
1
]
=
0.1
;
nonbonded
->
addParticle
(
params
);
}
positions
[
2
*
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
positions
[
2
*
i
+
1
]
=
Vec3
(
positions
[
2
*
i
][
0
]
+
1.0
,
positions
[
2
*
i
][
1
],
positions
[
2
*
i
][
2
]);
nonbonded
->
addExclusion
(
2
*
i
,
2
*
i
+
1
);
}
nonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
NoCutoff
);
system
.
addForce
(
nonbonded
);
// Compute the forces.
VerletIntegrator
integrator
(
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
);
// Modify the force so only one particle interacts with everything else.
set
<
int
>
set1
,
set2
;
set1
.
insert
(
151
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
set2
.
insert
(
i
);
nonbonded
->
addInteractionGroup
(
set1
,
set2
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state2
=
context
.
getState
(
State
::
Forces
);
// The force on that one particle should be the same.
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
151
],
state2
.
getForces
()[
151
],
1e-4
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
if
(
argc
>
1
)
...
...
@@ -553,6 +656,8 @@ int main(int argc, char* argv[]) {
testParallelComputation
();
testSwitchingFunction
();
testLongRangeCorrection
();
testInteractionGroups
();
testLargeInteractionGroup
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment