Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
54c0ca3f
Commit
54c0ca3f
authored
Aug 08, 2013
by
peastman
Browse files
Began creating OpenCL implementation of CustomNonbondedForce interaction groups
parent
aba74fd8
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
469 additions
and
13 deletions
+469
-13
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+1
-1
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
+1
-1
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+7
-2
platforms/opencl/include/OpenCLParameterSet.h
platforms/opencl/include/OpenCLParameterSet.h
+7
-0
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+257
-9
platforms/opencl/src/kernels/customNonbondedGroups.cl
platforms/opencl/src/kernels/customNonbondedGroups.cl
+91
-0
platforms/opencl/tests/TestOpenCLCustomNonbondedForce.cpp
platforms/opencl/tests/TestOpenCLCustomNonbondedForce.cpp
+105
-0
No files found.
platforms/cuda/src/CudaKernels.cpp
View file @
54c0ca3f
...
@@ -2072,7 +2072,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
...
@@ -2072,7 +2072,7 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
// Find how many tiles we will create for this group.
// Find how many tiles we will create for this group.
int
tileWidth
=
min
(
32
,
(
int
)
atoms2
.
size
());
int
tileWidth
=
min
(
min
(
32
,
(
int
)
atoms1
.
size
())
,
(
int
)
atoms2
.
size
());
int
numBlocks1
=
(
atoms1
.
size
()
+
tileWidth
-
1
)
/
tileWidth
;
int
numBlocks1
=
(
atoms1
.
size
()
+
tileWidth
-
1
)
/
tileWidth
;
int
numBlocks2
=
(
atoms2
.
size
()
+
tileWidth
-
1
)
/
tileWidth
;
int
numBlocks2
=
(
atoms2
.
size
()
+
tileWidth
-
1
)
/
tileWidth
;
...
...
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
View file @
54c0ca3f
...
@@ -615,7 +615,7 @@ void testLargeInteractionGroup() {
...
@@ -615,7 +615,7 @@ void testLargeInteractionGroup() {
positions
[
2
*
i
+
1
]
=
Vec3
(
positions
[
2
*
i
][
0
]
+
1.0
,
positions
[
2
*
i
][
1
],
positions
[
2
*
i
][
2
]);
positions
[
2
*
i
+
1
]
=
Vec3
(
positions
[
2
*
i
][
0
]
+
1.0
,
positions
[
2
*
i
][
1
],
positions
[
2
*
i
][
2
]);
nonbonded
->
addExclusion
(
2
*
i
,
2
*
i
+
1
);
nonbonded
->
addExclusion
(
2
*
i
,
2
*
i
+
1
);
}
}
nonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
No
Cutoff
);
nonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
Cutoff
Periodic
);
system
.
addForce
(
nonbonded
);
system
.
addForce
(
nonbonded
);
// Compute the forces.
// Compute the forces.
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
54c0ca3f
...
@@ -639,7 +639,7 @@ private:
...
@@ -639,7 +639,7 @@ private:
class
OpenCLCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
class
OpenCLCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
public:
OpenCLCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
OpenCLCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
tabulatedFunctionParams
(
NULL
),
forceCopy
(
NULL
),
system
(
system
)
{
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
tabulatedFunctionParams
(
NULL
),
interactionGroupData
(
NULL
),
forceCopy
(
NULL
),
system
(
system
)
,
hasInitializedKernel
(
false
)
{
}
}
~
OpenCLCalcCustomNonbondedForceKernel
();
~
OpenCLCalcCustomNonbondedForceKernel
();
/**
/**
...
@@ -666,15 +666,20 @@ public:
...
@@ -666,15 +666,20 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
private:
private:
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
);
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
OpenCLArray
*
tabulatedFunctionParams
;
OpenCLArray
*
tabulatedFunctionParams
;
OpenCLArray
*
interactionGroupData
;
cl
::
Kernel
interactionGroupKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
*>
tabulatedFunctions
;
double
longRangeCoefficient
;
double
longRangeCoefficient
;
bool
hasInitializedLongRangeCorrection
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
;
int
numGroupThreadBlocks
;
CustomNonbondedForce
*
forceCopy
;
CustomNonbondedForce
*
forceCopy
;
const
System
&
system
;
const
System
&
system
;
};
};
...
...
platforms/opencl/include/OpenCLParameterSet.h
View file @
54c0ca3f
...
@@ -81,6 +81,13 @@ public:
...
@@ -81,6 +81,13 @@ public:
*/
*/
template
<
class
T
>
template
<
class
T
>
void
setParameterValues
(
const
std
::
vector
<
std
::
vector
<
T
>
>&
values
);
void
setParameterValues
(
const
std
::
vector
<
std
::
vector
<
T
>
>&
values
);
/**
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data.
*/
std
::
vector
<
OpenCLNonbondedUtilities
::
ParameterInfo
>&
getBuffers
()
{
return
buffers
;
}
/**
/**
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data.
* containing the data.
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
54c0ca3f
...
@@ -46,6 +46,7 @@
...
@@ -46,6 +46,7 @@
#include "lepton/ParsedExpression.h"
#include "lepton/ParsedExpression.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include "SimTKOpenMMUtilities.h"
#include <algorithm>
#include <cmath>
#include <cmath>
#include <set>
#include <set>
...
@@ -1875,6 +1876,17 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -1875,6 +1876,17 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
class OpenCLCustomNonbondedForceInfo : public OpenCLForceInfo {
class OpenCLCustomNonbondedForceInfo : public OpenCLForceInfo {
public:
public:
OpenCLCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
OpenCLCustomNonbondedForceInfo(int requiredBuffers, const CustomNonbondedForce& force) : OpenCLForceInfo(requiredBuffers), force(force) {
if (force.getNumInteractionGroups() > 0) {
groupsForParticle.resize(force.getNumParticles());
for (int i = 0; i < force.getNumInteractionGroups(); i++) {
set<int> set1, set2;
force.getInteractionGroupParameters(i, set1, set2);
for (set<int>::const_iterator iter = set1.begin(); iter != set1.end(); ++iter)
groupsForParticle[*iter].insert(2*i);
for (set<int>::const_iterator iter = set2.begin(); iter != set2.end(); ++iter)
groupsForParticle[*iter].insert(2*i+1);
}
}
}
}
bool areParticlesIdentical(int particle1, int particle2) {
bool areParticlesIdentical(int particle1, int particle2) {
vector<double> params1;
vector<double> params1;
...
@@ -1884,6 +1896,8 @@ public:
...
@@ -1884,6 +1896,8 @@ public:
for (int i = 0; i < (int) params1.size(); i++)
for (int i = 0; i < (int) params1.size(); i++)
if (params1[i] != params2[i])
if (params1[i] != params2[i])
return false;
return false;
if (groupsForParticle.size() > 0 && groupsForParticle[particle1] != groupsForParticle[particle2])
return false;
return true;
return true;
}
}
int getNumParticleGroups() {
int getNumParticleGroups() {
...
@@ -1901,6 +1915,7 @@ public:
...
@@ -1901,6 +1915,7 @@ public:
}
}
private:
private:
const CustomNonbondedForce& force;
const CustomNonbondedForce& force;
vector<set<int> > groupsForParticle;
};
};
OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
...
@@ -1910,6 +1925,8 @@ OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
...
@@ -1910,6 +1925,8 @@ OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
delete globals;
delete globals;
if (tabulatedFunctionParams != NULL)
if (tabulatedFunctionParams != NULL)
delete tabulatedFunctionParams;
delete tabulatedFunctionParams;
if (interactionGroupData != NULL)
delete interactionGroupData;
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
for (int i = 0; i < (int) tabulatedFunctions.size(); i++)
delete tabulatedFunctions[i];
delete tabulatedFunctions[i];
if (forceCopy != NULL)
if (forceCopy != NULL)
...
@@ -1920,7 +1937,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -1920,7 +1937,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
int forceIndex;
int forceIndex;
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
for (forceIndex = 0; forceIndex < system.getNumForces() && &system.getForce(forceIndex) != &force; ++forceIndex)
;
;
string
prefix
=
"custom"
+
cl
.
intToString
(
forceIndex
)
+
"_"
;
string prefix =
(force.getNumInteractionGroups() == 0 ?
"custom"+cl.intToString(forceIndex)+"_"
: "")
;
// Record parameters and exclusions.
// Record parameters and exclusions.
...
@@ -2021,14 +2038,18 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -2021,14 +2038,18 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
replacements["SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
replacements["SWITCH_C5"] = cl.doubleToString(6/pow(force.getSwitchingDistance()-force.getCutoffDistance(), 5.0));
}
}
string source = cl.replaceStrings(OpenCLKernelSources::customNonbonded, replacements);
string source = cl.replaceStrings(OpenCLKernelSources::customNonbonded, replacements);
cl
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
if (force.getNumInteractionGroups() > 0)
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
initInteractionGroups(force, source);
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
else {
cl
.
getNonbondedUtilities
().
addParameter
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"params"
+
cl
.
intToString
(
i
+
1
),
buffer
.
getComponentType
(),
buffer
.
getNumComponents
(),
buffer
.
getSize
(),
buffer
.
getMemory
()));
cl.getNonbondedUtilities().addInteraction(useCutoff, usePeriodic, true, force.getCutoffDistance(), exclusionList, source, force.getForceGroup());
}
for (int i = 0; i < (int) params->getBuffers().size(); i++) {
if
(
globals
!=
NULL
)
{
const OpenCLNonbondedUtilities::ParameterInfo& buffer = params->getBuffers()[i];
globals
->
upload
(
globalParamValues
);
cl.getNonbondedUtilities().addParameter(OpenCLNonbondedUtilities::ParameterInfo(prefix+"params"+cl.intToString(i+1), buffer.getComponentType(), buffer.getNumComponents(), buffer.getSize(), buffer.getMemory()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
cl_float
),
globals
->
getDeviceBuffer
()));
}
if (globals != NULL) {
globals->upload(globalParamValues);
cl.getNonbondedUtilities().addArgument(OpenCLNonbondedUtilities::ParameterInfo(prefix+"globals", "float", 1, sizeof(cl_float), globals->getDeviceBuffer()));
}
}
}
cl.addForce(new OpenCLCustomNonbondedForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
cl.addForce(new OpenCLCustomNonbondedForceInfo(cl.getNonbondedUtilities().getNumForceBuffers(), force));
...
@@ -2044,6 +2065,215 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -2044,6 +2065,215 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
}
}
}
}
void OpenCLCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbondedForce& force, const string& interactionSource) {
// Process groups to form tiles.
vector<vector<int> > atomLists;
vector<pair<int, int> > tiles;
for (int group = 0; group < force.getNumInteractionGroups(); group++) {
// Get the list of atoms in this group and sort them.
set<int> set1, set2;
force.getInteractionGroupParameters(group, set1, set2);
vector<int> atoms1, atoms2;
atoms1.insert(atoms1.begin(), set1.begin(), set1.end());
atoms2.insert(atoms2.begin(), set2.begin(), set2.end());
sort(atoms1.begin(), atoms1.end());
sort(atoms2.begin(), atoms2.end());
// Find how many tiles we will create for this group.
int tileWidth = min(min(32, (int) atoms1.size()), (int) atoms2.size());
int numBlocks1 = (atoms1.size()+tileWidth-1)/tileWidth;
int numBlocks2 = (atoms2.size()+tileWidth-1)/tileWidth;
// Add the tiles.
for (int i = 0; i < numBlocks1; i++)
for (int j = 0; j < numBlocks2; j++)
tiles.push_back(make_pair(atomLists.size()+i, atomLists.size()+numBlocks1+j));
// Add the atom lists.
for (int i = 0; i < numBlocks1; i++) {
vector<int> atoms;
int first = i*tileWidth;
int last = min((i+1)*tileWidth, (int) atoms1.size());
for (int j = first; j < last; j++)
atoms.push_back(atoms1[j]);
atomLists.push_back(atoms);
}
for (int i = 0; i < numBlocks2; i++) {
vector<int> atoms;
int first = i*tileWidth;
int last = min((i+1)*tileWidth, (int) atoms2.size());
for (int j = first; j < last; j++)
atoms.push_back(atoms2[j]);
atomLists.push_back(atoms);
}
}
// Build a lookup table for quickly identifying excluded interactions.
set<pair<int, int> > exclusions;
for (int i = 0; i < force.getNumExclusions(); i++) {
int p1, p2;
force.getExclusionParticles(i, p1, p2);
exclusions.insert(make_pair(p1, p2));
}
// Build the exclusion flags for each tile. While we're at it, filter out tiles
// where all interactions are excluded, and sort the tiles by size.
vector<vector<int> > exclusionFlags(tiles.size());
vector<pair<int, int> > tileOrder;
for (int tile = 0; tile < tiles.size(); tile++) {
if (atomLists[tiles[tile].first].size() < atomLists[tiles[tile].second].size()) {
// For efficiency, we want the first axis to be the larger one.
int swap = tiles[tile].first;
tiles[tile].first = tiles[tile].second;
tiles[tile].second = swap;
}
vector<int>& atoms1 = atomLists[tiles[tile].first];
vector<int>& atoms2 = atomLists[tiles[tile].second];
vector<int> flags(atoms1.size(), (1<<atoms2.size())-1);
int numExcluded = 0;
for (int i = 0; i < (int) atoms1.size(); i++)
for (int j = 0; j < (int) atoms2.size(); j++) {
int a1 = atoms1[i];
int a2 = atoms2[j];
if (a1 == a2 || exclusions.find(make_pair(a1, a2)) != exclusions.end() || exclusions.find(make_pair(a2, a1)) != exclusions.end()) {
flags[i] &= -1-(1<<j);
numExcluded++;
}
}
if (numExcluded == atoms1.size()*atoms2.size())
continue; // All interactions are excluded.
tileOrder.push_back(make_pair((int) -atoms2.size(), tile));
if (numExcluded > 0)
exclusionFlags[tile] = flags;
}
sort(tileOrder.begin(), tileOrder.end());
// Merge tiles to get as close as possible to 32 along the first axis of each one.
vector<int> tileSetStart;
tileSetStart.push_back(0);
int tileSetSize = 0;
for (int i = 0; i < tileOrder.size(); i++) {
int tile = tileOrder[i].second;
int size = atomLists[tiles[tile].first].size();
if (tileSetSize+size > 32) {
tileSetStart.push_back(i);
tileSetSize = 0;
}
tileSetSize += size;
}
tileSetStart.push_back(tileOrder.size());
// Build the data structures.
int numTileSets = tileSetStart.size()-1;
vector<mm_int4> groupData;
for (int tileSet = 0; tileSet < numTileSets; tileSet++) {
int indexInTileSet = 0;
for (int i = tileSetStart[tileSet]; i < tileSetStart[tileSet+1]; i++) {
int tile = tileOrder[i].second;
vector<int>& atoms1 = atomLists[tiles[tile].first];
vector<int>& atoms2 = atomLists[tiles[tile].second];
int range = indexInTileSet + ((indexInTileSet+atoms1.size())<<16);
int allFlags = (1<<atoms2.size())-1;
for (int j = 0; j < (int) atoms1.size(); j++) {
int a1 = atoms1[j];
int a2 = (j < atoms2.size() ? atoms2[j] : 0);
int flags = (exclusionFlags[tile].size() > 0 ? exclusionFlags[tile][j] : allFlags);
groupData.push_back(mm_int4(a1, a2, range, flags<<indexInTileSet));
}
indexInTileSet += atoms1.size();
}
for (; indexInTileSet < 32; indexInTileSet++)
groupData.push_back(mm_int4(0, 0, 0, 0));
}
interactionGroupData = OpenCLArray::create<mm_int4>(cl, groupData.size(), "interactionGroupData");
interactionGroupData->upload(groupData);
// Create the kernel.
map<string, string> replacements;
replacements["COMPUTE_INTERACTION"] = interactionSource;
const string suffixes[] = {"x", "y", "z", "w"};
stringstream localData;
int localDataSize = 0;
vector<OpenCLNonbondedUtilities::ParameterInfo>& buffers = params->getBuffers();
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getNumComponents() == 1)
localData<<buffers[i].getComponentType()<<" params"<<(i+1)<<";\n";
else {
for (int j = 0; j < buffers[i].getNumComponents(); ++j)
localData<<buffers[i].getComponentType()<<" params"<<(i+1)<<"_"<<suffixes[j]<<";\n";
}
localDataSize += buffers[i].getSize();
}
replacements["ATOM_PARAMETER_DATA"] = localData.str();
stringstream args;
for (int i = 0; i < (int) buffers.size(); i++)
args<<", __global const "<<buffers[i].getType()<<"* restrict global_params"<<(i+1);
if (globals != NULL)
args<<", __global const float* restrict globals";
replacements["PARAMETER_ARGUMENTS"] = args.str();
stringstream load1;
for (int i = 0; i < (int) buffers.size(); i++)
load1<<buffers[i].getType()<<" params"<<(i+1)<<"1 = global_params"<<(i+1)<<"[atom1];\n";
replacements["LOAD_ATOM1_PARAMETERS"] = load1.str();
stringstream loadLocal2;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getNumComponents() == 1)
loadLocal2<<"localData[get_local_id(0)].params"<<(i+1)<<" = global_params"<<(i+1)<<"[atom2];\n";
else {
loadLocal2<<buffers[i].getType()<<" temp_params"<<(i+1)<<" = global_params"<<(i+1)<<"[atom2];\n";
for (int j = 0; j < buffers[i].getNumComponents(); ++j)
loadLocal2<<"localData[get_local_id(0)].params"<<(i+1)<<"_"<<suffixes[j]<<" = temp_params"<<(i+1)<<"."<<suffixes[j]<<";\n";
}
}
replacements["LOAD_LOCAL_PARAMETERS"] = loadLocal2.str();
stringstream load2;
for (int i = 0; i < (int) buffers.size(); i++) {
if (buffers[i].getNumComponents() == 1)
load2<<buffers[i].getType()<<" params"<<(i+1)<<"2 = localData[localIndex].params"<<(i+1)<<";\n";
else {
load2<<buffers[i].getType()<<" params"<<(i+1)<<"2 = make_"<<buffers[i].getType()<<"(";
for (int j = 0; j < buffers[i].getNumComponents(); ++j) {
if (j > 0)
load2<<", ";
load2<<"localData[localIndex].params"<<(i+1)<<"_"<<suffixes[j];
}
load2<<");\n";
}
}
replacements["LOAD_ATOM2_PARAMETERS"] = load2.str();
map<string, string> defines;
if (force.getNonbondedMethod() != CustomNonbondedForce::NoCutoff)
defines["USE_CUTOFF"] = "1";
if (force.getNonbondedMethod() == CustomNonbondedForce::CutoffPeriodic)
defines["USE_PERIODIC"] = "1";
defines["THREAD_BLOCK_SIZE"] = cl.intToString(cl.getNonbondedUtilities().getForceThreadBlockSize());
double cutoff = force.getCutoffDistance();
defines["CUTOFF_SQUARED"] = cl.doubleToString(cutoff*cutoff);
defines["PADDED_NUM_ATOMS"] = cl.intToString(cl.getPaddedNumAtoms());
defines["TILE_SIZE"] = "32";
int numContexts = cl.getPlatformData().contexts.size();
int startIndex = cl.getContextIndex()*numTileSets/numContexts;
int endIndex = (cl.getContextIndex()+1)*numTileSets/numContexts;
defines["FIRST_TILE"] = cl.intToString(startIndex);
defines["LAST_TILE"] = cl.intToString(endIndex);
if ((localDataSize/4)%2 == 0 && !cl.getUseDoublePrecision())
defines["PARAMETER_SIZE_IS_EVEN"] = "1";
cl::Program program = cl.createProgram(cl.replaceStrings(OpenCLKernelSources::customNonbondedGroups, replacements), defines);
interactionGroupKernel = cl::Kernel(program, "computeInteractionGroups");
numGroupThreadBlocks = cl.getNonbondedUtilities().getNumForceThreadBlocks();
}
double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool includeForces, bool includeEnergy) {
if (globals != NULL) {
if (globals != NULL) {
bool changed = false;
bool changed = false;
...
@@ -2065,6 +2295,24 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
...
@@ -2065,6 +2295,24 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner());
longRangeCoefficient = CustomNonbondedForceImpl::calcLongRangeCorrection(*forceCopy, context.getOwner());
hasInitializedLongRangeCorrection = true;
hasInitializedLongRangeCorrection = true;
}
}
if (interactionGroupData != NULL) {
if (!hasInitializedKernel) {
hasInitializedKernel = true;
int index = 0;
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getLongForceBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getEnergyBuffer().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, cl.getPosq().getDeviceBuffer());
interactionGroupKernel.setArg<cl::Buffer>(index++, interactionGroupData->getDeviceBuffer());
setPeriodicBoxSizeArg(cl, interactionGroupKernel, index++);
setInvPeriodicBoxSizeArg(cl, interactionGroupKernel, index++);
for (int i = 0; i < (int) params->getBuffers().size(); i++)
interactionGroupKernel.setArg<cl::Memory>(index++, params->getBuffers()[i].getMemory());
if (globals != NULL)
interactionGroupKernel.setArg<cl::Buffer>(index++, globals->getDeviceBuffer());
}
int forceThreadBlockSize = cl.getNonbondedUtilities().getForceThreadBlockSize();
cl.executeKernel(interactionGroupKernel, numGroupThreadBlocks*forceThreadBlockSize, forceThreadBlockSize);
}
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
mm_double4 boxSize = cl.getPeriodicBoxSizeDouble();
return longRangeCoefficient/(boxSize.x*boxSize.y*boxSize.z);
return longRangeCoefficient/(boxSize.x*boxSize.y*boxSize.z);
}
}
...
...
platforms/opencl/src/kernels/customNonbondedGroups.cl
0 → 100644
View file @
54c0ca3f
#
ifdef
SUPPORTS_64_BIT_ATOMICS
#
pragma
OPENCL
EXTENSION
cl_khr_int64_base_atomics
:
enable
#
endif
#
define
WARPS_PER_GROUP
(
THREAD_BLOCK_SIZE/TILE_SIZE
)
typedef
struct
{
real
x,
y,
z
;
real
q
;
real
fx,
fy,
fz
;
ATOM_PARAMETER_DATA
#
ifndef
PARAMETER_SIZE_IS_EVEN
real
padding
;
#
endif
}
AtomData
;
__kernel
void
computeInteractionGroups
(
__global
long*
restrict
forceBuffers,
__global
real*
restrict
energyBuffer,
__global
const
real4*
restrict
posq,
__global
const
int4*
restrict
groupData,
real4
periodicBoxSize,
real4
invPeriodicBoxSize
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
get_global_size
(
0
)
/TILE_SIZE
;
const
unsigned
int
warp
=
get_global_id
(
0
)
/TILE_SIZE
; // global warpIndex
const
unsigned
int
tgx
=
get_local_id
(
0
)
&
(
TILE_SIZE-1
)
; // index within the warp
const
unsigned
int
tbx
=
get_local_id
(
0
)
-
tgx
; // block warpIndex
real
energy
=
0.0f
;
__local
AtomData
localData[THREAD_BLOCK_SIZE]
;
const
unsigned
int
startTile
=
FIRST_TILE+warp*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
;
const
unsigned
int
endTile
=
FIRST_TILE+
(
warp+1
)
*
(
LAST_TILE-FIRST_TILE
)
/totalWarps
;
for
(
int
tile
=
startTile
; tile < endTile; tile++) {
const
int4
atomData
=
groupData[TILE_SIZE*tile+tgx]
;
const
int
atom1
=
atomData.x
;
const
int
atom2
=
atomData.y
;
const
int
rangeStart
=
atomData.z&0xFFFF
;
const
int
rangeEnd
=
(
atomData.z>>16
)
&0xFFFF
;
const
int
exclusions
=
atomData.w
;
real4
posq1
=
posq[atom1]
;
LOAD_ATOM1_PARAMETERS
real4
force
=
(
real4
)
(
0
)
;
real4
posq2
=
posq[atom2]
;
localData[get_local_id
(
0
)
].x
=
posq2.x
;
localData[get_local_id
(
0
)
].y
=
posq2.y
;
localData[get_local_id
(
0
)
].z
=
posq2.z
;
localData[get_local_id
(
0
)
].q
=
posq2.w
;
LOAD_LOCAL_PARAMETERS
localData[get_local_id
(
0
)
].fx
=
0.0f
;
localData[get_local_id
(
0
)
].fy
=
0.0f
;
localData[get_local_id
(
0
)
].fz
=
0.0f
;
int
tj
=
tgx
;
SYNC_WARPS
;
for
(
int
j
=
rangeStart
; j < rangeEnd; j++) {
bool
isExcluded
=
(((
exclusions>>tj
)
&1
)
==
0
)
;
int
localIndex
=
tbx+tj
;
posq2
=
(
real4
)
(
localData[localIndex].x,
localData[localIndex].y,
localData[localIndex].z,
localData[localIndex].q
)
;
real4
delta
=
(
real4
)
(
posq2.xyz
-
posq1.xyz,
0
)
;
#
ifdef
USE_PERIODIC
delta.xyz
-=
floor
(
delta.xyz*invPeriodicBoxSize.xyz+0.5f
)
*periodicBoxSize.xyz
;
#
endif
real
r2
=
delta.x*delta.x
+
delta.y*delta.y
+
delta.z*delta.z
;
#
ifdef
USE_CUTOFF
if
(
!isExcluded
&&
r2
<
CUTOFF_SQUARED
)
{
#
endif
real
invR
=
RSQRT
(
r2
)
;
real
r
=
RECIP
(
invR
)
;
LOAD_ATOM2_PARAMETERS
real
dEdR
=
0.0f
;
real
tempEnergy
=
0.0f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
delta
*=
dEdR
;
force.xyz
-=
delta.xyz
;
localData[localIndex].fx
+=
delta.x
;
localData[localIndex].fy
+=
delta.y
;
localData[localIndex].fz
+=
delta.z
;
#
ifdef
USE_CUTOFF
}
#
endif
tj
=
(
tj
==
rangeEnd-1
?
rangeStart
:
tj+1
)
;
SYNC_WARPS
;
}
if
(
exclusions
!=
0
)
{
atom_add
(
&forceBuffers[atom1],
(
long
)
(
force.x*0x100000000
))
;
atom_add
(
&forceBuffers[atom1+PADDED_NUM_ATOMS],
(
long
)
(
force.y*0x100000000
))
;
atom_add
(
&forceBuffers[atom1+2*PADDED_NUM_ATOMS],
(
long
)
(
force.z*0x100000000
))
;
atom_add
(
&forceBuffers[atom2],
(
long
)
(
localData[get_local_id
(
0
)
].fx*0x100000000
))
;
atom_add
(
&forceBuffers[atom2+PADDED_NUM_ATOMS],
(
long
)
(
localData[get_local_id
(
0
)
].fy*0x100000000
))
;
atom_add
(
&forceBuffers[atom2+2*PADDED_NUM_ATOMS],
(
long
)
(
localData[get_local_id
(
0
)
].fz*0x100000000
))
;
}
}
energyBuffer[get_global_id
(
0
)
]
+=
energy
;
}
\ No newline at end of file
platforms/opencl/tests/TestOpenCLCustomNonbondedForce.cpp
View file @
54c0ca3f
...
@@ -538,6 +538,109 @@ void testLongRangeCorrection() {
...
@@ -538,6 +538,109 @@ void testLongRangeCorrection() {
ASSERT_EQUAL_TOL
(
standardEnergy1
-
standardEnergy2
,
customEnergy1
-
customEnergy2
,
1e-4
);
ASSERT_EQUAL_TOL
(
standardEnergy1
-
standardEnergy2
,
customEnergy1
-
customEnergy2
,
1e-4
);
}
}
void
testInteractionGroups
()
{
const
int
numParticles
=
6
;
System
system
;
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"v1+v2"
);
nonbonded
->
addPerParticleParameter
(
"v"
);
vector
<
double
>
params
(
1
,
0.001
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
params
);
params
[
0
]
*=
10
;
}
set
<
int
>
set1
,
set2
,
set3
,
set4
;
set1
.
insert
(
2
);
set2
.
insert
(
0
);
set2
.
insert
(
1
);
set2
.
insert
(
2
);
set2
.
insert
(
3
);
set2
.
insert
(
4
);
set2
.
insert
(
5
);
nonbonded
->
addInteractionGroup
(
set1
,
set2
);
// Particle 2 interacts with every other particle.
set3
.
insert
(
0
);
set3
.
insert
(
1
);
set4
.
insert
(
4
);
set4
.
insert
(
5
);
nonbonded
->
addInteractionGroup
(
set3
,
set4
);
// Particles 0 and 1 interact with 4 and 5.
nonbonded
->
addExclusion
(
1
,
2
);
// Add an exclusion to make sure it gets skipped.
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Energy
);
double
expectedEnergy
=
331.423
;
// Each digit is the number of interactions a particle particle is involved in.
ASSERT_EQUAL_TOL
(
expectedEnergy
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testLargeInteractionGroup
()
{
const
int
numMolecules
=
300
;
const
int
numParticles
=
numMolecules
*
2
;
const
double
boxSize
=
20.0
;
// Create a large system.
System
system
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"4*eps*((sigma/r)^12-(sigma/r)^6)+138.935456*q/r; q=q1*q2; sigma=0.5*(sigma1+sigma2); eps=sqrt(eps1*eps2)"
);
nonbonded
->
addPerParticleParameter
(
"q"
);
nonbonded
->
addPerParticleParameter
(
"sigma"
);
nonbonded
->
addPerParticleParameter
(
"eps"
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
double
>
params
(
3
);
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
if
(
i
<
numMolecules
/
2
)
{
params
[
0
]
=
1.0
;
params
[
1
]
=
0.2
;
params
[
2
]
=
0.1
;
nonbonded
->
addParticle
(
params
);
params
[
0
]
=
-
1.0
;
params
[
1
]
=
0.1
;
nonbonded
->
addParticle
(
params
);
}
else
{
params
[
0
]
=
1.0
;
params
[
1
]
=
0.2
;
params
[
2
]
=
0.2
;
nonbonded
->
addParticle
(
params
);
params
[
0
]
=
-
1.0
;
params
[
1
]
=
0.1
;
nonbonded
->
addParticle
(
params
);
}
positions
[
2
*
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
positions
[
2
*
i
+
1
]
=
Vec3
(
positions
[
2
*
i
][
0
]
+
1.0
,
positions
[
2
*
i
][
1
],
positions
[
2
*
i
][
2
]);
nonbonded
->
addExclusion
(
2
*
i
,
2
*
i
+
1
);
}
nonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
system
.
addForce
(
nonbonded
);
// Compute the forces.
VerletIntegrator
integrator
(
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
);
// Modify the force so only one particle interacts with everything else.
set
<
int
>
set1
,
set2
;
set1
.
insert
(
151
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
set2
.
insert
(
i
);
nonbonded
->
addInteractionGroup
(
set1
,
set2
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state2
=
context
.
getState
(
State
::
Forces
);
// The force on that one particle should be the same.
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
151
],
state2
.
getForces
()[
151
],
1e-4
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
try
{
if
(
argc
>
1
)
if
(
argc
>
1
)
...
@@ -553,6 +656,8 @@ int main(int argc, char* argv[]) {
...
@@ -553,6 +656,8 @@ int main(int argc, char* argv[]) {
testParallelComputation
();
testParallelComputation
();
testSwitchingFunction
();
testSwitchingFunction
();
testLongRangeCorrection
();
testLongRangeCorrection
();
testInteractionGroups
();
testLargeInteractionGroup
();
}
}
catch
(
const
exception
&
e
)
{
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment