Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
3cb25ad8
Commit
3cb25ad8
authored
Aug 25, 2013
by
Lee-Ping Wang
Browse files
Merge branch 'master' of github.com:leeping/openmm
parents
7bfb75c7
24608623
Changes
125
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1025 additions
and
185 deletions
+1025
-185
openmmapi/src/MonteCarloAnisotropicBarostat.cpp
openmmapi/src/MonteCarloAnisotropicBarostat.cpp
+2
-2
platforms/cuda/include/CudaContext.h
platforms/cuda/include/CudaContext.h
+2
-3
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+7
-2
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+45
-22
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+304
-35
platforms/cuda/src/kernels/andersenThermostat.cu
platforms/cuda/src/kernels/andersenThermostat.cu
+2
-2
platforms/cuda/src/kernels/brownian.cu
platforms/cuda/src/kernels/brownian.cu
+6
-6
platforms/cuda/src/kernels/constraints.cu
platforms/cuda/src/kernels/constraints.cu
+2
-2
platforms/cuda/src/kernels/coulombLennardJones.cu
platforms/cuda/src/kernels/coulombLennardJones.cu
+5
-2
platforms/cuda/src/kernels/customNonbondedGroups.cu
platforms/cuda/src/kernels/customNonbondedGroups.cu
+89
-0
platforms/cuda/src/kernels/langevin.cu
platforms/cuda/src/kernels/langevin.cu
+10
-10
platforms/cuda/src/kernels/verlet.cu
platforms/cuda/src/kernels/verlet.cu
+10
-10
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
+180
-0
platforms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
...orms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
+4
-43
platforms/opencl/include/OpenCLContext.h
platforms/opencl/include/OpenCLContext.h
+8
-2
platforms/opencl/include/OpenCLKernels.h
platforms/opencl/include/OpenCLKernels.h
+7
-2
platforms/opencl/include/OpenCLParameterSet.h
platforms/opencl/include/OpenCLParameterSet.h
+7
-0
platforms/opencl/sharedTarget/CMakeLists.txt
platforms/opencl/sharedTarget/CMakeLists.txt
+1
-1
platforms/opencl/src/OpenCLContext.cpp
platforms/opencl/src/OpenCLContext.cpp
+41
-32
platforms/opencl/src/OpenCLKernels.cpp
platforms/opencl/src/OpenCLKernels.cpp
+293
-9
No files found.
openmmapi/src/MonteCarloAnisotropicBarostat.cpp
View file @
3cb25ad8
...
@@ -35,8 +35,8 @@
...
@@ -35,8 +35,8 @@
using
namespace
OpenMM
;
using
namespace
OpenMM
;
MonteCarloAnisotropicBarostat
::
MonteCarloAnisotropicBarostat
(
const
Vec3
&
defaultPressure
,
double
temperature
,
int
frequency
,
bool
scaleX
,
bool
scaleY
,
bool
scaleZ
)
:
MonteCarloAnisotropicBarostat
::
MonteCarloAnisotropicBarostat
(
const
Vec3
&
defaultPressure
,
double
temperature
,
bool
scaleX
,
bool
scaleY
,
bool
scaleZ
,
int
frequency
)
:
defaultPressure
(
defaultPressure
),
temperature
(
temperature
),
frequency
(
frequency
),
scaleX
(
scaleX
),
scaleY
(
scaleY
),
scaleZ
(
scaleZ
)
{
defaultPressure
(
defaultPressure
),
temperature
(
temperature
),
scaleX
(
scaleX
),
scaleY
(
scaleY
),
scaleZ
(
scaleZ
)
,
frequency
(
frequency
)
{
setRandomNumberSeed
((
int
)
time
(
NULL
));
setRandomNumberSeed
((
int
)
time
(
NULL
));
}
}
...
...
platforms/cuda/include/CudaContext.h
View file @
3cb25ad8
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -489,7 +489,6 @@ private:
...
@@ -489,7 +489,6 @@ private:
struct
MoleculeGroup
;
struct
MoleculeGroup
;
class
VirtualSiteInfo
;
class
VirtualSiteInfo
;
void
findMoleculeGroups
();
void
findMoleculeGroups
();
static
void
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
std
::
vector
<
int
>&
atomMolecule
,
std
::
vector
<
std
::
vector
<
int
>
>&
atomBonds
);
/**
/**
* Ensure that all molecules marked as "identical" really are identical. This should be
* Ensure that all molecules marked as "identical" really are identical. This should be
* called whenever force field parameters change. If necessary, it will rebuild the list
* called whenever force field parameters change. If necessary, it will rebuild the list
...
@@ -515,7 +514,7 @@ private:
...
@@ -515,7 +514,7 @@ private:
int
numAtomBlocks
;
int
numAtomBlocks
;
int
numThreadBlocks
;
int
numThreadBlocks
;
bool
useBlockingSync
,
useDoublePrecision
,
useMixedPrecision
,
contextIsValid
,
atomsWereReordered
;
bool
useBlockingSync
,
useDoublePrecision
,
useMixedPrecision
,
contextIsValid
,
atomsWereReordered
;
std
::
string
compiler
,
tempDir
,
gpuArchitecture
;
std
::
string
compiler
,
tempDir
,
cacheDir
,
gpuArchitecture
;
float4
periodicBoxSizeFloat
,
invPeriodicBoxSizeFloat
;
float4
periodicBoxSizeFloat
,
invPeriodicBoxSizeFloat
;
double4
periodicBoxSize
,
invPeriodicBoxSize
;
double4
periodicBoxSize
,
invPeriodicBoxSize
;
std
::
string
defaultOptimizationOptions
;
std
::
string
defaultOptimizationOptions
;
...
...
platforms/cuda/include/CudaKernels.h
View file @
3cb25ad8
...
@@ -638,7 +638,7 @@ private:
...
@@ -638,7 +638,7 @@ private:
class
CudaCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
class
CudaCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
public:
CudaCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
CudaCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
tabulatedFunctionParams
(
NULL
),
forceCopy
(
NULL
),
system
(
system
)
{
cu
(
cu
),
params
(
NULL
),
globals
(
NULL
),
tabulatedFunctionParams
(
NULL
),
interactionGroupData
(
NULL
),
forceCopy
(
NULL
),
system
(
system
)
,
hasInitializedKernel
(
false
)
{
}
}
~
CudaCalcCustomNonbondedForceKernel
();
~
CudaCalcCustomNonbondedForceKernel
();
/**
/**
...
@@ -665,15 +665,20 @@ public:
...
@@ -665,15 +665,20 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
private:
private:
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
);
CudaContext
&
cu
;
CudaContext
&
cu
;
CudaParameterSet
*
params
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
CudaArray
*
globals
;
CudaArray
*
tabulatedFunctionParams
;
CudaArray
*
tabulatedFunctionParams
;
CudaArray
*
interactionGroupData
;
CUfunction
interactionGroupKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
float
>
globalParamValues
;
std
::
vector
<
CudaArray
*>
tabulatedFunctions
;
std
::
vector
<
CudaArray
*>
tabulatedFunctions
;
double
longRangeCoefficient
;
double
longRangeCoefficient
;
bool
hasInitializedLongRangeCorrection
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
;
int
numGroupThreadBlocks
;
CustomNonbondedForce
*
forceCopy
;
CustomNonbondedForce
*
forceCopy
;
const
System
&
system
;
const
System
&
system
;
};
};
...
...
platforms/cuda/src/CudaContext.cpp
View file @
3cb25ad8
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -35,19 +35,25 @@
...
@@ -35,19 +35,25 @@
#include "CudaIntegrationUtilities.h"
#include "CudaIntegrationUtilities.h"
#include "CudaKernelSources.h"
#include "CudaKernelSources.h"
#include "CudaNonbondedUtilities.h"
#include "CudaNonbondedUtilities.h"
#include "SHA1.h"
#include "hilbert.h"
#include "hilbert.h"
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
#include "openmm/Platform.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/System.h"
#include "openmm/VirtualSite.h"
#include "openmm/VirtualSite.h"
#include "CudaExpressionUtilities.h"
#include "CudaExpressionUtilities.h"
#include "openmm/internal/ContextImpl.h"
#include <algorithm>
#include <algorithm>
#include <cstdlib>
#include <cstdlib>
#include <fstream>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include <typeinfo>
#include <typeinfo>
#include <cudaProfiler.h>
#include <cudaProfiler.h>
#ifndef WIN32
#include <unistd.h>
#endif
#define CHECK_RESULT(result) CHECK_RESULT2(result, errorMessage);
#define CHECK_RESULT(result) CHECK_RESULT2(result, errorMessage);
...
@@ -87,10 +93,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -87,10 +93,14 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
}
}
else
else
throw
OpenMMException
(
"Illegal value for CudaPrecision: "
+
precision
);
throw
OpenMMException
(
"Illegal value for CudaPrecision: "
+
precision
);
char
*
cacheVariable
=
getenv
(
"OPENMM_CACHE_DIR"
);
cacheDir
=
(
cacheVariable
==
NULL
?
tempDir
:
string
(
cacheVariable
));
#ifdef WIN32
#ifdef WIN32
this
->
tempDir
=
tempDir
+
"
\\
"
;
this
->
tempDir
=
tempDir
+
"
\\
"
;
cacheDir
=
cacheDir
+
"
\\
"
;
#else
#else
this
->
tempDir
=
tempDir
+
"/"
;
this
->
tempDir
=
tempDir
+
"/"
;
cacheDir
=
cacheDir
+
"/"
;
#endif
#endif
contextIndex
=
platformData
.
contexts
.
size
();
contextIndex
=
platformData
.
contexts
.
size
();
int
numDevices
;
int
numDevices
;
...
@@ -214,6 +224,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -214,6 +224,8 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"ACOS"
]
=
useDoublePrecision
?
"acos"
:
"acosf"
;
compilationDefines
[
"ACOS"
]
=
useDoublePrecision
?
"acos"
:
"acosf"
;
compilationDefines
[
"ASIN"
]
=
useDoublePrecision
?
"asin"
:
"asinf"
;
compilationDefines
[
"ASIN"
]
=
useDoublePrecision
?
"asin"
:
"asinf"
;
compilationDefines
[
"ATAN"
]
=
useDoublePrecision
?
"atan"
:
"atanf"
;
compilationDefines
[
"ATAN"
]
=
useDoublePrecision
?
"atan"
:
"atanf"
;
compilationDefines
[
"ERF"
]
=
useDoublePrecision
?
"erf"
:
"erff"
;
compilationDefines
[
"ERFC"
]
=
useDoublePrecision
?
"erfc"
:
"erfcf"
;
// Create the work thread used for parallelization when running on multiple devices.
// Create the work thread used for parallelization when running on multiple devices.
...
@@ -347,6 +359,7 @@ static bool compileInWindows(const string &command) {
...
@@ -347,6 +359,7 @@ static bool compileInWindows(const string &command) {
#endif
#endif
CUmodule
CudaContext
::
createModule
(
const
string
source
,
const
map
<
string
,
string
>&
defines
,
const
char
*
optimizationFlags
)
{
CUmodule
CudaContext
::
createModule
(
const
string
source
,
const
map
<
string
,
string
>&
defines
,
const
char
*
optimizationFlags
)
{
string
bits
=
intToString
(
8
*
sizeof
(
void
*
));
string
options
=
(
optimizationFlags
==
NULL
?
defaultOptimizationOptions
:
string
(
optimizationFlags
));
string
options
=
(
optimizationFlags
==
NULL
?
defaultOptimizationOptions
:
string
(
optimizationFlags
));
stringstream
src
;
stringstream
src
;
if
(
!
options
.
empty
())
if
(
!
options
.
empty
())
...
@@ -394,17 +407,38 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
...
@@ -394,17 +407,38 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
src
<<
endl
;
src
<<
endl
;
src
<<
source
<<
endl
;
src
<<
source
<<
endl
;
// See whether we already have PTX for this kernel cached.
CSHA1
sha1
;
sha1
.
Update
((
const
UINT_8
*
)
src
.
str
().
c_str
(),
src
.
str
().
size
());
sha1
.
Final
();
UINT_8
hash
[
20
];
sha1
.
GetHash
(
hash
);
stringstream
cacheFile
;
cacheFile
<<
cacheDir
;
cacheFile
.
flags
(
ios
::
hex
);
for
(
int
i
=
0
;
i
<
20
;
i
++
)
cacheFile
<<
setw
(
2
)
<<
setfill
(
'0'
)
<<
(
int
)
hash
[
i
];
cacheFile
<<
'_'
<<
gpuArchitecture
<<
'_'
<<
bits
;
CUmodule
module
;
if
(
cuModuleLoad
(
&
module
,
cacheFile
.
str
().
c_str
())
==
CUDA_SUCCESS
)
return
module
;
// Write out the source to a temporary file.
// Write out the source to a temporary file.
stringstream
tempFileName
;
stringstream
tempFileName
;
tempFileName
<<
"openmmTempKernel"
<<
this
;
// Include a pointer to this context as part of the filename to avoid collisions.
tempFileName
<<
"openmmTempKernel"
<<
this
;
// Include a pointer to this context as part of the filename to avoid collisions.
#ifdef WIN32
tempFileName
<<
"_"
<<
GetCurrentProcessId
();
#else
tempFileName
<<
"_"
<<
getpid
();
#endif
string
inputFile
=
(
tempDir
+
tempFileName
.
str
()
+
".cu"
);
string
inputFile
=
(
tempDir
+
tempFileName
.
str
()
+
".cu"
);
string
outputFile
=
(
tempDir
+
tempFileName
.
str
()
+
".ptx"
);
string
outputFile
=
(
tempDir
+
tempFileName
.
str
()
+
".ptx"
);
string
logFile
=
(
tempDir
+
tempFileName
.
str
()
+
".log"
);
string
logFile
=
(
tempDir
+
tempFileName
.
str
()
+
".log"
);
ofstream
out
(
inputFile
.
c_str
());
ofstream
out
(
inputFile
.
c_str
());
out
<<
src
.
str
();
out
<<
src
.
str
();
out
.
close
();
out
.
close
();
string
bits
=
intToString
(
8
*
sizeof
(
void
*
));
#ifdef WIN32
#ifdef WIN32
#ifdef _DEBUG
#ifdef _DEBUG
string
command
=
"
\"
"
+
compiler
+
"
\"
--ptx -G -g --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o "
+
outputFile
+
" "
+
options
+
" "
+
inputFile
+
" 2> "
+
logFile
;
string
command
=
"
\"
"
+
compiler
+
"
\"
--ptx -G -g --machine "
+
bits
+
" -arch=sm_"
+
gpuArchitecture
+
" -o "
+
outputFile
+
" "
+
options
+
" "
+
inputFile
+
" 2> "
+
logFile
;
...
@@ -433,7 +467,6 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
...
@@ -433,7 +467,6 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
}
}
throw
OpenMMException
(
error
.
str
());
throw
OpenMMException
(
error
.
str
());
}
}
CUmodule
module
;
CUresult
result
=
cuModuleLoad
(
&
module
,
outputFile
.
c_str
());
CUresult
result
=
cuModuleLoad
(
&
module
,
outputFile
.
c_str
());
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
m
;
std
::
stringstream
m
;
...
@@ -441,6 +474,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
...
@@ -441,6 +474,7 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
throw
OpenMMException
(
m
.
str
());
throw
OpenMMException
(
m
.
str
());
}
}
remove
(
inputFile
.
c_str
());
remove
(
inputFile
.
c_str
());
if
(
rename
(
outputFile
.
c_str
(),
cacheFile
.
str
().
c_str
())
!=
0
)
remove
(
outputFile
.
c_str
());
remove
(
outputFile
.
c_str
());
remove
(
logFile
.
c_str
());
remove
(
logFile
.
c_str
());
return
module
;
return
module
;
...
@@ -616,15 +650,6 @@ void CudaContext::clearAutoclearBuffers() {
...
@@ -616,15 +650,6 @@ void CudaContext::clearAutoclearBuffers() {
}
}
}
}
void
CudaContext
::
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
// Recursively tag atoms as belonging to a particular molecule.
atomMolecule
[
atom
]
=
molecule
;
for
(
int
i
=
0
;
i
<
(
int
)
atomBonds
[
atom
].
size
();
i
++
)
if
(
atomMolecule
[
atomBonds
[
atom
][
i
]]
==
-
1
)
tagAtomsInMolecule
(
atomBonds
[
atom
][
i
],
molecule
,
atomMolecule
,
atomBonds
);
}
/**
/**
* This class ensures that atom reordering doesn't break virtual sites.
* This class ensures that atom reordering doesn't break virtual sites.
*/
*/
...
@@ -719,16 +744,14 @@ void CudaContext::findMoleculeGroups() {
...
@@ -719,16 +744,14 @@ void CudaContext::findMoleculeGroups() {
}
}
}
}
// Now
tag
atoms by which molecule they belong to.
// Now
identify
atoms by which molecule they belong to.
vector
<
int
>
atomMolecule
(
numAtoms
,
-
1
);
vector
<
vector
<
int
>
>
atomIndices
=
ContextImpl
::
findMolecules
(
numAtoms
,
atomBonds
);
int
numMolecules
=
0
;
int
numMolecules
=
atomIndices
.
size
();
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
vector
<
int
>
atomMolecule
(
numAtoms
);
if
(
atomMolecule
[
i
]
==
-
1
)
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
tagAtomsInMolecule
(
i
,
numMolecules
++
,
atomMolecule
,
atomBonds
);
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
[
i
].
size
();
j
++
)
vector
<
vector
<
int
>
>
atomIndices
(
numMolecules
);
atomMolecule
[
atomIndices
[
i
][
j
]]
=
i
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
atomIndices
[
atomMolecule
[
i
]].
push_back
(
i
);
// Construct a description of each molecule.
// Construct a description of each molecule.
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
3cb25ad8
This diff is collapsed.
Click to expand it.
platforms/cuda/src/kernels/andersenThermostat.cu
View file @
3cb25ad8
...
@@ -2,11 +2,11 @@
...
@@ -2,11 +2,11 @@
* Apply the Andersen thermostat to adjust particle velocities.
* Apply the Andersen thermostat to adjust particle velocities.
*/
*/
extern
"C"
__global__
void
applyAndersenThermostat
(
float
collisionFrequency
,
float
kT
,
mixed4
*
velm
,
const
mixed4
*
__restrict__
stepSize
,
const
float4
*
__restrict__
random
,
extern
"C"
__global__
void
applyAndersenThermostat
(
int
numAtoms
,
float
collisionFrequency
,
float
kT
,
mixed4
*
velm
,
const
mixed4
*
__restrict__
stepSize
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
,
const
int
*
__restrict__
atomGroups
)
{
unsigned
int
randomIndex
,
const
int
*
__restrict__
atomGroups
)
{
float
collisionProbability
=
1.0
f
-
expf
(
-
(
float
)
(
collisionFrequency
*
stepSize
[
0
].
y
));
float
collisionProbability
=
1.0
f
-
expf
(
-
(
float
)
(
collisionFrequency
*
stepSize
[
0
].
y
));
float
randomRange
=
erff
(
collisionProbability
/
sqrtf
(
2.0
f
));
float
randomRange
=
erff
(
collisionProbability
/
sqrtf
(
2.0
f
));
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed4
velocity
=
velm
[
index
];
mixed4
velocity
=
velm
[
index
];
float4
selectRand
=
random
[
randomIndex
+
atomGroups
[
index
]];
float4
selectRand
=
random
[
randomIndex
+
atomGroups
[
index
]];
float4
velRand
=
random
[
randomIndex
+
index
];
float4
velRand
=
random
[
randomIndex
+
index
];
...
...
platforms/cuda/src/kernels/brownian.cu
View file @
3cb25ad8
...
@@ -2,16 +2,16 @@
...
@@ -2,16 +2,16 @@
* Perform the first step of Brownian integration.
* Perform the first step of Brownian integration.
*/
*/
extern
"C"
__global__
void
integrateBrownianPart1
(
mixed
tauDeltaT
,
mixed
noiseAmplitude
,
const
long
long
*
__restrict__
force
,
extern
"C"
__global__
void
integrateBrownianPart1
(
int
numAtoms
,
int
paddedNumAtoms
,
mixed
tauDeltaT
,
mixed
noiseAmplitude
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
,
const
mixed4
*
__restrict__
velm
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
)
{
mixed4
*
__restrict__
posDelta
,
const
mixed4
*
__restrict__
velm
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
)
{
randomIndex
+=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
const
mixed
fscale
=
tauDeltaT
/
(
mixed
)
0x100000000
;
const
mixed
fscale
=
tauDeltaT
/
(
mixed
)
0x100000000
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed
invMass
=
velm
[
index
].
w
;
mixed
invMass
=
velm
[
index
].
w
;
if
(
invMass
!=
0
)
{
if
(
invMass
!=
0
)
{
posDelta
[
index
].
x
=
fscale
*
invMass
*
force
[
index
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
x
;
posDelta
[
index
].
x
=
fscale
*
invMass
*
force
[
index
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
x
;
posDelta
[
index
].
y
=
fscale
*
invMass
*
force
[
index
+
PADDED_NUM_ATOMS
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
y
;
posDelta
[
index
].
y
=
fscale
*
invMass
*
force
[
index
+
paddedNumAtoms
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
y
;
posDelta
[
index
].
z
=
fscale
*
invMass
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
z
;
posDelta
[
index
].
z
=
fscale
*
invMass
*
force
[
index
+
paddedNumAtoms
*
2
]
+
noiseAmplitude
*
SQRT
(
invMass
)
*
random
[
randomIndex
].
z
;
}
}
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
randomIndex
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
...
@@ -21,9 +21,9 @@ extern "C" __global__ void integrateBrownianPart1(mixed tauDeltaT, mixed noiseAm
...
@@ -21,9 +21,9 @@ extern "C" __global__ void integrateBrownianPart1(mixed tauDeltaT, mixed noiseAm
* Perform the second step of Brownian integration.
* Perform the second step of Brownian integration.
*/
*/
extern
"C"
__global__
void
integrateBrownianPart2
(
mixed
deltaT
,
real4
*
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
extern
"C"
__global__
void
integrateBrownianPart2
(
int
numAtoms
,
mixed
deltaT
,
real4
*
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
const
mixed
oneOverDeltaT
=
RECIP
(
deltaT
);
const
mixed
oneOverDeltaT
=
RECIP
(
deltaT
);
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
if
(
velm
[
index
].
w
!=
0
)
{
if
(
velm
[
index
].
w
!=
0
)
{
mixed4
delta
=
posDelta
[
index
];
mixed4
delta
=
posDelta
[
index
];
velm
[
index
].
x
=
oneOverDeltaT
*
delta
.
x
;
velm
[
index
].
x
=
oneOverDeltaT
*
delta
.
x
;
...
...
platforms/cuda/src/kernels/constraints.cu
View file @
3cb25ad8
extern
"C"
__global__
void
applyPositionDeltas
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
)
{
extern
"C"
__global__
void
applyPositionDeltas
(
int
numAtoms
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
posDelta
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
unsigned
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
real4
pos1
=
posq
[
index
];
real4
pos1
=
posq
[
index
];
real4
pos2
=
posqCorrection
[
index
];
real4
pos2
=
posqCorrection
[
index
];
...
...
platforms/cuda/src/kernels/coulombLennardJones.cu
View file @
3cb25ad8
...
@@ -22,8 +22,11 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
...
@@ -22,8 +22,11 @@ if ((!isExcluded && r2 < CUTOFF_SQUARED) || needCorrection) {
if
(
needCorrection
)
{
if
(
needCorrection
)
{
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
// Subtract off the part of this interaction that was included in the reciprocal space contribution.
tempForce
=
-
prefactor
*
((
1.0
f
-
erfcAlphaR
)
-
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
if
(
1
-
erfcAlphaR
>
1e-6
)
{
tempEnergy
+=
-
prefactor
*
(
1.0
f
-
erfcAlphaR
);
real
erfAlphaR
=
ERF
(
alphaR
);
// Our erfc approximation is not accurate enough when r is very small, which happens with Drude particles.
tempForce
=
-
prefactor
*
(
erfAlphaR
-
alphaR
*
expAlphaRSqr
*
TWO_OVER_SQRT_PI
);
tempEnergy
+=
-
prefactor
*
erfAlphaR
;
}
}
}
else
{
else
{
#if HAS_LENNARD_JONES
#if HAS_LENNARD_JONES
...
...
platforms/cuda/src/kernels/customNonbondedGroups.cu
0 → 100644
View file @
3cb25ad8
#define WARPS_PER_GROUP (THREAD_BLOCK_SIZE/TILE_SIZE)
typedef
struct
{
real
x
,
y
,
z
;
real
q
;
real
fx
,
fy
,
fz
;
ATOM_PARAMETER_DATA
#ifndef PARAMETER_SIZE_IS_EVEN
real
padding
;
#endif
}
AtomData
;
extern
"C"
__global__
void
computeInteractionGroups
(
unsigned
long
long
*
__restrict__
forceBuffers
,
real
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
posq
,
const
int4
*
__restrict__
groupData
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
PARAMETER_ARGUMENTS
)
{
const
unsigned
int
totalWarps
=
(
blockDim
.
x
*
gridDim
.
x
)
/
TILE_SIZE
;
const
unsigned
int
warp
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
TILE_SIZE
;
// global warpIndex
const
unsigned
int
tgx
=
threadIdx
.
x
&
(
TILE_SIZE
-
1
);
// index within the warp
const
unsigned
int
tbx
=
threadIdx
.
x
-
tgx
;
// block warpIndex
real
energy
=
0.0
f
;
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
const
unsigned
int
startTile
=
FIRST_TILE
+
warp
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
const
unsigned
int
endTile
=
FIRST_TILE
+
(
warp
+
1
)
*
(
LAST_TILE
-
FIRST_TILE
)
/
totalWarps
;
for
(
int
tile
=
startTile
;
tile
<
endTile
;
tile
++
)
{
const
int4
atomData
=
groupData
[
TILE_SIZE
*
tile
+
tgx
];
const
int
atom1
=
atomData
.
x
;
const
int
atom2
=
atomData
.
y
;
const
int
rangeStart
=
atomData
.
z
&
0xFFFF
;
const
int
rangeEnd
=
(
atomData
.
z
>>
16
)
&
0xFFFF
;
const
int
exclusions
=
atomData
.
w
;
real4
posq1
=
posq
[
atom1
];
LOAD_ATOM1_PARAMETERS
real3
force
=
make_real3
(
0
);
real4
posq2
=
posq
[
atom2
];
localData
[
threadIdx
.
x
].
x
=
posq2
.
x
;
localData
[
threadIdx
.
x
].
y
=
posq2
.
y
;
localData
[
threadIdx
.
x
].
z
=
posq2
.
z
;
localData
[
threadIdx
.
x
].
q
=
posq2
.
w
;
LOAD_LOCAL_PARAMETERS
localData
[
threadIdx
.
x
].
fx
=
0.0
f
;
localData
[
threadIdx
.
x
].
fy
=
0.0
f
;
localData
[
threadIdx
.
x
].
fz
=
0.0
f
;
int
tj
=
tgx
;
for
(
int
j
=
rangeStart
;
j
<
rangeEnd
;
j
++
)
{
bool
isExcluded
=
(((
exclusions
>>
tj
)
&
1
)
==
0
);
int
localIndex
=
tbx
+
tj
;
posq2
=
make_real4
(
localData
[
localIndex
].
x
,
localData
[
localIndex
].
y
,
localData
[
localIndex
].
z
,
localData
[
localIndex
].
q
);
real3
delta
=
make_real3
(
posq2
.
x
-
posq1
.
x
,
posq2
.
y
-
posq1
.
y
,
posq2
.
z
-
posq1
.
z
);
#ifdef USE_PERIODIC
delta
.
x
-=
floor
(
delta
.
x
*
invPeriodicBoxSize
.
x
+
0.5
f
)
*
periodicBoxSize
.
x
;
delta
.
y
-=
floor
(
delta
.
y
*
invPeriodicBoxSize
.
y
+
0.5
f
)
*
periodicBoxSize
.
y
;
delta
.
z
-=
floor
(
delta
.
z
*
invPeriodicBoxSize
.
z
+
0.5
f
)
*
periodicBoxSize
.
z
;
#endif
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
#ifdef USE_CUTOFF
if
(
!
isExcluded
&&
r2
<
CUTOFF_SQUARED
)
{
#endif
real
invR
=
RSQRT
(
r2
);
real
r
=
RECIP
(
invR
);
LOAD_ATOM2_PARAMETERS
real
dEdR
=
0.0
f
;
real
tempEnergy
=
0.0
f
;
COMPUTE_INTERACTION
energy
+=
tempEnergy
;
delta
*=
dEdR
;
force
.
x
-=
delta
.
x
;
force
.
y
-=
delta
.
y
;
force
.
z
-=
delta
.
z
;
localData
[
localIndex
].
fx
+=
delta
.
x
;
localData
[
localIndex
].
fy
+=
delta
.
y
;
localData
[
localIndex
].
fz
+=
delta
.
z
;
#ifdef USE_CUTOFF
}
#endif
tj
=
(
tj
==
rangeEnd
-
1
?
rangeStart
:
tj
+
1
);
}
if
(
exclusions
!=
0
)
{
atomicAdd
(
&
forceBuffers
[
atom1
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
x
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
y
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom1
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
force
.
z
*
0x100000000
)));
}
atomicAdd
(
&
forceBuffers
[
atom2
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fx
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fy
*
0x100000000
)));
atomicAdd
(
&
forceBuffers
[
atom2
+
2
*
PADDED_NUM_ATOMS
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
localData
[
threadIdx
.
x
].
fz
*
0x100000000
)));
}
energyBuffer
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
+=
energy
;
}
\ No newline at end of file
platforms/cuda/src/kernels/langevin.cu
View file @
3cb25ad8
...
@@ -4,7 +4,7 @@ enum {VelScale, ForceScale, NoiseScale, MaxParams};
...
@@ -4,7 +4,7 @@ enum {VelScale, ForceScale, NoiseScale, MaxParams};
* Perform the first step of Langevin integration.
* Perform the first step of Langevin integration.
*/
*/
extern
"C"
__global__
void
integrateLangevinPart1
(
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
,
extern
"C"
__global__
void
integrateLangevinPart1
(
int
numAtoms
,
int
paddedNumAtoms
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
,
const
mixed
*
__restrict__
paramBuffer
,
const
mixed2
*
__restrict__
dt
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
)
{
const
mixed
*
__restrict__
paramBuffer
,
const
mixed2
*
__restrict__
dt
,
const
float4
*
__restrict__
random
,
unsigned
int
randomIndex
)
{
mixed
vscale
=
paramBuffer
[
VelScale
];
mixed
vscale
=
paramBuffer
[
VelScale
];
mixed
fscale
=
paramBuffer
[
ForceScale
]
/
(
mixed
)
0x100000000
;
mixed
fscale
=
paramBuffer
[
ForceScale
]
/
(
mixed
)
0x100000000
;
...
@@ -12,13 +12,13 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
...
@@ -12,13 +12,13 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
mixed
stepSize
=
dt
[
0
].
y
;
mixed
stepSize
=
dt
[
0
].
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
randomIndex
+=
index
;
randomIndex
+=
index
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
numAtoms
)
{
mixed4
velocity
=
velm
[
index
];
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0
)
{
if
(
velocity
.
w
!=
0
)
{
mixed
sqrtInvMass
=
SQRT
(
velocity
.
w
);
mixed
sqrtInvMass
=
SQRT
(
velocity
.
w
);
velocity
.
x
=
vscale
*
velocity
.
x
+
fscale
*
velocity
.
w
*
force
[
index
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
x
;
velocity
.
x
=
vscale
*
velocity
.
x
+
fscale
*
velocity
.
w
*
force
[
index
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
x
;
velocity
.
y
=
vscale
*
velocity
.
y
+
fscale
*
velocity
.
w
*
force
[
index
+
PADDED_NUM_ATOMS
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
y
;
velocity
.
y
=
vscale
*
velocity
.
y
+
fscale
*
velocity
.
w
*
force
[
index
+
paddedNumAtoms
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
y
;
velocity
.
z
=
vscale
*
velocity
.
z
+
fscale
*
velocity
.
w
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
z
;
velocity
.
z
=
vscale
*
velocity
.
z
+
fscale
*
velocity
.
w
*
force
[
index
+
paddedNumAtoms
*
2
]
+
noisescale
*
sqrtInvMass
*
random
[
randomIndex
].
z
;
velm
[
index
]
=
velocity
;
velm
[
index
]
=
velocity
;
posDelta
[
index
]
=
make_mixed4
(
stepSize
*
velocity
.
x
,
stepSize
*
velocity
.
y
,
stepSize
*
velocity
.
z
,
0
);
posDelta
[
index
]
=
make_mixed4
(
stepSize
*
velocity
.
x
,
stepSize
*
velocity
.
y
,
stepSize
*
velocity
.
z
,
0
);
}
}
...
@@ -31,7 +31,7 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
...
@@ -31,7 +31,7 @@ extern "C" __global__ void integrateLangevinPart1(mixed4* __restrict__ velm, con
* Perform the second step of Langevin integration.
* Perform the second step of Langevin integration.
*/
*/
extern
"C"
__global__
void
integrateLangevinPart2
(
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
const
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
mixed2
*
__restrict__
dt
)
{
extern
"C"
__global__
void
integrateLangevinPart2
(
int
numAtoms
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
const
mixed4
*
__restrict__
posDelta
,
mixed4
*
__restrict__
velm
,
const
mixed2
*
__restrict__
dt
)
{
#if __CUDA_ARCH__ >= 130
#if __CUDA_ARCH__ >= 130
double
invStepSize
=
1.0
/
dt
[
0
].
y
;
double
invStepSize
=
1.0
/
dt
[
0
].
y
;
#else
#else
...
@@ -39,7 +39,7 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
...
@@ -39,7 +39,7 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
float
correction
=
(
1.0
f
-
invStepSize
*
dt
[
0
].
y
)
/
dt
[
0
].
y
;
float
correction
=
(
1.0
f
-
invStepSize
*
dt
[
0
].
y
)
/
dt
[
0
].
y
;
#endif
#endif
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
numAtoms
)
{
mixed4
vel
=
velm
[
index
];
mixed4
vel
=
velm
[
index
];
if
(
vel
.
w
!=
0
)
{
if
(
vel
.
w
!=
0
)
{
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
...
@@ -78,7 +78,7 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
...
@@ -78,7 +78,7 @@ extern "C" __global__ void integrateLangevinPart2(real4* __restrict__ posq, real
* Select the step size to use for the next step.
* Select the step size to use for the next step.
*/
*/
extern
"C"
__global__
void
selectLangevinStepSize
(
mixed
maxStepSize
,
mixed
errorTol
,
mixed
tau
,
mixed
kT
,
mixed2
*
__restrict__
dt
,
extern
"C"
__global__
void
selectLangevinStepSize
(
int
numAtoms
,
int
paddedNumAtoms
,
mixed
maxStepSize
,
mixed
errorTol
,
mixed
tau
,
mixed
kT
,
mixed2
*
__restrict__
dt
,
const
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed
*
__restrict__
paramBuffer
)
{
const
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed
*
__restrict__
paramBuffer
)
{
// Calculate the error.
// Calculate the error.
...
@@ -87,8 +87,8 @@ extern "C" __global__ void selectLangevinStepSize(mixed maxStepSize, mixed error
...
@@ -87,8 +87,8 @@ extern "C" __global__ void selectLangevinStepSize(mixed maxStepSize, mixed error
mixed
err
=
0
;
mixed
err
=
0
;
unsigned
int
index
=
threadIdx
.
x
;
unsigned
int
index
=
threadIdx
.
x
;
const
mixed
scale
=
RECIP
((
mixed
)
0x100000000
);
const
mixed
scale
=
RECIP
((
mixed
)
0x100000000
);
while
(
index
<
NUM_ATOMS
)
{
while
(
index
<
numAtoms
)
{
mixed3
f
=
make_mixed3
(
scale
*
force
[
index
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]);
mixed3
f
=
make_mixed3
(
scale
*
force
[
index
],
scale
*
force
[
index
+
paddedNumAtoms
],
scale
*
force
[
index
+
paddedNumAtoms
*
2
]);
mixed
invMass
=
velm
[
index
].
w
;
mixed
invMass
=
velm
[
index
].
w
;
err
+=
(
f
.
x
*
f
.
x
+
f
.
y
*
f
.
y
+
f
.
z
*
f
.
z
)
*
invMass
;
err
+=
(
f
.
x
*
f
.
x
+
f
.
y
*
f
.
y
+
f
.
z
*
f
.
z
)
*
invMass
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
...
@@ -106,7 +106,7 @@ extern "C" __global__ void selectLangevinStepSize(mixed maxStepSize, mixed error
...
@@ -106,7 +106,7 @@ extern "C" __global__ void selectLangevinStepSize(mixed maxStepSize, mixed error
if
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
==
0
)
{
if
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
==
0
)
{
// Select the new step size.
// Select the new step size.
mixed
totalError
=
SQRT
(
error
[
0
]
/
(
NUM_ATOMS
*
3
));
mixed
totalError
=
SQRT
(
error
[
0
]
/
(
numAtoms
*
3
));
mixed
newStepSize
=
SQRT
(
errorTol
/
totalError
);
mixed
newStepSize
=
SQRT
(
errorTol
/
totalError
);
mixed
oldStepSize
=
dt
[
0
].
y
;
mixed
oldStepSize
=
dt
[
0
].
y
;
if
(
oldStepSize
>
0.0
f
)
if
(
oldStepSize
>
0.0
f
)
...
...
platforms/cuda/src/kernels/verlet.cu
View file @
3cb25ad8
...
@@ -2,13 +2,13 @@
...
@@ -2,13 +2,13 @@
* Perform the first step of Verlet integration.
* Perform the first step of Verlet integration.
*/
*/
extern
"C"
__global__
void
integrateVerletPart1
(
const
mixed2
*
__restrict__
dt
,
const
real4
*
__restrict__
posq
,
extern
"C"
__global__
void
integrateVerletPart1
(
int
numAtoms
,
int
paddedNumAtoms
,
const
mixed2
*
__restrict__
dt
,
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
)
{
const
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
mixed4
*
__restrict__
posDelta
)
{
const
mixed2
stepSize
=
dt
[
0
];
const
mixed2
stepSize
=
dt
[
0
];
const
mixed
dtPos
=
stepSize
.
y
;
const
mixed
dtPos
=
stepSize
.
y
;
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
mixed
scale
=
dtVel
/
(
mixed
)
0x100000000
;
const
mixed
scale
=
dtVel
/
(
mixed
)
0x100000000
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed4
velocity
=
velm
[
index
];
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
if
(
velocity
.
w
!=
0.0
)
{
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
...
@@ -19,8 +19,8 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
...
@@ -19,8 +19,8 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
real4
pos
=
posq
[
index
];
real4
pos
=
posq
[
index
];
#endif
#endif
velocity
.
x
+=
scale
*
force
[
index
]
*
velocity
.
w
;
velocity
.
x
+=
scale
*
force
[
index
]
*
velocity
.
w
;
velocity
.
y
+=
scale
*
force
[
index
+
PADDED_NUM_ATOMS
]
*
velocity
.
w
;
velocity
.
y
+=
scale
*
force
[
index
+
paddedNumAtoms
]
*
velocity
.
w
;
velocity
.
z
+=
scale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]
*
velocity
.
w
;
velocity
.
z
+=
scale
*
force
[
index
+
paddedNumAtoms
*
2
]
*
velocity
.
w
;
pos
.
x
=
velocity
.
x
*
dtPos
;
pos
.
x
=
velocity
.
x
*
dtPos
;
pos
.
y
=
velocity
.
y
*
dtPos
;
pos
.
y
=
velocity
.
y
*
dtPos
;
pos
.
z
=
velocity
.
z
*
dtPos
;
pos
.
z
=
velocity
.
z
*
dtPos
;
...
@@ -34,7 +34,7 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
...
@@ -34,7 +34,7 @@ extern "C" __global__ void integrateVerletPart1(const mixed2* __restrict__ dt, c
* Perform the second step of Verlet integration.
* Perform the second step of Verlet integration.
*/
*/
extern
"C"
__global__
void
integrateVerletPart2
(
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
extern
"C"
__global__
void
integrateVerletPart2
(
int
numAtoms
,
mixed2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
real4
*
__restrict__
posqCorrection
,
mixed4
*
__restrict__
velm
,
const
mixed4
*
__restrict__
posDelta
)
{
mixed2
stepSize
=
dt
[
0
];
mixed2
stepSize
=
dt
[
0
];
#if __CUDA_ARCH__ >= 130
#if __CUDA_ARCH__ >= 130
...
@@ -46,7 +46,7 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
...
@@ -46,7 +46,7 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
==
0
)
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
dt
[
0
].
x
=
stepSize
.
y
;
for
(;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed4
velocity
=
velm
[
index
];
mixed4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
if
(
velocity
.
w
!=
0.0
)
{
#ifdef USE_MIXED_PRECISION
#ifdef USE_MIXED_PRECISION
...
@@ -80,14 +80,14 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
...
@@ -80,14 +80,14 @@ extern "C" __global__ void integrateVerletPart2(mixed2* __restrict__ dt, real4*
* Select the step size to use for the next step.
* Select the step size to use for the next step.
*/
*/
extern
"C"
__global__
void
selectVerletStepSize
(
mixed
maxStepSize
,
mixed
errorTol
,
mixed2
*
__restrict__
dt
,
const
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
)
{
extern
"C"
__global__
void
selectVerletStepSize
(
int
numAtoms
,
int
paddedNumAtoms
,
mixed
maxStepSize
,
mixed
errorTol
,
mixed2
*
__restrict__
dt
,
const
mixed4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
)
{
// Calculate the error.
// Calculate the error.
extern
__shared__
mixed
error
[];
extern
__shared__
mixed
error
[];
mixed
err
=
0.0
f
;
mixed
err
=
0.0
f
;
const
mixed
scale
=
RECIP
((
mixed
)
0x100000000
);
const
mixed
scale
=
RECIP
((
mixed
)
0x100000000
);
for
(
int
index
=
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
threadIdx
.
x
;
index
<
numAtoms
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
mixed3
f
=
make_mixed3
(
scale
*
force
[
index
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
],
scale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]);
mixed3
f
=
make_mixed3
(
scale
*
force
[
index
],
scale
*
force
[
index
+
paddedNumAtoms
],
scale
*
force
[
index
+
paddedNumAtoms
*
2
]);
mixed
invMass
=
velm
[
index
].
w
;
mixed
invMass
=
velm
[
index
].
w
;
err
+=
(
f
.
x
*
f
.
x
+
f
.
y
*
f
.
y
+
f
.
z
*
f
.
z
)
*
invMass
;
err
+=
(
f
.
x
*
f
.
x
+
f
.
y
*
f
.
y
+
f
.
z
*
f
.
z
)
*
invMass
;
}
}
...
@@ -102,7 +102,7 @@ extern "C" __global__ void selectVerletStepSize(mixed maxStepSize, mixed errorTo
...
@@ -102,7 +102,7 @@ extern "C" __global__ void selectVerletStepSize(mixed maxStepSize, mixed errorTo
__syncthreads
();
__syncthreads
();
}
}
if
(
threadIdx
.
x
==
0
)
{
if
(
threadIdx
.
x
==
0
)
{
mixed
totalError
=
SQRT
(
error
[
0
]
/
(
NUM_ATOMS
*
3
));
mixed
totalError
=
SQRT
(
error
[
0
]
/
(
numAtoms
*
3
));
mixed
newStepSize
=
SQRT
(
errorTol
/
totalError
);
mixed
newStepSize
=
SQRT
(
errorTol
/
totalError
);
mixed
oldStepSize
=
dt
[
0
].
y
;
mixed
oldStepSize
=
dt
[
0
].
y
;
if
(
oldStepSize
>
0.0
f
)
if
(
oldStepSize
>
0.0
f
)
...
...
platforms/cuda/tests/TestCudaCustomNonbondedForce.cpp
View file @
3cb25ad8
...
@@ -34,6 +34,9 @@
...
@@ -34,6 +34,9 @@
* This tests all the different force terms in the CUDA implementation of CustomNonbondedForce.
* This tests all the different force terms in the CUDA implementation of CustomNonbondedForce.
*/
*/
#ifdef WIN32
#define _USE_MATH_DEFINES // Needed to get M_PI
#endif
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/internal/AssertionUtilities.h"
#include "sfmt/SFMT.h"
#include "sfmt/SFMT.h"
#include "openmm/Context.h"
#include "openmm/Context.h"
...
@@ -42,6 +45,7 @@
...
@@ -42,6 +45,7 @@
#include "openmm/NonbondedForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "openmm/VerletIntegrator.h"
#include <cmath>
#include <iostream>
#include <iostream>
#include <vector>
#include <vector>
...
@@ -538,6 +542,179 @@ void testLongRangeCorrection() {
...
@@ -538,6 +542,179 @@ void testLongRangeCorrection() {
ASSERT_EQUAL_TOL
(
standardEnergy1
-
standardEnergy2
,
customEnergy1
-
customEnergy2
,
1e-4
);
ASSERT_EQUAL_TOL
(
standardEnergy1
-
standardEnergy2
,
customEnergy1
-
customEnergy2
,
1e-4
);
}
}
void
testInteractionGroups
()
{
const
int
numParticles
=
6
;
System
system
;
VerletIntegrator
integrator
(
0.01
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"v1+v2"
);
nonbonded
->
addPerParticleParameter
(
"v"
);
vector
<
double
>
params
(
1
,
0.001
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
params
);
params
[
0
]
*=
10
;
}
set
<
int
>
set1
,
set2
,
set3
,
set4
;
set1
.
insert
(
2
);
set2
.
insert
(
0
);
set2
.
insert
(
1
);
set2
.
insert
(
2
);
set2
.
insert
(
3
);
set2
.
insert
(
4
);
set2
.
insert
(
5
);
nonbonded
->
addInteractionGroup
(
set1
,
set2
);
// Particle 2 interacts with every other particle.
set3
.
insert
(
0
);
set3
.
insert
(
1
);
set4
.
insert
(
4
);
set4
.
insert
(
5
);
nonbonded
->
addInteractionGroup
(
set3
,
set4
);
// Particles 0 and 1 interact with 4 and 5.
nonbonded
->
addExclusion
(
1
,
2
);
// Add an exclusion to make sure it gets skipped.
system
.
addForce
(
nonbonded
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Energy
);
double
expectedEnergy
=
331.423
;
// Each digit is the number of interactions a particle particle is involved in.
ASSERT_EQUAL_TOL
(
expectedEnergy
,
state
.
getPotentialEnergy
(),
TOL
);
}
void
testLargeInteractionGroup
()
{
const
int
numMolecules
=
300
;
const
int
numParticles
=
numMolecules
*
2
;
const
double
boxSize
=
20.0
;
// Create a large system.
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"4*eps*((sigma/r)^12-(sigma/r)^6)+138.935456*q/r; q=q1*q2; sigma=0.5*(sigma1+sigma2); eps=sqrt(eps1*eps2)"
);
nonbonded
->
addPerParticleParameter
(
"q"
);
nonbonded
->
addPerParticleParameter
(
"sigma"
);
nonbonded
->
addPerParticleParameter
(
"eps"
);
vector
<
Vec3
>
positions
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
double
>
params
(
3
);
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
if
(
i
<
numMolecules
/
2
)
{
params
[
0
]
=
1.0
;
params
[
1
]
=
0.2
;
params
[
2
]
=
0.1
;
nonbonded
->
addParticle
(
params
);
params
[
0
]
=
-
1.0
;
params
[
1
]
=
0.1
;
nonbonded
->
addParticle
(
params
);
}
else
{
params
[
0
]
=
1.0
;
params
[
1
]
=
0.2
;
params
[
2
]
=
0.2
;
nonbonded
->
addParticle
(
params
);
params
[
0
]
=
-
1.0
;
params
[
1
]
=
0.1
;
nonbonded
->
addParticle
(
params
);
}
positions
[
2
*
i
]
=
Vec3
(
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
),
boxSize
*
genrand_real2
(
sfmt
));
positions
[
2
*
i
+
1
]
=
Vec3
(
positions
[
2
*
i
][
0
]
+
1.0
,
positions
[
2
*
i
][
1
],
positions
[
2
*
i
][
2
]);
nonbonded
->
addExclusion
(
2
*
i
,
2
*
i
+
1
);
}
nonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
system
.
addForce
(
nonbonded
);
// Compute the forces.
VerletIntegrator
integrator
(
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
);
// Modify the force so only one particle interacts with everything else.
set
<
int
>
set1
,
set2
;
set1
.
insert
(
151
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
set2
.
insert
(
i
);
nonbonded
->
addInteractionGroup
(
set1
,
set2
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state2
=
context
.
getState
(
State
::
Forces
);
// The force on that one particle should be the same.
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
151
],
state2
.
getForces
()[
151
],
1e-4
);
// Modify the interaction group so it includes all interactions. This should now reproduce the original forces
// on all atoms.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
set1
.
insert
(
i
);
nonbonded
->
setInteractionGroupParameters
(
0
,
set1
,
set2
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
State
state3
=
context
.
getState
(
State
::
Forces
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
],
state3
.
getForces
()[
i
],
1e-4
);
}
void
testInteractionGroupLongRangeCorrection
()
{
const
int
numParticles
=
10
;
const
double
boxSize
=
10.0
;
const
double
cutoff
=
0.5
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
CustomNonbondedForce
*
nonbonded
=
new
CustomNonbondedForce
(
"c1*c2*r^-4"
);
nonbonded
->
addPerParticleParameter
(
"c"
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
double
>
params
(
1
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
params
[
0
]
=
(
i
%
2
==
0
?
1.1
:
2.0
);
nonbonded
->
addParticle
(
params
);
positions
[
i
]
=
Vec3
(
0.5
*
i
,
0
,
0
);
}
nonbonded
->
setNonbondedMethod
(
CustomNonbondedForce
::
CutoffPeriodic
);
nonbonded
->
setCutoffDistance
(
cutoff
);
system
.
addForce
(
nonbonded
);
// Setup nonbonded groups. They involve 1 interaction of type AA,
// 2 of type BB, and 5 of type AB.
set
<
int
>
set1
,
set2
,
set3
,
set4
,
set5
;
set1
.
insert
(
0
);
set1
.
insert
(
1
);
set1
.
insert
(
2
);
nonbonded
->
addInteractionGroup
(
set1
,
set1
);
set2
.
insert
(
3
);
set3
.
insert
(
4
);
set3
.
insert
(
6
);
set3
.
insert
(
8
);
nonbonded
->
addInteractionGroup
(
set2
,
set3
);
set4
.
insert
(
5
);
set5
.
insert
(
7
);
set5
.
insert
(
9
);
nonbonded
->
addInteractionGroup
(
set4
,
set5
);
// Compute energy with and without the correction.
VerletIntegrator
integrator
(
0.01
);
Context
context
(
system
,
integrator
,
platform
);
context
.
setPositions
(
positions
);
double
energy1
=
context
.
getState
(
State
::
Energy
).
getPotentialEnergy
();
nonbonded
->
setUseLongRangeCorrection
(
true
);
context
.
reinitialize
();
context
.
setPositions
(
positions
);
double
energy2
=
context
.
getState
(
State
::
Energy
).
getPotentialEnergy
();
// Check the result.
double
sum
=
(
1.1
*
1.1
+
2
*
2.0
*
2.0
+
5
*
1.1
*
2.0
)
*
2.0
;
int
numPairs
=
(
numParticles
*
(
numParticles
+
1
))
/
2
;
double
expected
=
2
*
M_PI
*
numParticles
*
numParticles
*
sum
/
(
numPairs
*
boxSize
*
boxSize
*
boxSize
);
ASSERT_EQUAL_TOL
(
expected
,
energy2
-
energy1
,
1e-4
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
try
{
if
(
argc
>
1
)
if
(
argc
>
1
)
...
@@ -553,6 +730,9 @@ int main(int argc, char* argv[]) {
...
@@ -553,6 +730,9 @@ int main(int argc, char* argv[]) {
testParallelComputation
();
testParallelComputation
();
testSwitchingFunction
();
testSwitchingFunction
();
testLongRangeCorrection
();
testLongRangeCorrection
();
testInteractionGroups
();
testLargeInteractionGroup
();
testInteractionGroupLongRangeCorrection
();
}
}
catch
(
const
exception
&
e
)
{
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
...
...
platforms/cuda/tests/TestCudaMonteCarloAnisotropicBarostat.cpp
View file @
3cb25ad8
...
@@ -53,44 +53,6 @@ using namespace std;
...
@@ -53,44 +53,6 @@ using namespace std;
CudaPlatform
platform
;
CudaPlatform
platform
;
void
testChangingBoxSize
()
{
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
4
,
0
,
0
),
Vec3
(
0
,
5
,
0
),
Vec3
(
0
,
0
,
6
));
system
.
addParticle
(
1.0
);
NonbondedForce
*
nb
=
new
NonbondedForce
();
nb
->
setNonbondedMethod
(
NonbondedForce
::
CutoffPeriodic
);
nb
->
setCutoffDistance
(
2.0
);
nb
->
addParticle
(
1
,
0.5
,
0.5
);
system
.
addForce
(
nb
);
LangevinIntegrator
integrator
(
300.0
,
1.0
,
0.01
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
;
positions
.
push_back
(
Vec3
());
context
.
setPositions
(
positions
);
Vec3
x
,
y
,
z
;
context
.
getState
(
State
::
Forces
).
getPeriodicBoxVectors
(
x
,
y
,
z
);
ASSERT_EQUAL_VEC
(
Vec3
(
4
,
0
,
0
),
x
,
0
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
5
,
0
),
y
,
0
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
6
),
z
,
0
);
context
.
setPeriodicBoxVectors
(
Vec3
(
7
,
0
,
0
),
Vec3
(
0
,
8
,
0
),
Vec3
(
0
,
0
,
9
));
context
.
getState
(
State
::
Forces
).
getPeriodicBoxVectors
(
x
,
y
,
z
);
ASSERT_EQUAL_VEC
(
Vec3
(
7
,
0
,
0
),
x
,
0
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
8
,
0
),
y
,
0
);
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
0
,
9
),
z
,
0
);
// Shrinking the box too small should produce an exception.
context
.
setPeriodicBoxVectors
(
Vec3
(
7
,
0
,
0
),
Vec3
(
0
,
3.9
,
0
),
Vec3
(
0
,
0
,
9
));
bool
ok
=
true
;
try
{
context
.
getState
(
State
::
Forces
).
getPeriodicBoxVectors
(
x
,
y
,
z
);
ok
=
false
;
}
catch
(
exception
&
ex
)
{
}
ASSERT
(
ok
);
}
void
testIdealGas
()
{
void
testIdealGas
()
{
const
int
numParticles
=
64
;
const
int
numParticles
=
64
;
const
int
frequency
=
10
;
const
int
frequency
=
10
;
...
@@ -112,7 +74,7 @@ void testIdealGas() {
...
@@ -112,7 +74,7 @@ void testIdealGas() {
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
positions
[
i
]
=
Vec3
(
initialLength
*
genrand_real2
(
sfmt
),
0.5
*
initialLength
*
genrand_real2
(
sfmt
),
2
*
initialLength
*
genrand_real2
(
sfmt
));
positions
[
i
]
=
Vec3
(
initialLength
*
genrand_real2
(
sfmt
),
0.5
*
initialLength
*
genrand_real2
(
sfmt
),
2
*
initialLength
*
genrand_real2
(
sfmt
));
}
}
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pressure
,
pressure
,
pressure
),
temp
[
0
],
frequency
);
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pressure
,
pressure
,
pressure
),
temp
[
0
],
true
,
true
,
true
,
frequency
);
system
.
addForce
(
barostat
);
system
.
addForce
(
barostat
);
// Test it for three different temperatures.
// Test it for three different temperatures.
...
@@ -170,7 +132,7 @@ void testIdealGasAxis(int axis) {
...
@@ -170,7 +132,7 @@ void testIdealGasAxis(int axis) {
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
positions
[
i
]
=
Vec3
(
initialLength
*
genrand_real2
(
sfmt
),
0.5
*
initialLength
*
genrand_real2
(
sfmt
),
2
*
initialLength
*
genrand_real2
(
sfmt
));
positions
[
i
]
=
Vec3
(
initialLength
*
genrand_real2
(
sfmt
),
0.5
*
initialLength
*
genrand_real2
(
sfmt
),
2
*
initialLength
*
genrand_real2
(
sfmt
));
}
}
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pressure
,
pressure
,
pressure
),
temp
[
0
],
frequency
,
scaleX
,
scaleY
,
scaleZ
);
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pressure
,
pressure
,
pressure
),
temp
[
0
],
scaleX
,
scaleY
,
scaleZ
,
frequency
);
system
.
addForce
(
barostat
);
system
.
addForce
(
barostat
);
// Test it for three different temperatures.
// Test it for three different temperatures.
...
@@ -226,7 +188,7 @@ void testRandomSeed() {
...
@@ -226,7 +188,7 @@ void testRandomSeed() {
forceField
->
addParticle
((
i
%
2
==
0
?
1.0
:
-
1.0
),
1.0
,
5.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
1.0
:
-
1.0
),
1.0
,
5.0
);
}
}
system
.
addForce
(
forceField
);
system
.
addForce
(
forceField
);
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pressure
,
pressure
,
pressure
),
temp
,
1
);
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pressure
,
pressure
,
pressure
),
temp
,
true
,
true
,
true
,
1
);
system
.
addForce
(
barostat
);
system
.
addForce
(
barostat
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
...
@@ -332,7 +294,7 @@ void testEinsteinCrystal() {
...
@@ -332,7 +294,7 @@ void testEinsteinCrystal() {
system
.
addForce
(
force
);
system
.
addForce
(
force
);
system
.
addForce
(
nb
);
system
.
addForce
(
nb
);
// Create the barostat.
// Create the barostat.
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pres3
[
p
],
pres3
[
p
],
pres3
[
p
]),
temp
,
frequency
,
(
a
==
0
||
a
==
3
),
(
a
==
1
||
a
==
3
),
(
a
==
2
||
a
==
3
));
MonteCarloAnisotropicBarostat
*
barostat
=
new
MonteCarloAnisotropicBarostat
(
Vec3
(
pres3
[
p
],
pres3
[
p
],
pres3
[
p
]),
temp
,
(
a
==
0
||
a
==
3
),
(
a
==
1
||
a
==
3
),
(
a
==
2
||
a
==
3
)
,
frequency
);
system
.
addForce
(
barostat
);
system
.
addForce
(
barostat
);
barostat
->
setTemperature
(
temp
);
barostat
->
setTemperature
(
temp
);
LangevinIntegrator
integrator
(
temp
,
0.1
,
0.01
);
LangevinIntegrator
integrator
(
temp
,
0.1
,
0.01
);
...
@@ -422,7 +384,6 @@ int main(int argc, char* argv[]) {
...
@@ -422,7 +384,6 @@ int main(int argc, char* argv[]) {
try
{
try
{
if
(
argc
>
1
)
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"CudaPrecision"
,
string
(
argv
[
1
]));
platform
.
setPropertyDefaultValue
(
"CudaPrecision"
,
string
(
argv
[
1
]));
testChangingBoxSize
();
testIdealGas
();
testIdealGas
();
testIdealGasAxis
(
0
);
testIdealGasAxis
(
0
);
testIdealGasAxis
(
1
);
testIdealGasAxis
(
1
);
...
...
platforms/opencl/include/OpenCLContext.h
View file @
3cb25ad8
...
@@ -9,7 +9,7 @@
...
@@ -9,7 +9,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
1
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -191,6 +191,12 @@ public:
...
@@ -191,6 +191,12 @@ public:
int
getDeviceIndex
()
{
int
getDeviceIndex
()
{
return
deviceIndex
;
return
deviceIndex
;
}
}
/**
* Get the index of the cl::Platform associated with this object.
*/
int
getPlatformIndex
()
{
return
platformIndex
;
}
/**
/**
* Get the PlatformData object this context is part of.
* Get the PlatformData object this context is part of.
*/
*/
...
@@ -589,7 +595,6 @@ private:
...
@@ -589,7 +595,6 @@ private:
struct
MoleculeGroup
;
struct
MoleculeGroup
;
class
VirtualSiteInfo
;
class
VirtualSiteInfo
;
void
findMoleculeGroups
();
void
findMoleculeGroups
();
static
void
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
std
::
vector
<
int
>&
atomMolecule
,
std
::
vector
<
std
::
vector
<
int
>
>&
atomBonds
);
/**
/**
* Ensure that all molecules marked as "identical" really are identical. This should be
* Ensure that all molecules marked as "identical" really are identical. This should be
* called whenever force field parameters change. If necessary, it will rebuild the list
* called whenever force field parameters change. If necessary, it will rebuild the list
...
@@ -605,6 +610,7 @@ private:
...
@@ -605,6 +610,7 @@ private:
double
time
;
double
time
;
OpenCLPlatform
::
PlatformData
&
platformData
;
OpenCLPlatform
::
PlatformData
&
platformData
;
int
deviceIndex
;
int
deviceIndex
;
int
platformIndex
;
int
contextIndex
;
int
contextIndex
;
int
stepCount
;
int
stepCount
;
int
computeForceCount
;
int
computeForceCount
;
...
...
platforms/opencl/include/OpenCLKernels.h
View file @
3cb25ad8
...
@@ -639,7 +639,7 @@ private:
...
@@ -639,7 +639,7 @@ private:
class
OpenCLCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
class
OpenCLCalcCustomNonbondedForceKernel
:
public
CalcCustomNonbondedForceKernel
{
public:
public:
OpenCLCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
OpenCLCalcCustomNonbondedForceKernel
(
std
::
string
name
,
const
Platform
&
platform
,
OpenCLContext
&
cl
,
const
System
&
system
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
tabulatedFunctionParams
(
NULL
),
forceCopy
(
NULL
),
system
(
system
)
{
cl
(
cl
),
params
(
NULL
),
globals
(
NULL
),
tabulatedFunctionParams
(
NULL
),
interactionGroupData
(
NULL
),
forceCopy
(
NULL
),
system
(
system
)
,
hasInitializedKernel
(
false
)
{
}
}
~
OpenCLCalcCustomNonbondedForceKernel
();
~
OpenCLCalcCustomNonbondedForceKernel
();
/**
/**
...
@@ -666,15 +666,20 @@ public:
...
@@ -666,15 +666,20 @@ public:
*/
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
private:
private:
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
);
OpenCLContext
&
cl
;
OpenCLContext
&
cl
;
OpenCLParameterSet
*
params
;
OpenCLParameterSet
*
params
;
OpenCLArray
*
globals
;
OpenCLArray
*
globals
;
OpenCLArray
*
tabulatedFunctionParams
;
OpenCLArray
*
tabulatedFunctionParams
;
OpenCLArray
*
interactionGroupData
;
cl
::
Kernel
interactionGroupKernel
;
std
::
vector
<
void
*>
interactionGroupArgs
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
std
::
string
>
globalParamNames
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
cl_float
>
globalParamValues
;
std
::
vector
<
OpenCLArray
*>
tabulatedFunctions
;
std
::
vector
<
OpenCLArray
*>
tabulatedFunctions
;
double
longRangeCoefficient
;
double
longRangeCoefficient
;
bool
hasInitializedLongRangeCorrection
;
bool
hasInitializedLongRangeCorrection
,
hasInitializedKernel
;
int
numGroupThreadBlocks
;
CustomNonbondedForce
*
forceCopy
;
CustomNonbondedForce
*
forceCopy
;
const
System
&
system
;
const
System
&
system
;
};
};
...
...
platforms/opencl/include/OpenCLParameterSet.h
View file @
3cb25ad8
...
@@ -81,6 +81,13 @@ public:
...
@@ -81,6 +81,13 @@ public:
*/
*/
template
<
class
T
>
template
<
class
T
>
void
setParameterValues
(
const
std
::
vector
<
std
::
vector
<
T
>
>&
values
);
void
setParameterValues
(
const
std
::
vector
<
std
::
vector
<
T
>
>&
values
);
/**
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data.
*/
std
::
vector
<
OpenCLNonbondedUtilities
::
ParameterInfo
>&
getBuffers
()
{
return
buffers
;
}
/**
/**
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* Get a set of OpenCLNonbondedUtilities::ParameterInfo objects which describe the Buffers
* containing the data.
* containing the data.
...
...
platforms/opencl/sharedTarget/CMakeLists.txt
View file @
3cb25ad8
...
@@ -19,6 +19,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
...
@@ -19,6 +19,6 @@ ELSE (UNIX AND CMAKE_BUILD_TYPE MATCHES Debug)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
SET
(
MAIN_OPENMM_LIB
${
OPENMM_LIBRARY_NAME
}
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
ENDIF
(
UNIX AND CMAKE_BUILD_TYPE MATCHES Debug
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
OPENCL_LIBRARIES
}
${
PTHREADS_LIB
}
)
TARGET_LINK_LIBRARIES
(
${
SHARED_TARGET
}
${
MAIN_OPENMM_LIB
}
${
OPENCL_LIBRARIES
}
${
PTHREADS_LIB
}
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"-DOPENMM_OPENCL_BUILDING_SHARED_LIBRARY"
)
SET_TARGET_PROPERTIES
(
${
SHARED_TARGET
}
PROPERTIES COMPILE_FLAGS
"
-msse2
-DOPENMM_OPENCL_BUILDING_SHARED_LIBRARY"
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
INSTALL_TARGETS
(
/lib/plugins RUNTIME_DIRECTORY /lib/plugins
${
SHARED_TARGET
}
)
platforms/opencl/src/OpenCLContext.cpp
View file @
3cb25ad8
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
2
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
3
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -39,6 +39,7 @@
...
@@ -39,6 +39,7 @@
#include "openmm/Platform.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/System.h"
#include "openmm/VirtualSite.h"
#include "openmm/VirtualSite.h"
#include "openmm/internal/ContextImpl.h"
#include <algorithm>
#include <algorithm>
#include <fstream>
#include <fstream>
#include <iostream>
#include <iostream>
...
@@ -87,17 +88,25 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -87,17 +88,25 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
contextIndex
=
platformData
.
contexts
.
size
();
contextIndex
=
platformData
.
contexts
.
size
();
std
::
vector
<
cl
::
Platform
>
platforms
;
std
::
vector
<
cl
::
Platform
>
platforms
;
cl
::
Platform
::
get
(
&
platforms
);
cl
::
Platform
::
get
(
&
platforms
);
if
(
platformIndex
<
0
||
platformIndex
>=
(
int
)
platforms
.
size
())
throw
OpenMMException
(
"Illegal value for OpenCL platform index"
);
string
platformVendor
=
platforms
[
platformIndex
].
getInfo
<
CL_PLATFORM_VENDOR
>
();
vector
<
cl
::
Device
>
devices
;
platforms
[
platformIndex
].
getDevices
(
CL_DEVICE_TYPE_ALL
,
&
devices
);
const
int
minThreadBlockSize
=
32
;
const
int
minThreadBlockSize
=
32
;
if
(
deviceIndex
<
0
||
deviceIndex
>=
(
int
)
devices
.
size
())
{
// Try to figure out which device is the fastest.
int
bestSpeed
=
-
1
;
int
bestSpeed
=
-
1
;
int
bestDevice
=
-
1
;
int
bestPlatform
=
-
1
;
for
(
int
j
=
0
;
j
<
platforms
.
size
();
j
++
)
{
// if they supplied a valid platformIndex, we only look through that platform
if
(
j
!=
platformIndex
&&
platformIndex
>=
0
&&
platformIndex
<
(
int
)
platforms
.
size
())
continue
;
string
platformVendor
=
platforms
[
j
].
getInfo
<
CL_PLATFORM_VENDOR
>
();
vector
<
cl
::
Device
>
devices
;
platforms
[
j
].
getDevices
(
CL_DEVICE_TYPE_ALL
,
&
devices
);
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
devices
.
size
();
i
++
)
{
// if they supplied a valid deviceIndex, we only look through that one
if
(
i
!=
deviceIndex
&&
deviceIndex
>=
0
&&
deviceIndex
<
(
int
)
devices
.
size
())
continue
;
if
(
platformVendor
==
"Apple"
&&
devices
[
i
].
getInfo
<
CL_DEVICE_VENDOR
>
()
==
"AMD"
)
if
(
platformVendor
==
"Apple"
&&
devices
[
i
].
getInfo
<
CL_DEVICE_VENDOR
>
()
==
"AMD"
)
continue
;
// Don't use AMD GPUs on OS X due to serious bugs.
continue
;
// Don't use AMD GPUs on OS X due to serious bugs.
int
maxSize
=
devices
[
i
].
getInfo
<
CL_DEVICE_MAX_WORK_ITEM_SIZES
>
()[
0
];
int
maxSize
=
devices
[
i
].
getInfo
<
CL_DEVICE_MAX_WORK_ITEM_SIZES
>
()[
0
];
...
@@ -136,15 +145,26 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -136,15 +145,26 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
}
}
int
speed
=
devices
[
i
].
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
()
*
processingElementsPerComputeUnit
*
devices
[
i
].
getInfo
<
CL_DEVICE_MAX_CLOCK_FREQUENCY
>
();
int
speed
=
devices
[
i
].
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
()
*
processingElementsPerComputeUnit
*
devices
[
i
].
getInfo
<
CL_DEVICE_MAX_CLOCK_FREQUENCY
>
();
if
(
maxSize
>=
minThreadBlockSize
&&
speed
>
bestSpeed
)
{
if
(
maxSize
>=
minThreadBlockSize
&&
speed
>
bestSpeed
)
{
deviceIndex
=
i
;
bestDevice
=
i
;
bestSpeed
=
speed
;
bestSpeed
=
speed
;
bestPlatform
=
j
;
}
}
}
}
}
}
if
(
deviceIndex
==
-
1
)
if
(
bestPlatform
==
-
1
)
throw
OpenMMException
(
"No compatible OpenCL platform is available"
);
if
(
bestDevice
==
-
1
)
throw
OpenMMException
(
"No compatible OpenCL device is available"
);
throw
OpenMMException
(
"No compatible OpenCL device is available"
);
device
=
devices
[
deviceIndex
];
this
->
deviceIndex
=
deviceIndex
;
vector
<
cl
::
Device
>
devices
;
platforms
[
bestPlatform
].
getDevices
(
CL_DEVICE_TYPE_ALL
,
&
devices
);
string
platformVendor
=
platforms
[
bestPlatform
].
getInfo
<
CL_PLATFORM_VENDOR
>
();
device
=
devices
[
bestDevice
];
this
->
deviceIndex
=
bestDevice
;
this
->
platformIndex
=
bestPlatform
;
if
(
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
()
<
minThreadBlockSize
)
if
(
device
.
getInfo
<
CL_DEVICE_MAX_WORK_GROUP_SIZE
>
()
<
minThreadBlockSize
)
throw
OpenMMException
(
"The specified OpenCL device is not compatible with OpenMM"
);
throw
OpenMMException
(
"The specified OpenCL device is not compatible with OpenMM"
);
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
intToString
(
ThreadBlockSize
);
compilationDefines
[
"WORK_GROUP_SIZE"
]
=
intToString
(
ThreadBlockSize
);
...
@@ -226,7 +246,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
...
@@ -226,7 +246,7 @@ OpenCLContext::OpenCLContext(const System& system, int platformIndex, int device
compilationDefines
[
"SYNC_WARPS"
]
=
"barrier(CLK_LOCAL_MEM_FENCE)"
;
compilationDefines
[
"SYNC_WARPS"
]
=
"barrier(CLK_LOCAL_MEM_FENCE)"
;
vector
<
cl
::
Device
>
contextDevices
;
vector
<
cl
::
Device
>
contextDevices
;
contextDevices
.
push_back
(
device
);
contextDevices
.
push_back
(
device
);
cl_context_properties
cprops
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
p
latform
Index
](),
0
};
cl_context_properties
cprops
[]
=
{
CL_CONTEXT_PLATFORM
,
(
cl_context_properties
)
platforms
[
bestP
latform
](),
0
};
context
=
cl
::
Context
(
contextDevices
,
cprops
,
errorCallback
);
context
=
cl
::
Context
(
contextDevices
,
cprops
,
errorCallback
);
queue
=
cl
::
CommandQueue
(
context
,
device
);
queue
=
cl
::
CommandQueue
(
context
,
device
);
numAtoms
=
system
.
getNumParticles
();
numAtoms
=
system
.
getNumParticles
();
...
@@ -618,15 +638,6 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
...
@@ -618,15 +638,6 @@ void OpenCLContext::reduceBuffer(OpenCLArray& array, int numBuffers) {
executeKernel
(
reduceReal4Kernel
,
bufferSize
,
128
);
executeKernel
(
reduceReal4Kernel
,
bufferSize
,
128
);
}
}
void
OpenCLContext
::
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
// Recursively tag atoms as belonging to a particular molecule.
atomMolecule
[
atom
]
=
molecule
;
for
(
int
i
=
0
;
i
<
(
int
)
atomBonds
[
atom
].
size
();
i
++
)
if
(
atomMolecule
[
atomBonds
[
atom
][
i
]]
==
-
1
)
tagAtomsInMolecule
(
atomBonds
[
atom
][
i
],
molecule
,
atomMolecule
,
atomBonds
);
}
/**
/**
* This class ensures that atom reordering doesn't break virtual sites.
* This class ensures that atom reordering doesn't break virtual sites.
*/
*/
...
@@ -722,16 +733,14 @@ void OpenCLContext::findMoleculeGroups() {
...
@@ -722,16 +733,14 @@ void OpenCLContext::findMoleculeGroups() {
}
}
}
}
// Now
tag
atoms by which molecule they belong to.
// Now
identify
atoms by which molecule they belong to.
vector
<
int
>
atomMolecule
(
numAtoms
,
-
1
);
vector
<
vector
<
int
>
>
atomIndices
=
ContextImpl
::
findMolecules
(
numAtoms
,
atomBonds
);
int
numMolecules
=
0
;
int
numMolecules
=
atomIndices
.
size
();
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
vector
<
int
>
atomMolecule
(
numAtoms
);
if
(
atomMolecule
[
i
]
==
-
1
)
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
tagAtomsInMolecule
(
i
,
numMolecules
++
,
atomMolecule
,
atomBonds
);
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
[
i
].
size
();
j
++
)
vector
<
vector
<
int
>
>
atomIndices
(
numMolecules
);
atomMolecule
[
atomIndices
[
i
][
j
]]
=
i
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
atomIndices
[
atomMolecule
[
i
]].
push_back
(
i
);
// Construct a description of each molecule.
// Construct a description of each molecule.
...
...
platforms/opencl/src/OpenCLKernels.cpp
View file @
3cb25ad8
...
@@ -46,6 +46,7 @@
...
@@ -46,6 +46,7 @@
#include "lepton/ParsedExpression.h"
#include "lepton/ParsedExpression.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMRealType.h"
#include "SimTKOpenMMUtilities.h"
#include "SimTKOpenMMUtilities.h"
#include <algorithm>
#include <cmath>
#include <cmath>
#include <set>
#include <set>
...
@@ -1875,6 +1876,17 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
...
@@ -1875,6 +1876,17 @@ void OpenCLCalcNonbondedForceKernel::copyParametersToContext(ContextImpl& contex
class
OpenCLCustomNonbondedForceInfo
:
public
OpenCLForceInfo
{
class
OpenCLCustomNonbondedForceInfo
:
public
OpenCLForceInfo
{
public:
public:
OpenCLCustomNonbondedForceInfo
(
int
requiredBuffers
,
const
CustomNonbondedForce
&
force
)
:
OpenCLForceInfo
(
requiredBuffers
),
force
(
force
)
{
OpenCLCustomNonbondedForceInfo
(
int
requiredBuffers
,
const
CustomNonbondedForce
&
force
)
:
OpenCLForceInfo
(
requiredBuffers
),
force
(
force
)
{
if
(
force
.
getNumInteractionGroups
()
>
0
)
{
groupsForParticle
.
resize
(
force
.
getNumParticles
());
for
(
int
i
=
0
;
i
<
force
.
getNumInteractionGroups
();
i
++
)
{
set
<
int
>
set1
,
set2
;
force
.
getInteractionGroupParameters
(
i
,
set1
,
set2
);
for
(
set
<
int
>::
const_iterator
iter
=
set1
.
begin
();
iter
!=
set1
.
end
();
++
iter
)
groupsForParticle
[
*
iter
].
insert
(
2
*
i
);
for
(
set
<
int
>::
const_iterator
iter
=
set2
.
begin
();
iter
!=
set2
.
end
();
++
iter
)
groupsForParticle
[
*
iter
].
insert
(
2
*
i
+
1
);
}
}
}
}
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
bool
areParticlesIdentical
(
int
particle1
,
int
particle2
)
{
vector
<
double
>
params1
;
vector
<
double
>
params1
;
...
@@ -1884,6 +1896,8 @@ public:
...
@@ -1884,6 +1896,8 @@ public:
for
(
int
i
=
0
;
i
<
(
int
)
params1
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
params1
.
size
();
i
++
)
if
(
params1
[
i
]
!=
params2
[
i
])
if
(
params1
[
i
]
!=
params2
[
i
])
return
false
;
return
false
;
if
(
groupsForParticle
.
size
()
>
0
&&
groupsForParticle
[
particle1
]
!=
groupsForParticle
[
particle2
])
return
false
;
return
true
;
return
true
;
}
}
int
getNumParticleGroups
()
{
int
getNumParticleGroups
()
{
...
@@ -1901,6 +1915,7 @@ public:
...
@@ -1901,6 +1915,7 @@ public:
}
}
private:
private:
const
CustomNonbondedForce
&
force
;
const
CustomNonbondedForce
&
force
;
vector
<
set
<
int
>
>
groupsForParticle
;
};
};
OpenCLCalcCustomNonbondedForceKernel
::~
OpenCLCalcCustomNonbondedForceKernel
()
{
OpenCLCalcCustomNonbondedForceKernel
::~
OpenCLCalcCustomNonbondedForceKernel
()
{
...
@@ -1910,6 +1925,8 @@ OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
...
@@ -1910,6 +1925,8 @@ OpenCLCalcCustomNonbondedForceKernel::~OpenCLCalcCustomNonbondedForceKernel() {
delete
globals
;
delete
globals
;
if
(
tabulatedFunctionParams
!=
NULL
)
if
(
tabulatedFunctionParams
!=
NULL
)
delete
tabulatedFunctionParams
;
delete
tabulatedFunctionParams
;
if
(
interactionGroupData
!=
NULL
)
delete
interactionGroupData
;
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
delete
tabulatedFunctions
[
i
];
delete
tabulatedFunctions
[
i
];
if
(
forceCopy
!=
NULL
)
if
(
forceCopy
!=
NULL
)
...
@@ -1920,7 +1937,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -1920,7 +1937,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
int
forceIndex
;
int
forceIndex
;
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
for
(
forceIndex
=
0
;
forceIndex
<
system
.
getNumForces
()
&&
&
system
.
getForce
(
forceIndex
)
!=
&
force
;
++
forceIndex
)
;
;
string
prefix
=
"custom"
+
cl
.
intToString
(
forceIndex
)
+
"_"
;
string
prefix
=
(
force
.
getNumInteractionGroups
()
==
0
?
"custom"
+
cl
.
intToString
(
forceIndex
)
+
"_"
:
""
)
;
// Record parameters and exclusions.
// Record parameters and exclusions.
...
@@ -2021,6 +2038,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -2021,6 +2038,9 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
replacements
[
"SWITCH_C5"
]
=
cl
.
doubleToString
(
6
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
(),
5.0
));
replacements
[
"SWITCH_C5"
]
=
cl
.
doubleToString
(
6
/
pow
(
force
.
getSwitchingDistance
()
-
force
.
getCutoffDistance
(),
5.0
));
}
}
string
source
=
cl
.
replaceStrings
(
OpenCLKernelSources
::
customNonbonded
,
replacements
);
string
source
=
cl
.
replaceStrings
(
OpenCLKernelSources
::
customNonbonded
,
replacements
);
if
(
force
.
getNumInteractionGroups
()
>
0
)
initInteractionGroups
(
force
,
source
);
else
{
cl
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
cl
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
const
OpenCLNonbondedUtilities
::
ParameterInfo
&
buffer
=
params
->
getBuffers
()[
i
];
...
@@ -2030,6 +2050,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -2030,6 +2050,7 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
globals
->
upload
(
globalParamValues
);
globals
->
upload
(
globalParamValues
);
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
cl_float
),
globals
->
getDeviceBuffer
()));
cl
.
getNonbondedUtilities
().
addArgument
(
OpenCLNonbondedUtilities
::
ParameterInfo
(
prefix
+
"globals"
,
"float"
,
1
,
sizeof
(
cl_float
),
globals
->
getDeviceBuffer
()));
}
}
}
cl
.
addForce
(
new
OpenCLCustomNonbondedForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
cl
.
addForce
(
new
OpenCLCustomNonbondedForceInfo
(
cl
.
getNonbondedUtilities
().
getNumForceBuffers
(),
force
));
// Record information for the long range correction.
// Record information for the long range correction.
...
@@ -2044,6 +2065,250 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
...
@@ -2044,6 +2065,250 @@ void OpenCLCalcCustomNonbondedForceKernel::initialize(const System& system, cons
}
}
}
}
void
OpenCLCalcCustomNonbondedForceKernel
::
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
string
&
interactionSource
)
{
// Process groups to form tiles.
vector
<
vector
<
int
>
>
atomLists
;
vector
<
pair
<
int
,
int
>
>
tiles
;
map
<
pair
<
int
,
int
>
,
int
>
duplicateInteractions
;
for
(
int
group
=
0
;
group
<
force
.
getNumInteractionGroups
();
group
++
)
{
// Get the list of atoms in this group and sort them.
set
<
int
>
set1
,
set2
;
force
.
getInteractionGroupParameters
(
group
,
set1
,
set2
);
vector
<
int
>
atoms1
,
atoms2
;
atoms1
.
insert
(
atoms1
.
begin
(),
set1
.
begin
(),
set1
.
end
());
atoms2
.
insert
(
atoms2
.
begin
(),
set2
.
begin
(),
set2
.
end
());
sort
(
atoms1
.
begin
(),
atoms1
.
end
());
sort
(
atoms2
.
begin
(),
atoms2
.
end
());
// Find how many tiles we will create for this group.
int
tileWidth
=
min
(
min
(
32
,
(
int
)
atoms1
.
size
()),
(
int
)
atoms2
.
size
());
int
numBlocks1
=
(
atoms1
.
size
()
+
tileWidth
-
1
)
/
tileWidth
;
int
numBlocks2
=
(
atoms2
.
size
()
+
tileWidth
-
1
)
/
tileWidth
;
// Add the tiles.
for
(
int
i
=
0
;
i
<
numBlocks1
;
i
++
)
for
(
int
j
=
0
;
j
<
numBlocks2
;
j
++
)
tiles
.
push_back
(
make_pair
(
atomLists
.
size
()
+
i
,
atomLists
.
size
()
+
numBlocks1
+
j
));
// Add the atom lists.
for
(
int
i
=
0
;
i
<
numBlocks1
;
i
++
)
{
vector
<
int
>
atoms
;
int
first
=
i
*
tileWidth
;
int
last
=
min
((
i
+
1
)
*
tileWidth
,
(
int
)
atoms1
.
size
());
for
(
int
j
=
first
;
j
<
last
;
j
++
)
atoms
.
push_back
(
atoms1
[
j
]);
atomLists
.
push_back
(
atoms
);
}
for
(
int
i
=
0
;
i
<
numBlocks2
;
i
++
)
{
vector
<
int
>
atoms
;
int
first
=
i
*
tileWidth
;
int
last
=
min
((
i
+
1
)
*
tileWidth
,
(
int
)
atoms2
.
size
());
for
(
int
j
=
first
;
j
<
last
;
j
++
)
atoms
.
push_back
(
atoms2
[
j
]);
atomLists
.
push_back
(
atoms
);
}
// If this group contains duplicate interactions, record that we need to skip them once.
for
(
int
i
=
0
;
i
<
(
int
)
atoms1
.
size
();
i
++
)
{
int
a1
=
atoms1
[
i
];
if
(
set2
.
find
(
a1
)
==
set2
.
end
())
continue
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms2
.
size
()
&&
atoms2
[
j
]
<
a1
;
j
++
)
{
int
a2
=
atoms2
[
j
];
if
(
set1
.
find
(
a2
)
!=
set1
.
end
())
{
pair
<
int
,
int
>
key
=
make_pair
(
a2
,
a1
);
if
(
duplicateInteractions
.
find
(
key
)
==
duplicateInteractions
.
end
())
duplicateInteractions
[
key
]
=
0
;
duplicateInteractions
[
key
]
++
;
}
}
}
}
// Build a lookup table for quickly identifying excluded interactions.
set
<
pair
<
int
,
int
>
>
exclusions
;
for
(
int
i
=
0
;
i
<
force
.
getNumExclusions
();
i
++
)
{
int
p1
,
p2
;
force
.
getExclusionParticles
(
i
,
p1
,
p2
);
exclusions
.
insert
(
make_pair
(
min
(
p1
,
p2
),
max
(
p1
,
p2
)));
}
// Build the exclusion flags for each tile. While we're at it, filter out tiles
// where all interactions are excluded, and sort the tiles by size.
vector
<
vector
<
int
>
>
exclusionFlags
(
tiles
.
size
());
vector
<
pair
<
int
,
int
>
>
tileOrder
;
for
(
int
tile
=
0
;
tile
<
tiles
.
size
();
tile
++
)
{
if
(
atomLists
[
tiles
[
tile
].
first
].
size
()
<
atomLists
[
tiles
[
tile
].
second
].
size
())
{
// For efficiency, we want the first axis to be the larger one.
int
swap
=
tiles
[
tile
].
first
;
tiles
[
tile
].
first
=
tiles
[
tile
].
second
;
tiles
[
tile
].
second
=
swap
;
}
vector
<
int
>&
atoms1
=
atomLists
[
tiles
[
tile
].
first
];
vector
<
int
>&
atoms2
=
atomLists
[
tiles
[
tile
].
second
];
vector
<
int
>
flags
(
atoms1
.
size
(),
(
int
)
(
1LL
<<
atoms2
.
size
())
-
1
);
int
numExcluded
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
atoms1
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
(
int
)
atoms2
.
size
();
j
++
)
{
int
a1
=
atoms1
[
i
];
int
a2
=
atoms2
[
j
];
bool
isExcluded
=
false
;
pair
<
int
,
int
>
key
=
make_pair
(
min
(
a1
,
a2
),
max
(
a1
,
a2
));
if
(
a1
==
a2
||
exclusions
.
find
(
key
)
!=
exclusions
.
end
())
isExcluded
=
true
;
// This is an excluded interaction.
else
if
(
duplicateInteractions
.
find
(
key
)
!=
duplicateInteractions
.
end
()
&&
duplicateInteractions
[
key
]
>
0
)
{
// Both atoms are in both sets, so skip duplicate interactions.
isExcluded
=
true
;
duplicateInteractions
[
key
]
--
;
}
if
(
isExcluded
)
{
flags
[
i
]
&=
-
1
-
(
1
<<
j
);
numExcluded
++
;
}
}
if
(
numExcluded
==
atoms1
.
size
()
*
atoms2
.
size
())
continue
;
// All interactions are excluded.
tileOrder
.
push_back
(
make_pair
((
int
)
-
atoms2
.
size
(),
tile
));
exclusionFlags
[
tile
]
=
flags
;
}
sort
(
tileOrder
.
begin
(),
tileOrder
.
end
());
// Merge tiles to get as close as possible to 32 along the first axis of each one.
vector
<
int
>
tileSetStart
;
tileSetStart
.
push_back
(
0
);
int
tileSetSize
=
0
;
for
(
int
i
=
0
;
i
<
tileOrder
.
size
();
i
++
)
{
int
tile
=
tileOrder
[
i
].
second
;
int
size
=
atomLists
[
tiles
[
tile
].
first
].
size
();
if
(
tileSetSize
+
size
>
32
)
{
tileSetStart
.
push_back
(
i
);
tileSetSize
=
0
;
}
tileSetSize
+=
size
;
}
tileSetStart
.
push_back
(
tileOrder
.
size
());
// Build the data structures.
int
numTileSets
=
tileSetStart
.
size
()
-
1
;
vector
<
mm_int4
>
groupData
;
for
(
int
tileSet
=
0
;
tileSet
<
numTileSets
;
tileSet
++
)
{
int
indexInTileSet
=
0
;
int
minSize
=
0
;
if
(
cl
.
getSIMDWidth
()
<
32
)
{
// We need to include a barrier inside the inner loop, so ensure that all
// threads will loop the same number of times.
for
(
int
i
=
tileSetStart
[
tileSet
];
i
<
tileSetStart
[
tileSet
+
1
];
i
++
)
minSize
=
max
(
minSize
,
(
int
)
atomLists
[
tiles
[
tileOrder
[
i
].
second
].
first
].
size
());
}
for
(
int
i
=
tileSetStart
[
tileSet
];
i
<
tileSetStart
[
tileSet
+
1
];
i
++
)
{
int
tile
=
tileOrder
[
i
].
second
;
vector
<
int
>&
atoms1
=
atomLists
[
tiles
[
tile
].
first
];
vector
<
int
>&
atoms2
=
atomLists
[
tiles
[
tile
].
second
];
int
range
=
indexInTileSet
+
((
indexInTileSet
+
max
(
minSize
,
(
int
)
atoms1
.
size
()))
<<
16
);
int
allFlags
=
(
1
<<
atoms2
.
size
())
-
1
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms1
.
size
();
j
++
)
{
int
a1
=
atoms1
[
j
];
int
a2
=
(
j
<
atoms2
.
size
()
?
atoms2
[
j
]
:
0
);
int
flags
=
(
exclusionFlags
[
tile
].
size
()
>
0
?
exclusionFlags
[
tile
][
j
]
:
allFlags
);
groupData
.
push_back
(
mm_int4
(
a1
,
a2
,
range
,
flags
<<
indexInTileSet
));
}
indexInTileSet
+=
atoms1
.
size
();
}
for
(;
indexInTileSet
<
32
;
indexInTileSet
++
)
groupData
.
push_back
(
mm_int4
(
0
,
0
,
minSize
<<
16
,
0
));
}
interactionGroupData
=
OpenCLArray
::
create
<
mm_int4
>
(
cl
,
groupData
.
size
(),
"interactionGroupData"
);
interactionGroupData
->
upload
(
groupData
);
// Create the kernel.
map
<
string
,
string
>
replacements
;
replacements
[
"COMPUTE_INTERACTION"
]
=
interactionSource
;
const
string
suffixes
[]
=
{
"x"
,
"y"
,
"z"
,
"w"
};
stringstream
localData
;
int
localDataSize
=
0
;
vector
<
OpenCLNonbondedUtilities
::
ParameterInfo
>&
buffers
=
params
->
getBuffers
();
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
getNumComponents
()
==
1
)
localData
<<
buffers
[
i
].
getComponentType
()
<<
" params"
<<
(
i
+
1
)
<<
";
\n
"
;
else
{
for
(
int
j
=
0
;
j
<
buffers
[
i
].
getNumComponents
();
++
j
)
localData
<<
buffers
[
i
].
getComponentType
()
<<
" params"
<<
(
i
+
1
)
<<
"_"
<<
suffixes
[
j
]
<<
";
\n
"
;
}
localDataSize
+=
buffers
[
i
].
getSize
();
}
replacements
[
"ATOM_PARAMETER_DATA"
]
=
localData
.
str
();
stringstream
args
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
args
<<
", __global const "
<<
buffers
[
i
].
getType
()
<<
"* restrict global_params"
<<
(
i
+
1
);
if
(
globals
!=
NULL
)
args
<<
", __global const float* restrict globals"
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
stringstream
load1
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
load1
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"1 = global_params"
<<
(
i
+
1
)
<<
"[atom1];
\n
"
;
replacements
[
"LOAD_ATOM1_PARAMETERS"
]
=
load1
.
str
();
stringstream
loadLocal2
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
getNumComponents
()
==
1
)
loadLocal2
<<
"localData[get_local_id(0)].params"
<<
(
i
+
1
)
<<
" = global_params"
<<
(
i
+
1
)
<<
"[atom2];
\n
"
;
else
{
loadLocal2
<<
buffers
[
i
].
getType
()
<<
" temp_params"
<<
(
i
+
1
)
<<
" = global_params"
<<
(
i
+
1
)
<<
"[atom2];
\n
"
;
for
(
int
j
=
0
;
j
<
buffers
[
i
].
getNumComponents
();
++
j
)
loadLocal2
<<
"localData[get_local_id(0)].params"
<<
(
i
+
1
)
<<
"_"
<<
suffixes
[
j
]
<<
" = temp_params"
<<
(
i
+
1
)
<<
"."
<<
suffixes
[
j
]
<<
";
\n
"
;
}
}
replacements
[
"LOAD_LOCAL_PARAMETERS"
]
=
loadLocal2
.
str
();
stringstream
load2
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
{
if
(
buffers
[
i
].
getNumComponents
()
==
1
)
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 = localData[localIndex].params"
<<
(
i
+
1
)
<<
";
\n
"
;
else
{
load2
<<
buffers
[
i
].
getType
()
<<
" params"
<<
(
i
+
1
)
<<
"2 = ("
<<
buffers
[
i
].
getType
()
<<
") ("
;
for
(
int
j
=
0
;
j
<
buffers
[
i
].
getNumComponents
();
++
j
)
{
if
(
j
>
0
)
load2
<<
", "
;
load2
<<
"localData[localIndex].params"
<<
(
i
+
1
)
<<
"_"
<<
suffixes
[
j
];
}
load2
<<
");
\n
"
;
}
}
replacements
[
"LOAD_ATOM2_PARAMETERS"
]
=
load2
.
str
();
map
<
string
,
string
>
defines
;
if
(
force
.
getNonbondedMethod
()
!=
CustomNonbondedForce
::
NoCutoff
)
defines
[
"USE_CUTOFF"
]
=
"1"
;
if
(
force
.
getNonbondedMethod
()
==
CustomNonbondedForce
::
CutoffPeriodic
)
defines
[
"USE_PERIODIC"
]
=
"1"
;
defines
[
"THREAD_BLOCK_SIZE"
]
=
cl
.
intToString
(
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
double
cutoff
=
force
.
getCutoffDistance
();
defines
[
"CUTOFF_SQUARED"
]
=
cl
.
doubleToString
(
cutoff
*
cutoff
);
defines
[
"PADDED_NUM_ATOMS"
]
=
cl
.
intToString
(
cl
.
getPaddedNumAtoms
());
defines
[
"TILE_SIZE"
]
=
"32"
;
int
numContexts
=
cl
.
getPlatformData
().
contexts
.
size
();
int
startIndex
=
cl
.
getContextIndex
()
*
numTileSets
/
numContexts
;
int
endIndex
=
(
cl
.
getContextIndex
()
+
1
)
*
numTileSets
/
numContexts
;
defines
[
"FIRST_TILE"
]
=
cl
.
intToString
(
startIndex
);
defines
[
"LAST_TILE"
]
=
cl
.
intToString
(
endIndex
);
if
((
localDataSize
/
4
)
%
2
==
0
&&
!
cl
.
getUseDoublePrecision
())
defines
[
"PARAMETER_SIZE_IS_EVEN"
]
=
"1"
;
cl
::
Program
program
=
cl
.
createProgram
(
cl
.
replaceStrings
(
OpenCLKernelSources
::
customNonbondedGroups
,
replacements
),
defines
);
interactionGroupKernel
=
cl
::
Kernel
(
program
,
"computeInteractionGroups"
);
numGroupThreadBlocks
=
cl
.
getNonbondedUtilities
().
getNumForceThreadBlocks
();
}
double
OpenCLCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
double
OpenCLCalcCustomNonbondedForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
if
(
globals
!=
NULL
)
{
if
(
globals
!=
NULL
)
{
bool
changed
=
false
;
bool
changed
=
false
;
...
@@ -2065,6 +2330,25 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
...
@@ -2065,6 +2330,25 @@ double OpenCLCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool
longRangeCoefficient
=
CustomNonbondedForceImpl
::
calcLongRangeCorrection
(
*
forceCopy
,
context
.
getOwner
());
longRangeCoefficient
=
CustomNonbondedForceImpl
::
calcLongRangeCorrection
(
*
forceCopy
,
context
.
getOwner
());
hasInitializedLongRangeCorrection
=
true
;
hasInitializedLongRangeCorrection
=
true
;
}
}
if
(
interactionGroupData
!=
NULL
)
{
if
(
!
hasInitializedKernel
)
{
hasInitializedKernel
=
true
;
int
index
=
0
;
bool
useLong
=
cl
.
getSupports64BitGlobalAtomics
();
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
(
useLong
?
cl
.
getLongForceBuffer
()
:
cl
.
getForceBuffers
()).
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getEnergyBuffer
().
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
cl
.
getPosq
().
getDeviceBuffer
());
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
interactionGroupData
->
getDeviceBuffer
());
setPeriodicBoxSizeArg
(
cl
,
interactionGroupKernel
,
index
++
);
setInvPeriodicBoxSizeArg
(
cl
,
interactionGroupKernel
,
index
++
);
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
interactionGroupKernel
.
setArg
<
cl
::
Memory
>
(
index
++
,
params
->
getBuffers
()[
i
].
getMemory
());
if
(
globals
!=
NULL
)
interactionGroupKernel
.
setArg
<
cl
::
Buffer
>
(
index
++
,
globals
->
getDeviceBuffer
());
}
int
forceThreadBlockSize
=
max
(
32
,
cl
.
getNonbondedUtilities
().
getForceThreadBlockSize
());
cl
.
executeKernel
(
interactionGroupKernel
,
numGroupThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
mm_double4
boxSize
=
cl
.
getPeriodicBoxSizeDouble
();
return
longRangeCoefficient
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
return
longRangeCoefficient
/
(
boxSize
.
x
*
boxSize
.
y
*
boxSize
.
z
);
}
}
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment