Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
6e434a02
Commit
6e434a02
authored
Jun 08, 2012
by
Peter Eastman
Browse files
Continuing to implement new CUDA platform: HarmonicBondForce and VerletIntegrator
parent
1f0ec7b5
Changes
16
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
6257 additions
and
611 deletions
+6257
-611
platforms/cuda2/src/CudaArray.cpp
platforms/cuda2/src/CudaArray.cpp
+5
-4
platforms/cuda2/src/CudaBondedUtilities.cpp
platforms/cuda2/src/CudaBondedUtilities.cpp
+27
-31
platforms/cuda2/src/CudaBondedUtilities.h
platforms/cuda2/src/CudaBondedUtilities.h
+4
-3
platforms/cuda2/src/CudaContext.cpp
platforms/cuda2/src/CudaContext.cpp
+73
-90
platforms/cuda2/src/CudaContext.h
platforms/cuda2/src/CudaContext.h
+67
-66
platforms/cuda2/src/CudaForceInfo.h
platforms/cuda2/src/CudaForceInfo.h
+1
-9
platforms/cuda2/src/CudaIntegrationUtilities.cpp
platforms/cuda2/src/CudaIntegrationUtilities.cpp
+16
-16
platforms/cuda2/src/CudaKernelFactory.cpp
platforms/cuda2/src/CudaKernelFactory.cpp
+38
-37
platforms/cuda2/src/CudaKernels.cpp
platforms/cuda2/src/CudaKernels.cpp
+5075
-0
platforms/cuda2/src/CudaKernels.h
platforms/cuda2/src/CudaKernels.h
+444
-354
platforms/cuda2/src/kernels/bondForce.cu
platforms/cuda2/src/kernels/bondForce.cu
+7
-0
platforms/cuda2/src/kernels/harmonicBondForce.cu
platforms/cuda2/src/kernels/harmonicBondForce.cu
+4
-0
platforms/cuda2/src/kernels/vectorOps.cu
platforms/cuda2/src/kernels/vectorOps.cu
+39
-1
platforms/cuda2/src/kernels/verlet.cu
platforms/cuda2/src/kernels/verlet.cu
+86
-0
platforms/cuda2/tests/TestCudaHarmonicBondForce.cpp
platforms/cuda2/tests/TestCudaHarmonicBondForce.cpp
+133
-0
platforms/cuda2/tests/TestCudaVerletIntegrator.cpp
platforms/cuda2/tests/TestCudaVerletIntegrator.cpp
+238
-0
No files found.
platforms/cuda2/src/CudaArray.cpp
View file @
6e434a02
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "CudaArray.h"
#include "CudaArray.h"
#include "CudaContext.h"
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include <vector>
#include <vector>
...
@@ -36,7 +37,7 @@ CudaArray::CudaArray(int size, int elementSize, const std::string& name) :
...
@@ -36,7 +37,7 @@ CudaArray::CudaArray(int size, int elementSize, const std::string& name) :
CUresult
result
=
cuMemAlloc
(
&
pointer
,
size
*
elementSize
);
CUresult
result
=
cuMemAlloc
(
&
pointer
,
size
*
elementSize
);
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
str
<<
"Error creating array "
<<
name
<<
": "
<<
result
;
str
<<
"Error creating array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
throw
OpenMMException
(
str
.
str
());
}
}
}
}
...
@@ -46,7 +47,7 @@ CudaArray::~CudaArray() {
...
@@ -46,7 +47,7 @@ CudaArray::~CudaArray() {
CUresult
result
=
cuMemFree
(
pointer
);
CUresult
result
=
cuMemFree
(
pointer
);
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
str
<<
"Error deleting array "
<<
name
<<
": "
<<
result
;
str
<<
"Error deleting array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
throw
OpenMMException
(
str
.
str
());
}
}
}
}
...
@@ -60,7 +61,7 @@ void CudaArray::upload(void* data, bool blocking) {
...
@@ -60,7 +61,7 @@ void CudaArray::upload(void* data, bool blocking) {
result
=
cuMemcpyHtoDAsync
(
pointer
,
data
,
size
*
elementSize
,
0
);
result
=
cuMemcpyHtoDAsync
(
pointer
,
data
,
size
*
elementSize
,
0
);
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
str
<<
"Error uploading array "
<<
name
<<
": "
<<
result
;
str
<<
"Error uploading array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
throw
OpenMMException
(
str
.
str
());
}
}
}
}
...
@@ -73,7 +74,7 @@ void CudaArray::download(void* data, bool blocking) const {
...
@@ -73,7 +74,7 @@ void CudaArray::download(void* data, bool blocking) const {
result
=
cuMemcpyDtoHAsync
(
data
,
pointer
,
size
*
elementSize
,
0
);
result
=
cuMemcpyDtoHAsync
(
data
,
pointer
,
size
*
elementSize
,
0
);
if
(
result
!=
CUDA_SUCCESS
)
{
if
(
result
!=
CUDA_SUCCESS
)
{
std
::
stringstream
str
;
std
::
stringstream
str
;
str
<<
"Error downloading array "
<<
name
<<
": "
<<
result
;
str
<<
"Error downloading array "
<<
name
<<
": "
<<
CudaContext
::
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
str
.
str
());
throw
OpenMMException
(
str
.
str
());
}
}
}
}
platforms/cuda2/src/CudaBondedUtilities.cpp
View file @
6e434a02
...
@@ -26,6 +26,7 @@
...
@@ -26,6 +26,7 @@
#include "CudaBondedUtilities.h"
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaExpressionUtilities.h"
#include "CudaKernelSources.h"
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
#include "CudaNonbondedUtilities.h"
#include "CudaNonbondedUtilities.h"
#include <iostream>
#include <iostream>
...
@@ -81,8 +82,8 @@ void CudaBondedUtilities::initialize(const System& system) {
...
@@ -81,8 +82,8 @@ void CudaBondedUtilities::initialize(const System& system) {
for
(
int
atom
=
0
;
atom
<
width
;
atom
++
)
for
(
int
atom
=
0
;
atom
<
width
;
atom
++
)
indexVec
[
bond
*
width
+
atom
]
=
forceAtoms
[
i
][
bond
][
startAtom
+
atom
];
indexVec
[
bond
*
width
+
atom
]
=
forceAtoms
[
i
][
bond
][
startAtom
+
atom
];
}
}
CudaArray
*
indices
=
CudaArray
::
create
<
unsigned
int
>
(
indexVec
.
size
()
,
"bondedIndices"
);
CudaArray
*
indices
=
new
CudaArray
(
numBonds
,
4
*
width
,
"bondedIndices"
);
indices
->
upload
(
indexVec
);
indices
->
upload
(
&
indexVec
[
0
]
);
atomIndices
[
i
].
push_back
(
indices
);
atomIndices
[
i
].
push_back
(
indices
);
startAtom
+=
width
;
startAtom
+=
width
;
}
}
...
@@ -91,13 +92,14 @@ void CudaBondedUtilities::initialize(const System& system) {
...
@@ -91,13 +92,14 @@ void CudaBondedUtilities::initialize(const System& system) {
// Create the kernel.
// Create the kernel.
stringstream
s
;
stringstream
s
;
s
<<
CudaKernelSources
::
vectorOps
;
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
s
<<
prefixCode
[
i
];
s
<<
prefixCode
[
i
];
s
<<
"extern
\"
C
\"
__global__ void computeBondedForces(long* __restrict__ forceBuffer, real* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups"
;
s
<<
"extern
\"
C
\"
__global__ void computeBondedForces(
unsigned long
long* __restrict__ forceBuffer, real* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups"
;
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
{
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
force
].
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
force
].
size
();
i
++
)
{
int
indexWidth
=
atomIndices
[
force
][
i
]
->
getElementSize
()
/
4
;
int
indexWidth
=
atomIndices
[
force
][
i
]
->
getElementSize
()
/
4
;
string
indexType
=
"u
nsigned int"
+
(
indexWidth
==
1
?
""
:
context
.
intToString
(
indexWidth
)
)
;
string
indexType
=
"u
int"
+
context
.
intToString
(
indexWidth
);
s
<<
", const "
<<
indexType
<<
"* __restrict__ atomIndices"
<<
force
<<
"_"
<<
i
;
s
<<
", const "
<<
indexType
<<
"* __restrict__ atomIndices"
<<
force
<<
"_"
<<
i
;
}
}
}
}
...
@@ -129,10 +131,10 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
...
@@ -129,10 +131,10 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
forceIndex
].
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
forceIndex
].
size
();
i
++
)
{
int
indexWidth
=
atomIndices
[
forceIndex
][
i
]
->
getElementSize
()
/
4
;
int
indexWidth
=
atomIndices
[
forceIndex
][
i
]
->
getElementSize
()
/
4
;
suffix
=
(
indexWidth
==
1
?
suffix1
:
suffix4
);
suffix
=
(
indexWidth
==
1
?
suffix1
:
suffix4
);
string
indexType
=
"u
nsigned int"
+
(
indexWidth
==
1
?
""
:
context
.
intToString
(
indexWidth
)
)
;
string
indexType
=
"u
int"
+
context
.
intToString
(
indexWidth
);
s
<<
" "
<<
indexType
<<
" atoms"
<<
i
<<
" = atomIndices"
<<
forceIndex
<<
"_"
<<
i
<<
"[index];
\n
"
;
s
<<
" "
<<
indexType
<<
" atoms"
<<
i
<<
" = atomIndices"
<<
forceIndex
<<
"_"
<<
i
<<
"[index];
\n
"
;
s
<<
" "
<<
indexType
<<
" buffers = bufferIndices"
<<
forceIndex
<<
"[index];
\n
"
;
int
atomsToLoad
=
min
(
indexWidth
,
numAtoms
-
startAtom
)
;
for
(
int
j
=
0
;
j
<
indexWidth
;
j
++
)
{
for
(
int
j
=
0
;
j
<
atomsToLoad
;
j
++
)
{
s
<<
" unsigned int atom"
<<
(
startAtom
+
j
+
1
)
<<
" = atoms"
<<
i
<<
suffix
[
j
]
<<
";
\n
"
;
s
<<
" unsigned int atom"
<<
(
startAtom
+
j
+
1
)
<<
" = atoms"
<<
i
<<
suffix
[
j
]
<<
";
\n
"
;
s
<<
" real4 pos"
<<
(
j
+
1
)
<<
" = posq[atom"
<<
(
j
+
1
)
<<
"];
\n
"
;
s
<<
" real4 pos"
<<
(
j
+
1
)
<<
" = posq[atom"
<<
(
j
+
1
)
<<
"];
\n
"
;
}
}
...
@@ -140,34 +142,28 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
...
@@ -140,34 +142,28 @@ string CudaBondedUtilities::createForceSource(int forceIndex, int numBonds, int
}
}
s
<<
computeForce
<<
"
\n
"
;
s
<<
computeForce
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"], (long) (force.x*0xFFFFFFFF));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"], static_cast<unsigned long long>((long long) (force"
<<
(
i
+
1
)
<<
".x*0xFFFFFFFF)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], (long) (force.x*0xFFFFFFFF));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], static_cast<unsigned long long>((long long) (force"
<<
(
i
+
1
)
<<
".y*0xFFFFFFFF)));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS*2], (long) (force.x*0xFFFFFFFF));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS*2], static_cast<unsigned long long>((long long) (force"
<<
(
i
+
1
)
<<
".z*0xFFFFFFFF)));
\n
"
;
s
<<
" __threadfence_block();
\n
"
;
}
}
s
<<
"}
\n
"
;
s
<<
"}
\n
"
;
return
s
.
str
();
return
s
.
str
();
}
}
void
CudaBondedUtilities
::
computeInteractions
(
int
groups
)
{
void
CudaBondedUtilities
::
computeInteractions
(
int
groups
)
{
// if (!hasInitializedKernels) {
if
(
!
hasInitializedKernels
)
{
// hasInitializedKernels = true;
hasInitializedKernels
=
true
;
// for (int i = 0; i < (int) forceSets.size(); i++) {
kernelArgs
.
push_back
(
&
context
.
getForce
().
getDevicePointer
());
// int index = 0;
kernelArgs
.
push_back
(
&
context
.
getEnergyBuffer
().
getDevicePointer
());
// cl::Kernel& kernel = kernels[i];
kernelArgs
.
push_back
(
&
context
.
getPosq
().
getDevicePointer
());
// kernel.setArg<cl::Buffer>(index++, context.getForceBuffers().getDeviceBuffer());
kernelArgs
.
push_back
(
NULL
);
// kernel.setArg<cl::Buffer>(index++, context.getEnergyBuffer().getDeviceBuffer());
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
// kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
[
i
].
size
();
j
++
)
// index++;
kernelArgs
.
push_back
(
&
atomIndices
[
i
][
j
]
->
getDevicePointer
());
// for (int j = 0; j < (int) forceSets[i].size(); j++) {
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
// kernel.setArg<cl::Buffer>(index++, atomIndices[forceSets[i][j]]->getDeviceBuffer());
kernelArgs
.
push_back
(
&
arguments
[
i
]);
// kernel.setArg<cl::Buffer>(index++, bufferIndices[forceSets[i][j]]->getDeviceBuffer());
}
// }
kernelArgs
[
3
]
=
&
groups
;
// for (int j = 0; j < (int) arguments.size(); j++)
context
.
executeKernel
(
kernel
,
&
kernelArgs
[
0
],
maxBonds
);
// kernel.setArg<cl::Memory>(index++, *arguments[j]);
// }
// }
// for (int i = 0; i < (int) kernels.size(); i++) {
// kernels[i].setArg<cl_int>(3, groups);
// context.executeKernel(kernels[i], maxBonds);
// }
}
}
platforms/cuda2/src/CudaBondedUtilities.h
View file @
6e434a02
...
@@ -59,7 +59,7 @@ namespace OpenMM {
...
@@ -59,7 +59,7 @@ namespace OpenMM {
* <li>The positions of those atoms will have been stored in the real4 variables "pos1", "pos2", ....</li>
* <li>The positions of those atoms will have been stored in the real4 variables "pos1", "pos2", ....</li>
* <li>A real variable called "energy" will exist. Your code should add the potential energy of the
* <li>A real variable called "energy" will exist. Your code should add the potential energy of the
* bond to that variable.</li>
* bond to that variable.</li>
* <li>Your code should define real
4
variables called "force1", "force2", ... that contain the force to
* <li>Your code should define real
3
variables called "force1", "force2", ... that contain the force to
* apply to each atom.</li>
* apply to each atom.</li>
* </ol>
* </ol>
*
*
...
@@ -69,8 +69,8 @@ namespace OpenMM {
...
@@ -69,8 +69,8 @@ namespace OpenMM {
* <tt><pre>
* <tt><pre>
* real4 delta = pos2-pos1;
* real4 delta = pos2-pos1;
* energy += delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
* energy += delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
* real
4
force1 = 2.0f*delta;
* real
3
force1 = 2.0f*delta;
* real
4
force2 = -2.0f*delta;
* real
3
force2 = -2.0f*delta;
* </pre></tt>
* </pre></tt>
*
*
* Interactions will often depend on parameters or other data. Call addArgument() to provide the data
* Interactions will often depend on parameters or other data. Call addArgument() to provide the data
...
@@ -129,6 +129,7 @@ private:
...
@@ -129,6 +129,7 @@ private:
std
::
vector
<
std
::
string
>
argTypes
;
std
::
vector
<
std
::
string
>
argTypes
;
std
::
vector
<
std
::
vector
<
CudaArray
*>
>
atomIndices
;
std
::
vector
<
std
::
vector
<
CudaArray
*>
>
atomIndices
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
std
::
string
>
prefixCode
;
std
::
vector
<
void
*>
kernelArgs
;
int
numForceBuffers
,
maxBonds
;
int
numForceBuffers
,
maxBonds
;
bool
hasInitializedKernels
;
bool
hasInitializedKernels
;
};
};
...
...
platforms/cuda2/src/CudaContext.cpp
View file @
6e434a02
...
@@ -30,7 +30,7 @@
...
@@ -30,7 +30,7 @@
#include <cmath>
#include <cmath>
#include "CudaContext.h"
#include "CudaContext.h"
#include "CudaArray.h"
#include "CudaArray.h"
//
#include "CudaBondedUtilities.h"
#include "CudaBondedUtilities.h"
#include "CudaForceInfo.h"
#include "CudaForceInfo.h"
#include "CudaIntegrationUtilities.h"
#include "CudaIntegrationUtilities.h"
#include "CudaKernelSources.h"
#include "CudaKernelSources.h"
...
@@ -67,8 +67,8 @@ bool CudaContext::hasInitializedCuda = false;
...
@@ -67,8 +67,8 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
compiler
(
compiler
),
const
string
&
tempDir
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
compiler
(
compiler
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
velm
(
NULL
),
/*
force
Buffers(NULL), longForceBuffer
(NULL), energyBuffer(NULL), atomIndex(NULL),
*/
integration
(
NULL
),
expression
(
NULL
),
velm
(
NULL
),
force
(
NULL
),
energyBuffer
(
NULL
),
atomIndex
(
NULL
),
integration
(
NULL
),
expression
(
NULL
),
/*
bonded(NULL), nonbonded(NULL),*/
thread
(
NULL
)
{
bonded
(
NULL
),
/*
nonbonded(NULL),*/
thread
(
NULL
)
{
if
(
!
hasInitializedCuda
)
{
if
(
!
hasInitializedCuda
)
{
CHECK_RESULT2
(
cuInit
(
0
),
"Error initializing CUDA"
);
CHECK_RESULT2
(
cuInit
(
0
),
"Error initializing CUDA"
);
hasInitializedCuda
=
true
;
hasInitializedCuda
=
true
;
...
@@ -138,10 +138,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -138,10 +138,9 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
device
));
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
device
));
int
numThreadBlocksPerComputeUnit
=
6
;
int
numThreadBlocksPerComputeUnit
=
6
;
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
multiprocessors
;
numThreadBlocks
=
numThreadBlocksPerComputeUnit
*
multiprocessors
;
//
bonded = new CudaBondedUtilities(*this);
bonded
=
new
CudaBondedUtilities
(
*
this
);
// nonbonded = new CudaNonbondedUtilities(*this);
// nonbonded = new CudaNonbondedUtilities(*this);
if
(
useDoublePrecision
)
{
if
(
useDoublePrecision
)
{
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
paddedNumAtoms
*
sizeof
(
double4
),
0
));
posq
=
CudaArray
::
create
<
double4
>
(
paddedNumAtoms
,
"posq"
);
posq
=
CudaArray
::
create
<
double4
>
(
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
create
<
double4
>
(
paddedNumAtoms
,
"velm"
);
velm
=
CudaArray
::
create
<
double4
>
(
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"make_real2"
]
=
"make_double2"
;
compilationDefines
[
"make_real2"
]
=
"make_double2"
;
...
@@ -149,7 +148,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -149,7 +148,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"make_real4"
]
=
"make_double4"
;
compilationDefines
[
"make_real4"
]
=
"make_double4"
;
}
}
else
{
else
{
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
paddedNumAtoms
*
sizeof
(
float4
),
0
));
posq
=
CudaArray
::
create
<
float4
>
(
paddedNumAtoms
,
"posq"
);
posq
=
CudaArray
::
create
<
float4
>
(
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
create
<
float4
>
(
paddedNumAtoms
,
"velm"
);
velm
=
CudaArray
::
create
<
float4
>
(
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"make_real2"
]
=
"make_float2"
;
compilationDefines
[
"make_real2"
]
=
"make_float2"
;
...
@@ -203,22 +201,16 @@ CudaContext::~CudaContext() {
...
@@ -203,22 +201,16 @@ CudaContext::~CudaContext() {
delete
posq
;
delete
posq
;
if
(
velm
!=
NULL
)
if
(
velm
!=
NULL
)
delete
velm
;
delete
velm
;
// if (force != NULL)
if
(
force
!=
NULL
)
// delete force;
delete
force
;
// if (forceBuffers != NULL)
if
(
energyBuffer
!=
NULL
)
// delete forceBuffers;
delete
energyBuffer
;
// if (longForceBuffer != NULL)
// delete longForceBuffer;
// if (energyBuffer != NULL)
// delete energyBuffer;
// if (atomIndex != NULL)
// delete atomIndex;
if
(
integration
!=
NULL
)
if
(
integration
!=
NULL
)
delete
integration
;
delete
integration
;
if
(
expression
!=
NULL
)
if
(
expression
!=
NULL
)
delete
expression
;
delete
expression
;
//
if (bonded != NULL)
if
(
bonded
!=
NULL
)
//
delete bonded;
delete
bonded
;
// if (nonbonded != NULL)
// if (nonbonded != NULL)
// delete nonbonded;
// delete nonbonded;
if
(
thread
!=
NULL
)
if
(
thread
!=
NULL
)
...
@@ -229,6 +221,17 @@ CudaContext::~CudaContext() {
...
@@ -229,6 +221,17 @@ CudaContext::~CudaContext() {
}
}
void
CudaContext
::
initialize
()
{
void
CudaContext
::
initialize
()
{
string
errorMessage
=
"Error initializing Context"
;
if
(
useDoublePrecision
)
{
energyBuffer
=
CudaArray
::
create
<
double
>
(
numThreadBlocks
*
ThreadBlockSize
,
"energyBuffer"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
4
,
numThreadBlocks
*
ThreadBlockSize
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
double
),
0
));
}
else
{
energyBuffer
=
CudaArray
::
create
<
float
>
(
numThreadBlocks
*
ThreadBlockSize
,
"energyBuffer"
);
int
pinnedBufferSize
=
max
(
paddedNumAtoms
*
6
,
numThreadBlocks
*
ThreadBlockSize
);
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
pinnedBufferSize
*
sizeof
(
float
),
0
));
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
double
mass
=
system
.
getParticleMass
(
i
);
double
mass
=
system
.
getParticleMass
(
i
);
if
(
useDoublePrecision
)
if
(
useDoublePrecision
)
...
@@ -237,11 +240,10 @@ void CudaContext::initialize() {
...
@@ -237,11 +240,10 @@ void CudaContext::initialize() {
((
float4
*
)
pinnedBuffer
)[
i
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
mass
==
0.0
?
0.0
f
:
(
float
)
(
1.0
/
mass
));
((
float4
*
)
pinnedBuffer
)[
i
]
=
make_float4
(
0.0
f
,
0.0
f
,
0.0
f
,
mass
==
0.0
?
0.0
f
:
(
float
)
(
1.0
/
mass
));
}
}
velm
->
upload
(
pinnedBuffer
);
velm
->
upload
(
pinnedBuffer
);
// bonded->initialize(system);
bonded
->
initialize
(
system
);
force
=
CudaArray
::
create
<
long3
>
(
paddedNumAtoms
,
"force"
);
force
=
CudaArray
::
create
<
long
long
>
(
paddedNumAtoms
*
3
,
"force"
);
addAutoclearBuffer
(
force
->
getDevicePointer
(),
force
->
getSize
()
*
6
);
addAutoclearBuffer
(
force
->
getDevicePointer
(),
force
->
getSize
()
*
force
->
getElementSize
());
energyBuffer
=
CudaArray
::
create
<
float
>
(
numThreadBlocks
*
ThreadBlockSize
,
"energyBuffer"
);
addAutoclearBuffer
(
energyBuffer
->
getDevicePointer
(),
energyBuffer
->
getSize
()
*
energyBuffer
->
getElementSize
());
addAutoclearBuffer
(
energyBuffer
->
getDevicePointer
(),
energyBuffer
->
getSize
());
atomIndexDevice
=
CudaArray
::
create
<
int
>
(
paddedNumAtoms
,
"atomIndex"
);
atomIndexDevice
=
CudaArray
::
create
<
int
>
(
paddedNumAtoms
,
"atomIndex"
);
atomIndex
.
resize
(
paddedNumAtoms
);
atomIndex
.
resize
(
paddedNumAtoms
);
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
for
(
int
i
=
0
;
i
<
paddedNumAtoms
;
++
i
)
...
@@ -448,82 +450,63 @@ void CudaContext::executeKernel(CUfunction kernel, void** arguments, int threads
...
@@ -448,82 +450,63 @@ void CudaContext::executeKernel(CUfunction kernel, void** arguments, int threads
}
}
void
CudaContext
::
clearBuffer
(
CudaArray
&
array
)
{
void
CudaContext
::
clearBuffer
(
CudaArray
&
array
)
{
clearBuffer
(
array
.
getDevicePointer
(),
array
.
getSize
()
*
array
.
getElementSize
()
/
4
);
clearBuffer
(
array
.
getDevicePointer
(),
array
.
getSize
()
*
array
.
getElementSize
());
}
}
void
CudaContext
::
clearBuffer
(
CUdeviceptr
memory
,
int
size
)
{
void
CudaContext
::
clearBuffer
(
CUdeviceptr
memory
,
int
size
)
{
void
*
args
[]
=
{
&
memory
,
&
size
};
int
words
=
size
/
4
;
void
*
args
[]
=
{
&
memory
,
&
words
};
executeKernel
(
clearBufferKernel
,
args
,
size
,
128
);
executeKernel
(
clearBufferKernel
,
args
,
size
,
128
);
}
}
void
CudaContext
::
addAutoclearBuffer
(
CUdeviceptr
memory
,
int
size
)
{
void
CudaContext
::
addAutoclearBuffer
(
CUdeviceptr
memory
,
int
size
)
{
autoclearBuffers
.
push_back
(
memory
);
autoclearBuffers
.
push_back
(
memory
);
autoclearBufferSizes
.
push_back
(
size
);
autoclearBufferSizes
.
push_back
(
size
/
4
);
}
}
//void CudaContext::clearAutoclearBuffers() {
void
CudaContext
::
clearAutoclearBuffers
()
{
// int base = 0;
int
base
=
0
;
// int total = autoclearBufferSizes.size();
int
total
=
autoclearBufferSizes
.
size
();
// while (total-base >= 6) {
while
(
total
-
base
>=
6
)
{
// clearSixBuffersKernel.setArg<cl::Memory>(0, *autoclearBuffers[base]);
void
*
args
[]
=
{
&
autoclearBuffers
[
base
],
&
autoclearBufferSizes
[
base
],
// clearSixBuffersKernel.setArg<cl_int>(1, autoclearBufferSizes[base]);
&
autoclearBuffers
[
base
+
1
],
&
autoclearBufferSizes
[
base
+
1
],
// clearSixBuffersKernel.setArg<cl::Memory>(2, *autoclearBuffers[base+1]);
&
autoclearBuffers
[
base
+
2
],
&
autoclearBufferSizes
[
base
+
2
],
// clearSixBuffersKernel.setArg<cl_int>(3, autoclearBufferSizes[base+1]);
&
autoclearBuffers
[
base
+
3
],
&
autoclearBufferSizes
[
base
+
3
],
// clearSixBuffersKernel.setArg<cl::Memory>(4, *autoclearBuffers[base+2]);
&
autoclearBuffers
[
base
+
4
],
&
autoclearBufferSizes
[
base
+
4
],
// clearSixBuffersKernel.setArg<cl_int>(5, autoclearBufferSizes[base+2]);
&
autoclearBuffers
[
base
+
5
],
&
autoclearBufferSizes
[
base
+
5
]};
// clearSixBuffersKernel.setArg<cl::Memory>(6, *autoclearBuffers[base+3]);
executeKernel
(
clearSixBuffersKernel
,
args
,
max
(
max
(
max
(
max
(
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
autoclearBufferSizes
[
base
+
2
]),
autoclearBufferSizes
[
base
+
3
]),
autoclearBufferSizes
[
base
+
4
]),
autoclearBufferSizes
[
base
+
5
]),
128
);
// clearSixBuffersKernel.setArg<cl_int>(7, autoclearBufferSizes[base+3]);
base
+=
6
;
// clearSixBuffersKernel.setArg<cl::Memory>(8, *autoclearBuffers[base+4]);
}
// clearSixBuffersKernel.setArg<cl_int>(9, autoclearBufferSizes[base+4]);
if
(
total
-
base
==
5
)
{
// clearSixBuffersKernel.setArg<cl::Memory>(10, *autoclearBuffers[base+5]);
void
*
args
[]
=
{
&
autoclearBuffers
[
base
],
&
autoclearBufferSizes
[
base
],
// clearSixBuffersKernel.setArg<cl_int>(11, autoclearBufferSizes[base+5]);
&
autoclearBuffers
[
base
+
1
],
&
autoclearBufferSizes
[
base
+
1
],
// executeKernel(clearSixBuffersKernel, max(max(max(max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), autoclearBufferSizes[base+3]), autoclearBufferSizes[base+4]), autoclearBufferSizes[base+5]), 128);
&
autoclearBuffers
[
base
+
2
],
&
autoclearBufferSizes
[
base
+
2
],
// base += 6;
&
autoclearBuffers
[
base
+
3
],
&
autoclearBufferSizes
[
base
+
3
],
// }
&
autoclearBuffers
[
base
+
4
],
&
autoclearBufferSizes
[
base
+
4
]};
// if (total-base == 5) {
executeKernel
(
clearFiveBuffersKernel
,
args
,
max
(
max
(
max
(
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
autoclearBufferSizes
[
base
+
2
]),
autoclearBufferSizes
[
base
+
3
]),
autoclearBufferSizes
[
base
+
4
]),
128
);
// clearFiveBuffersKernel.setArg<cl::Memory>(0, *autoclearBuffers[base]);
}
// clearFiveBuffersKernel.setArg<cl_int>(1, autoclearBufferSizes[base]);
else
if
(
total
-
base
==
4
)
{
// clearFiveBuffersKernel.setArg<cl::Memory>(2, *autoclearBuffers[base+1]);
void
*
args
[]
=
{
&
autoclearBuffers
[
base
],
&
autoclearBufferSizes
[
base
],
// clearFiveBuffersKernel.setArg<cl_int>(3, autoclearBufferSizes[base+1]);
&
autoclearBuffers
[
base
+
1
],
&
autoclearBufferSizes
[
base
+
1
],
// clearFiveBuffersKernel.setArg<cl::Memory>(4, *autoclearBuffers[base+2]);
&
autoclearBuffers
[
base
+
2
],
&
autoclearBufferSizes
[
base
+
2
],
// clearFiveBuffersKernel.setArg<cl_int>(5, autoclearBufferSizes[base+2]);
&
autoclearBuffers
[
base
+
3
],
&
autoclearBufferSizes
[
base
+
3
]};
// clearFiveBuffersKernel.setArg<cl::Memory>(6, *autoclearBuffers[base+3]);
executeKernel
(
clearFourBuffersKernel
,
args
,
max
(
max
(
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
autoclearBufferSizes
[
base
+
2
]),
autoclearBufferSizes
[
base
+
3
]),
128
);
// clearFiveBuffersKernel.setArg<cl_int>(7, autoclearBufferSizes[base+3]);
}
// clearFiveBuffersKernel.setArg<cl::Memory>(8, *autoclearBuffers[base+4]);
else
if
(
total
-
base
==
3
)
{
// clearFiveBuffersKernel.setArg<cl_int>(9, autoclearBufferSizes[base+4]);
void
*
args
[]
=
{
&
autoclearBuffers
[
base
],
&
autoclearBufferSizes
[
base
],
// executeKernel(clearFiveBuffersKernel, max(max(max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), autoclearBufferSizes[base+3]), autoclearBufferSizes[base+4]), 128);
&
autoclearBuffers
[
base
+
1
],
&
autoclearBufferSizes
[
base
+
1
],
// }
&
autoclearBuffers
[
base
+
2
],
&
autoclearBufferSizes
[
base
+
2
]};
// else if (total-base == 4) {
executeKernel
(
clearThreeBuffersKernel
,
args
,
max
(
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
autoclearBufferSizes
[
base
+
2
]),
128
);
// clearFourBuffersKernel.setArg<cl::Memory>(0, *autoclearBuffers[base]);
}
// clearFourBuffersKernel.setArg<cl_int>(1, autoclearBufferSizes[base]);
else
if
(
total
-
base
==
2
)
{
// clearFourBuffersKernel.setArg<cl::Memory>(2, *autoclearBuffers[base+1]);
void
*
args
[]
=
{
&
autoclearBuffers
[
base
],
&
autoclearBufferSizes
[
base
],
// clearFourBuffersKernel.setArg<cl_int>(3, autoclearBufferSizes[base+1]);
&
autoclearBuffers
[
base
+
1
],
&
autoclearBufferSizes
[
base
+
1
]};
// clearFourBuffersKernel.setArg<cl::Memory>(4, *autoclearBuffers[base+2]);
executeKernel
(
clearTwoBuffersKernel
,
args
,
max
(
autoclearBufferSizes
[
base
],
autoclearBufferSizes
[
base
+
1
]),
128
);
// clearFourBuffersKernel.setArg<cl_int>(5, autoclearBufferSizes[base+2]);
}
// clearFourBuffersKernel.setArg<cl::Memory>(6, *autoclearBuffers[base+3]);
else
if
(
total
-
base
==
1
)
{
// clearFourBuffersKernel.setArg<cl_int>(7, autoclearBufferSizes[base+3]);
clearBuffer
(
autoclearBuffers
[
base
],
autoclearBufferSizes
[
base
]
*
4
);
// executeKernel(clearFourBuffersKernel, max(max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), autoclearBufferSizes[base+3]), 128);
}
// }
}
// else if (total-base == 3) {
// clearThreeBuffersKernel.setArg<cl::Memory>(0, *autoclearBuffers[base]);
// clearThreeBuffersKernel.setArg<cl_int>(1, autoclearBufferSizes[base]);
// clearThreeBuffersKernel.setArg<cl::Memory>(2, *autoclearBuffers[base+1]);
// clearThreeBuffersKernel.setArg<cl_int>(3, autoclearBufferSizes[base+1]);
// clearThreeBuffersKernel.setArg<cl::Memory>(4, *autoclearBuffers[base+2]);
// clearThreeBuffersKernel.setArg<cl_int>(5, autoclearBufferSizes[base+2]);
// executeKernel(clearThreeBuffersKernel, max(max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), autoclearBufferSizes[base+2]), 128);
// }
// else if (total-base == 2) {
// clearTwoBuffersKernel.setArg<cl::Memory>(0, *autoclearBuffers[base]);
// clearTwoBuffersKernel.setArg<cl_int>(1, autoclearBufferSizes[base]);
// clearTwoBuffersKernel.setArg<cl::Memory>(2, *autoclearBuffers[base+1]);
// clearTwoBuffersKernel.setArg<cl_int>(3, autoclearBufferSizes[base+1]);
// executeKernel(clearTwoBuffersKernel, max(autoclearBufferSizes[base], autoclearBufferSizes[base+1]), 128);
// }
// else if (total-base == 1) {
// clearBuffer(*autoclearBuffers[base], autoclearBufferSizes[base]);
// }
//}
void
CudaContext
::
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
void
CudaContext
::
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
// Recursively tag atoms as belonging to a particular molecule.
// Recursively tag atoms as belonging to a particular molecule.
...
@@ -539,7 +522,7 @@ void CudaContext::tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMo
...
@@ -539,7 +522,7 @@ void CudaContext::tagAtomsInMolecule(int atom, int molecule, vector<int>& atomMo
*/
*/
class
CudaContext
::
VirtualSiteInfo
:
public
CudaForceInfo
{
class
CudaContext
::
VirtualSiteInfo
:
public
CudaForceInfo
{
public:
public:
VirtualSiteInfo
(
const
System
&
system
)
:
CudaForceInfo
(
0
)
{
VirtualSiteInfo
(
const
System
&
system
)
{
for
(
int
i
=
0
;
i
<
system
.
getNumParticles
();
i
++
)
{
for
(
int
i
=
0
;
i
<
system
.
getNumParticles
();
i
++
)
{
if
(
system
.
isVirtualSite
(
i
))
{
if
(
system
.
isVirtualSite
(
i
))
{
siteTypes
.
push_back
(
&
typeid
(
system
.
getVirtualSite
(
i
)));
siteTypes
.
push_back
(
&
typeid
(
system
.
getVirtualSite
(
i
)));
...
...
platforms/cuda2/src/CudaContext.h
View file @
6e434a02
...
@@ -125,41 +125,42 @@ public:
...
@@ -125,41 +125,42 @@ public:
return
*
velm
;
return
*
velm
;
}
}
/**
/**
* Get the array which contains the force on each atom (re
s
presented as
a long3
in 64 bit fixed point).
* Get the array which contains the force on each atom (represented as
three long longs
in 64 bit fixed point).
*/
*/
CudaArray
&
getForce
()
{
CudaArray
&
getForce
()
{
return
*
force
;
return
*
force
;
}
}
// /**
/**
// * Get the array which contains the buffers in which forces are computed.
* Get the array which contains the buffer in which energy is computed.
// */
*/
// CudaArray<mm_float4>& getForceBuffers() {
CudaArray
&
getEnergyBuffer
()
{
// return *forceBuffers;
return
*
energyBuffer
;
// }
}
// /**
/**
// * Get the array which contains a contribution to each force represented as 64 bit fixed point.
* Get a pointer to a block of pinned memory that can be used for efficient transfers between host and device.
// */
* This is guaranteed to be at least as large as any of the arrays returned by methods of this class.
// CudaArray<cl_long>& getLongForceBuffer() {
*/
// return *longForceBuffer;
void
*
getPinnedBuffer
()
{
// }
return
pinnedBuffer
;
// /**
}
// * Get the array which contains the buffer in which energy is computed.
/**
// */
* Get the host-side vector which contains the index of each atom.
// CudaArray<cl_float>& getEnergyBuffer() {
*/
// return *energyBuffer;
const
std
::
vector
<
int
>&
getAtomIndex
()
const
{
// }
return
atomIndex
;
// /**
}
// * Get the array which contains the index of each atom.
/**
// */
* Get the array which contains the index of each atom.
// CudaArray<cl_int>& getAtomIndex() {
*/
// return *atomIndex;
CudaArray
&
getAtomIndexArray
()
{
// }
return
*
atomIndexDevice
;
// /**
}
// * Get the number of cells by which the positions are offset.
/**
// */
* Get the number of cells by which the positions are offset.
// std::vector<mm_int4>& getPosCellOffsets() {
*/
// return posCellOffsets;
std
::
vector
<
int4
>&
getPosCellOffsets
()
{
// }
return
posCellOffsets
;
}
/**
/**
* Replace all occurrences of a list of substrings.
* Replace all occurrences of a list of substrings.
*
*
...
@@ -210,20 +211,20 @@ public:
...
@@ -210,20 +211,20 @@ public:
* Set all elements of an array to 0.
* Set all elements of an array to 0.
*
*
* @param memory the memory to clear
* @param memory the memory to clear
* @param size the
number of 4-byte elements in
the buffer
* @param size the
size of
the buffer
in bytes
*/
*/
void
clearBuffer
(
CUdeviceptr
memory
,
int
size
);
void
clearBuffer
(
CUdeviceptr
memory
,
int
size
);
/**
/**
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*
*
* @param memory the memory to clear
* @param memory the memory to clear
* @param size the
number of 4-byte elements in
the buffer
* @param size the
size of
the buffer
in bytes
*/
*/
void
addAutoclearBuffer
(
CUdeviceptr
memory
,
int
size
);
void
addAutoclearBuffer
(
CUdeviceptr
memory
,
int
size
);
//
/**
/**
//
* Clear all buffers that have been registered with addAutoclearBuffer().
* Clear all buffers that have been registered with addAutoclearBuffer().
//
*/
*/
//
void clearAutoclearBuffers();
void
clearAutoclearBuffers
();
/**
/**
* Get the current simulation time.
* Get the current simulation time.
*/
*/
...
@@ -309,27 +310,27 @@ public:
...
@@ -309,27 +310,27 @@ public:
/**
/**
* Convert a CUDA result code to the corresponding string description.
* Convert a CUDA result code to the corresponding string description.
*/
*/
std
::
string
getErrorString
(
CUresult
result
);
static
std
::
string
getErrorString
(
CUresult
result
);
//
/**
/**
//
* Get the size of the periodic box.
* Get the size of the periodic box.
//
*/
*/
//
float
4 getPeriodicBoxSize() const {
double
4
getPeriodicBoxSize
()
const
{
//
return periodicBoxSize;
return
periodicBoxSize
;
//
}
}
//
/**
/**
//
* Set the size of the periodic box.
* Set the size of the periodic box.
//
*/
*/
//
void setPeriodicBoxSize(double xsize, double ysize, double zsize) {
void
setPeriodicBoxSize
(
double
xsize
,
double
ysize
,
double
zsize
)
{
//
periodicBoxSize = make_
float4((float) xsize, (float) ysize, (float)
zsize, 0);
periodicBoxSize
=
make_
double4
(
xsize
,
ysize
,
zsize
,
0.
0
);
//
invPeriodicBoxSize = make_
float4((float)
(1.0/xsize
)
,
(float) (
1.0/ysize
)
,
(float) (
1.0/zsize
)
, 0);
invPeriodicBoxSize
=
make_
double4
(
1.0
/
xsize
,
1.0
/
ysize
,
1.0
/
zsize
,
0.
0
);
//
}
}
//
/**
/**
//
* Get the inverse of the size of the periodic box.
* Get the inverse of the size of the periodic box.
//
*/
*/
//
float
4 getInvPeriodicBoxSize() const {
double
4
getInvPeriodicBoxSize
()
const
{
//
return invPeriodicBoxSize;
return
invPeriodicBoxSize
;
//
}
}
/**
/**
* Get the CudaIntegrationUtilities for this context.
* Get the CudaIntegrationUtilities for this context.
*/
*/
...
@@ -342,12 +343,12 @@ public:
...
@@ -342,12 +343,12 @@ public:
CudaExpressionUtilities
&
getExpressionUtilities
()
{
CudaExpressionUtilities
&
getExpressionUtilities
()
{
return
*
expression
;
return
*
expression
;
}
}
//
/**
/**
//
* Get the CudaBondedUtilities for this context.
* Get the CudaBondedUtilities for this context.
//
*/
*/
//
CudaBondedUtilities& getBondedUtilities() {
CudaBondedUtilities
&
getBondedUtilities
()
{
//
return *bonded;
return
*
bonded
;
//
}
}
// /**
// /**
// * Get the CudaNonbondedUtilities for this context.
// * Get the CudaNonbondedUtilities for this context.
// */
// */
...
@@ -428,8 +429,8 @@ private:
...
@@ -428,8 +429,8 @@ private:
int
numThreadBlocks
;
int
numThreadBlocks
;
bool
useBlockingSync
,
useDoublePrecision
,
accumulateInDouble
,
contextIsValid
,
atomsWereReordered
,
moleculesInvalid
;
bool
useBlockingSync
,
useDoublePrecision
,
accumulateInDouble
,
contextIsValid
,
atomsWereReordered
,
moleculesInvalid
;
std
::
string
compiler
,
tempDir
,
gpuArchitecture
;
std
::
string
compiler
,
tempDir
,
gpuArchitecture
;
float
4
periodicBoxSize
;
double
4
periodicBoxSize
;
float
4
invPeriodicBoxSize
;
double
4
invPeriodicBoxSize
;
std
::
string
defaultOptimizationOptions
;
std
::
string
defaultOptimizationOptions
;
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
std
::
map
<
std
::
string
,
std
::
string
>
compilationDefines
;
CUcontext
context
;
CUcontext
context
;
...
@@ -456,7 +457,7 @@ private:
...
@@ -456,7 +457,7 @@ private:
std
::
vector
<
ReorderListener
*>
reorderListeners
;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
CudaIntegrationUtilities
*
integration
;
CudaIntegrationUtilities
*
integration
;
CudaExpressionUtilities
*
expression
;
CudaExpressionUtilities
*
expression
;
//
CudaBondedUtilities* bonded;
CudaBondedUtilities
*
bonded
;
// CudaNonbondedUtilities* nonbonded;
// CudaNonbondedUtilities* nonbonded;
WorkThread
*
thread
;
WorkThread
*
thread
;
};
};
...
...
platforms/cuda2/src/CudaForceInfo.h
View file @
6e434a02
...
@@ -39,13 +39,7 @@ namespace OpenMM {
...
@@ -39,13 +39,7 @@ namespace OpenMM {
class
OPENMM_EXPORT
CudaForceInfo
{
class
OPENMM_EXPORT
CudaForceInfo
{
public:
public:
CudaForceInfo
(
int
requiredForceBuffers
)
:
requiredForceBuffers
(
requiredForceBuffers
)
{
CudaForceInfo
()
{
}
/**
* Get the number of force buffers this force requires.
*/
int
getRequiredForceBuffers
()
{
return
requiredForceBuffers
;
}
}
/**
/**
* Get whether or not two particles have identical force field parameters.
* Get whether or not two particles have identical force field parameters.
...
@@ -63,8 +57,6 @@ public:
...
@@ -63,8 +57,6 @@ public:
* Get whether two particle groups are identical.
* Get whether two particle groups are identical.
*/
*/
virtual
bool
areGroupsIdentical
(
int
group1
,
int
group2
);
virtual
bool
areGroupsIdentical
(
int
group1
,
int
group2
);
private:
int
requiredForceBuffers
;
};
};
}
// namespace OpenMM
}
// namespace OpenMM
...
...
platforms/cuda2/src/CudaIntegrationUtilities.cpp
View file @
6e434a02
...
@@ -672,15 +672,15 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
...
@@ -672,15 +672,15 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
delete
vsiteOutOfPlaneWeights
;
delete
vsiteOutOfPlaneWeights
;
}
}
//
void CudaIntegrationUtilities::applyConstraints(double tol) {
void
CudaIntegrationUtilities
::
applyConstraints
(
double
tol
)
{
//
applyConstraints(false, tol);
applyConstraints
(
false
,
tol
);
//
}
}
//
//
void CudaIntegrationUtilities::applyVelocityConstraints(double tol) {
void
CudaIntegrationUtilities
::
applyVelocityConstraints
(
double
tol
)
{
//
applyConstraints(true, tol);
applyConstraints
(
true
,
tol
);
//
}
}
//
//
void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double tol) {
void
CudaIntegrationUtilities
::
applyConstraints
(
bool
constrainVelocities
,
double
tol
)
{
// bool hasInitialized;
// bool hasInitialized;
// CUfunction settleKernel, shakeKernel, ccmaForceKernel, ccmaUpdateKernel;
// CUfunction settleKernel, shakeKernel, ccmaForceKernel, ccmaUpdateKernel;
// if (constrainVelocities) {
// if (constrainVelocities) {
...
@@ -772,19 +772,19 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
...
@@ -772,19 +772,19 @@ CudaIntegrationUtilities::~CudaIntegrationUtilities() {
// }
// }
// }
// }
// }
// }
//
}
}
//
//
void CudaIntegrationUtilities::computeVirtualSites() {
void
CudaIntegrationUtilities
::
computeVirtualSites
()
{
// if (numVsites > 0)
// if (numVsites > 0)
// context.executeKernel(vsitePositionKernel, numVsites);
// context.executeKernel(vsitePositionKernel, numVsites);
//
}
}
//
//
void CudaIntegrationUtilities::distributeForcesFromVirtualSites() {
void
CudaIntegrationUtilities
::
distributeForcesFromVirtualSites
()
{
// if (numVsites > 0) {
// if (numVsites > 0) {
// vsiteForceKernel.setArg<cl::Buffer>(1, context.getForce().getDeviceBuffer());
// vsiteForceKernel.setArg<cl::Buffer>(1, context.getForce().getDeviceBuffer());
// context.executeKernel(vsiteForceKernel, numVsites);
// context.executeKernel(vsiteForceKernel, numVsites);
// }
// }
//
}
}
void
CudaIntegrationUtilities
::
initRandomNumberGenerator
(
unsigned
int
randomNumberSeed
)
{
void
CudaIntegrationUtilities
::
initRandomNumberGenerator
(
unsigned
int
randomNumberSeed
)
{
if
(
random
!=
NULL
)
{
if
(
random
!=
NULL
)
{
...
...
platforms/cuda2/src/CudaKernelFactory.cpp
View file @
6e434a02
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
* -------------------------------------------------------------------------- */
* -------------------------------------------------------------------------- */
#include "CudaKernelFactory.h"
#include "CudaKernelFactory.h"
#include "CudaKernels.h"
//#include "CudaParallelKernels.h"
//#include "CudaParallelKernels.h"
#include "CudaPlatform.h"
#include "CudaPlatform.h"
#include "openmm/internal/ContextImpl.h"
#include "openmm/internal/ContextImpl.h"
...
@@ -66,64 +67,64 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
...
@@ -66,64 +67,64 @@ KernelImpl* CudaKernelFactory::createKernelImpl(std::string name, const Platform
// if (name == CalcCustomCompoundBondForceKernel::Name())
// if (name == CalcCustomCompoundBondForceKernel::Name())
// return new CudaParallelCalcCustomCompoundBondForceKernel(name, platform, data, context.getSystem());
// return new CudaParallelCalcCustomCompoundBondForceKernel(name, platform, data, context.getSystem());
// }
// }
//
CudaContext& c
l
= *data.contexts[0];
CudaContext
&
c
u
=
*
data
.
contexts
[
0
];
//
if (name == CalcForcesAndEnergyKernel::Name())
if
(
name
==
CalcForcesAndEnergyKernel
::
Name
())
//
return new CudaCalcForcesAndEnergyKernel(name, platform, c
l
);
return
new
CudaCalcForcesAndEnergyKernel
(
name
,
platform
,
c
u
);
//
if (name == UpdateStateDataKernel::Name())
if
(
name
==
UpdateStateDataKernel
::
Name
())
//
return new CudaUpdateStateDataKernel(name, platform, c
l
);
return
new
CudaUpdateStateDataKernel
(
name
,
platform
,
c
u
);
//
if (name == ApplyConstraintsKernel::Name())
if
(
name
==
ApplyConstraintsKernel
::
Name
())
//
return new CudaApplyConstraintsKernel(name, platform, c
l
);
return
new
CudaApplyConstraintsKernel
(
name
,
platform
,
c
u
);
//
if (name == VirtualSitesKernel::Name())
if
(
name
==
VirtualSitesKernel
::
Name
())
//
return new CudaVirtualSitesKernel(name, platform, c
l
);
return
new
CudaVirtualSitesKernel
(
name
,
platform
,
c
u
);
//
if (name == CalcHarmonicBondForceKernel::Name())
if
(
name
==
CalcHarmonicBondForceKernel
::
Name
())
//
return new CudaCalcHarmonicBondForceKernel(name, platform, c
l
, context.getSystem());
return
new
CudaCalcHarmonicBondForceKernel
(
name
,
platform
,
c
u
,
context
.
getSystem
());
// if (name == CalcCustomBondForceKernel::Name())
// if (name == CalcCustomBondForceKernel::Name())
// return new CudaCalcCustomBondForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcCustomBondForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcHarmonicAngleForceKernel::Name())
// if (name == CalcHarmonicAngleForceKernel::Name())
// return new CudaCalcHarmonicAngleForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcHarmonicAngleForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcCustomAngleForceKernel::Name())
// if (name == CalcCustomAngleForceKernel::Name())
// return new CudaCalcCustomAngleForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcCustomAngleForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcPeriodicTorsionForceKernel::Name())
// if (name == CalcPeriodicTorsionForceKernel::Name())
// return new CudaCalcPeriodicTorsionForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcPeriodicTorsionForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcRBTorsionForceKernel::Name())
// if (name == CalcRBTorsionForceKernel::Name())
// return new CudaCalcRBTorsionForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcRBTorsionForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcCMAPTorsionForceKernel::Name())
// if (name == CalcCMAPTorsionForceKernel::Name())
// return new CudaCalcCMAPTorsionForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcCMAPTorsionForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcCustomTorsionForceKernel::Name())
// if (name == CalcCustomTorsionForceKernel::Name())
// return new CudaCalcCustomTorsionForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcCustomTorsionForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcNonbondedForceKernel::Name())
// if (name == CalcNonbondedForceKernel::Name())
// return new CudaCalcNonbondedForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcNonbondedForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcCustomNonbondedForceKernel::Name())
// if (name == CalcCustomNonbondedForceKernel::Name())
// return new CudaCalcCustomNonbondedForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcCustomNonbondedForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcGBSAOBCForceKernel::Name())
// if (name == CalcGBSAOBCForceKernel::Name())
// return new CudaCalcGBSAOBCForceKernel(name, platform, c
l
);
// return new CudaCalcGBSAOBCForceKernel(name, platform, c
u
);
// if (name == CalcCustomGBForceKernel::Name())
// if (name == CalcCustomGBForceKernel::Name())
// return new CudaCalcCustomGBForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcCustomGBForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcCustomExternalForceKernel::Name())
// if (name == CalcCustomExternalForceKernel::Name())
// return new CudaCalcCustomExternalForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcCustomExternalForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcCustomHbondForceKernel::Name())
// if (name == CalcCustomHbondForceKernel::Name())
// return new CudaCalcCustomHbondForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcCustomHbondForceKernel(name, platform, c
u
, context.getSystem());
// if (name == CalcCustomCompoundBondForceKernel::Name())
// if (name == CalcCustomCompoundBondForceKernel::Name())
// return new CudaCalcCustomCompoundBondForceKernel(name, platform, c
l
, context.getSystem());
// return new CudaCalcCustomCompoundBondForceKernel(name, platform, c
u
, context.getSystem());
//
if (name == IntegrateVerletStepKernel::Name())
if
(
name
==
IntegrateVerletStepKernel
::
Name
())
//
return new CudaIntegrateVerletStepKernel(name, platform, c
l
);
return
new
CudaIntegrateVerletStepKernel
(
name
,
platform
,
c
u
);
// if (name == IntegrateLangevinStepKernel::Name())
// if (name == IntegrateLangevinStepKernel::Name())
// return new CudaIntegrateLangevinStepKernel(name, platform, c
l
);
// return new CudaIntegrateLangevinStepKernel(name, platform, c
u
);
// if (name == IntegrateBrownianStepKernel::Name())
// if (name == IntegrateBrownianStepKernel::Name())
// return new CudaIntegrateBrownianStepKernel(name, platform, c
l
);
// return new CudaIntegrateBrownianStepKernel(name, platform, c
u
);
// if (name == IntegrateVariableVerletStepKernel::Name())
// if (name == IntegrateVariableVerletStepKernel::Name())
// return new CudaIntegrateVariableVerletStepKernel(name, platform, c
l
);
// return new CudaIntegrateVariableVerletStepKernel(name, platform, c
u
);
// if (name == IntegrateVariableLangevinStepKernel::Name())
// if (name == IntegrateVariableLangevinStepKernel::Name())
// return new CudaIntegrateVariableLangevinStepKernel(name, platform, c
l
);
// return new CudaIntegrateVariableLangevinStepKernel(name, platform, c
u
);
// if (name == IntegrateCustomStepKernel::Name())
// if (name == IntegrateCustomStepKernel::Name())
// return new CudaIntegrateCustomStepKernel(name, platform, c
l
);
// return new CudaIntegrateCustomStepKernel(name, platform, c
u
);
// if (name == ApplyAndersenThermostatKernel::Name())
// if (name == ApplyAndersenThermostatKernel::Name())
// return new CudaApplyAndersenThermostatKernel(name, platform, c
l
);
// return new CudaApplyAndersenThermostatKernel(name, platform, c
u
);
// if (name == ApplyMonteCarloBarostatKernel::Name())
// if (name == ApplyMonteCarloBarostatKernel::Name())
// return new CudaApplyMonteCarloBarostatKernel(name, platform, c
l
);
// return new CudaApplyMonteCarloBarostatKernel(name, platform, c
u
);
//
if (name == CalcKineticEnergyKernel::Name())
if
(
name
==
CalcKineticEnergyKernel
::
Name
())
//
return new CudaCalcKineticEnergyKernel(name, platform, c
l
);
return
new
CudaCalcKineticEnergyKernel
(
name
,
platform
,
c
u
);
// if (name == RemoveCMMotionKernel::Name())
// if (name == RemoveCMMotionKernel::Name())
// return new CudaRemoveCMMotionKernel(name, platform, c
l
);
// return new CudaRemoveCMMotionKernel(name, platform, c
u
);
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
throw
OpenMMException
((
std
::
string
(
"Tried to create kernel with illegal kernel name '"
)
+
name
+
"'"
).
c_str
());
}
}
platforms/cuda2/src/CudaKernels.cpp
0 → 100644
View file @
6e434a02
This diff is collapsed.
Click to expand it.
platforms/cuda2/src/CudaKernels.h
View file @
6e434a02
This diff is collapsed.
Click to expand it.
platforms/cuda2/src/kernels/bondForce.cu
0 → 100644
View file @
6e434a02
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real
r
=
SQRT
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
);
COMPUTE_FORCE
dEdR
=
(
r
>
0
)
?
(
dEdR
/
r
)
:
0
;
delta
*=
dEdR
;
real3
force1
=
delta
;
real3
force2
=
-
delta
;
platforms/cuda2/src/kernels/harmonicBondForce.cu
0 → 100644
View file @
6e434a02
float2
bondParams
=
PARAMS
[
index
];
real
deltaIdeal
=
r
-
bondParams
.
x
;
energy
+=
0.5
f
*
bondParams
.
y
*
deltaIdeal
*
deltaIdeal
;
real
dEdR
=
bondParams
.
y
*
deltaIdeal
;
platforms/cuda2/src/kernels/vectorOps.cu
View file @
6e434a02
...
@@ -42,7 +42,7 @@ inline __device__ double4 make_double4(double a) {
...
@@ -42,7 +42,7 @@ inline __device__ double4 make_double4(double a) {
// Negate a vector.
// Negate a vector.
inline
__device__
int2
operator
*
(
int2
a
)
{
inline
__device__
int2
operator
-
(
int2
a
)
{
return
make_int2
(
-
a
.
x
,
-
a
.
y
);
return
make_int2
(
-
a
.
x
,
-
a
.
y
);
}
}
...
@@ -455,3 +455,41 @@ inline __device__ double3 operator*(double a, double3 b) {
...
@@ -455,3 +455,41 @@ inline __device__ double3 operator*(double a, double3 b) {
inline
__device__
double4
operator
*
(
double
a
,
double4
b
)
{
inline
__device__
double4
operator
*
(
double
a
,
double4
b
)
{
return
make_double4
(
a
*
b
.
x
,
a
*
b
.
y
,
a
*
b
.
z
,
a
*
b
.
w
);
return
make_double4
(
a
*
b
.
x
,
a
*
b
.
y
,
a
*
b
.
z
,
a
*
b
.
w
);
}
}
// *= operator (multiply vector by constant)
inline
__device__
void
operator
*=
(
int2
&
a
,
int
b
)
{
a
.
x
*=
b
;
a
.
y
*=
b
;
}
inline
__device__
void
operator
*=
(
int3
&
a
,
int
b
)
{
a
.
x
*=
b
;
a
.
y
*=
b
;
a
.
z
*=
b
;
}
inline
__device__
void
operator
*=
(
int4
&
a
,
int
b
)
{
a
.
x
*=
b
;
a
.
y
*=
b
;
a
.
z
*=
b
;
a
.
w
*=
b
;
}
inline
__device__
void
operator
*=
(
float2
&
a
,
float
b
)
{
a
.
x
*=
b
;
a
.
y
*=
b
;
}
inline
__device__
void
operator
*=
(
float3
&
a
,
float
b
)
{
a
.
x
*=
b
;
a
.
y
*=
b
;
a
.
z
*=
b
;
}
inline
__device__
void
operator
*=
(
float4
&
a
,
float
b
)
{
a
.
x
*=
b
;
a
.
y
*=
b
;
a
.
z
*=
b
;
a
.
w
*=
b
;
}
inline
__device__
void
operator
*=
(
double2
&
a
,
double
b
)
{
a
.
x
*=
b
;
a
.
y
*=
b
;
}
inline
__device__
void
operator
*=
(
double3
&
a
,
double
b
)
{
a
.
x
*=
b
;
a
.
y
*=
b
;
a
.
z
*=
b
;
}
inline
__device__
void
operator
*=
(
double4
&
a
,
double
b
)
{
a
.
x
*=
b
;
a
.
y
*=
b
;
a
.
z
*=
b
;
a
.
w
*=
b
;
}
platforms/cuda2/src/kernels/verlet.cu
0 → 100644
View file @
6e434a02
/**
* Perform the first step of Verlet integration.
*/
extern
"C"
__global__
void
integrateVerletPart1
(
const
real2
*
__restrict__
dt
,
const
real4
*
__restrict__
posq
,
real4
*
__restrict__
velm
,
const
long
long
*
__restrict__
force
,
real4
*
__restrict__
posDelta
)
{
const
real2
stepSize
=
dt
[
0
];
const
real
dtPos
=
stepSize
.
y
;
const
real
dtVel
=
0.5
f
*
(
stepSize
.
x
+
stepSize
.
y
);
const
real
scale
=
dtVel
/
(
real
)
0xFFFFFFFF
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
real4
pos
=
posq
[
index
];
velocity
.
x
+=
scale
*
force
[
index
]
*
velocity
.
w
;
velocity
.
y
+=
scale
*
force
[
index
+
PADDED_NUM_ATOMS
]
*
velocity
.
w
;
velocity
.
z
+=
scale
*
force
[
index
+
PADDED_NUM_ATOMS
*
2
]
*
velocity
.
w
;
pos
.
x
=
velocity
.
x
*
dtPos
;
pos
.
y
=
velocity
.
y
*
dtPos
;
pos
.
z
=
velocity
.
z
*
dtPos
;
posDelta
[
index
]
=
pos
;
velm
[
index
]
=
velocity
;
}
}
}
/**
* Perform the second step of Verlet integration.
*/
extern
"C"
__global__
void
integrateVerletPart2
(
real2
*
__restrict__
dt
,
real4
*
__restrict__
posq
,
real4
*
__restrict__
velm
,
const
real4
*
__restrict__
posDelta
)
{
real2
stepSize
=
dt
[
0
];
double
oneOverDt
=
1.0
/
stepSize
.
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
==
0
)
dt
[
0
].
x
=
stepSize
.
y
;
for
(;
index
<
NUM_ATOMS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
velocity
=
velm
[
index
];
if
(
velocity
.
w
!=
0.0
)
{
real4
pos
=
posq
[
index
];
real4
delta
=
posDelta
[
index
];
pos
.
x
+=
delta
.
x
;
pos
.
y
+=
delta
.
y
;
pos
.
z
+=
delta
.
z
;
velocity
=
make_real4
((
real
)
(
delta
.
x
*
oneOverDt
),
(
real
)
(
delta
.
y
*
oneOverDt
),
(
real
)
(
delta
.
z
*
oneOverDt
),
velocity
.
w
);
posq
[
index
]
=
pos
;
velm
[
index
]
=
velocity
;
}
}
}
/**
* Select the step size to use for the next step.
*/
//
//extern "C" __global__ void selectVerletStepSize(real maxStepSize, real errorTol, real2* __restrict__ dt, const real4* __restrict__ velm, const real4* __restrict__ force, __local real* __restrict__ error) {
// // Calculate the error.
//
// real err = 0.0f;
// for (int index = threadIdx.x; index < NUM_ATOMS; index += blockDim.x*gridDim.x) {
// real4 f = force[index];
// real invMass = velm[index].w;
// err += (f.x*f.x + f.y*f.y + f.z*f.z)*invMass;
// }
// error[threadIdx.x] = err;
// __syncthreads;
//
// // Sum the errors from all threads.
//
// for (unsigned int offset = 1; offset < get_local_size(0); offset *= 2) {
// if (threadIdx.x+offset < get_local_size(0) && (threadIdx.x&(2*offset-1)) == 0)
// error[threadIdx.x] += error[threadIdx.x+offset];
// __syncthreads;
// }
// if (threadIdx.x == 0) {
// real totalError = sqrt(error[0]/(NUM_ATOMS*3));
// real newStepSize = sqrt(errorTol/totalError);
// real oldStepSize = dt[0].y;
// if (oldStepSize > 0.0f)
// newStepSize = min(newStepSize, oldStepSize*2.0f); // For safety, limit how quickly dt can increase.
// if (newStepSize > oldStepSize && newStepSize < 1.1f*oldStepSize)
// newStepSize = oldStepSize; // Keeping dt constant between steps improves the behavior of the integrator.
// if (newStepSize > maxStepSize)
// newStepSize = maxStepSize;
// dt[0].y = newStepSize;
// }
//}
platforms/cuda2/tests/TestCudaHarmonicBondForce.cpp
0 → 100644
View file @
6e434a02
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of HarmonicBondForce.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include <iostream>
#include <map>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
void
testBonds
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
VerletIntegrator
integrator
(
0.01
);
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
forceField
->
addBond
(
0
,
1
,
1.5
,
0.8
);
forceField
->
addBond
(
1
,
2
,
1.2
,
0.7
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
3
);
positions
[
0
]
=
Vec3
(
0
,
2
,
0
);
positions
[
1
]
=
Vec3
(
0
,
0
,
0
);
positions
[
2
]
=
Vec3
(
1
,
0
,
0
);
context
.
setPositions
(
positions
);
State
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
{
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
-
0.8
*
0.5
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.7
*
0.2
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
forces
[
0
][
0
]
-
forces
[
2
][
0
],
-
forces
[
0
][
1
]
-
forces
[
2
][
1
],
-
forces
[
0
][
2
]
-
forces
[
2
][
2
]),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
0.5
*
0.8
*
0.5
*
0.5
+
0.5
*
0.7
*
0.2
*
0.2
,
state
.
getPotentialEnergy
(),
TOL
);
}
// Try changing the bond parameters and make sure it's still correct.
forceField
->
setBondParameters
(
0
,
0
,
1
,
1.6
,
0.9
);
forceField
->
setBondParameters
(
1
,
1
,
2
,
1.3
,
0.8
);
forceField
->
updateParametersInContext
(
context
);
state
=
context
.
getState
(
State
::
Forces
|
State
::
Energy
);
{
const
vector
<
Vec3
>&
forces
=
state
.
getForces
();
ASSERT_EQUAL_VEC
(
Vec3
(
0
,
-
0.9
*
0.4
,
0
),
forces
[
0
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.8
*
0.3
,
0
,
0
),
forces
[
2
],
TOL
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
forces
[
0
][
0
]
-
forces
[
2
][
0
],
-
forces
[
0
][
1
]
-
forces
[
2
][
1
],
-
forces
[
0
][
2
]
-
forces
[
2
][
2
]),
forces
[
1
],
TOL
);
ASSERT_EQUAL_TOL
(
0.5
*
0.9
*
0.4
*
0.4
+
0.5
*
0.8
*
0.3
*
0.3
,
state
.
getPotentialEnergy
(),
TOL
);
}
}
void
testParallelComputation
()
{
CudaPlatform
platform
;
System
system
;
const
int
numParticles
=
200
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
system
.
addParticle
(
1.0
);
HarmonicBondForce
*
force
=
new
HarmonicBondForce
();
for
(
int
i
=
1
;
i
<
numParticles
;
i
++
)
force
->
addBond
(
i
-
1
,
i
,
1.1
,
i
);
system
.
addForce
(
force
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
positions
[
i
]
=
Vec3
(
i
,
0
,
0
);
VerletIntegrator
integrator1
(
0.01
);
Context
context1
(
system
,
integrator1
,
platform
);
context1
.
setPositions
(
positions
);
State
state1
=
context1
.
getState
(
State
::
Forces
|
State
::
Energy
);
VerletIntegrator
integrator2
(
0.01
);
string
deviceIndex
=
platform
.
getPropertyValue
(
context1
,
CudaPlatform
::
CudaDeviceIndex
());
map
<
string
,
string
>
props
;
props
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
+
","
+
deviceIndex
;
Context
context2
(
system
,
integrator2
,
platform
,
props
);
context2
.
setPositions
(
positions
);
State
state2
=
context2
.
getState
(
State
::
Forces
|
State
::
Energy
);
ASSERT_EQUAL_TOL
(
state1
.
getPotentialEnergy
(),
state2
.
getPotentialEnergy
(),
1e-5
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
ASSERT_EQUAL_VEC
(
state1
.
getForces
()[
i
],
state2
.
getForces
()[
i
],
1e-5
);
}
int
main
()
{
try
{
testBonds
();
// testParallelComputation();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
platforms/cuda2/tests/TestCudaVerletIntegrator.cpp
0 → 100644
View file @
6e434a02
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of VerletIntegrator.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "openmm/Context.h"
#include "CudaPlatform.h"
#include "openmm/HarmonicBondForce.h"
#include "openmm/NonbondedForce.h"
#include "openmm/System.h"
#include "openmm/VerletIntegrator.h"
#include "../src/SimTKUtilities/SimTKOpenMMRealType.h"
#include "sfmt/SFMT.h"
#include <iostream>
#include <vector>
using
namespace
OpenMM
;
using
namespace
std
;
const
double
TOL
=
1e-5
;
/**
* Compute the energy of a state, taking into account the half step offset between
* positions and velocities.
*/
static
double
computeEnergy
(
const
State
&
state
,
const
System
&
system
,
double
dt
)
{
const
vector
<
Vec3
>&
v
=
state
.
getVelocities
();
const
vector
<
Vec3
>&
f
=
state
.
getForces
();
double
energy
=
0.0
;
for
(
int
i
=
0
;
i
<
system
.
getNumParticles
();
i
++
)
{
double
m
=
system
.
getParticleMass
(
i
);
Vec3
vel
=
v
[
i
]
+
f
[
i
]
*
(
0.5
*
dt
/
m
);
energy
+=
0.5
*
m
*
vel
.
dot
(
vel
);
}
return
energy
+
state
.
getPotentialEnergy
();
}
void
testSingleBond
()
{
CudaPlatform
platform
;
System
system
;
system
.
addParticle
(
2.0
);
system
.
addParticle
(
2.0
);
VerletIntegrator
integrator
(
0.01
);
HarmonicBondForce
*
forceField
=
new
HarmonicBondForce
();
forceField
->
addBond
(
0
,
1
,
1.5
,
1
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
2
);
positions
[
0
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
context
.
setPositions
(
positions
);
// This is simply a harmonic oscillator, so compare it to the analytical solution.
const
double
freq
=
1.0
;;
State
state
=
context
.
getState
(
State
::
Energy
);
const
double
initialEnergy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
state
=
context
.
getState
(
State
::
Positions
|
State
::
Velocities
|
State
::
Energy
);
double
time
=
state
.
getTime
();
double
expectedDist
=
1.5
+
0.5
*
std
::
cos
(
freq
*
time
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
*
expectedDist
,
0
,
0
),
state
.
getPositions
()[
0
],
0.02
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
*
expectedDist
,
0
,
0
),
state
.
getPositions
()[
1
],
0.02
);
double
expectedSpeed
=
-
0.5
*
freq
*
std
::
sin
(
freq
*
time
);
ASSERT_EQUAL_VEC
(
Vec3
(
-
0.5
*
expectedSpeed
,
0
,
0
),
state
.
getVelocities
()[
0
],
0.02
);
ASSERT_EQUAL_VEC
(
Vec3
(
0.5
*
expectedSpeed
,
0
,
0
),
state
.
getVelocities
()[
1
],
0.02
);
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.01
);
integrator
.
step
(
1
);
}
ASSERT_EQUAL_TOL
(
10.0
,
context
.
getState
(
0
).
getTime
(),
1e-5
);
}
void
testConstraints
()
{
const
int
numParticles
=
8
;
const
int
numConstraints
=
5
;
const
double
temp
=
100.0
;
CudaPlatform
platform
;
System
system
;
VerletIntegrator
integrator
(
0.001
);
integrator
.
setConstraintTolerance
(
1e-5
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
10.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
0.2
:
-
0.2
),
0.5
,
5.0
);
}
system
.
addConstraint
(
0
,
1
,
1.0
);
system
.
addConstraint
(
1
,
2
,
1.0
);
system
.
addConstraint
(
2
,
3
,
1.0
);
system
.
addConstraint
(
4
,
5
,
1.0
);
system
.
addConstraint
(
6
,
7
,
1.0
);
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
positions
[
i
]
=
Vec3
(
i
/
2
,
(
i
+
1
)
/
2
,
0
);
velocities
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
}
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and see whether the constraints remain satisfied.
double
initialEnergy
=
0.0
;
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Energy
|
State
::
Velocities
|
State
::
Forces
);
for
(
int
j
=
0
;
j
<
numConstraints
;
++
j
)
{
int
particle1
,
particle2
;
double
distance
;
system
.
getConstraintParameters
(
j
,
particle1
,
particle2
,
distance
);
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
1e-4
);
}
double
energy
=
computeEnergy
(
state
,
system
,
integrator
.
getStepSize
());
if
(
i
==
1
)
initialEnergy
=
energy
;
else
if
(
i
>
1
)
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.01
);
integrator
.
step
(
1
);
}
}
void
testConstrainedClusters
()
{
const
int
numParticles
=
7
;
const
double
temp
=
500.0
;
CudaPlatform
platform
;
System
system
;
VerletIntegrator
integrator
(
0.001
);
integrator
.
setConstraintTolerance
(
1e-5
);
NonbondedForce
*
forceField
=
new
NonbondedForce
();
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
{
system
.
addParticle
(
i
>
1
?
1.0
:
10.0
);
forceField
->
addParticle
((
i
%
2
==
0
?
0.2
:
-
0.2
),
0.5
,
5.0
);
}
system
.
addConstraint
(
0
,
1
,
1.0
);
system
.
addConstraint
(
0
,
2
,
1.0
);
system
.
addConstraint
(
0
,
3
,
1.0
);
system
.
addConstraint
(
0
,
4
,
1.0
);
system
.
addConstraint
(
1
,
5
,
1.0
);
system
.
addConstraint
(
1
,
6
,
1.0
);
system
.
addConstraint
(
2
,
3
,
sqrt
(
2.0
));
system
.
addConstraint
(
2
,
4
,
sqrt
(
2.0
));
system
.
addConstraint
(
3
,
4
,
sqrt
(
2.0
));
system
.
addConstraint
(
5
,
6
,
sqrt
(
2.0
));
system
.
addForce
(
forceField
);
Context
context
(
system
,
integrator
,
platform
);
vector
<
Vec3
>
positions
(
numParticles
);
positions
[
0
]
=
Vec3
(
0
,
0
,
0
);
positions
[
1
]
=
Vec3
(
1
,
0
,
0
);
positions
[
2
]
=
Vec3
(
-
1
,
0
,
0
);
positions
[
3
]
=
Vec3
(
0
,
1
,
0
);
positions
[
4
]
=
Vec3
(
0
,
0
,
1
);
positions
[
5
]
=
Vec3
(
2
,
0
,
0
);
positions
[
6
]
=
Vec3
(
1
,
1
,
0
);
vector
<
Vec3
>
velocities
(
numParticles
);
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
++
i
)
velocities
[
i
]
=
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
);
context
.
setPositions
(
positions
);
context
.
setVelocities
(
velocities
);
// Simulate it and see whether the constraints remain satisfied.
double
initialEnergy
=
0.0
;
for
(
int
i
=
0
;
i
<
1000
;
++
i
)
{
State
state
=
context
.
getState
(
State
::
Positions
|
State
::
Energy
|
State
::
Velocities
|
State
::
Forces
);
for
(
int
j
=
0
;
j
<
system
.
getNumConstraints
();
++
j
)
{
int
particle1
,
particle2
;
double
distance
;
system
.
getConstraintParameters
(
j
,
particle1
,
particle2
,
distance
);
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
2e-5
);
}
double
energy
=
computeEnergy
(
state
,
system
,
integrator
.
getStepSize
());
if
(
i
==
1
)
initialEnergy
=
energy
;
else
if
(
i
>
1
)
ASSERT_EQUAL_TOL
(
initialEnergy
,
energy
,
0.01
);
integrator
.
step
(
1
);
}
}
int
main
()
{
try
{
testSingleBond
();
// testConstraints();
// testConstrainedClusters();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment