Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
1f0ec7b5
Commit
1f0ec7b5
authored
Jun 07, 2012
by
Peter Eastman
Browse files
Continuing to implement new CUDA platform
parent
99cebd08
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1625 additions
and
102 deletions
+1625
-102
platforms/cuda2/src/CudaBondedUtilities.cpp
platforms/cuda2/src/CudaBondedUtilities.cpp
+173
-0
platforms/cuda2/src/CudaBondedUtilities.h
platforms/cuda2/src/CudaBondedUtilities.h
+138
-0
platforms/cuda2/src/CudaContext.cpp
platforms/cuda2/src/CudaContext.cpp
+38
-27
platforms/cuda2/src/CudaContext.h
platforms/cuda2/src/CudaContext.h
+16
-22
platforms/cuda2/src/CudaExpressionUtilities.cpp
platforms/cuda2/src/CudaExpressionUtilities.cpp
+21
-34
platforms/cuda2/src/CudaExpressionUtilities.h
platforms/cuda2/src/CudaExpressionUtilities.h
+14
-19
platforms/cuda2/src/CudaIntegrationUtilities.cpp
platforms/cuda2/src/CudaIntegrationUtilities.cpp
+856
-0
platforms/cuda2/src/CudaIntegrationUtilities.h
platforms/cuda2/src/CudaIntegrationUtilities.h
+153
-0
platforms/cuda2/src/kernels/random.cu
platforms/cuda2/src/kernels/random.cu
+118
-0
platforms/cuda2/tests/TestCudaRandom.cpp
platforms/cuda2/tests/TestCudaRandom.cpp
+98
-0
No files found.
platforms/cuda2/src/CudaBondedUtilities.cpp
0 → 100644
View file @
1f0ec7b5
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaBondedUtilities.h"
#include "CudaExpressionUtilities.h"
#include "openmm/OpenMMException.h"
#include "CudaNonbondedUtilities.h"
#include <iostream>
using
namespace
OpenMM
;
using
namespace
std
;
CudaBondedUtilities
::
CudaBondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
numForceBuffers
(
0
),
maxBonds
(
0
),
hasInitializedKernels
(
false
)
{
}
CudaBondedUtilities
::~
CudaBondedUtilities
()
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
.
size
();
i
++
)
for
(
int
j
=
0
;
j
<
(
int
)
atomIndices
[
i
].
size
();
j
++
)
delete
atomIndices
[
i
][
j
];
}
void
CudaBondedUtilities
::
addInteraction
(
const
vector
<
vector
<
int
>
>&
atoms
,
const
string
&
source
,
int
group
)
{
if
(
atoms
.
size
()
>
0
)
{
forceAtoms
.
push_back
(
atoms
);
forceSource
.
push_back
(
source
);
forceGroup
.
push_back
(
group
);
}
}
std
::
string
CudaBondedUtilities
::
addArgument
(
CUdeviceptr
data
,
const
string
&
type
)
{
arguments
.
push_back
(
data
);
argTypes
.
push_back
(
type
);
return
"customArg"
+
context
.
intToString
(
arguments
.
size
());
}
void
CudaBondedUtilities
::
addPrefixCode
(
const
string
&
source
)
{
prefixCode
.
push_back
(
source
);
}
void
CudaBondedUtilities
::
initialize
(
const
System
&
system
)
{
int
numForces
=
forceAtoms
.
size
();
if
(
numForces
==
0
)
return
;
// Build the lists of atom indices.
atomIndices
.
resize
(
numForces
);
for
(
int
i
=
0
;
i
<
numForces
;
i
++
)
{
int
numBonds
=
forceAtoms
[
i
].
size
();
int
numAtoms
=
forceAtoms
[
i
][
0
].
size
();
int
startAtom
=
0
;
while
(
startAtom
<
numAtoms
)
{
int
width
=
max
(
numAtoms
-
startAtom
,
4
);
if
(
width
==
3
)
width
=
4
;
vector
<
unsigned
int
>
indexVec
(
width
*
numBonds
);
for
(
int
bond
=
0
;
bond
<
numBonds
;
bond
++
)
{
for
(
int
atom
=
0
;
atom
<
width
;
atom
++
)
indexVec
[
bond
*
width
+
atom
]
=
forceAtoms
[
i
][
bond
][
startAtom
+
atom
];
}
CudaArray
*
indices
=
CudaArray
::
create
<
unsigned
int
>
(
indexVec
.
size
(),
"bondedIndices"
);
indices
->
upload
(
indexVec
);
atomIndices
[
i
].
push_back
(
indices
);
startAtom
+=
width
;
}
}
// Create the kernel.
stringstream
s
;
for
(
int
i
=
0
;
i
<
(
int
)
prefixCode
.
size
();
i
++
)
s
<<
prefixCode
[
i
];
s
<<
"extern
\"
C
\"
__global__ void computeBondedForces(long* __restrict__ forceBuffer, real* __restrict__ energyBuffer, const real4* __restrict__ posq, int groups"
;
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
force
].
size
();
i
++
)
{
int
indexWidth
=
atomIndices
[
force
][
i
]
->
getElementSize
()
/
4
;
string
indexType
=
"unsigned int"
+
(
indexWidth
==
1
?
""
:
context
.
intToString
(
indexWidth
));
s
<<
", const "
<<
indexType
<<
"* __restrict__ atomIndices"
<<
force
<<
"_"
<<
i
;
}
}
for
(
int
i
=
0
;
i
<
(
int
)
arguments
.
size
();
i
++
)
s
<<
", "
<<
argTypes
[
i
]
<<
"* customArg"
<<
(
i
+
1
);
s
<<
") {
\n
"
;
s
<<
"real energy = 0;
\n
"
;
for
(
int
force
=
0
;
force
<
numForces
;
force
++
)
s
<<
createForceSource
(
force
,
forceAtoms
[
force
].
size
(),
forceAtoms
[
force
][
0
].
size
(),
forceGroup
[
force
],
forceSource
[
force
]);
s
<<
"energyBuffer[blockIdx.x*blockDim.x+threadIdx.x] += energy;
\n
"
;
s
<<
"}
\n
"
;
map
<
string
,
string
>
defines
;
defines
[
"PADDED_NUM_ATOMS"
]
=
context
.
intToString
(
context
.
getPaddedNumAtoms
());
CUmodule
module
=
context
.
createModule
(
s
.
str
(),
defines
);
kernel
=
context
.
getKernel
(
module
,
"computeBondedForces"
);
forceAtoms
.
clear
();
forceSource
.
clear
();
}
string
CudaBondedUtilities
::
createForceSource
(
int
forceIndex
,
int
numBonds
,
int
numAtoms
,
int
group
,
const
string
&
computeForce
)
{
maxBonds
=
max
(
maxBonds
,
numBonds
);
string
suffix1
[]
=
{
""
};
string
suffix4
[]
=
{
".x"
,
".y"
,
".z"
,
".w"
};
string
*
suffix
;
stringstream
s
;
s
<<
"if ((groups&"
<<
(
1
<<
group
)
<<
") != 0)
\n
"
;
s
<<
"for (unsigned int index = blockIdx.x*blockDim.x+threadIdx.x; index < "
<<
numBonds
<<
"; index += blockDim.x*gridDim.x) {
\n
"
;
int
startAtom
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
atomIndices
[
forceIndex
].
size
();
i
++
)
{
int
indexWidth
=
atomIndices
[
forceIndex
][
i
]
->
getElementSize
()
/
4
;
suffix
=
(
indexWidth
==
1
?
suffix1
:
suffix4
);
string
indexType
=
"unsigned int"
+
(
indexWidth
==
1
?
""
:
context
.
intToString
(
indexWidth
));
s
<<
" "
<<
indexType
<<
" atoms"
<<
i
<<
" = atomIndices"
<<
forceIndex
<<
"_"
<<
i
<<
"[index];
\n
"
;
s
<<
" "
<<
indexType
<<
" buffers = bufferIndices"
<<
forceIndex
<<
"[index];
\n
"
;
for
(
int
j
=
0
;
j
<
indexWidth
;
j
++
)
{
s
<<
" unsigned int atom"
<<
(
startAtom
+
j
+
1
)
<<
" = atoms"
<<
i
<<
suffix
[
j
]
<<
";
\n
"
;
s
<<
" real4 pos"
<<
(
j
+
1
)
<<
" = posq[atom"
<<
(
j
+
1
)
<<
"];
\n
"
;
}
startAtom
+=
indexWidth
;
}
s
<<
computeForce
<<
"
\n
"
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"], (long) (force.x*0xFFFFFFFF));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS], (long) (force.x*0xFFFFFFFF));
\n
"
;
s
<<
" atomicAdd(&forceBuffer[atom"
<<
(
i
+
1
)
<<
"+PADDED_NUM_ATOMS*2], (long) (force.x*0xFFFFFFFF));
\n
"
;
}
s
<<
"}
\n
"
;
return
s
.
str
();
}
void
CudaBondedUtilities
::
computeInteractions
(
int
groups
)
{
// if (!hasInitializedKernels) {
// hasInitializedKernels = true;
// for (int i = 0; i < (int) forceSets.size(); i++) {
// int index = 0;
// cl::Kernel& kernel = kernels[i];
// kernel.setArg<cl::Buffer>(index++, context.getForceBuffers().getDeviceBuffer());
// kernel.setArg<cl::Buffer>(index++, context.getEnergyBuffer().getDeviceBuffer());
// kernel.setArg<cl::Buffer>(index++, context.getPosq().getDeviceBuffer());
// index++;
// for (int j = 0; j < (int) forceSets[i].size(); j++) {
// kernel.setArg<cl::Buffer>(index++, atomIndices[forceSets[i][j]]->getDeviceBuffer());
// kernel.setArg<cl::Buffer>(index++, bufferIndices[forceSets[i][j]]->getDeviceBuffer());
// }
// for (int j = 0; j < (int) arguments.size(); j++)
// kernel.setArg<cl::Memory>(index++, *arguments[j]);
// }
// }
// for (int i = 0; i < (int) kernels.size(); i++) {
// kernels[i].setArg<cl_int>(3, groups);
// context.executeKernel(kernels[i], maxBonds);
// }
}
platforms/cuda2/src/CudaBondedUtilities.h
0 → 100644
View file @
1f0ec7b5
#ifndef OPENMM_CUDABONDEDUTILITIES_H_
#define OPENMM_CUDABONDEDUTILITIES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2011-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "CudaArray.h"
#include "CudaContext.h"
#include "openmm/System.h"
#include <string>
#include <vector>
namespace
OpenMM
{
/**
* This class provides a generic mechanism for evaluating bonded interactions. You write only
* the source code needed to compute one interaction, and this class takes care of creating
* and executing a complete kernel that loops over bonds, evaluates each one, and accumulates
* the resulting forces and energies. This offers two advantages. First, it simplifies the
* task of writing a new Force. Second, it allows multiple forces to be evaluated by a single
* kernel, which reduces overhead and improves performance.
*
* A "bonded interaction" means an interaction that affects a small, fixed set of particles.
* The interaction energy may depend on the positions of only those particles, and the list of
* particles forming a "bond" may not change with time. Examples of bonded interactions
* include HarmonicBondForce, HarmonicAngleForce, and PeriodicTorsionForce.
*
* To create a bonded interaction, call addInteraction(). You pass to it a block of source
* code for evaluating the interaction. The inputs and outputs for that source code are as
* follows:
*
* <ol>
* <li>The index of the bond being evaluated will have been stored in the unsigned int variable "index".</li>
* <li>The indices of the atoms forming that bond will have been stored in the unsigned int variables "atom1",
* "atom2", ....</li>
* <li>The positions of those atoms will have been stored in the real4 variables "pos1", "pos2", ....</li>
* <li>A real variable called "energy" will exist. Your code should add the potential energy of the
* bond to that variable.</li>
* <li>Your code should define real4 variables called "force1", "force2", ... that contain the force to
* apply to each atom.</li>
* </ol>
*
* As a simple example, the following source code would be used to implement a pairwise interaction of
* the form E=r^2:
*
* <tt><pre>
* real4 delta = pos2-pos1;
* energy += delta.x*delta.x + delta.y*delta.y + delta.z*delta.z;
* real4 force1 = 2.0f*delta;
* real4 force2 = -2.0f*delta;
* </pre></tt>
*
* Interactions will often depend on parameters or other data. Call addArgument() to provide the data
* to this class. It will be passed to the interaction kernel as an argument, and you can refer to it
* from your interaction code.
*/
class
OPENMM_EXPORT
CudaBondedUtilities
{
public:
CudaBondedUtilities
(
CudaContext
&
context
);
~
CudaBondedUtilities
();
/**
* Add a bonded interaction.
*
* @param atoms this should have one entry for each bond, and that entry should contain the list
* of atoms involved in the bond. Every entry must have the same number of atoms.
* @param source the code to evaluate the interaction
* @param group the force group in which the interaction should be calculated
*/
void
addInteraction
(
const
std
::
vector
<
std
::
vector
<
int
>
>&
atoms
,
const
std
::
string
&
source
,
int
group
);
/**
* Add an argument that should be passed to the interaction kernel.
*
* @param data the device memory containing the data to pass
* @param type the data type contained in the memory (e.g. "float4")
* @return the name that will be used for the argument. Any code you pass to addInteraction() should
* refer to it by this name.
*/
std
::
string
addArgument
(
CUdeviceptr
data
,
const
std
::
string
&
type
);
/**
* Add some Cuda code that should be included in the program, before the start of the kernel.
* This can be used, for example, to define functions that will be called by the kernel.
*
* @param source the code to include
*/
void
addPrefixCode
(
const
std
::
string
&
source
);
/**
* Initialize this object in preparation for a simulation.
*/
void
initialize
(
const
System
&
system
);
/**
* Compute the bonded interactions.
*
* @param groups a set of bit flags for which force groups to include
*/
void
computeInteractions
(
int
groups
);
private:
std
::
string
createForceSource
(
int
forceIndex
,
int
numBonds
,
int
numAtoms
,
int
group
,
const
std
::
string
&
computeForce
);
CudaContext
&
context
;
CUfunction
kernel
;
std
::
vector
<
std
::
vector
<
std
::
vector
<
int
>
>
>
forceAtoms
;
std
::
vector
<
std
::
vector
<
int
>
>
indexWidth
;
std
::
vector
<
std
::
string
>
forceSource
;
std
::
vector
<
int
>
forceGroup
;
std
::
vector
<
CUdeviceptr
>
arguments
;
std
::
vector
<
std
::
string
>
argTypes
;
std
::
vector
<
std
::
vector
<
CudaArray
*>
>
atomIndices
;
std
::
vector
<
std
::
string
>
prefixCode
;
int
numForceBuffers
,
maxBonds
;
bool
hasInitializedKernels
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDABONDEDUTILITIES_H_*/
platforms/cuda2/src/CudaContext.cpp
View file @
1f0ec7b5
...
@@ -32,7 +32,7 @@
...
@@ -32,7 +32,7 @@
#include "CudaArray.h"
#include "CudaArray.h"
//#include "CudaBondedUtilities.h"
//#include "CudaBondedUtilities.h"
#include "CudaForceInfo.h"
#include "CudaForceInfo.h"
//
#include "CudaIntegrationUtilities.h"
#include "CudaIntegrationUtilities.h"
#include "CudaKernelSources.h"
#include "CudaKernelSources.h"
//#include "CudaNonbondedUtilities.h"
//#include "CudaNonbondedUtilities.h"
#include "hilbert.h"
#include "hilbert.h"
...
@@ -40,6 +40,7 @@
...
@@ -40,6 +40,7 @@
#include "openmm/Platform.h"
#include "openmm/Platform.h"
#include "openmm/System.h"
#include "openmm/System.h"
#include "openmm/VirtualSite.h"
#include "openmm/VirtualSite.h"
#include "CudaExpressionUtilities.h"
#include <algorithm>
#include <algorithm>
#include <cstdlib>
#include <cstdlib>
#include <fstream>
#include <fstream>
...
@@ -66,8 +67,8 @@ bool CudaContext::hasInitializedCuda = false;
...
@@ -66,8 +67,8 @@ bool CudaContext::hasInitializedCuda = false;
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
CudaContext
::
CudaContext
(
const
System
&
system
,
int
deviceIndex
,
bool
useBlockingSync
,
const
string
&
precision
,
const
string
&
compiler
,
const
string
&
tempDir
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
compiler
(
compiler
),
const
string
&
tempDir
,
CudaPlatform
::
PlatformData
&
platformData
)
:
system
(
system
),
compiler
(
compiler
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
time
(
0.0
),
platformData
(
platformData
),
stepCount
(
0
),
computeForceCount
(
0
),
contextIsValid
(
false
),
atomsWereReordered
(
false
),
pinnedBuffer
(
NULL
),
posq
(
NULL
),
velm
(
NULL
),
/*forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL), integration(NULL),
velm
(
NULL
),
/*forceBuffers(NULL), longForceBuffer(NULL), energyBuffer(NULL), atomIndex(NULL),
*/
integration
(
NULL
),
expression
(
NULL
),
bonded(NULL), nonbonded(NULL),*/
thread
(
NULL
)
{
/*
bonded(NULL), nonbonded(NULL),*/
thread
(
NULL
)
{
if
(
!
hasInitializedCuda
)
{
if
(
!
hasInitializedCuda
)
{
CHECK_RESULT2
(
cuInit
(
0
),
"Error initializing CUDA"
);
CHECK_RESULT2
(
cuInit
(
0
),
"Error initializing CUDA"
);
hasInitializedCuda
=
true
;
hasInitializedCuda
=
true
;
...
@@ -143,11 +144,17 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -143,11 +144,17 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
paddedNumAtoms
*
sizeof
(
double4
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
paddedNumAtoms
*
sizeof
(
double4
),
0
));
posq
=
CudaArray
::
create
<
double4
>
(
paddedNumAtoms
,
"posq"
);
posq
=
CudaArray
::
create
<
double4
>
(
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
create
<
double4
>
(
paddedNumAtoms
,
"velm"
);
velm
=
CudaArray
::
create
<
double4
>
(
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"make_real2"
]
=
"make_double2"
;
compilationDefines
[
"make_real3"
]
=
"make_double3"
;
compilationDefines
[
"make_real4"
]
=
"make_double4"
;
}
}
else
{
else
{
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
paddedNumAtoms
*
sizeof
(
float4
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
(
&
pinnedBuffer
,
paddedNumAtoms
*
sizeof
(
float4
),
0
));
posq
=
CudaArray
::
create
<
float4
>
(
paddedNumAtoms
,
"posq"
);
posq
=
CudaArray
::
create
<
float4
>
(
paddedNumAtoms
,
"posq"
);
velm
=
CudaArray
::
create
<
float4
>
(
paddedNumAtoms
,
"velm"
);
velm
=
CudaArray
::
create
<
float4
>
(
paddedNumAtoms
,
"velm"
);
compilationDefines
[
"make_real2"
]
=
"make_float2"
;
compilationDefines
[
"make_real3"
]
=
"make_float3"
;
compilationDefines
[
"make_real4"
]
=
"make_float4"
;
}
}
posCellOffsets
.
resize
(
paddedNumAtoms
,
make_int4
(
0
,
0
,
0
,
0
));
posCellOffsets
.
resize
(
paddedNumAtoms
,
make_int4
(
0
,
0
,
0
,
0
));
...
@@ -160,8 +167,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -160,8 +167,6 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
clearFourBuffersKernel
=
getKernel
(
utilities
,
"clearFourBuffers"
);
clearFourBuffersKernel
=
getKernel
(
utilities
,
"clearFourBuffers"
);
clearFiveBuffersKernel
=
getKernel
(
utilities
,
"clearFiveBuffers"
);
clearFiveBuffersKernel
=
getKernel
(
utilities
,
"clearFiveBuffers"
);
clearSixBuffersKernel
=
getKernel
(
utilities
,
"clearSixBuffers"
);
clearSixBuffersKernel
=
getKernel
(
utilities
,
"clearSixBuffers"
);
reduceFloat4Kernel
=
getKernel
(
utilities
,
"reduceFloat4Buffer"
);
reduceForcesKernel
=
getKernel
(
utilities
,
"reduceForces"
);
// Set defines based on the requested precision.
// Set defines based on the requested precision.
...
@@ -170,14 +175,21 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -170,14 +175,21 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
compilationDefines
[
"RECIP"
]
=
useDoublePrecision
?
"1.0/"
:
"1.0f/"
;
compilationDefines
[
"RECIP"
]
=
useDoublePrecision
?
"1.0/"
:
"1.0f/"
;
compilationDefines
[
"EXP"
]
=
useDoublePrecision
?
"exp"
:
"expf"
;
compilationDefines
[
"EXP"
]
=
useDoublePrecision
?
"exp"
:
"expf"
;
compilationDefines
[
"LOG"
]
=
useDoublePrecision
?
"log"
:
"logf"
;
compilationDefines
[
"LOG"
]
=
useDoublePrecision
?
"log"
:
"logf"
;
compilationDefines
[
"COS"
]
=
useDoublePrecision
?
"cos"
:
"cosf"
;
compilationDefines
[
"SIN"
]
=
useDoublePrecision
?
"sin"
:
"sinf"
;
compilationDefines
[
"TAN"
]
=
useDoublePrecision
?
"tan"
:
"tanf"
;
compilationDefines
[
"ACOS"
]
=
useDoublePrecision
?
"acos"
:
"acosf"
;
compilationDefines
[
"ASIN"
]
=
useDoublePrecision
?
"asin"
:
"asinf"
;
compilationDefines
[
"ATAN"
]
=
useDoublePrecision
?
"atan"
:
"atanf"
;
// Create the work thread used for parallelization when running on multiple devices.
// Create the work thread used for parallelization when running on multiple devices.
thread
=
new
WorkThread
();
thread
=
new
WorkThread
();
//
// // Create the integration utilities object.
// Create utilities objects.
//
// integration = new CudaIntegrationUtilities(*this, system);
integration
=
new
CudaIntegrationUtilities
(
*
this
,
system
);
expression
=
new
CudaExpressionUtilities
(
*
this
);
}
}
CudaContext
::~
CudaContext
()
{
CudaContext
::~
CudaContext
()
{
...
@@ -201,8 +213,10 @@ CudaContext::~CudaContext() {
...
@@ -201,8 +213,10 @@ CudaContext::~CudaContext() {
// delete energyBuffer;
// delete energyBuffer;
// if (atomIndex != NULL)
// if (atomIndex != NULL)
// delete atomIndex;
// delete atomIndex;
// if (integration != NULL)
if
(
integration
!=
NULL
)
// delete integration;
delete
integration
;
if
(
expression
!=
NULL
)
delete
expression
;
// if (bonded != NULL)
// if (bonded != NULL)
// delete bonded;
// delete bonded;
// if (nonbonded != NULL)
// if (nonbonded != NULL)
...
@@ -272,6 +286,18 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
...
@@ -272,6 +286,18 @@ CUmodule CudaContext::createModule(const string source, const map<string, string
}
}
if
(
!
compilationDefines
.
empty
())
if
(
!
compilationDefines
.
empty
())
src
<<
endl
;
src
<<
endl
;
if
(
useDoublePrecision
)
{
src
<<
"typedef double real;
\n
"
;
src
<<
"typedef double2 real2;
\n
"
;
src
<<
"typedef double3 real3;
\n
"
;
src
<<
"typedef double4 real4;
\n
"
;
}
else
{
src
<<
"typedef float real;
\n
"
;
src
<<
"typedef float2 real2;
\n
"
;
src
<<
"typedef float3 real3;
\n
"
;
src
<<
"typedef float4 real4;
\n
"
;
}
for
(
map
<
string
,
string
>::
const_iterator
iter
=
defines
.
begin
();
iter
!=
defines
.
end
();
++
iter
)
{
for
(
map
<
string
,
string
>::
const_iterator
iter
=
defines
.
begin
();
iter
!=
defines
.
end
();
++
iter
)
{
src
<<
"#define "
<<
iter
->
first
;
src
<<
"#define "
<<
iter
->
first
;
if
(
!
iter
->
second
.
empty
())
if
(
!
iter
->
second
.
empty
())
...
@@ -498,22 +524,7 @@ void CudaContext::addAutoclearBuffer(CUdeviceptr memory, int size) {
...
@@ -498,22 +524,7 @@ void CudaContext::addAutoclearBuffer(CUdeviceptr memory, int size) {
// clearBuffer(*autoclearBuffers[base], autoclearBufferSizes[base]);
// clearBuffer(*autoclearBuffers[base], autoclearBufferSizes[base]);
// }
// }
//}
//}
//
//void CudaContext::reduceForces() {
// if (supports64BitGlobalAtomics)
// executeKernel(reduceForcesKernel, paddedNumAtoms, 128);
// else
// reduceBuffer(*forceBuffers, numForceBuffers);
//}
//
//void CudaContext::reduceBuffer(CudaArray<mm_float4>& array, int numBuffers) {
// int bufferSize = array.getSize()/numBuffers;
// reduceFloat4Kernel.setArg<cl::Buffer>(0, array.getDeviceBuffer());
// reduceFloat4Kernel.setArg<cl_int>(1, bufferSize);
// reduceFloat4Kernel.setArg<cl_int>(2, numBuffers);
// executeKernel(reduceFloat4Kernel, bufferSize, 128);
//}
//
void
CudaContext
::
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
void
CudaContext
::
tagAtomsInMolecule
(
int
atom
,
int
molecule
,
vector
<
int
>&
atomMolecule
,
vector
<
vector
<
int
>
>&
atomBonds
)
{
// Recursively tag atoms as belonging to a particular molecule.
// Recursively tag atoms as belonging to a particular molecule.
...
...
platforms/cuda2/src/CudaContext.h
View file @
1f0ec7b5
...
@@ -46,6 +46,7 @@ namespace OpenMM {
...
@@ -46,6 +46,7 @@ namespace OpenMM {
class
CudaArray
;
class
CudaArray
;
class
CudaForceInfo
;
class
CudaForceInfo
;
class
CudaExpressionUtilities
;
class
CudaIntegrationUtilities
;
class
CudaIntegrationUtilities
;
class
CudaBondedUtilities
;
class
CudaBondedUtilities
;
class
CudaNonbondedUtilities
;
class
CudaNonbondedUtilities
;
...
@@ -216,25 +217,13 @@ public:
...
@@ -216,25 +217,13 @@ public:
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
* Register a buffer that should be automatically cleared (all elements set to 0) at the start of each force or energy computation.
*
*
* @param memory the memory to clear
* @param memory the memory to clear
* @param size the number of
float/doubl
e elements in the buffer
* @param size the number of
4-byt
e elements in the buffer
*/
*/
void
addAutoclearBuffer
(
CUdeviceptr
memory
,
int
size
);
void
addAutoclearBuffer
(
CUdeviceptr
memory
,
int
size
);
// /**
// /**
// * Clear all buffers that have been registered with addAutoclearBuffer().
// * Clear all buffers that have been registered with addAutoclearBuffer().
// */
// */
// void clearAutoclearBuffers();
// void clearAutoclearBuffers();
// /**
// * Given a collection of buffers packed into an array, sum them and store
// * the sum in the first buffer.
// *
// * @param array the array containing the buffers to reduce
// * @param numBuffers the number of buffers packed into the array
// */
// void reduceBuffer(CudaArray<mm_float4>& array, int numBuffers);
// /**
// * Sum the buffesr containing forces.
// */
// void reduceForces();
/**
/**
* Get the current simulation time.
* Get the current simulation time.
*/
*/
...
@@ -341,12 +330,18 @@ public:
...
@@ -341,12 +330,18 @@ public:
// float4 getInvPeriodicBoxSize() const {
// float4 getInvPeriodicBoxSize() const {
// return invPeriodicBoxSize;
// return invPeriodicBoxSize;
// }
// }
// /**
/**
// * Get the CudaIntegrationUtilities for this context.
* Get the CudaIntegrationUtilities for this context.
// */
*/
// CudaIntegrationUtilities& getIntegrationUtilities() {
CudaIntegrationUtilities
&
getIntegrationUtilities
()
{
// return *integration;
return
*
integration
;
// }
}
/**
* Get the CudaExpressionUtilities for this context.
*/
CudaExpressionUtilities
&
getExpressionUtilities
()
{
return
*
expression
;
}
// /**
// /**
// * Get the CudaBondedUtilities for this context.
// * Get the CudaBondedUtilities for this context.
// */
// */
...
@@ -445,8 +440,6 @@ private:
...
@@ -445,8 +440,6 @@ private:
CUfunction
clearFourBuffersKernel
;
CUfunction
clearFourBuffersKernel
;
CUfunction
clearFiveBuffersKernel
;
CUfunction
clearFiveBuffersKernel
;
CUfunction
clearSixBuffersKernel
;
CUfunction
clearSixBuffersKernel
;
CUfunction
reduceFloat4Kernel
;
CUfunction
reduceForcesKernel
;
std
::
vector
<
CudaForceInfo
*>
forces
;
std
::
vector
<
CudaForceInfo
*>
forces
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
Molecule
>
molecules
;
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
std
::
vector
<
MoleculeGroup
>
moleculeGroups
;
...
@@ -461,7 +454,8 @@ private:
...
@@ -461,7 +454,8 @@ private:
std
::
vector
<
CUdeviceptr
>
autoclearBuffers
;
std
::
vector
<
CUdeviceptr
>
autoclearBuffers
;
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
int
>
autoclearBufferSizes
;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
std
::
vector
<
ReorderListener
*>
reorderListeners
;
// CudaIntegrationUtilities* integration;
CudaIntegrationUtilities
*
integration
;
CudaExpressionUtilities
*
expression
;
// CudaBondedUtilities* bonded;
// CudaBondedUtilities* bonded;
// CudaNonbondedUtilities* nonbonded;
// CudaNonbondedUtilities* nonbonded;
WorkThread
*
thread
;
WorkThread
*
thread
;
...
...
platforms/cuda2/src/CudaExpressionUtilities.cpp
View file @
1f0ec7b5
...
@@ -33,19 +33,6 @@ using namespace OpenMM;
...
@@ -33,19 +33,6 @@ using namespace OpenMM;
using
namespace
Lepton
;
using
namespace
Lepton
;
using
namespace
std
;
using
namespace
std
;
string
CudaExpressionUtilities
::
doubleToString
(
double
value
)
{
stringstream
s
;
s
.
precision
(
8
);
s
<<
scientific
<<
value
<<
"f"
;
return
s
.
str
();
}
string
CudaExpressionUtilities
::
intToString
(
int
value
)
{
stringstream
s
;
s
<<
value
;
return
s
.
str
();
}
string
CudaExpressionUtilities
::
createExpressions
(
const
map
<
string
,
ParsedExpression
>&
expressions
,
const
map
<
string
,
string
>&
variables
,
string
CudaExpressionUtilities
::
createExpressions
(
const
map
<
string
,
ParsedExpression
>&
expressions
,
const
map
<
string
,
string
>&
variables
,
const
vector
<
pair
<
string
,
string
>
>&
functions
,
const
string
&
prefix
,
const
string
&
functionParams
,
const
string
&
tempType
)
{
const
vector
<
pair
<
string
,
string
>
>&
functions
,
const
string
&
prefix
,
const
string
&
functionParams
,
const
string
&
tempType
)
{
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variableNodes
;
vector
<
pair
<
ExpressionTreeNode
,
string
>
>
variableNodes
;
...
@@ -75,13 +62,13 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -75,13 +62,13 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
return
;
return
;
for
(
int
i
=
0
;
i
<
(
int
)
node
.
getChildren
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
node
.
getChildren
().
size
();
i
++
)
processExpression
(
out
,
node
.
getChildren
()[
i
],
temps
,
functions
,
prefix
,
functionParams
,
allExpressions
,
tempType
);
processExpression
(
out
,
node
.
getChildren
()[
i
],
temps
,
functions
,
prefix
,
functionParams
,
allExpressions
,
tempType
);
string
name
=
prefix
+
intToString
(
temps
.
size
());
string
name
=
prefix
+
context
.
intToString
(
temps
.
size
());
bool
hasRecordedNode
=
false
;
bool
hasRecordedNode
=
false
;
out
<<
tempType
<<
" "
<<
name
<<
" = "
;
out
<<
tempType
<<
" "
<<
name
<<
" = "
;
switch
(
node
.
getOperation
().
getId
())
{
switch
(
node
.
getOperation
().
getId
())
{
case
Operation
::
CONSTANT
:
case
Operation
::
CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
Constant
*>
(
&
node
.
getOperation
())
->
getValue
());
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
Constant
*>
(
&
node
.
getOperation
())
->
getValue
());
break
;
break
;
case
Operation
::
VARIABLE
:
case
Operation
::
VARIABLE
:
throw
OpenMMException
(
"Unknown variable in expression: "
+
node
.
getOperation
().
getName
());
throw
OpenMMException
(
"Unknown variable in expression: "
+
node
.
getOperation
().
getName
());
...
@@ -107,7 +94,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -107,7 +94,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
string
valueName
=
name
;
string
valueName
=
name
;
string
derivName
=
name
;
string
derivName
=
name
;
if
(
valueNode
!=
NULL
&&
derivNode
!=
NULL
)
{
if
(
valueNode
!=
NULL
&&
derivNode
!=
NULL
)
{
string
name2
=
prefix
+
intToString
(
temps
.
size
());
string
name2
=
prefix
+
context
.
intToString
(
temps
.
size
());
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
if
(
isDeriv
)
{
if
(
isDeriv
)
{
valueName
=
name2
;
valueName
=
name2
;
...
@@ -120,14 +107,14 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -120,14 +107,14 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
}
}
out
<<
"{
\n
"
;
out
<<
"{
\n
"
;
out
<<
"float4 params = "
<<
functionParams
<<
"["
<<
i
<<
"];
\n
"
;
out
<<
"float4 params = "
<<
functionParams
<<
"["
<<
i
<<
"];
\n
"
;
out
<<
"
float
x = "
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
";
\n
"
;
out
<<
"
real
x = "
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
";
\n
"
;
out
<<
"if (x >= params.x && x <= params.y) {
\n
"
;
out
<<
"if (x >= params.x && x <= params.y) {
\n
"
;
out
<<
"x = (x-params.x)*params.z;
\n
"
;
out
<<
"x = (x-params.x)*params.z;
\n
"
;
out
<<
"int index = (int) (floor(x));
\n
"
;
out
<<
"int index = (int) (floor(x));
\n
"
;
out
<<
"index = min(index, (int) params.w);
\n
"
;
out
<<
"index = min(index, (int) params.w);
\n
"
;
out
<<
"float4 coeff = "
<<
functions
[
i
].
second
<<
"[index];
\n
"
;
out
<<
"float4 coeff = "
<<
functions
[
i
].
second
<<
"[index];
\n
"
;
out
<<
"
float
b = x-index;
\n
"
;
out
<<
"
real
b = x-index;
\n
"
;
out
<<
"
float
a = 1.0f-b;
\n
"
;
out
<<
"
real
a = 1.0f-b;
\n
"
;
if
(
valueNode
!=
NULL
)
if
(
valueNode
!=
NULL
)
out
<<
valueName
<<
" = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(params.z*params.z);
\n
"
;
out
<<
valueName
<<
" = a*coeff.x+b*coeff.y+((a*a*a-a)*coeff.z+(b*b*b-b)*coeff.w)/(params.z*params.z);
\n
"
;
if
(
derivNode
!=
NULL
)
if
(
derivNode
!=
NULL
)
...
@@ -164,7 +151,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -164,7 +151,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
"-"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
"-"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
break
;
case
Operation
::
SQRT
:
case
Operation
::
SQRT
:
out
<<
"
sqrt
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
SQRT
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
EXP
:
case
Operation
::
EXP
:
out
<<
"EXP("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"EXP("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
...
@@ -173,31 +160,31 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -173,31 +160,31 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
"LOG("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"LOG("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
SIN
:
case
Operation
::
SIN
:
out
<<
"
sin
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
SIN
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
COS
:
case
Operation
::
COS
:
out
<<
"
cos
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
COS
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
SEC
:
case
Operation
::
SEC
:
out
<<
"
1.0f/cos
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
RECIP(COS
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")
)
"
;
break
;
break
;
case
Operation
::
CSC
:
case
Operation
::
CSC
:
out
<<
"
1.0f/sin
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
RECIP(SIN
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")
)
"
;
break
;
break
;
case
Operation
::
TAN
:
case
Operation
::
TAN
:
out
<<
"
tan
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
TAN
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
COT
:
case
Operation
::
COT
:
out
<<
"
1.0f/tan
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
RECIP(TAN
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")
)
"
;
break
;
break
;
case
Operation
::
ASIN
:
case
Operation
::
ASIN
:
out
<<
"
asin
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
ASIN
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
ACOS
:
case
Operation
::
ACOS
:
out
<<
"
acos
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
ACSO
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
ATAN
:
case
Operation
::
ATAN
:
out
<<
"
atan
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"
ATAN
("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
SINH
:
case
Operation
::
SINH
:
out
<<
"sinh("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"sinh("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
...
@@ -236,10 +223,10 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -236,10 +223,10 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
"RECIP("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
out
<<
"RECIP("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
")"
;
break
;
break
;
case
Operation
::
ADD_CONSTANT
:
case
Operation
::
ADD_CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
AddConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"+"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
AddConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"+"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
break
;
case
Operation
::
MULTIPLY_CONSTANT
:
case
Operation
::
MULTIPLY_CONSTANT
:
out
<<
doubleToString
(
dynamic_cast
<
const
Operation
::
MultiplyConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"*"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
out
<<
context
.
doubleToString
(
dynamic_cast
<
const
Operation
::
MultiplyConstant
*>
(
&
node
.
getOperation
())
->
getValue
())
<<
"*"
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
);
break
;
break
;
case
Operation
::
POWER_CONSTANT
:
case
Operation
::
POWER_CONSTANT
:
{
{
...
@@ -266,14 +253,14 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -266,14 +253,14 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
for
(
map
<
int
,
const
ExpressionTreeNode
*>::
const_iterator
iter
=
powers
.
begin
();
iter
!=
powers
.
end
();
++
iter
)
{
for
(
map
<
int
,
const
ExpressionTreeNode
*>::
const_iterator
iter
=
powers
.
begin
();
iter
!=
powers
.
end
();
++
iter
)
{
if
(
iter
->
first
!=
exponent
)
{
if
(
iter
->
first
!=
exponent
)
{
exponents
.
push_back
(
iter
->
first
>=
0
?
iter
->
first
:
-
iter
->
first
);
exponents
.
push_back
(
iter
->
first
>=
0
?
iter
->
first
:
-
iter
->
first
);
string
name2
=
prefix
+
intToString
(
temps
.
size
());
string
name2
=
prefix
+
context
.
intToString
(
temps
.
size
());
names
.
push_back
(
name2
);
names
.
push_back
(
name2
);
temps
.
push_back
(
make_pair
(
*
iter
->
second
,
name2
));
temps
.
push_back
(
make_pair
(
*
iter
->
second
,
name2
));
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
out
<<
tempType
<<
" "
<<
name2
<<
" = 0.0f;
\n
"
;
}
}
}
}
out
<<
"{
\n
"
;
out
<<
"{
\n
"
;
out
<<
"
float
multiplier = "
<<
(
exponent
<
0.0
?
"
1.0f/
"
:
""
)
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
";
\n
"
;
out
<<
"
real
multiplier = "
<<
(
exponent
<
0.0
?
"
RECIP(
"
:
"
(
"
)
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
"
)
;
\n
"
;
bool
done
=
false
;
bool
done
=
false
;
while
(
!
done
)
{
while
(
!
done
)
{
done
=
true
;
done
=
true
;
...
@@ -295,7 +282,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -295,7 +282,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
"}"
;
out
<<
"}"
;
}
}
else
else
out
<<
"pow("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
doubleToString
(
exponent
)
<<
")"
;
out
<<
"pow("
<<
getTempName
(
node
.
getChildren
()[
0
],
temps
)
<<
", "
<<
context
.
doubleToString
(
exponent
)
<<
")"
;
break
;
break
;
}
}
case
Operation
::
MIN
:
case
Operation
::
MIN
:
...
...
platforms/cuda2/src/CudaExpressionUtilities.h
View file @
1f0ec7b5
...
@@ -45,6 +45,8 @@ namespace OpenMM {
...
@@ -45,6 +45,8 @@ namespace OpenMM {
class
OPENMM_EXPORT
CudaExpressionUtilities
{
class
OPENMM_EXPORT
CudaExpressionUtilities
{
public:
public:
CudaExpressionUtilities
(
CudaContext
&
context
)
:
context
(
context
)
{
}
/**
/**
* Generate the source code for calculating a set of expressions.
* Generate the source code for calculating a set of expressions.
*
*
...
@@ -54,10 +56,10 @@ public:
...
@@ -54,10 +56,10 @@ public:
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "
float
")
* @param tempType the type of value to use for temporary variables (defaults to "
real
")
*/
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
map
<
std
::
string
,
std
::
string
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"
float
"
);
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"
real
"
);
/**
/**
* Generate the source code for calculating a set of expressions.
* Generate the source code for calculating a set of expressions.
*
*
...
@@ -67,10 +69,10 @@ public:
...
@@ -67,10 +69,10 @@ public:
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param functions defines the variable name for each tabulated function that may appear in the expressions
* @param prefix a prefix to put in front of temporary variables
* @param prefix a prefix to put in front of temporary variables
* @param functionParams the variable name containing the parameters for each tabulated function
* @param functionParams the variable name containing the parameters for each tabulated function
* @param tempType the type of value to use for temporary variables (defaults to "
float
")
* @param tempType the type of value to use for temporary variables (defaults to "
real
")
*/
*/
static
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variables
,
std
::
string
createExpressions
(
const
std
::
map
<
std
::
string
,
Lepton
::
ParsedExpression
>&
expressions
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
variables
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"
float
"
);
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
string
&
tempType
=
"
real
"
);
/**
/**
* Calculate the spline coefficients for a tabulated function that appears in expressions.
* Calculate the spline coefficients for a tabulated function that appears in expressions.
*
*
...
@@ -79,26 +81,19 @@ public:
...
@@ -79,26 +81,19 @@ public:
* @param max the value of the independent variable corresponding to the last element of values
* @param max the value of the independent variable corresponding to the last element of values
* @return the spline coefficients
* @return the spline coefficients
*/
*/
static
std
::
vector
<
float4
>
computeFunctionCoefficients
(
const
std
::
vector
<
double
>&
values
,
double
min
,
double
max
);
std
::
vector
<
float4
>
computeFunctionCoefficients
(
const
std
::
vector
<
double
>&
values
,
double
min
,
double
max
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static
std
::
string
doubleToString
(
double
value
);
/**
* Convert a number to a string in a format suitable for including in a kernel.
*/
static
std
::
string
intToString
(
int
value
);
class
FunctionPlaceholder
;
class
FunctionPlaceholder
;
private:
private:
static
void
processExpression
(
std
::
stringstream
&
out
,
const
Lepton
::
ExpressionTreeNode
&
node
,
void
processExpression
(
std
::
stringstream
&
out
,
const
Lepton
::
ExpressionTreeNode
&
node
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
,
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
vector
<
std
::
pair
<
std
::
string
,
std
::
string
>
>&
functions
,
const
std
::
string
&
prefix
,
const
std
::
string
&
functionParams
,
const
std
::
vector
<
Lepton
::
ParsedExpression
>&
allExpressions
,
const
std
::
string
&
tempType
);
const
std
::
vector
<
Lepton
::
ParsedExpression
>&
allExpressions
,
const
std
::
string
&
tempType
);
static
std
::
string
getTempName
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
);
std
::
string
getTempName
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
std
::
vector
<
std
::
pair
<
Lepton
::
ExpressionTreeNode
,
std
::
string
>
>&
temps
);
static
void
findRelatedTabulatedFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
void
findRelatedTabulatedFunctions
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
const
Lepton
::
ExpressionTreeNode
*&
valueNode
,
const
Lepton
::
ExpressionTreeNode
*&
derivNode
);
const
Lepton
::
ExpressionTreeNode
*&
valueNode
,
const
Lepton
::
ExpressionTreeNode
*&
derivNode
);
static
void
findRelatedPowers
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
void
findRelatedPowers
(
const
Lepton
::
ExpressionTreeNode
&
node
,
const
Lepton
::
ExpressionTreeNode
&
searchNode
,
std
::
map
<
int
,
const
Lepton
::
ExpressionTreeNode
*>&
powers
);
std
::
map
<
int
,
const
Lepton
::
ExpressionTreeNode
*>&
powers
);
CudaContext
&
context
;
};
};
/**
/**
...
...
platforms/cuda2/src/CudaIntegrationUtilities.cpp
0 → 100644
View file @
1f0ec7b5
This diff is collapsed.
Click to expand it.
platforms/cuda2/src/CudaIntegrationUtilities.h
0 → 100644
View file @
1f0ec7b5
#ifndef OPENMM_CUDAINTEGRATIONUTILITIES_H_
#define OPENMM_CUDAINTEGRATIONUTILITIES_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* -------------------------------------------------------------------------- */
#include "openmm/System.h"
#include "CudaContext.h"
#include "openmm/internal/windowsExport.h"
#include <iosfwd>
namespace
OpenMM
{
/**
* This class implements features that are used by many different integrators, including
* common workspace arrays, random number generation, and enforcing constraints.
*/
class
OPENMM_EXPORT
CudaIntegrationUtilities
{
public:
CudaIntegrationUtilities
(
CudaContext
&
context
,
const
System
&
system
);
~
CudaIntegrationUtilities
();
/**
* Get the array which contains position deltas.
*/
CudaArray
&
getPosDelta
()
{
return
*
posDelta
;
}
/**
* Get the array which contains random values. Each element is a float4, whose components
* are independent, normally distributed random numbers with mean 0 and variance 1.
*/
CudaArray
&
getRandom
()
{
return
*
random
;
}
/**
* Get the array which contains the current step size.
*/
CudaArray
&
getStepSize
()
{
return
*
stepSize
;
}
/**
* Apply constraints to the atom positions.
*
* @param tol the constraint tolerance
*/
void
applyConstraints
(
double
tol
);
/**
* Apply constraints to the atom velocities.
*
* @param tol the constraint tolerance
*/
void
applyVelocityConstraints
(
double
tol
);
/**
* Initialize the random number generator.
*/
void
initRandomNumberGenerator
(
unsigned
int
randomNumberSeed
);
/**
* Ensure that sufficient random numbers are available in the array, and generate new ones if not.
*
* @param numValues the number of random float4's that will be required
* @return the index in the array at which to start reading
*/
int
prepareRandomNumbers
(
int
numValues
);
/**
* Compute the positions of virtual sites.
*/
void
computeVirtualSites
();
/**
* Distribute forces from virtual sites to the atoms they are based on.
*/
void
distributeForcesFromVirtualSites
();
/**
* Create a checkpoint recording the current state of the random number generator.
*
* @param stream an output stream the checkpoint data should be written to
*/
void
createCheckpoint
(
std
::
ostream
&
stream
);
/**
* Load a checkpoint that was written by createCheckpoint().
*
* @param stream an input stream the checkpoint data should be read from
*/
void
loadCheckpoint
(
std
::
istream
&
stream
);
private:
void
applyConstraints
(
bool
constrainVelocities
,
double
tol
);
CudaContext
&
context
;
CUfunction
settlePosKernel
,
settleVelKernel
;
CUfunction
shakePosKernel
,
shakeVelKernel
;
CUfunction
ccmaDirectionsKernel
;
CUfunction
ccmaPosForceKernel
,
ccmaVelForceKernel
;
CUfunction
ccmaMultiplyKernel
;
CUfunction
ccmaPosUpdateKernel
,
ccmaVelUpdateKernel
;
CUfunction
vsitePositionKernel
,
vsiteForceKernel
;
CUfunction
randomKernel
;
CudaArray
*
posDelta
;
CudaArray
*
settleAtoms
;
CudaArray
*
settleParams
;
CudaArray
*
shakeAtoms
;
CudaArray
*
shakeParams
;
CudaArray
*
random
;
CudaArray
*
randomSeed
;
CudaArray
*
stepSize
;
CudaArray
*
ccmaAtoms
;
CudaArray
*
ccmaDistance
;
CudaArray
*
ccmaReducedMass
;
CudaArray
*
ccmaAtomConstraints
;
CudaArray
*
ccmaNumAtomConstraints
;
CudaArray
*
ccmaConstraintMatrixColumn
;
CudaArray
*
ccmaConstraintMatrixValue
;
CudaArray
*
ccmaDelta1
;
CudaArray
*
ccmaDelta2
;
CudaArray
*
ccmaConverged
;
int
*
ccmaConvergedMemory
;
CudaArray
*
vsite2AvgAtoms
;
CudaArray
*
vsite2AvgWeights
;
CudaArray
*
vsite3AvgAtoms
;
CudaArray
*
vsite3AvgWeights
;
CudaArray
*
vsiteOutOfPlaneAtoms
;
CudaArray
*
vsiteOutOfPlaneWeights
;
int
randomPos
;
int
lastSeed
,
numVsites
;
bool
hasInitializedPosConstraintKernels
,
hasInitializedVelConstraintKernels
;
struct
ShakeCluster
;
struct
ConstraintOrderer
;
};
}
// namespace OpenMM
#endif
/*OPENMM_CUDAINTEGRATIONUTILITIES_H_*/
platforms/cuda2/src/kernels/random.cu
0 → 100644
View file @
1f0ec7b5
/**
* Generate random numbers
*/
extern
"C"
__global__
void
generateRandomNumbers
(
int
numValues
,
float4
*
__restrict__
random
,
uint4
*
__restrict__
seed
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
uint4
state
=
seed
[
index
];
unsigned
int
carry
=
0
;
while
(
index
<
numValues
)
{
float4
value
;
// Generate first value.
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
unsigned
int
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
unsigned
int
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x1
=
(
float
)
max
(
state
.
x
+
state
.
y
+
state
.
w
,
0x00000001u
)
/
(
float
)
0xffffffff
;
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
x1
=
sqrt
(
-
2.0
f
*
log
(
x1
));
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x2
=
(
float
)(
state
.
x
+
state
.
y
+
state
.
w
)
/
(
float
)
0xffffffff
;
value
.
x
=
x1
*
cos
(
2.0
f
*
3.14159265
f
*
x2
);
// Generate second value.
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x3
=
(
float
)
max
(
state
.
x
+
state
.
y
+
state
.
w
,
0x00000001u
)
/
(
float
)
0xffffffff
;
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
x3
=
sqrt
(
-
2.0
f
*
log
(
x3
));
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x4
=
(
float
)(
state
.
x
+
state
.
y
+
state
.
w
)
/
(
float
)
0xffffffff
;
value
.
y
=
x3
*
cos
(
2.0
f
*
3.14159265
f
*
x4
);
// Generate third value.
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x5
=
(
float
)
max
(
state
.
x
+
state
.
y
+
state
.
w
,
0x00000001u
)
/
(
float
)
0xffffffff
;
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
x5
=
sqrt
(
-
2.0
f
*
log
(
x5
));
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x6
=
(
float
)(
state
.
x
+
state
.
y
+
state
.
w
)
/
(
float
)
0xffffffff
;
value
.
z
=
x5
*
cos
(
2.0
f
*
3.14159265
f
*
x6
);
// Generate fourth value.
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x7
=
(
float
)
max
(
state
.
x
+
state
.
y
+
state
.
w
,
0x00000001u
)
/
(
float
)
0xffffffff
;
state
.
x
=
state
.
x
*
69069
+
1
;
state
.
y
^=
state
.
y
<<
13
;
state
.
y
^=
state
.
y
>>
17
;
state
.
y
^=
state
.
y
<<
5
;
x7
=
sqrt
(
-
2.0
f
*
log
(
x7
));
k
=
(
state
.
z
>>
2
)
+
(
state
.
w
>>
3
)
+
(
carry
>>
2
);
m
=
state
.
w
+
state
.
w
+
state
.
z
+
carry
;
state
.
z
=
state
.
w
;
state
.
w
=
m
;
carry
=
k
>>
30
;
float
x8
=
(
float
)(
state
.
x
+
state
.
y
+
state
.
w
)
/
(
float
)
0xffffffff
;
value
.
w
=
x7
*
cos
(
2.0
f
*
3.14159265
f
*
x8
);
// Record the values.
random
[
index
]
=
value
;
index
+=
blockDim
.
x
*
gridDim
.
x
;
}
seed
[
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
]
=
state
;
}
platforms/cuda2/tests/TestCudaRandom.cpp
0 → 100644
View file @
1f0ec7b5
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2008-2012 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
/**
* This tests the CUDA implementation of random number generation.
*/
#include "openmm/internal/AssertionUtilities.h"
#include "../src/CudaArray.h"
#include "../src/CudaContext.h"
#include "../src/CudaIntegrationUtilities.h"
#include "openmm/System.h"
#include <iostream>
using
namespace
OpenMM
;
using
namespace
std
;
void
testGaussian
()
{
int
numAtoms
=
5000
;
System
system
;
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
system
.
addParticle
(
1.0
);
CudaPlatform
platform
;
CudaPlatform
::
PlatformData
platformData
(
system
,
""
,
"true"
,
"single"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()));
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
CudaArray
&
random
=
context
.
getIntegrationUtilities
().
getRandom
();
context
.
getIntegrationUtilities
().
prepareRandomNumbers
(
random
.
getSize
());
const
int
numValues
=
random
.
getSize
()
*
4
;
vector
<
float4
>
values
(
numValues
);
random
.
download
(
values
);
float
*
data
=
reinterpret_cast
<
float
*>
(
&
values
[
0
]);
double
mean
=
0.0
;
double
var
=
0.0
;
double
skew
=
0.0
;
double
kurtosis
=
0.0
;
for
(
int
i
=
0
;
i
<
numValues
;
i
++
)
{
double
value
=
data
[
i
];
mean
+=
value
;
var
+=
value
*
value
;
skew
+=
value
*
value
*
value
;
kurtosis
+=
value
*
value
*
value
*
value
;
}
mean
/=
numValues
;
var
/=
numValues
;
skew
/=
numValues
;
kurtosis
/=
numValues
;
double
c2
=
var
-
mean
*
mean
;
double
c3
=
skew
-
3
*
var
*
mean
+
2
*
mean
*
mean
*
mean
;
double
c4
=
kurtosis
-
4
*
skew
*
mean
-
3
*
var
*
var
+
12
*
var
*
mean
*
mean
-
6
*
mean
*
mean
*
mean
*
mean
;
ASSERT_EQUAL_TOL
(
0.0
,
mean
,
3.0
/
sqrt
((
double
)
numValues
));
ASSERT_EQUAL_TOL
(
1.0
,
c2
,
3.0
/
pow
(
numValues
,
1.0
/
3.0
));
ASSERT_EQUAL_TOL
(
0.0
,
c3
,
3.0
/
pow
(
numValues
,
1.0
/
4.0
));
ASSERT_EQUAL_TOL
(
0.0
,
c4
,
3.0
/
pow
(
numValues
,
1.0
/
4.0
));
}
int
main
()
{
try
{
testGaussian
();
}
catch
(
const
exception
&
e
)
{
cout
<<
"exception: "
<<
e
.
what
()
<<
endl
;
return
1
;
}
cout
<<
"Done"
<<
endl
;
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment