Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
863447a6
"platforms/vscode:/vscode.git/clone" did not exist on "ee2d72298716fd5ffaee368e74b15e86257ccec1"
Commit
863447a6
authored
May 30, 2013
by
peastman
Browse files
Created CUDA implementation of ring polymer contraction
parent
2e0ebb67
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
287 additions
and
2 deletions
+287
-2
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
+82
-1
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
+8
-1
plugins/rpmd/platforms/cuda/src/kernels/rpmdContraction.cu
plugins/rpmd/platforms/cuda/src/kernels/rpmdContraction.cu
+121
-0
plugins/rpmd/platforms/cuda/tests/TestCudaRpmd.cpp
plugins/rpmd/platforms/cuda/tests/TestCudaRpmd.cpp
+76
-0
No files found.
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.cpp
View file @
863447a6
...
...
@@ -69,6 +69,10 @@ CudaIntegrateRPMDStepKernel::~CudaIntegrateRPMDStepKernel() {
delete
positions
;
if
(
velocities
!=
NULL
)
delete
velocities
;
if
(
contractedForces
!=
NULL
)
delete
contractedForces
;
if
(
contractedPositions
!=
NULL
)
delete
contractedPositions
;
}
void
CudaIntegrateRPMDStepKernel
::
initialize
(
const
System
&
system
,
const
RPMDIntegrator
&
integrator
)
{
...
...
@@ -106,6 +110,34 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
temp
[
i
]
=
make_float4
(
0
,
0
,
0
,
1
);
velocities
->
upload
(
temp
);
}
// Build a list of contractions.
groupsNotContracted
=
-
1
;
const
map
<
int
,
int
>&
contractions
=
integrator
.
getContractions
();
int
maxContractedCopies
=
0
;
for
(
map
<
int
,
int
>::
const_iterator
iter
=
contractions
.
begin
();
iter
!=
contractions
.
end
();
++
iter
)
{
int
group
=
iter
->
first
;
int
copies
=
iter
->
second
;
if
(
group
<
0
||
group
>
31
)
throw
OpenMMException
(
"RPMDIntegrator: Force group must be between 0 and 31"
);
if
(
copies
<
0
||
copies
>
numCopies
)
throw
OpenMMException
(
"RPMDIntegrator: Number of copies for contraction cannot be greater than the total number of copies being simulated"
);
if
(
copies
!=
numCopies
)
{
if
(
groupsByCopies
.
find
(
copies
)
==
groupsByCopies
.
end
())
{
groupsByCopies
[
copies
]
=
1
<<
group
;
groupsNotContracted
-=
1
<<
group
;
if
(
copies
>
maxContractedCopies
)
maxContractedCopies
=
copies
;
}
else
groupsByCopies
[
copies
]
|=
1
<<
group
;
}
}
if
(
maxContractedCopies
>
0
)
{
contractedForces
=
CudaArray
::
create
<
long
long
>
(
cu
,
maxContractedCopies
*
paddedParticles
*
3
,
"rpmdContractedForces"
);
contractedPositions
=
new
CudaArray
(
cu
,
maxContractedCopies
*
paddedParticles
,
elementSize
,
"rpmdContractedPositions"
);
}
// Create kernels.
...
...
@@ -129,6 +161,23 @@ void CudaIntegrateRPMDStepKernel::initialize(const System& system, const RPMDInt
copyToContextKernel
=
cu
.
getKernel
(
module
,
"copyDataToContext"
);
copyFromContextKernel
=
cu
.
getKernel
(
module
,
"copyDataFromContext"
);
translateKernel
=
cu
.
getKernel
(
module
,
"applyCellTranslations"
);
// Create kernels for doing contractions.
for
(
map
<
int
,
int
>::
const_iterator
iter
=
groupsByCopies
.
begin
();
iter
!=
groupsByCopies
.
end
();
++
iter
)
{
int
copies
=
iter
->
first
;
replacements
.
clear
();
replacements
[
"NUM_CONTRACTED_COPIES"
]
=
cu
.
intToString
(
copies
);
replacements
[
"POS_SCALE"
]
=
cu
.
doubleToString
(
1.0
/
numCopies
);
replacements
[
"FORCE_SCALE"
]
=
cu
.
doubleToString
(
1.0
/
copies
);
replacements
[
"FFT_Q_FORWARD"
]
=
createFFT
(
numCopies
,
"q"
,
true
);
replacements
[
"FFT_Q_BACKWARD"
]
=
createFFT
(
copies
,
"q"
,
false
);
replacements
[
"FFT_F_FORWARD"
]
=
createFFT
(
copies
,
"f"
,
true
);
replacements
[
"FFT_F_BACKWARD"
]
=
createFFT
(
numCopies
,
"f"
,
false
);
module
=
cu
.
createModule
(
cu
.
replaceStrings
(
CudaKernelSources
::
vectorOps
+
CudaRpmdKernelSources
::
rpmdContraction
,
replacements
),
defines
,
""
);
positionContractionKernels
[
copies
]
=
cu
.
getKernel
(
module
,
"contractPositions"
);
forceContractionKernels
[
copies
]
=
cu
.
getKernel
(
module
,
"contractForces"
);
}
}
void
CudaIntegrateRPMDStepKernel
::
execute
(
ContextImpl
&
context
,
const
RPMDIntegrator
&
integrator
,
bool
forcesAreValid
)
{
...
...
@@ -191,17 +240,49 @@ void CudaIntegrateRPMDStepKernel::execute(ContextImpl& context, const RPMDIntegr
}
void
CudaIntegrateRPMDStepKernel
::
computeForces
(
ContextImpl
&
context
)
{
// Compute forces from all groups that didn't have a specified contraction.
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
{
void
*
copyToContextArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
context
.
computeVirtualSites
();
context
.
updateContextState
();
context
.
calcForcesAndEnergy
(
true
,
false
);
context
.
calcForcesAndEnergy
(
true
,
false
,
groupsNotContracted
);
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
forces
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
velocities
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
positions
->
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyFromContextKernel
,
copyFromContextArgs
,
cu
.
getNumAtoms
());
}
// Now loop over contractions and compute forces from them.
for
(
map
<
int
,
int
>::
const_iterator
iter
=
groupsByCopies
.
begin
();
iter
!=
groupsByCopies
.
end
();
++
iter
)
{
int
copies
=
iter
->
first
;
int
groupFlags
=
iter
->
second
;
// Find the contracted positions.
void
*
contractPosArgs
[]
=
{
&
positions
->
getDevicePointer
(),
&
contractedPositions
->
getDevicePointer
()};
cu
.
executeKernel
(
positionContractionKernels
[
copies
],
contractPosArgs
,
numParticles
*
numCopies
,
workgroupSize
);
// Compute forces.
for
(
int
i
=
0
;
i
<
copies
;
i
++
)
{
void
*
copyToContextArgs
[]
=
{
&
velocities
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
contractedPositions
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyToContextKernel
,
copyToContextArgs
,
cu
.
getNumAtoms
());
context
.
computeVirtualSites
();
context
.
calcForcesAndEnergy
(
true
,
false
,
groupFlags
);
void
*
copyFromContextArgs
[]
=
{
&
cu
.
getForce
().
getDevicePointer
(),
&
contractedForces
->
getDevicePointer
(),
&
cu
.
getVelm
().
getDevicePointer
(),
&
velocities
->
getDevicePointer
(),
&
cu
.
getPosq
().
getDevicePointer
(),
&
contractedPositions
->
getDevicePointer
(),
&
cu
.
getAtomIndexArray
().
getDevicePointer
(),
&
i
};
cu
.
executeKernel
(
copyFromContextKernel
,
copyFromContextArgs
,
cu
.
getNumAtoms
());
}
// Apply the forces to the original copies.
void
*
contractForceArgs
[]
=
{
&
forces
->
getDevicePointer
(),
&
contractedForces
->
getDevicePointer
()};
cu
.
executeKernel
(
forceContractionKernels
[
copies
],
contractForceArgs
,
numParticles
*
numCopies
,
workgroupSize
);
}
}
double
CudaIntegrateRPMDStepKernel
::
computeKineticEnergy
(
ContextImpl
&
context
,
const
RPMDIntegrator
&
integrator
)
{
...
...
plugins/rpmd/platforms/cuda/src/CudaRpmdKernels.h
View file @
863447a6
...
...
@@ -35,6 +35,7 @@
#include "openmm/RpmdKernels.h"
#include "CudaContext.h"
#include "CudaArray.h"
#include <map>
namespace
OpenMM
{
...
...
@@ -45,7 +46,7 @@ namespace OpenMM {
class
CudaIntegrateRPMDStepKernel
:
public
IntegrateRPMDStepKernel
{
public:
CudaIntegrateRPMDStepKernel
(
std
::
string
name
,
const
Platform
&
platform
,
CudaContext
&
cu
)
:
IntegrateRPMDStepKernel
(
name
,
platform
),
cu
(
cu
),
forces
(
NULL
),
positions
(
NULL
),
velocities
(
NULL
)
{
IntegrateRPMDStepKernel
(
name
,
platform
),
cu
(
cu
),
forces
(
NULL
),
positions
(
NULL
),
velocities
(
NULL
)
,
contractedForces
(
NULL
),
contractedPositions
(
NULL
)
{
}
~
CudaIntegrateRPMDStepKernel
();
/**
...
...
@@ -88,10 +89,16 @@ private:
std
::
string
createFFT
(
int
size
,
const
std
::
string
&
variable
,
bool
forward
);
CudaContext
&
cu
;
int
numCopies
,
numParticles
,
workgroupSize
;
std
::
map
<
int
,
int
>
groupsByCopies
;
int
groupsNotContracted
;
CudaArray
*
forces
;
CudaArray
*
positions
;
CudaArray
*
velocities
;
CudaArray
*
contractedForces
;
CudaArray
*
contractedPositions
;
CUfunction
pileKernel
,
stepKernel
,
velocitiesKernel
,
copyToContextKernel
,
copyFromContextKernel
,
translateKernel
;
std
::
map
<
int
,
CUfunction
>
positionContractionKernels
;
std
::
map
<
int
,
CUfunction
>
forceContractionKernels
;
};
}
// namespace OpenMM
...
...
plugins/rpmd/platforms/cuda/src/kernels/rpmdContraction.cu
0 → 100644
View file @
863447a6
__device__
mixed3
multiplyComplexRealPart
(
mixed2
c1
,
mixed3
c2r
,
mixed3
c2i
)
{
return
c1
.
x
*
c2r
-
c1
.
y
*
c2i
;
}
__device__
mixed3
multiplyComplexImagPart
(
mixed2
c1
,
mixed3
c2r
,
mixed3
c2i
)
{
return
c1
.
x
*
c2i
+
c1
.
y
*
c2r
;
}
__device__
mixed3
multiplyComplexRealPartConj
(
mixed2
c1
,
mixed3
c2r
,
mixed3
c2i
)
{
return
c1
.
x
*
c2r
+
c1
.
y
*
c2i
;
}
__device__
mixed3
multiplyComplexImagPartConj
(
mixed2
c1
,
mixed3
c2r
,
mixed3
c2i
)
{
return
c1
.
x
*
c2i
-
c1
.
y
*
c2r
;
}
/**
* Compute the contracted positions
*/
extern
"C"
__global__
void
contractPositions
(
mixed4
*
posq
,
mixed4
*
contracted
)
{
const
int
numBlocks
=
(
blockDim
.
x
*
gridDim
.
x
)
/
NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
__shared__
mixed3
q
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed2
w
[
NUM_COPIES
];
mixed3
*
qreal
=
&
q
[
blockStart
];
mixed3
*
qimag
=
&
q
[
blockStart
+
blockDim
.
x
];
mixed3
*
tempreal
=
&
temp
[
blockStart
];
mixed3
*
tempimag
=
&
temp
[
blockStart
+
blockDim
.
x
];
if
(
threadIdx
.
x
<
NUM_COPIES
)
w
[
indexInBlock
]
=
make_mixed2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
__syncthreads
();
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
// Load the particle position.
mixed4
particlePosq
=
posq
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
];
qreal
[
indexInBlock
]
=
make_mixed3
(
particlePosq
.
x
,
particlePosq
.
y
,
particlePosq
.
z
);
qimag
[
indexInBlock
]
=
make_mixed3
(
0
);
// Forward FFT.
__syncthreads
();
FFT_Q_FORWARD
if
(
NUM_CONTRACTED_COPIES
>
1
)
{
// Compress the data to remove high frequencies.
int
start
=
(
NUM_CONTRACTED_COPIES
+
1
)
/
2
;
tempreal
[
indexInBlock
]
=
qreal
[
indexInBlock
];
tempimag
[
indexInBlock
]
=
qimag
[
indexInBlock
];
__syncthreads
();
if
(
indexInBlock
<
NUM_CONTRACTED_COPIES
)
{
qreal
[
indexInBlock
]
=
tempreal
[
indexInBlock
<
start
?
indexInBlock
:
indexInBlock
+
(
NUM_COPIES
-
NUM_CONTRACTED_COPIES
)];
qimag
[
indexInBlock
]
=
tempimag
[
indexInBlock
<
start
?
indexInBlock
:
indexInBlock
+
(
NUM_COPIES
-
NUM_CONTRACTED_COPIES
)];
}
__syncthreads
();
FFT_Q_BACKWARD
}
// Store results.
if
(
indexInBlock
<
NUM_CONTRACTED_COPIES
)
contracted
[
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
]
=
make_mixed4
(
POS_SCALE
*
qreal
[
indexInBlock
].
x
,
POS_SCALE
*
qreal
[
indexInBlock
].
y
,
POS_SCALE
*
qreal
[
indexInBlock
].
z
,
particlePosq
.
w
);
}
}
/**
* Apply the contracted forces to all copies.
*/
extern
"C"
__global__
void
contractForces
(
long
long
*
force
,
long
long
*
contracted
)
{
const
int
numBlocks
=
(
blockDim
.
x
*
gridDim
.
x
)
/
NUM_COPIES
;
const
int
blockStart
=
NUM_COPIES
*
(
threadIdx
.
x
/
NUM_COPIES
);
const
int
indexInBlock
=
threadIdx
.
x
-
blockStart
;
const
mixed
forceScale
=
1
/
(
mixed
)
0x100000000
;
__shared__
mixed3
f
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed3
temp
[
2
*
THREAD_BLOCK_SIZE
];
__shared__
mixed2
w
[
NUM_COPIES
];
mixed3
*
freal
=
&
f
[
blockStart
];
mixed3
*
fimag
=
&
f
[
blockStart
+
blockDim
.
x
];
mixed3
*
tempreal
=
&
temp
[
blockStart
];
mixed3
*
tempimag
=
&
temp
[
blockStart
+
blockDim
.
x
];
if
(
threadIdx
.
x
<
NUM_COPIES
)
w
[
indexInBlock
]
=
make_mixed2
(
cos
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
),
sin
(
-
indexInBlock
*
2
*
M_PI
/
NUM_COPIES
));
__syncthreads
();
for
(
int
particle
=
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
)
/
NUM_COPIES
;
particle
<
NUM_ATOMS
;
particle
+=
numBlocks
)
{
// Load the force.
int
forceIndex
=
particle
+
indexInBlock
*
PADDED_NUM_ATOMS
*
3
;
if
(
indexInBlock
<
NUM_CONTRACTED_COPIES
)
{
freal
[
indexInBlock
]
=
make_mixed3
(
contracted
[
forceIndex
],
contracted
[
forceIndex
+
PADDED_NUM_ATOMS
],
contracted
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]);
fimag
[
indexInBlock
]
=
make_mixed3
(
0
);
}
__syncthreads
();
// Forward FFT.
if
(
NUM_CONTRACTED_COPIES
>
1
)
{
FFT_F_FORWARD
}
// Set the high frequency components to 0.
int
start
=
(
NUM_CONTRACTED_COPIES
+
1
)
/
2
;
int
end
=
NUM_COPIES
-
NUM_CONTRACTED_COPIES
+
start
;
tempreal
[
indexInBlock
]
=
freal
[
indexInBlock
];
tempimag
[
indexInBlock
]
=
fimag
[
indexInBlock
];
__syncthreads
();
if
(
indexInBlock
>=
start
)
{
freal
[
indexInBlock
]
=
(
indexInBlock
<
end
?
make_mixed3
(
0
)
:
tempreal
[
indexInBlock
-
(
NUM_COPIES
-
NUM_CONTRACTED_COPIES
)]);
fimag
[
indexInBlock
]
=
(
indexInBlock
<
end
?
make_mixed3
(
0
)
:
tempimag
[
indexInBlock
-
(
NUM_COPIES
-
NUM_CONTRACTED_COPIES
)]);
}
__syncthreads
();
FFT_F_BACKWARD
// Store results.
force
[
forceIndex
]
=
FORCE_SCALE
*
freal
[
indexInBlock
].
x
;
force
[
forceIndex
+
PADDED_NUM_ATOMS
]
=
FORCE_SCALE
*
freal
[
indexInBlock
].
y
;
force
[
forceIndex
+
PADDED_NUM_ATOMS
*
2
]
=
FORCE_SCALE
*
freal
[
indexInBlock
].
z
;
}
}
plugins/rpmd/platforms/cuda/tests/TestCudaRpmd.cpp
View file @
863447a6
...
...
@@ -355,6 +355,82 @@ void testVirtualSites() {
ASSERT_USUALLY_EQUAL_TOL
(
expectedKE
,
meanKE
,
1e-2
);
}
void
testContractions
()
{
const
int
gridSize
=
3
;
const
int
numMolecules
=
gridSize
*
gridSize
*
gridSize
;
const
int
numParticles
=
numMolecules
*
2
;
const
int
numCopies
=
10
;
const
double
spacing
=
2.0
;
const
double
cutoff
=
3.0
;
const
double
boxSize
=
spacing
*
(
gridSize
+
1
);
const
double
temperature
=
300.0
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
boxSize
,
0
,
0
),
Vec3
(
0
,
boxSize
,
0
),
Vec3
(
0
,
0
,
boxSize
));
HarmonicBondForce
*
bonds
=
new
HarmonicBondForce
();
system
.
addForce
(
bonds
);
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
nonbonded
->
setCutoffDistance
(
cutoff
);
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
nonbonded
->
setForceGroup
(
1
);
nonbonded
->
setReciprocalSpaceForceGroup
(
2
);
system
.
addForce
(
nonbonded
);
// Create a cloud of molecules.
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
vector
<
Vec3
>
positions
(
numParticles
);
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
-
0.2
,
0.2
,
0.2
);
nonbonded
->
addParticle
(
0.2
,
0.2
,
0.2
);
nonbonded
->
addException
(
2
*
i
,
2
*
i
+
1
,
0
,
1
,
0
);
bonds
->
addBond
(
2
*
i
,
2
*
i
+
1
,
1.0
,
10000.0
);
}
map
<
int
,
int
>
contractions
;
contractions
[
1
]
=
3
;
contractions
[
2
]
=
1
;
RPMDIntegrator
integ
(
numCopies
,
temperature
,
10.0
,
0.001
,
contractions
);
Platform
&
platform
=
Platform
::
getPlatformByName
(
"CUDA"
);
Context
context
(
system
,
integ
,
platform
);
for
(
int
copy
=
0
;
copy
<
numCopies
;
copy
++
)
{
for
(
int
i
=
0
;
i
<
gridSize
;
i
++
)
for
(
int
j
=
0
;
j
<
gridSize
;
j
++
)
for
(
int
k
=
0
;
k
<
gridSize
;
k
++
)
{
Vec3
pos
=
Vec3
(
spacing
*
(
i
+
0.02
*
genrand_real2
(
sfmt
)),
spacing
*
(
j
+
0.02
*
genrand_real2
(
sfmt
)),
spacing
*
(
k
+
0.02
*
genrand_real2
(
sfmt
)));
int
index
=
k
+
gridSize
*
(
j
+
gridSize
*
i
);
positions
[
2
*
index
]
=
pos
;
positions
[
2
*
index
+
1
]
=
Vec3
(
pos
[
0
]
+
1.0
,
pos
[
1
],
pos
[
2
]);
}
integ
.
setPositions
(
copy
,
positions
);
}
// Check the temperature.
const
int
numSteps
=
1000
;
integ
.
step
(
1000
);
vector
<
double
>
ke
(
numCopies
,
0.0
);
for
(
int
i
=
0
;
i
<
numSteps
;
i
++
)
{
integ
.
step
(
1
);
vector
<
State
>
state
(
numCopies
);
for
(
int
j
=
0
;
j
<
numCopies
;
j
++
)
state
[
j
]
=
integ
.
getState
(
j
,
State
::
Velocities
,
true
);
for
(
int
j
=
0
;
j
<
numParticles
;
j
++
)
{
for
(
int
k
=
0
;
k
<
numCopies
;
k
++
)
{
Vec3
v
=
state
[
k
].
getVelocities
()[
j
];
ke
[
k
]
+=
0.5
*
system
.
getParticleMass
(
j
)
*
v
.
dot
(
v
);
}
}
}
double
meanKE
=
0.0
;
for
(
int
i
=
0
;
i
<
numCopies
;
i
++
)
meanKE
+=
ke
[
i
];
meanKE
/=
numSteps
*
numCopies
;
double
expectedKE
=
0.5
*
numCopies
*
numParticles
*
3
*
BOLTZ
*
temperature
;
ASSERT_USUALLY_EQUAL_TOL
(
expectedKE
,
meanKE
,
1e-2
);
}
int
main
(
int
argc
,
char
*
argv
[])
{
try
{
registerRPMDCudaKernelFactories
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment