Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cf112a25
Commit
cf112a25
authored
Jul 03, 2012
by
Peter Eastman
Browse files
Fixed bugs when using double precision
parent
dac874af
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
141 additions
and
88 deletions
+141
-88
platforms/cuda2/src/CudaContext.cpp
platforms/cuda2/src/CudaContext.cpp
+57
-32
platforms/cuda2/src/CudaContext.h
platforms/cuda2/src/CudaContext.h
+5
-0
platforms/cuda2/src/CudaIntegrationUtilities.cpp
platforms/cuda2/src/CudaIntegrationUtilities.cpp
+4
-3
platforms/cuda2/src/CudaKernels.cpp
platforms/cuda2/src/CudaKernels.cpp
+2
-2
platforms/cuda2/src/CudaNonbondedUtilities.cpp
platforms/cuda2/src/CudaNonbondedUtilities.cpp
+8
-2
platforms/cuda2/src/kernels/nonbondedExceptions.cu
platforms/cuda2/src/kernels/nonbondedExceptions.cu
+1
-1
platforms/cuda2/src/kernels/periodicTorsionForce.cu
platforms/cuda2/src/kernels/periodicTorsionForce.cu
+1
-1
platforms/cuda2/src/kernels/rbTorsionForce.cu
platforms/cuda2/src/kernels/rbTorsionForce.cu
+2
-2
platforms/cuda2/tests/TestCudaNonbondedForce.cpp
platforms/cuda2/tests/TestCudaNonbondedForce.cpp
+61
-45
No files found.
platforms/cuda2/src/CudaContext.cpp
View file @
cf112a25
...
@@ -766,25 +766,43 @@ void CudaContext::validateMolecules() {
...
@@ -766,25 +766,43 @@ void CudaContext::validateMolecules() {
// atoms to their original order, rebuild the list of identical molecules, and sort them
// atoms to their original order, rebuild the list of identical molecules, and sort them
// again.
// again.
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float4
>
newPosq
(
paddedNumAtoms
);
vector
<
float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
float4
>
newVelm
(
paddedNumAtoms
);
vector
<
int4
>
newCellOffsets
(
numAtoms
);
vector
<
int4
>
newCellOffsets
(
numAtoms
);
posq
->
download
(
oldPosq
);
if
(
useDoublePrecision
)
{
velm
->
download
(
oldVelm
);
vector
<
double4
>
oldPosq
(
paddedNumAtoms
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
vector
<
double4
>
newPosq
(
paddedNumAtoms
);
int
index
=
atomIndex
[
i
];
vector
<
double4
>
oldVelm
(
paddedNumAtoms
);
newPosq
[
index
]
=
oldPosq
[
i
];
vector
<
double4
>
newVelm
(
paddedNumAtoms
);
newVelm
[
index
]
=
oldVelm
[
i
];
posq
->
download
(
oldPosq
);
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
velm
->
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
}
else
{
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float4
>
newPosq
(
paddedNumAtoms
);
vector
<
float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
float4
>
newVelm
(
paddedNumAtoms
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
}
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
atomIndex
[
i
]
=
i
;
atomIndex
[
i
]
=
i
;
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
atomIndexDevice
->
upload
(
atomIndex
);
atomIndexDevice
->
upload
(
atomIndex
);
findMoleculeGroups
();
findMoleculeGroups
();
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
...
@@ -797,16 +815,23 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -797,16 +815,23 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
if
(
moleculesInvalid
)
if
(
moleculesInvalid
)
validateMolecules
();
validateMolecules
();
atomsWereReordered
=
true
;
atomsWereReordered
=
true
;
if
(
useDoublePrecision
)
reorderAtomsImpl
<
double
,
double4
>
(
enforcePeriodic
);
else
reorderAtomsImpl
<
float
,
float4
>
(
enforcePeriodic
);
}
template
<
class
Real
,
class
Real4
>
void
CudaContext
::
reorderAtomsImpl
(
bool
enforcePeriodic
)
{
// Find the range of positions and the number of bins along each axis.
// Find the range of positions and the number of bins along each axis.
vector
<
float
4
>
oldPosq
(
paddedNumAtoms
);
vector
<
Real
4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float
4
>
oldVelm
(
paddedNumAtoms
);
vector
<
Real
4
>
oldVelm
(
paddedNumAtoms
);
posq
->
download
(
oldPosq
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
velm
->
download
(
oldVelm
);
float
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
Real
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
float
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
Real
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
float
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
Real
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
if
(
nonbonded
->
getUsePeriodic
())
{
if
(
nonbonded
->
getUsePeriodic
())
{
minx
=
miny
=
minz
=
0.0
;
minx
=
miny
=
minz
=
0.0
;
maxx
=
periodicBoxSize
.
x
;
maxx
=
periodicBoxSize
.
x
;
...
@@ -815,7 +840,7 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -815,7 +840,7 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
}
}
else
{
else
{
for
(
int
i
=
1
;
i
<
numAtoms
;
i
++
)
{
for
(
int
i
=
1
;
i
<
numAtoms
;
i
++
)
{
const
float
4
&
pos
=
oldPosq
[
i
];
const
Real
4
&
pos
=
oldPosq
[
i
];
minx
=
min
(
minx
,
pos
.
x
);
minx
=
min
(
minx
,
pos
.
x
);
maxx
=
max
(
maxx
,
pos
.
x
);
maxx
=
max
(
maxx
,
pos
.
x
);
miny
=
min
(
miny
,
pos
.
y
);
miny
=
min
(
miny
,
pos
.
y
);
...
@@ -828,8 +853,8 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -828,8 +853,8 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
// Loop over each group of identical molecules and reorder them.
// Loop over each group of identical molecules and reorder them.
vector
<
int
>
originalIndex
(
numAtoms
);
vector
<
int
>
originalIndex
(
numAtoms
);
vector
<
float
4
>
newPosq
(
paddedNumAtoms
);
vector
<
Real
4
>
newPosq
(
paddedNumAtoms
);
vector
<
float
4
>
newVelm
(
paddedNumAtoms
);
vector
<
Real
4
>
newVelm
(
paddedNumAtoms
);
vector
<
int4
>
newCellOffsets
(
numAtoms
);
vector
<
int4
>
newCellOffsets
(
numAtoms
);
for
(
int
group
=
0
;
group
<
(
int
)
moleculeGroups
.
size
();
group
++
)
{
for
(
int
group
=
0
;
group
<
(
int
)
moleculeGroups
.
size
();
group
++
)
{
// Find the center of each molecule.
// Find the center of each molecule.
...
@@ -837,15 +862,15 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -837,15 +862,15 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
MoleculeGroup
&
mol
=
moleculeGroups
[
group
];
MoleculeGroup
&
mol
=
moleculeGroups
[
group
];
int
numMolecules
=
mol
.
offsets
.
size
();
int
numMolecules
=
mol
.
offsets
.
size
();
vector
<
int
>&
atoms
=
mol
.
atoms
;
vector
<
int
>&
atoms
=
mol
.
atoms
;
vector
<
float
4
>
molPos
(
numMolecules
);
vector
<
Real
4
>
molPos
(
numMolecules
);
float
invNumAtoms
=
1.0
f
/
atoms
.
size
();
Real
invNumAtoms
=
(
Real
)
(
1.0
/
atoms
.
size
()
)
;
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
molPos
[
i
].
x
=
0.0
f
;
molPos
[
i
].
x
=
0.0
f
;
molPos
[
i
].
y
=
0.0
f
;
molPos
[
i
].
y
=
0.0
f
;
molPos
[
i
].
z
=
0.0
f
;
molPos
[
i
].
z
=
0.0
f
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
const
float
4
&
pos
=
oldPosq
[
atom
];
const
Real
4
&
pos
=
oldPosq
[
atom
];
molPos
[
i
].
x
+=
pos
.
x
;
molPos
[
i
].
x
+=
pos
.
x
;
molPos
[
i
].
y
+=
pos
.
y
;
molPos
[
i
].
y
+=
pos
.
y
;
molPos
[
i
].
z
+=
pos
.
z
;
molPos
[
i
].
z
+=
pos
.
z
;
...
@@ -861,9 +886,9 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -861,9 +886,9 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
int
xcell
=
(
int
)
floor
(
molPos
[
i
].
x
*
invPeriodicBoxSize
.
x
);
int
xcell
=
(
int
)
floor
(
molPos
[
i
].
x
*
invPeriodicBoxSize
.
x
);
int
ycell
=
(
int
)
floor
(
molPos
[
i
].
y
*
invPeriodicBoxSize
.
y
);
int
ycell
=
(
int
)
floor
(
molPos
[
i
].
y
*
invPeriodicBoxSize
.
y
);
int
zcell
=
(
int
)
floor
(
molPos
[
i
].
z
*
invPeriodicBoxSize
.
z
);
int
zcell
=
(
int
)
floor
(
molPos
[
i
].
z
*
invPeriodicBoxSize
.
z
);
float
dx
=
xcell
*
periodicBoxSize
.
x
;
Real
dx
=
xcell
*
periodicBoxSize
.
x
;
float
dy
=
ycell
*
periodicBoxSize
.
y
;
Real
dy
=
ycell
*
periodicBoxSize
.
y
;
float
dz
=
zcell
*
periodicBoxSize
.
z
;
Real
dz
=
zcell
*
periodicBoxSize
.
z
;
if
(
dx
!=
0.0
f
||
dy
!=
0.0
f
||
dz
!=
0.0
f
)
{
if
(
dx
!=
0.0
f
||
dy
!=
0.0
f
||
dz
!=
0.0
f
)
{
molPos
[
i
].
x
-=
dx
;
molPos
[
i
].
x
-=
dx
;
molPos
[
i
].
y
-=
dy
;
molPos
[
i
].
y
-=
dy
;
...
@@ -871,7 +896,7 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -871,7 +896,7 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
if
(
enforcePeriodic
)
{
if
(
enforcePeriodic
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
float
4
p
=
oldPosq
[
atom
];
Real
4
p
=
oldPosq
[
atom
];
p
.
x
-=
dx
;
p
.
x
-=
dx
;
p
.
y
-=
dy
;
p
.
y
-=
dy
;
p
.
z
-=
dz
;
p
.
z
-=
dz
;
...
@@ -888,12 +913,12 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
...
@@ -888,12 +913,12 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
// Select a bin for each molecule, then sort them by bin.
// Select a bin for each molecule, then sort them by bin.
bool
useHilbert
=
(
numMolecules
>
5000
||
atoms
.
size
()
>
8
);
// For small systems, a simple zigzag curve works better than a Hilbert curve.
bool
useHilbert
=
(
numMolecules
>
5000
||
atoms
.
size
()
>
8
);
// For small systems, a simple zigzag curve works better than a Hilbert curve.
float
binWidth
;
Real
binWidth
;
if
(
useHilbert
)
if
(
useHilbert
)
binWidth
=
(
float
)(
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
);
binWidth
=
(
Real
)(
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
);
else
else
binWidth
=
(
float
)(
0.2
*
nonbonded
->
getCutoffDistance
());
binWidth
=
(
Real
)(
0.2
*
nonbonded
->
getCutoffDistance
());
float
invBinWidth
=
1.0
f
/
binWidth
;
Real
invBinWidth
=
(
Real
)
(
1.0
/
binWidth
)
;
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
*
invBinWidth
);
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
*
invBinWidth
);
int
ybins
=
1
+
(
int
)
((
maxy
-
miny
)
*
invBinWidth
);
int
ybins
=
1
+
(
int
)
((
maxy
-
miny
)
*
invBinWidth
);
vector
<
pair
<
int
,
int
>
>
molBins
(
numMolecules
);
vector
<
pair
<
int
,
int
>
>
molBins
(
numMolecules
);
...
...
platforms/cuda2/src/CudaContext.h
View file @
cf112a25
...
@@ -448,6 +448,11 @@ private:
...
@@ -448,6 +448,11 @@ private:
* of molecules and resort the atoms.
* of molecules and resort the atoms.
*/
*/
void
validateMolecules
();
void
validateMolecules
();
/**
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/
template
<
class
Real
,
class
Real4
>
void
reorderAtomsImpl
(
bool
enforcePeriodic
);
static
bool
hasInitializedCuda
;
static
bool
hasInitializedCuda
;
const
System
&
system
;
const
System
&
system
;
double
time
,
computeCapability
;
double
time
,
computeCapability
;
...
...
platforms/cuda2/src/CudaIntegrationUtilities.cpp
View file @
cf112a25
...
@@ -717,16 +717,17 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
...
@@ -717,16 +717,17 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
ccmaForceKernel
=
ccmaPosForceKernel
;
ccmaForceKernel
=
ccmaPosForceKernel
;
}
}
float
floatTol
=
(
float
)
tol
;
float
floatTol
=
(
float
)
tol
;
void
*
tolPointer
=
(
context
.
getUseDoublePrecision
()
?
(
void
*
)
&
tol
:
(
void
*
)
&
floatTol
);
if
(
settleAtoms
!=
NULL
)
{
if
(
settleAtoms
!=
NULL
)
{
int
numClusters
=
settleAtoms
->
getSize
();
int
numClusters
=
settleAtoms
->
getSize
();
void
*
args
[]
=
{
&
numClusters
,
&
floatTol
,
&
context
.
getPosq
().
getDevicePointer
(),
void
*
args
[]
=
{
&
numClusters
,
tolPointer
,
&
context
.
getPosq
().
getDevicePointer
(),
&
posDelta
->
getDevicePointer
(),
&
context
.
getVelm
().
getDevicePointer
(),
&
posDelta
->
getDevicePointer
(),
&
context
.
getVelm
().
getDevicePointer
(),
&
settleAtoms
->
getDevicePointer
(),
&
settleParams
->
getDevicePointer
()};
&
settleAtoms
->
getDevicePointer
(),
&
settleParams
->
getDevicePointer
()};
context
.
executeKernel
(
settleKernel
,
args
,
settleAtoms
->
getSize
());
context
.
executeKernel
(
settleKernel
,
args
,
settleAtoms
->
getSize
());
}
}
if
(
shakeAtoms
!=
NULL
)
{
if
(
shakeAtoms
!=
NULL
)
{
int
numClusters
=
shakeAtoms
->
getSize
();
int
numClusters
=
shakeAtoms
->
getSize
();
void
*
args
[]
=
{
&
numClusters
,
&
floatTol
,
&
context
.
getPosq
().
getDevicePointer
(),
void
*
args
[]
=
{
&
numClusters
,
tolPointer
,
&
context
.
getPosq
().
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
&
shakeAtoms
->
getDevicePointer
(),
&
shakeParams
->
getDevicePointer
()};
&
shakeAtoms
->
getDevicePointer
(),
&
shakeParams
->
getDevicePointer
()};
context
.
executeKernel
(
shakeKernel
,
args
,
shakeAtoms
->
getSize
());
context
.
executeKernel
(
shakeKernel
,
args
,
shakeAtoms
->
getSize
());
...
@@ -738,7 +739,7 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
...
@@ -738,7 +739,7 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
void
*
forceArgs
[]
=
{
&
ccmaAtoms
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
void
*
forceArgs
[]
=
{
&
ccmaAtoms
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
&
ccmaReducedMass
->
getDevicePointer
(),
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaConvergedDeviceMemory
,
&
ccmaReducedMass
->
getDevicePointer
(),
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaConvergedDeviceMemory
,
&
floatTol
,
&
i
};
tolPointer
,
&
i
};
void
*
multiplyArgs
[]
=
{
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaDelta2
->
getDevicePointer
(),
void
*
multiplyArgs
[]
=
{
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaDelta2
->
getDevicePointer
(),
&
ccmaConstraintMatrixColumn
->
getDevicePointer
(),
&
ccmaConstraintMatrixValue
->
getDevicePointer
(),
&
ccmaConvergedDeviceMemory
,
&
i
};
&
ccmaConstraintMatrixColumn
->
getDevicePointer
(),
&
ccmaConstraintMatrixValue
->
getDevicePointer
(),
&
ccmaConvergedDeviceMemory
,
&
i
};
void
*
updateArgs
[]
=
{
&
ccmaNumAtomConstraints
->
getDevicePointer
(),
&
ccmaAtomConstraints
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
void
*
updateArgs
[]
=
{
&
ccmaNumAtomConstraints
->
getDevicePointer
(),
&
ccmaAtomConstraints
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
...
...
platforms/cuda2/src/CudaKernels.cpp
View file @
cf112a25
...
@@ -1451,7 +1451,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1451,7 +1451,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
pmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid"
);
pmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid"
);
cu
.
addAutoclearBuffer
(
pmeGrid
->
getDevicePointer
(),
pmeGrid
->
getSize
()
*
sizeof
(
float2
)
);
cu
.
addAutoclearBuffer
(
pmeGrid
->
getDevicePointer
(),
pmeGrid
->
getSize
()
*
2
*
elementSize
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
...
@@ -1459,7 +1459,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1459,7 +1459,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
cufftResult
result
=
cufftPlan3d
(
&
fft
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
CUFFT_C2C
);
cufftResult
result
=
cufftPlan3d
(
&
fft
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2Z
:
CUFFT_C2C
);
if
(
result
!=
CUFFT_SUCCESS
)
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
hasInitializedFFT
=
true
;
hasInitializedFFT
=
true
;
...
...
platforms/cuda2/src/CudaNonbondedUtilities.cpp
View file @
cf112a25
...
@@ -234,8 +234,14 @@ void CudaNonbondedUtilities::initialize(const System& system) {
...
@@ -234,8 +234,14 @@ void CudaNonbondedUtilities::initialize(const System& system) {
interactingTiles
=
CudaArray
::
create
<
ushort2
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactingTiles
=
CudaArray
::
create
<
ushort2
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactionFlags
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
maxTiles
,
"interactionFlags"
);
interactionFlags
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
maxTiles
,
"interactionFlags"
);
interactionCount
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
1
,
"interactionCount"
);
interactionCount
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
1
,
"interactionCount"
);
blockCenter
=
CudaArray
::
create
<
float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
if
(
context
.
getUseDoublePrecision
())
{
blockBoundingBox
=
CudaArray
::
create
<
float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
blockCenter
=
CudaArray
::
create
<
double4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockBoundingBox
=
CudaArray
::
create
<
double4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
}
else
{
blockCenter
=
CudaArray
::
create
<
float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockBoundingBox
=
CudaArray
::
create
<
float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
}
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
pinnedInteractionCount
,
sizeof
(
unsigned
int
),
0
));
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
pinnedInteractionCount
,
sizeof
(
unsigned
int
),
0
));
pinnedInteractionCount
[
0
]
=
0
;
pinnedInteractionCount
[
0
]
=
0
;
interactionCount
->
upload
(
pinnedInteractionCount
);
interactionCount
->
upload
(
pinnedInteractionCount
);
...
...
platforms/cuda2/src/kernels/nonbondedExceptions.cu
View file @
cf112a25
real
4
exceptionParams
=
PARAMS
[
index
];
float
4
exceptionParams
=
PARAMS
[
index
];
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
real
invR
=
RSQRT
(
r2
);
...
...
platforms/cuda2/src/kernels/periodicTorsionForce.cu
View file @
cf112a25
real
4
torsionParams
=
PARAMS
[
index
];
float
4
torsionParams
=
PARAMS
[
index
];
real
deltaAngle
=
torsionParams
.
z
*
theta
-
torsionParams
.
y
;
real
deltaAngle
=
torsionParams
.
z
*
theta
-
torsionParams
.
y
;
energy
+=
torsionParams
.
x
*
(
1.0
f
+
COS
(
deltaAngle
));
energy
+=
torsionParams
.
x
*
(
1.0
f
+
COS
(
deltaAngle
));
real
sinDeltaAngle
=
SIN
(
deltaAngle
);
real
sinDeltaAngle
=
SIN
(
deltaAngle
);
...
...
platforms/cuda2/src/kernels/rbTorsionForce.cu
View file @
cf112a25
real
4
torsionParams1
=
PARAMS1
[
index
];
float
4
torsionParams1
=
PARAMS1
[
index
];
real
2
torsionParams2
=
PARAMS2
[
index
];
float
2
torsionParams2
=
PARAMS2
[
index
];
if
(
theta
<
0
)
if
(
theta
<
0
)
theta
+=
PI
;
theta
+=
PI
;
else
else
...
...
platforms/cuda2/tests/TestCudaNonbondedForce.cpp
View file @
cf112a25
...
@@ -476,30 +476,46 @@ void testBlockInteractions(bool periodic) {
...
@@ -476,30 +476,46 @@ void testBlockInteractions(bool periodic) {
// Verify that the bounds of each block were calculated correctly.
// Verify that the bounds of each block were calculated correctly.
vector
<
float4
>
posq
(
cuContext
.
getPosq
().
getSize
());
vector
<
double4
>
posq
(
cuContext
.
getPosq
().
getSize
());
cuContext
.
getPosq
().
download
(
posq
);
vector
<
double4
>
blockCenters
(
numBlocks
);
vector
<
float4
>
blockCenters
(
numBlocks
);
vector
<
double4
>
blockBoundingBoxes
(
numBlocks
);
vector
<
float4
>
blockBoundingBoxes
(
numBlocks
);
if
(
cuContext
.
getUseDoublePrecision
())
{
nb
.
getBlockCenters
().
download
(
blockCenters
);
cuContext
.
getPosq
().
download
(
posq
);
nb
.
getBlockBoundingBoxes
().
download
(
blockBoundingBoxes
);
nb
.
getBlockCenters
().
download
(
blockCenters
);
nb
.
getBlockBoundingBoxes
().
download
(
blockBoundingBoxes
);
}
else
{
vector
<
float4
>
posqf
(
cuContext
.
getPosq
().
getSize
());
vector
<
float4
>
blockCentersf
(
numBlocks
);
vector
<
float4
>
blockBoundingBoxesf
(
numBlocks
);
cuContext
.
getPosq
().
download
(
posqf
);
nb
.
getBlockCenters
().
download
(
blockCentersf
);
nb
.
getBlockBoundingBoxes
().
download
(
blockBoundingBoxesf
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
make_double4
(
posqf
[
i
].
x
,
posqf
[
i
].
y
,
posqf
[
i
].
z
,
posqf
[
i
].
w
);
for
(
int
i
=
0
;
i
<
numBlocks
;
i
++
)
{
blockCenters
[
i
]
=
make_double4
(
blockCentersf
[
i
].
x
,
blockCentersf
[
i
].
y
,
blockCentersf
[
i
].
z
,
blockCentersf
[
i
].
w
);
blockBoundingBoxes
[
i
]
=
make_double4
(
blockBoundingBoxesf
[
i
].
x
,
blockBoundingBoxesf
[
i
].
y
,
blockBoundingBoxesf
[
i
].
z
,
blockBoundingBoxesf
[
i
].
w
);
}
}
for
(
int
i
=
0
;
i
<
numBlocks
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numBlocks
;
i
++
)
{
float
4
gridSize
=
blockBoundingBoxes
[
i
];
double
4
gridSize
=
blockBoundingBoxes
[
i
];
float
4
center
=
blockCenters
[
i
];
double
4
center
=
blockCenters
[
i
];
if
(
periodic
)
{
if
(
periodic
)
{
ASSERT
(
gridSize
.
x
<
0.5
*
boxSize
);
ASSERT
(
gridSize
.
x
<
0.5
*
boxSize
);
ASSERT
(
gridSize
.
y
<
0.5
*
boxSize
);
ASSERT
(
gridSize
.
y
<
0.5
*
boxSize
);
ASSERT
(
gridSize
.
z
<
0.5
*
boxSize
);
ASSERT
(
gridSize
.
z
<
0.5
*
boxSize
);
}
}
float
minx
=
0.0
,
maxx
=
0.0
,
miny
=
0.0
,
maxy
=
0.0
,
minz
=
0.0
,
maxz
=
0.0
,
radius
=
0.0
;
double
minx
=
0.0
,
maxx
=
0.0
,
miny
=
0.0
,
maxy
=
0.0
,
minz
=
0.0
,
maxz
=
0.0
,
radius
=
0.0
;
for
(
int
j
=
0
;
j
<
blockSize
;
j
++
)
{
for
(
int
j
=
0
;
j
<
blockSize
;
j
++
)
{
float
4
pos
=
posq
[
i
*
blockSize
+
j
];
double
4
pos
=
posq
[
i
*
blockSize
+
j
];
float
dx
=
pos
.
x
-
center
.
x
;
double
dx
=
pos
.
x
-
center
.
x
;
float
dy
=
pos
.
y
-
center
.
y
;
double
dy
=
pos
.
y
-
center
.
y
;
float
dz
=
pos
.
z
-
center
.
z
;
double
dz
=
pos
.
z
-
center
.
z
;
if
(
periodic
)
{
if
(
periodic
)
{
dx
-=
(
float
)(
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
)
;
dx
-=
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
;
dy
-=
(
float
)(
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
)
;
dy
-=
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
;
dz
-=
(
float
)(
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
)
;
dz
-=
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
;
}
}
ASSERT
(
abs
(
dx
)
<
gridSize
.
x
+
TOL
);
ASSERT
(
abs
(
dx
)
<
gridSize
.
x
+
TOL
);
ASSERT
(
abs
(
dy
)
<
gridSize
.
y
+
TOL
);
ASSERT
(
abs
(
dy
)
<
gridSize
.
y
+
TOL
);
...
@@ -538,21 +554,21 @@ void testBlockInteractions(bool periodic) {
...
@@ -538,21 +554,21 @@ void testBlockInteractions(bool periodic) {
// Make sure this tile really should have been flagged based on bounding volumes.
// Make sure this tile really should have been flagged based on bounding volumes.
float
4
gridSize1
=
blockBoundingBoxes
[
x
];
double
4
gridSize1
=
blockBoundingBoxes
[
x
];
float
4
gridSize2
=
blockBoundingBoxes
[
y
];
double
4
gridSize2
=
blockBoundingBoxes
[
y
];
float
4
center1
=
blockCenters
[
x
];
double
4
center1
=
blockCenters
[
x
];
float
4
center2
=
blockCenters
[
y
];
double
4
center2
=
blockCenters
[
y
];
float
dx
=
center1
.
x
-
center2
.
x
;
double
dx
=
center1
.
x
-
center2
.
x
;
float
dy
=
center1
.
y
-
center2
.
y
;
double
dy
=
center1
.
y
-
center2
.
y
;
float
dz
=
center1
.
z
-
center2
.
z
;
double
dz
=
center1
.
z
-
center2
.
z
;
if
(
periodic
)
{
if
(
periodic
)
{
dx
-=
(
float
)(
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
)
;
dx
-=
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
;
dy
-=
(
float
)(
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
)
;
dy
-=
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
;
dz
-=
(
float
)(
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
)
;
dz
-=
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
;
}
}
dx
=
max
(
0.0
f
,
abs
(
dx
)
-
gridSize1
.
x
-
gridSize2
.
x
);
dx
=
max
(
0.0
,
abs
(
dx
)
-
gridSize1
.
x
-
gridSize2
.
x
);
dy
=
max
(
0.0
f
,
abs
(
dy
)
-
gridSize1
.
y
-
gridSize2
.
y
);
dy
=
max
(
0.0
,
abs
(
dy
)
-
gridSize1
.
y
-
gridSize2
.
y
);
dz
=
max
(
0.0
f
,
abs
(
dz
)
-
gridSize1
.
z
-
gridSize2
.
z
);
dz
=
max
(
0.0
,
abs
(
dz
)
-
gridSize1
.
z
-
gridSize2
.
z
);
ASSERT
(
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
)
<
cutoff
+
TOL
);
ASSERT
(
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
)
<
cutoff
+
TOL
);
// Check the interaction flags.
// Check the interaction flags.
...
@@ -560,16 +576,16 @@ void testBlockInteractions(bool periodic) {
...
@@ -560,16 +576,16 @@ void testBlockInteractions(bool periodic) {
unsigned
int
flags
=
interactionFlags
[
i
];
unsigned
int
flags
=
interactionFlags
[
i
];
for
(
int
atom2
=
0
;
atom2
<
32
;
atom2
++
)
{
for
(
int
atom2
=
0
;
atom2
<
32
;
atom2
++
)
{
if
((
flags
&
1
)
==
0
)
{
if
((
flags
&
1
)
==
0
)
{
float
4
pos2
=
posq
[
y
*
blockSize
+
atom2
];
double
4
pos2
=
posq
[
y
*
blockSize
+
atom2
];
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
float
4
pos1
=
posq
[
x
*
blockSize
+
atom1
];
double
4
pos1
=
posq
[
x
*
blockSize
+
atom1
];
float
dx
=
pos2
.
x
-
pos1
.
x
;
double
dx
=
pos2
.
x
-
pos1
.
x
;
float
dy
=
pos2
.
y
-
pos1
.
y
;
double
dy
=
pos2
.
y
-
pos1
.
y
;
float
dz
=
pos2
.
z
-
pos1
.
z
;
double
dz
=
pos2
.
z
-
pos1
.
z
;
if
(
periodic
)
{
if
(
periodic
)
{
dx
-=
(
float
)(
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
)
;
dx
-=
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
;
dy
-=
(
float
)(
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
)
;
dy
-=
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
;
dz
-=
(
float
)(
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
)
;
dz
-=
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
;
}
}
ASSERT
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cutoff
*
cutoff
);
ASSERT
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cutoff
*
cutoff
);
}
}
...
@@ -585,16 +601,16 @@ void testBlockInteractions(bool periodic) {
...
@@ -585,16 +601,16 @@ void testBlockInteractions(bool periodic) {
unsigned
int
y
=
(
unsigned
int
)
std
::
floor
(
numBlocks
+
0.5
-
std
::
sqrt
((
numBlocks
+
0.5
)
*
(
numBlocks
+
0.5
)
-
2
*
i
));
unsigned
int
y
=
(
unsigned
int
)
std
::
floor
(
numBlocks
+
0.5
-
std
::
sqrt
((
numBlocks
+
0.5
)
*
(
numBlocks
+
0.5
)
-
2
*
i
));
unsigned
int
x
=
(
i
-
y
*
numBlocks
+
y
*
(
y
+
1
)
/
2
);
unsigned
int
x
=
(
i
-
y
*
numBlocks
+
y
*
(
y
+
1
)
/
2
);
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
float
4
pos1
=
posq
[
x
*
blockSize
+
atom1
];
double
4
pos1
=
posq
[
x
*
blockSize
+
atom1
];
for
(
int
atom2
=
0
;
atom2
<
blockSize
;
++
atom2
)
{
for
(
int
atom2
=
0
;
atom2
<
blockSize
;
++
atom2
)
{
float
4
pos2
=
posq
[
y
*
blockSize
+
atom2
];
double
4
pos2
=
posq
[
y
*
blockSize
+
atom2
];
float
dx
=
pos1
.
x
-
pos2
.
x
;
double
dx
=
pos1
.
x
-
pos2
.
x
;
float
dy
=
pos1
.
y
-
pos2
.
y
;
double
dy
=
pos1
.
y
-
pos2
.
y
;
float
dz
=
pos1
.
z
-
pos2
.
z
;
double
dz
=
pos1
.
z
-
pos2
.
z
;
if
(
periodic
)
{
if
(
periodic
)
{
dx
-=
(
float
)(
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
)
;
dx
-=
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
;
dy
-=
(
float
)(
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
)
;
dy
-=
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
;
dz
-=
(
float
)(
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
)
;
dz
-=
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
;
}
}
ASSERT
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cutoff
*
cutoff
);
ASSERT
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cutoff
*
cutoff
);
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment