Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
cf112a25
"platforms/vscode:/vscode.git/clone" did not exist on "389563ef15ce705d8366ebbe705b8e65a8e69fd8"
Commit
cf112a25
authored
Jul 03, 2012
by
Peter Eastman
Browse files
Fixed bugs when using double precision
parent
dac874af
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
141 additions
and
88 deletions
+141
-88
platforms/cuda2/src/CudaContext.cpp
platforms/cuda2/src/CudaContext.cpp
+57
-32
platforms/cuda2/src/CudaContext.h
platforms/cuda2/src/CudaContext.h
+5
-0
platforms/cuda2/src/CudaIntegrationUtilities.cpp
platforms/cuda2/src/CudaIntegrationUtilities.cpp
+4
-3
platforms/cuda2/src/CudaKernels.cpp
platforms/cuda2/src/CudaKernels.cpp
+2
-2
platforms/cuda2/src/CudaNonbondedUtilities.cpp
platforms/cuda2/src/CudaNonbondedUtilities.cpp
+8
-2
platforms/cuda2/src/kernels/nonbondedExceptions.cu
platforms/cuda2/src/kernels/nonbondedExceptions.cu
+1
-1
platforms/cuda2/src/kernels/periodicTorsionForce.cu
platforms/cuda2/src/kernels/periodicTorsionForce.cu
+1
-1
platforms/cuda2/src/kernels/rbTorsionForce.cu
platforms/cuda2/src/kernels/rbTorsionForce.cu
+2
-2
platforms/cuda2/tests/TestCudaNonbondedForce.cpp
platforms/cuda2/tests/TestCudaNonbondedForce.cpp
+61
-45
No files found.
platforms/cuda2/src/CudaContext.cpp
View file @
cf112a25
...
...
@@ -766,25 +766,43 @@ void CudaContext::validateMolecules() {
// atoms to their original order, rebuild the list of identical molecules, and sort them
// again.
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float4
>
newPosq
(
paddedNumAtoms
);
vector
<
float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
float4
>
newVelm
(
paddedNumAtoms
);
vector
<
int4
>
newCellOffsets
(
numAtoms
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
if
(
useDoublePrecision
)
{
vector
<
double4
>
oldPosq
(
paddedNumAtoms
);
vector
<
double4
>
newPosq
(
paddedNumAtoms
);
vector
<
double4
>
oldVelm
(
paddedNumAtoms
);
vector
<
double4
>
newVelm
(
paddedNumAtoms
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
}
else
{
vector
<
float4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float4
>
newPosq
(
paddedNumAtoms
);
vector
<
float4
>
oldVelm
(
paddedNumAtoms
);
vector
<
float4
>
newVelm
(
paddedNumAtoms
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
int
index
=
atomIndex
[
i
];
newPosq
[
index
]
=
oldPosq
[
i
];
newVelm
[
index
]
=
oldVelm
[
i
];
newCellOffsets
[
index
]
=
posCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
}
for
(
int
i
=
0
;
i
<
numAtoms
;
i
++
)
{
atomIndex
[
i
]
=
i
;
posCellOffsets
[
i
]
=
newCellOffsets
[
i
];
}
posq
->
upload
(
newPosq
);
velm
->
upload
(
newVelm
);
atomIndexDevice
->
upload
(
atomIndex
);
findMoleculeGroups
();
for
(
int
i
=
0
;
i
<
(
int
)
reorderListeners
.
size
();
i
++
)
...
...
@@ -797,16 +815,23 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
if
(
moleculesInvalid
)
validateMolecules
();
atomsWereReordered
=
true
;
if
(
useDoublePrecision
)
reorderAtomsImpl
<
double
,
double4
>
(
enforcePeriodic
);
else
reorderAtomsImpl
<
float
,
float4
>
(
enforcePeriodic
);
}
template
<
class
Real
,
class
Real4
>
void
CudaContext
::
reorderAtomsImpl
(
bool
enforcePeriodic
)
{
// Find the range of positions and the number of bins along each axis.
vector
<
float
4
>
oldPosq
(
paddedNumAtoms
);
vector
<
float
4
>
oldVelm
(
paddedNumAtoms
);
vector
<
Real
4
>
oldPosq
(
paddedNumAtoms
);
vector
<
Real
4
>
oldVelm
(
paddedNumAtoms
);
posq
->
download
(
oldPosq
);
velm
->
download
(
oldVelm
);
float
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
float
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
float
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
Real
minx
=
oldPosq
[
0
].
x
,
maxx
=
oldPosq
[
0
].
x
;
Real
miny
=
oldPosq
[
0
].
y
,
maxy
=
oldPosq
[
0
].
y
;
Real
minz
=
oldPosq
[
0
].
z
,
maxz
=
oldPosq
[
0
].
z
;
if
(
nonbonded
->
getUsePeriodic
())
{
minx
=
miny
=
minz
=
0.0
;
maxx
=
periodicBoxSize
.
x
;
...
...
@@ -815,7 +840,7 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
}
else
{
for
(
int
i
=
1
;
i
<
numAtoms
;
i
++
)
{
const
float
4
&
pos
=
oldPosq
[
i
];
const
Real
4
&
pos
=
oldPosq
[
i
];
minx
=
min
(
minx
,
pos
.
x
);
maxx
=
max
(
maxx
,
pos
.
x
);
miny
=
min
(
miny
,
pos
.
y
);
...
...
@@ -828,8 +853,8 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
// Loop over each group of identical molecules and reorder them.
vector
<
int
>
originalIndex
(
numAtoms
);
vector
<
float
4
>
newPosq
(
paddedNumAtoms
);
vector
<
float
4
>
newVelm
(
paddedNumAtoms
);
vector
<
Real
4
>
newPosq
(
paddedNumAtoms
);
vector
<
Real
4
>
newVelm
(
paddedNumAtoms
);
vector
<
int4
>
newCellOffsets
(
numAtoms
);
for
(
int
group
=
0
;
group
<
(
int
)
moleculeGroups
.
size
();
group
++
)
{
// Find the center of each molecule.
...
...
@@ -837,15 +862,15 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
MoleculeGroup
&
mol
=
moleculeGroups
[
group
];
int
numMolecules
=
mol
.
offsets
.
size
();
vector
<
int
>&
atoms
=
mol
.
atoms
;
vector
<
float
4
>
molPos
(
numMolecules
);
float
invNumAtoms
=
1.0
f
/
atoms
.
size
();
vector
<
Real
4
>
molPos
(
numMolecules
);
Real
invNumAtoms
=
(
Real
)
(
1.0
/
atoms
.
size
()
)
;
for
(
int
i
=
0
;
i
<
numMolecules
;
i
++
)
{
molPos
[
i
].
x
=
0.0
f
;
molPos
[
i
].
y
=
0.0
f
;
molPos
[
i
].
z
=
0.0
f
;
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
const
float
4
&
pos
=
oldPosq
[
atom
];
const
Real
4
&
pos
=
oldPosq
[
atom
];
molPos
[
i
].
x
+=
pos
.
x
;
molPos
[
i
].
y
+=
pos
.
y
;
molPos
[
i
].
z
+=
pos
.
z
;
...
...
@@ -861,9 +886,9 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
int
xcell
=
(
int
)
floor
(
molPos
[
i
].
x
*
invPeriodicBoxSize
.
x
);
int
ycell
=
(
int
)
floor
(
molPos
[
i
].
y
*
invPeriodicBoxSize
.
y
);
int
zcell
=
(
int
)
floor
(
molPos
[
i
].
z
*
invPeriodicBoxSize
.
z
);
float
dx
=
xcell
*
periodicBoxSize
.
x
;
float
dy
=
ycell
*
periodicBoxSize
.
y
;
float
dz
=
zcell
*
periodicBoxSize
.
z
;
Real
dx
=
xcell
*
periodicBoxSize
.
x
;
Real
dy
=
ycell
*
periodicBoxSize
.
y
;
Real
dz
=
zcell
*
periodicBoxSize
.
z
;
if
(
dx
!=
0.0
f
||
dy
!=
0.0
f
||
dz
!=
0.0
f
)
{
molPos
[
i
].
x
-=
dx
;
molPos
[
i
].
y
-=
dy
;
...
...
@@ -871,7 +896,7 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
if
(
enforcePeriodic
)
{
for
(
int
j
=
0
;
j
<
(
int
)
atoms
.
size
();
j
++
)
{
int
atom
=
atoms
[
j
]
+
mol
.
offsets
[
i
];
float
4
p
=
oldPosq
[
atom
];
Real
4
p
=
oldPosq
[
atom
];
p
.
x
-=
dx
;
p
.
y
-=
dy
;
p
.
z
-=
dz
;
...
...
@@ -888,12 +913,12 @@ void CudaContext::reorderAtoms(bool enforcePeriodic) {
// Select a bin for each molecule, then sort them by bin.
bool
useHilbert
=
(
numMolecules
>
5000
||
atoms
.
size
()
>
8
);
// For small systems, a simple zigzag curve works better than a Hilbert curve.
float
binWidth
;
Real
binWidth
;
if
(
useHilbert
)
binWidth
=
(
float
)(
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
);
binWidth
=
(
Real
)(
max
(
max
(
maxx
-
minx
,
maxy
-
miny
),
maxz
-
minz
)
/
255.0
);
else
binWidth
=
(
float
)(
0.2
*
nonbonded
->
getCutoffDistance
());
float
invBinWidth
=
1.0
f
/
binWidth
;
binWidth
=
(
Real
)(
0.2
*
nonbonded
->
getCutoffDistance
());
Real
invBinWidth
=
(
Real
)
(
1.0
/
binWidth
)
;
int
xbins
=
1
+
(
int
)
((
maxx
-
minx
)
*
invBinWidth
);
int
ybins
=
1
+
(
int
)
((
maxy
-
miny
)
*
invBinWidth
);
vector
<
pair
<
int
,
int
>
>
molBins
(
numMolecules
);
...
...
platforms/cuda2/src/CudaContext.h
View file @
cf112a25
...
...
@@ -448,6 +448,11 @@ private:
* of molecules and resort the atoms.
*/
void
validateMolecules
();
/**
* This is the internal implementation of reorderAtoms(), templatized by the numerical precision in use.
*/
template
<
class
Real
,
class
Real4
>
void
reorderAtomsImpl
(
bool
enforcePeriodic
);
static
bool
hasInitializedCuda
;
const
System
&
system
;
double
time
,
computeCapability
;
...
...
platforms/cuda2/src/CudaIntegrationUtilities.cpp
View file @
cf112a25
...
...
@@ -717,16 +717,17 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
ccmaForceKernel
=
ccmaPosForceKernel
;
}
float
floatTol
=
(
float
)
tol
;
void
*
tolPointer
=
(
context
.
getUseDoublePrecision
()
?
(
void
*
)
&
tol
:
(
void
*
)
&
floatTol
);
if
(
settleAtoms
!=
NULL
)
{
int
numClusters
=
settleAtoms
->
getSize
();
void
*
args
[]
=
{
&
numClusters
,
&
floatTol
,
&
context
.
getPosq
().
getDevicePointer
(),
void
*
args
[]
=
{
&
numClusters
,
tolPointer
,
&
context
.
getPosq
().
getDevicePointer
(),
&
posDelta
->
getDevicePointer
(),
&
context
.
getVelm
().
getDevicePointer
(),
&
settleAtoms
->
getDevicePointer
(),
&
settleParams
->
getDevicePointer
()};
context
.
executeKernel
(
settleKernel
,
args
,
settleAtoms
->
getSize
());
}
if
(
shakeAtoms
!=
NULL
)
{
int
numClusters
=
shakeAtoms
->
getSize
();
void
*
args
[]
=
{
&
numClusters
,
&
floatTol
,
&
context
.
getPosq
().
getDevicePointer
(),
void
*
args
[]
=
{
&
numClusters
,
tolPointer
,
&
context
.
getPosq
().
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
&
shakeAtoms
->
getDevicePointer
(),
&
shakeParams
->
getDevicePointer
()};
context
.
executeKernel
(
shakeKernel
,
args
,
shakeAtoms
->
getSize
());
...
...
@@ -738,7 +739,7 @@ void CudaIntegrationUtilities::applyConstraints(bool constrainVelocities, double
void
*
forceArgs
[]
=
{
&
ccmaAtoms
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
constrainVelocities
?
&
context
.
getVelm
().
getDevicePointer
()
:
&
posDelta
->
getDevicePointer
(),
&
ccmaReducedMass
->
getDevicePointer
(),
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaConvergedDeviceMemory
,
&
floatTol
,
&
i
};
tolPointer
,
&
i
};
void
*
multiplyArgs
[]
=
{
&
ccmaDelta1
->
getDevicePointer
(),
&
ccmaDelta2
->
getDevicePointer
(),
&
ccmaConstraintMatrixColumn
->
getDevicePointer
(),
&
ccmaConstraintMatrixValue
->
getDevicePointer
(),
&
ccmaConvergedDeviceMemory
,
&
i
};
void
*
updateArgs
[]
=
{
&
ccmaNumAtomConstraints
->
getDevicePointer
(),
&
ccmaAtomConstraints
->
getDevicePointer
(),
&
ccmaDistance
->
getDevicePointer
(),
...
...
platforms/cuda2/src/CudaKernels.cpp
View file @
cf112a25
...
...
@@ -1451,7 +1451,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
int
elementSize
=
(
cu
.
getUseDoublePrecision
()
?
sizeof
(
double
)
:
sizeof
(
float
));
pmeGrid
=
new
CudaArray
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
,
2
*
elementSize
,
"pmeGrid"
);
cu
.
addAutoclearBuffer
(
pmeGrid
->
getDevicePointer
(),
pmeGrid
->
getSize
()
*
sizeof
(
float2
)
);
cu
.
addAutoclearBuffer
(
pmeGrid
->
getDevicePointer
(),
pmeGrid
->
getSize
()
*
2
*
elementSize
);
pmeBsplineModuliX
=
new
CudaArray
(
cu
,
gridSizeX
,
elementSize
,
"pmeBsplineModuliX"
);
pmeBsplineModuliY
=
new
CudaArray
(
cu
,
gridSizeY
,
elementSize
,
"pmeBsplineModuliY"
);
pmeBsplineModuliZ
=
new
CudaArray
(
cu
,
gridSizeZ
,
elementSize
,
"pmeBsplineModuliZ"
);
...
...
@@ -1459,7 +1459,7 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeAtomRange
=
CudaArray
::
create
<
int
>
(
cu
,
gridSizeX
*
gridSizeY
*
gridSizeZ
+
1
,
"pmeAtomRange"
);
pmeAtomGridIndex
=
CudaArray
::
create
<
int2
>
(
cu
,
numParticles
,
"pmeAtomGridIndex"
);
sort
=
new
CudaSort
(
cu
,
new
SortTrait
(),
cu
.
getNumAtoms
());
cufftResult
result
=
cufftPlan3d
(
&
fft
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
CUFFT_C2C
);
cufftResult
result
=
cufftPlan3d
(
&
fft
,
gridSizeX
,
gridSizeY
,
gridSizeZ
,
cu
.
getUseDoublePrecision
()
?
CUFFT_Z2Z
:
CUFFT_C2C
);
if
(
result
!=
CUFFT_SUCCESS
)
throw
OpenMMException
(
"Error initializing FFT: "
+
cu
.
intToString
(
result
));
hasInitializedFFT
=
true
;
...
...
platforms/cuda2/src/CudaNonbondedUtilities.cpp
View file @
cf112a25
...
...
@@ -234,8 +234,14 @@ void CudaNonbondedUtilities::initialize(const System& system) {
interactingTiles
=
CudaArray
::
create
<
ushort2
>
(
context
,
maxTiles
,
"interactingTiles"
);
interactionFlags
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
maxTiles
,
"interactionFlags"
);
interactionCount
=
CudaArray
::
create
<
unsigned
int
>
(
context
,
1
,
"interactionCount"
);
blockCenter
=
CudaArray
::
create
<
float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockBoundingBox
=
CudaArray
::
create
<
float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
if
(
context
.
getUseDoublePrecision
())
{
blockCenter
=
CudaArray
::
create
<
double4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockBoundingBox
=
CudaArray
::
create
<
double4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
}
else
{
blockCenter
=
CudaArray
::
create
<
float4
>
(
context
,
numAtomBlocks
,
"blockCenter"
);
blockBoundingBox
=
CudaArray
::
create
<
float4
>
(
context
,
numAtomBlocks
,
"blockBoundingBox"
);
}
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
pinnedInteractionCount
,
sizeof
(
unsigned
int
),
0
));
pinnedInteractionCount
[
0
]
=
0
;
interactionCount
->
upload
(
pinnedInteractionCount
);
...
...
platforms/cuda2/src/kernels/nonbondedExceptions.cu
View file @
cf112a25
real
4
exceptionParams
=
PARAMS
[
index
];
float
4
exceptionParams
=
PARAMS
[
index
];
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real
r2
=
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
;
real
invR
=
RSQRT
(
r2
);
...
...
platforms/cuda2/src/kernels/periodicTorsionForce.cu
View file @
cf112a25
real
4
torsionParams
=
PARAMS
[
index
];
float
4
torsionParams
=
PARAMS
[
index
];
real
deltaAngle
=
torsionParams
.
z
*
theta
-
torsionParams
.
y
;
energy
+=
torsionParams
.
x
*
(
1.0
f
+
COS
(
deltaAngle
));
real
sinDeltaAngle
=
SIN
(
deltaAngle
);
...
...
platforms/cuda2/src/kernels/rbTorsionForce.cu
View file @
cf112a25
real
4
torsionParams1
=
PARAMS1
[
index
];
real
2
torsionParams2
=
PARAMS2
[
index
];
float
4
torsionParams1
=
PARAMS1
[
index
];
float
2
torsionParams2
=
PARAMS2
[
index
];
if
(
theta
<
0
)
theta
+=
PI
;
else
...
...
platforms/cuda2/tests/TestCudaNonbondedForce.cpp
View file @
cf112a25
...
...
@@ -476,30 +476,46 @@ void testBlockInteractions(bool periodic) {
// Verify that the bounds of each block were calculated correctly.
vector
<
float4
>
posq
(
cuContext
.
getPosq
().
getSize
());
cuContext
.
getPosq
().
download
(
posq
);
vector
<
float4
>
blockCenters
(
numBlocks
);
vector
<
float4
>
blockBoundingBoxes
(
numBlocks
);
nb
.
getBlockCenters
().
download
(
blockCenters
);
nb
.
getBlockBoundingBoxes
().
download
(
blockBoundingBoxes
);
vector
<
double4
>
posq
(
cuContext
.
getPosq
().
getSize
());
vector
<
double4
>
blockCenters
(
numBlocks
);
vector
<
double4
>
blockBoundingBoxes
(
numBlocks
);
if
(
cuContext
.
getUseDoublePrecision
())
{
cuContext
.
getPosq
().
download
(
posq
);
nb
.
getBlockCenters
().
download
(
blockCenters
);
nb
.
getBlockBoundingBoxes
().
download
(
blockBoundingBoxes
);
}
else
{
vector
<
float4
>
posqf
(
cuContext
.
getPosq
().
getSize
());
vector
<
float4
>
blockCentersf
(
numBlocks
);
vector
<
float4
>
blockBoundingBoxesf
(
numBlocks
);
cuContext
.
getPosq
().
download
(
posqf
);
nb
.
getBlockCenters
().
download
(
blockCentersf
);
nb
.
getBlockBoundingBoxes
().
download
(
blockBoundingBoxesf
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
posq
[
i
]
=
make_double4
(
posqf
[
i
].
x
,
posqf
[
i
].
y
,
posqf
[
i
].
z
,
posqf
[
i
].
w
);
for
(
int
i
=
0
;
i
<
numBlocks
;
i
++
)
{
blockCenters
[
i
]
=
make_double4
(
blockCentersf
[
i
].
x
,
blockCentersf
[
i
].
y
,
blockCentersf
[
i
].
z
,
blockCentersf
[
i
].
w
);
blockBoundingBoxes
[
i
]
=
make_double4
(
blockBoundingBoxesf
[
i
].
x
,
blockBoundingBoxesf
[
i
].
y
,
blockBoundingBoxesf
[
i
].
z
,
blockBoundingBoxesf
[
i
].
w
);
}
}
for
(
int
i
=
0
;
i
<
numBlocks
;
i
++
)
{
float
4
gridSize
=
blockBoundingBoxes
[
i
];
float
4
center
=
blockCenters
[
i
];
double
4
gridSize
=
blockBoundingBoxes
[
i
];
double
4
center
=
blockCenters
[
i
];
if
(
periodic
)
{
ASSERT
(
gridSize
.
x
<
0.5
*
boxSize
);
ASSERT
(
gridSize
.
y
<
0.5
*
boxSize
);
ASSERT
(
gridSize
.
z
<
0.5
*
boxSize
);
}
float
minx
=
0.0
,
maxx
=
0.0
,
miny
=
0.0
,
maxy
=
0.0
,
minz
=
0.0
,
maxz
=
0.0
,
radius
=
0.0
;
double
minx
=
0.0
,
maxx
=
0.0
,
miny
=
0.0
,
maxy
=
0.0
,
minz
=
0.0
,
maxz
=
0.0
,
radius
=
0.0
;
for
(
int
j
=
0
;
j
<
blockSize
;
j
++
)
{
float
4
pos
=
posq
[
i
*
blockSize
+
j
];
float
dx
=
pos
.
x
-
center
.
x
;
float
dy
=
pos
.
y
-
center
.
y
;
float
dz
=
pos
.
z
-
center
.
z
;
double
4
pos
=
posq
[
i
*
blockSize
+
j
];
double
dx
=
pos
.
x
-
center
.
x
;
double
dy
=
pos
.
y
-
center
.
y
;
double
dz
=
pos
.
z
-
center
.
z
;
if
(
periodic
)
{
dx
-=
(
float
)(
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
)
;
dy
-=
(
float
)(
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
)
;
dz
-=
(
float
)(
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
)
;
dx
-=
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
;
dy
-=
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
;
dz
-=
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
;
}
ASSERT
(
abs
(
dx
)
<
gridSize
.
x
+
TOL
);
ASSERT
(
abs
(
dy
)
<
gridSize
.
y
+
TOL
);
...
...
@@ -538,21 +554,21 @@ void testBlockInteractions(bool periodic) {
// Make sure this tile really should have been flagged based on bounding volumes.
float
4
gridSize1
=
blockBoundingBoxes
[
x
];
float
4
gridSize2
=
blockBoundingBoxes
[
y
];
float
4
center1
=
blockCenters
[
x
];
float
4
center2
=
blockCenters
[
y
];
float
dx
=
center1
.
x
-
center2
.
x
;
float
dy
=
center1
.
y
-
center2
.
y
;
float
dz
=
center1
.
z
-
center2
.
z
;
double
4
gridSize1
=
blockBoundingBoxes
[
x
];
double
4
gridSize2
=
blockBoundingBoxes
[
y
];
double
4
center1
=
blockCenters
[
x
];
double
4
center2
=
blockCenters
[
y
];
double
dx
=
center1
.
x
-
center2
.
x
;
double
dy
=
center1
.
y
-
center2
.
y
;
double
dz
=
center1
.
z
-
center2
.
z
;
if
(
periodic
)
{
dx
-=
(
float
)(
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
)
;
dy
-=
(
float
)(
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
)
;
dz
-=
(
float
)(
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
)
;
dx
-=
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
;
dy
-=
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
;
dz
-=
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
;
}
dx
=
max
(
0.0
f
,
abs
(
dx
)
-
gridSize1
.
x
-
gridSize2
.
x
);
dy
=
max
(
0.0
f
,
abs
(
dy
)
-
gridSize1
.
y
-
gridSize2
.
y
);
dz
=
max
(
0.0
f
,
abs
(
dz
)
-
gridSize1
.
z
-
gridSize2
.
z
);
dx
=
max
(
0.0
,
abs
(
dx
)
-
gridSize1
.
x
-
gridSize2
.
x
);
dy
=
max
(
0.0
,
abs
(
dy
)
-
gridSize1
.
y
-
gridSize2
.
y
);
dz
=
max
(
0.0
,
abs
(
dz
)
-
gridSize1
.
z
-
gridSize2
.
z
);
ASSERT
(
sqrt
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
)
<
cutoff
+
TOL
);
// Check the interaction flags.
...
...
@@ -560,16 +576,16 @@ void testBlockInteractions(bool periodic) {
unsigned
int
flags
=
interactionFlags
[
i
];
for
(
int
atom2
=
0
;
atom2
<
32
;
atom2
++
)
{
if
((
flags
&
1
)
==
0
)
{
float
4
pos2
=
posq
[
y
*
blockSize
+
atom2
];
double
4
pos2
=
posq
[
y
*
blockSize
+
atom2
];
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
float
4
pos1
=
posq
[
x
*
blockSize
+
atom1
];
float
dx
=
pos2
.
x
-
pos1
.
x
;
float
dy
=
pos2
.
y
-
pos1
.
y
;
float
dz
=
pos2
.
z
-
pos1
.
z
;
double
4
pos1
=
posq
[
x
*
blockSize
+
atom1
];
double
dx
=
pos2
.
x
-
pos1
.
x
;
double
dy
=
pos2
.
y
-
pos1
.
y
;
double
dz
=
pos2
.
z
-
pos1
.
z
;
if
(
periodic
)
{
dx
-=
(
float
)(
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
)
;
dy
-=
(
float
)(
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
)
;
dz
-=
(
float
)(
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
)
;
dx
-=
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
;
dy
-=
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
;
dz
-=
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
;
}
ASSERT
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cutoff
*
cutoff
);
}
...
...
@@ -585,16 +601,16 @@ void testBlockInteractions(bool periodic) {
unsigned
int
y
=
(
unsigned
int
)
std
::
floor
(
numBlocks
+
0.5
-
std
::
sqrt
((
numBlocks
+
0.5
)
*
(
numBlocks
+
0.5
)
-
2
*
i
));
unsigned
int
x
=
(
i
-
y
*
numBlocks
+
y
*
(
y
+
1
)
/
2
);
for
(
int
atom1
=
0
;
atom1
<
blockSize
;
++
atom1
)
{
float
4
pos1
=
posq
[
x
*
blockSize
+
atom1
];
double
4
pos1
=
posq
[
x
*
blockSize
+
atom1
];
for
(
int
atom2
=
0
;
atom2
<
blockSize
;
++
atom2
)
{
float
4
pos2
=
posq
[
y
*
blockSize
+
atom2
];
float
dx
=
pos1
.
x
-
pos2
.
x
;
float
dy
=
pos1
.
y
-
pos2
.
y
;
float
dz
=
pos1
.
z
-
pos2
.
z
;
double
4
pos2
=
posq
[
y
*
blockSize
+
atom2
];
double
dx
=
pos1
.
x
-
pos2
.
x
;
double
dy
=
pos1
.
y
-
pos2
.
y
;
double
dz
=
pos1
.
z
-
pos2
.
z
;
if
(
periodic
)
{
dx
-=
(
float
)(
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
)
;
dy
-=
(
float
)(
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
)
;
dz
-=
(
float
)(
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
)
;
dx
-=
floor
(
0.5
+
dx
/
boxSize
)
*
boxSize
;
dy
-=
floor
(
0.5
+
dy
/
boxSize
)
*
boxSize
;
dz
-=
floor
(
0.5
+
dz
/
boxSize
)
*
boxSize
;
}
ASSERT
(
dx
*
dx
+
dy
*
dy
+
dz
*
dz
>
cutoff
*
cutoff
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment