Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
73183c61
"vscode:/vscode.git/clone" did not exist on "7e544edd83b5c2d8c472739ddf6703060596ae02"
Commit
73183c61
authored
May 31, 2016
by
ChayaSt
Browse files
resolved conflict
parents
0e218233
32e08b87
Changes
267
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
328 additions
and
201 deletions
+328
-201
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+3
-4
platforms/cuda/src/CudaExpressionUtilities.cpp
platforms/cuda/src/CudaExpressionUtilities.cpp
+8
-8
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+71
-24
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+20
-15
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+23
-3
platforms/cuda/src/kernels/angleForce.cu
platforms/cuda/src/kernels/angleForce.cu
+4
-0
platforms/cuda/src/kernels/bondForce.cu
platforms/cuda/src/kernels/bondForce.cu
+3
-0
platforms/cuda/src/kernels/cmapTorsionForce.cu
platforms/cuda/src/kernels/cmapTorsionForce.cu
+10
-0
platforms/cuda/src/kernels/customCentroidBond.cu
platforms/cuda/src/kernels/customCentroidBond.cu
+5
-2
platforms/cuda/src/kernels/customCompoundBond.cu
platforms/cuda/src/kernels/customCompoundBond.cu
+4
-1
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+21
-23
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+26
-28
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+47
-51
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+29
-28
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+10
-10
platforms/cuda/src/kernels/torsionForce.cu
platforms/cuda/src/kernels/torsionForce.cu
+5
-0
platforms/cuda/tests/CudaTests.h
platforms/cuda/tests/CudaTests.h
+2
-2
platforms/cuda/tests/TestCudaFFT3D.cpp
platforms/cuda/tests/TestCudaFFT3D.cpp
+1
-1
platforms/cuda/tests/TestCudaNonbondedForce.cpp
platforms/cuda/tests/TestCudaNonbondedForce.cpp
+35
-0
platforms/cuda/tests/TestCudaRandom.cpp
platforms/cuda/tests/TestCudaRandom.cpp
+1
-1
No files found.
platforms/cuda/src/CudaContext.cpp
View file @
73183c61
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -106,7 +106,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -106,7 +106,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
useMixedPrecision
=
false
;
useMixedPrecision
=
false
;
}
}
else
else
throw
OpenMMException
(
"Illegal value for
Cuda
Precision: "
+
precision
);
throw
OpenMMException
(
"Illegal value for Precision: "
+
precision
);
char
*
cacheVariable
=
getenv
(
"OPENMM_CACHE_DIR"
);
char
*
cacheVariable
=
getenv
(
"OPENMM_CACHE_DIR"
);
cacheDir
=
(
cacheVariable
==
NULL
?
tempDir
:
string
(
cacheVariable
));
cacheDir
=
(
cacheVariable
==
NULL
?
tempDir
:
string
(
cacheVariable
));
#ifdef WIN32
#ifdef WIN32
...
@@ -121,7 +121,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -121,7 +121,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
string
errorMessage
=
"Error initializing Context"
;
string
errorMessage
=
"Error initializing Context"
;
CHECK_RESULT
(
cuDeviceGetCount
(
&
numDevices
));
CHECK_RESULT
(
cuDeviceGetCount
(
&
numDevices
));
if
(
deviceIndex
<
-
1
||
deviceIndex
>=
numDevices
)
if
(
deviceIndex
<
-
1
||
deviceIndex
>=
numDevices
)
throw
OpenMMException
(
"Illegal value for
Cuda
DeviceIndex: "
+
intToString
(
deviceIndex
));
throw
OpenMMException
(
"Illegal value for DeviceIndex: "
+
intToString
(
deviceIndex
));
vector
<
int
>
devicePrecedence
;
vector
<
int
>
devicePrecedence
;
if
(
deviceIndex
==
-
1
)
{
if
(
deviceIndex
==
-
1
)
{
...
@@ -1131,7 +1131,6 @@ void CudaContext::reorderAtoms() {
...
@@ -1131,7 +1131,6 @@ void CudaContext::reorderAtoms() {
reorderAtomsImpl
<
float
,
float4
,
double
,
double4
>
();
reorderAtomsImpl
<
float
,
float4
,
double
,
double4
>
();
else
else
reorderAtomsImpl
<
float
,
float4
,
float
,
float4
>
();
reorderAtomsImpl
<
float
,
float4
,
float
,
float4
>
();
nonbonded
->
updateNeighborListSize
();
}
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
...
...
platforms/cuda/src/CudaExpressionUtilities.cpp
View file @
73183c61
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -174,8 +174,8 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -174,8 +174,8 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
"if (x >= "
<<
paramsFloat
[
2
]
<<
" && x <= "
<<
paramsFloat
[
3
]
<<
" && y >= "
<<
paramsFloat
[
4
]
<<
" && y <= "
<<
paramsFloat
[
5
]
<<
") {
\n
"
;
out
<<
"if (x >= "
<<
paramsFloat
[
2
]
<<
" && x <= "
<<
paramsFloat
[
3
]
<<
" && y >= "
<<
paramsFloat
[
4
]
<<
" && y <= "
<<
paramsFloat
[
5
]
<<
") {
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
2
]
<<
")*"
<<
paramsFloat
[
6
]
<<
";
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
2
]
<<
")*"
<<
paramsFloat
[
6
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
4
]
<<
")*"
<<
paramsFloat
[
7
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
4
]
<<
")*"
<<
paramsFloat
[
7
]
<<
";
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
");
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
"
-1
);
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
");
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
"
-1
);
\n
"
;
out
<<
"int coeffIndex = 4*(s+"
<<
paramsInt
[
0
]
<<
"*t);
\n
"
;
out
<<
"int coeffIndex = 4*(s+"
<<
paramsInt
[
0
]
<<
"*t);
\n
"
;
out
<<
"float4 c[4];
\n
"
;
out
<<
"float4 c[4];
\n
"
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
@@ -217,9 +217,9 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -217,9 +217,9 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
"x = (x - "
<<
paramsFloat
[
3
]
<<
")*"
<<
paramsFloat
[
9
]
<<
";
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
3
]
<<
")*"
<<
paramsFloat
[
9
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
5
]
<<
")*"
<<
paramsFloat
[
10
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
5
]
<<
")*"
<<
paramsFloat
[
10
]
<<
";
\n
"
;
out
<<
"z = (z - "
<<
paramsFloat
[
7
]
<<
")*"
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
"z = (z - "
<<
paramsFloat
[
7
]
<<
")*"
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
");
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
"
-1
);
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
");
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
"
-1
);
\n
"
;
out
<<
"int u = min((int) floor(z), "
<<
paramsInt
[
2
]
<<
");
\n
"
;
out
<<
"int u = min((int) floor(z), "
<<
paramsInt
[
2
]
<<
"
-1
);
\n
"
;
out
<<
"int coeffIndex = 16*(s+"
<<
paramsInt
[
0
]
<<
"*(t+"
<<
paramsInt
[
1
]
<<
"*u));
\n
"
;
out
<<
"int coeffIndex = 16*(s+"
<<
paramsInt
[
0
]
<<
"*(t+"
<<
paramsInt
[
1
]
<<
"*u));
\n
"
;
out
<<
"float4 c[16];
\n
"
;
out
<<
"float4 c[16];
\n
"
;
for
(
int
j
=
0
;
j
<
16
;
j
++
)
for
(
int
j
=
0
;
j
<
16
;
j
++
)
...
@@ -254,7 +254,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -254,7 +254,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
for
(
int
k
=
3
;
k
>=
0
;
k
--
)
for
(
int
k
=
3
;
k
>=
0
;
k
--
)
for
(
int
m
=
0
;
m
<
4
;
m
++
)
{
for
(
int
m
=
0
;
m
<
4
;
m
++
)
{
int
base
=
4
*
m
;
int
base
=
4
*
m
;
string
suffix
=
suffixes
[
m
];
string
suffix
=
suffixes
[
k
];
out
<<
"derivy["
<<
m
<<
"] = da*derivy["
<<
m
<<
"] + (3*c["
<<
(
base
+
3
)
<<
"]"
<<
suffix
<<
"*db + 2*c["
<<
(
base
+
2
)
<<
"]"
<<
suffix
<<
")*db + c["
<<
(
base
+
1
)
<<
"]"
<<
suffix
<<
";
\n
"
;
out
<<
"derivy["
<<
m
<<
"] = da*derivy["
<<
m
<<
"] + (3*c["
<<
(
base
+
3
)
<<
"]"
<<
suffix
<<
"*db + 2*c["
<<
(
base
+
2
)
<<
"]"
<<
suffix
<<
")*db + c["
<<
(
base
+
1
)
<<
"]"
<<
suffix
<<
";
\n
"
;
}
}
out
<<
nodeNames
[
j
]
<<
" = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));
\n
"
;
out
<<
nodeNames
[
j
]
<<
" = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));
\n
"
;
...
@@ -271,7 +271,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -271,7 +271,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
nodeNames
[
j
]
<<
" *= "
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
nodeNames
[
j
]
<<
" *= "
<<
paramsFloat
[
11
]
<<
";
\n
"
;
}
}
else
else
throw
OpenMMException
(
"Unsupported derivative order for Continuous
2
DFunction"
);
throw
OpenMMException
(
"Unsupported derivative order for Continuous
3
DFunction"
);
}
}
out
<<
"}
\n
"
;
out
<<
"}
\n
"
;
}
}
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
73183c61
This diff is collapsed.
Click to expand it.
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
73183c61
...
@@ -65,12 +65,14 @@ private:
...
@@ -65,12 +65,14 @@ private:
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
// Decide how many thread blocks to use.
// Decide how many thread blocks to use.
string
errorMessage
=
"Error initializing nonbonded utilities"
;
string
errorMessage
=
"Error initializing nonbonded utilities"
;
int
multiprocessors
;
int
multiprocessors
;
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
context
.
getDevice
()));
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
context
.
getDevice
()));
CHECK_RESULT
(
cuEventCreate
(
&
downloadCountEvent
,
0
));
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
pinnedCountBuffer
,
sizeof
(
int
),
CU_MEMHOSTALLOC_PORTABLE
));
numForceThreadBlocks
=
4
*
multiprocessors
;
numForceThreadBlocks
=
4
*
multiprocessors
;
forceThreadBlockSize
=
(
context
.
getComputeCapability
()
<
2.0
?
128
:
256
);
forceThreadBlockSize
=
(
context
.
getComputeCapability
()
<
2.0
?
128
:
256
);
}
}
...
@@ -106,6 +108,9 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
...
@@ -106,6 +108,9 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
delete
rebuildNeighborList
;
delete
rebuildNeighborList
;
if
(
blockSorter
!=
NULL
)
if
(
blockSorter
!=
NULL
)
delete
blockSorter
;
delete
blockSorter
;
if
(
pinnedCountBuffer
!=
NULL
)
cuMemFreeHost
(
pinnedCountBuffer
);
cuEventDestroy
(
downloadCountEvent
);
}
}
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
...
@@ -375,17 +380,14 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
...
@@ -375,17 +380,14 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
bool
rebuild
=
false
;
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
do
{
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
forceRebuildNeighborList
=
false
;
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
lastCutoff
=
kernels
.
cutoffDistance
;
lastCutoff
=
kernels
.
cutoffDistance
;
interactionCount
->
download
(
pinnedCountBuffer
,
false
);
cuEventRecord
(
downloadCountEvent
,
context
.
getCurrentStream
());
}
}
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
...
@@ -398,20 +400,22 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo
...
@@ -398,20 +400,22 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
context
.
executeKernel
(
kernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
context
.
executeKernel
(
kernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
if
(
useCutoff
&&
numTiles
>
0
)
{
cuEventSynchronize
(
downloadCountEvent
);
updateNeighborListSize
();
}
}
}
bool
CudaNonbondedUtilities
::
updateNeighborListSize
()
{
bool
CudaNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
if
(
!
useCutoff
)
return
false
;
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
if
(
pinnedCountBuffer
[
0
]
<=
(
unsigned
int
)
maxTiles
)
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
maxTiles
)
return
false
;
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
// this from happening in the future.
maxTiles
=
(
int
)
(
1.2
*
pinned
InteractionCount
[
0
]);
maxTiles
=
(
int
)
(
1.2
*
pinned
CountBuffer
[
0
]);
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
if
(
maxTiles
>
totalTiles
)
maxTiles
=
totalTiles
;
maxTiles
=
totalTiles
;
...
@@ -428,6 +432,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
...
@@ -428,6 +432,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
context
.
setForcesValid
(
false
);
return
true
;
return
true
;
}
}
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
73183c61
...
@@ -62,6 +62,14 @@ extern "C" OPENMM_EXPORT_CUDA void registerPlatforms() {
...
@@ -62,6 +62,14 @@ extern "C" OPENMM_EXPORT_CUDA void registerPlatforms() {
#endif
#endif
CudaPlatform
::
CudaPlatform
()
{
CudaPlatform
::
CudaPlatform
()
{
deprecatedPropertyReplacements
[
"CudaDeviceIndex"
]
=
CudaDeviceIndex
();
deprecatedPropertyReplacements
[
"CudaDeviceName"
]
=
CudaDeviceName
();
deprecatedPropertyReplacements
[
"CudaUseBlockingSync"
]
=
CudaUseBlockingSync
();
deprecatedPropertyReplacements
[
"CudaPrecision"
]
=
CudaPrecision
();
deprecatedPropertyReplacements
[
"CudaUseCpuPme"
]
=
CudaUseCpuPme
();
deprecatedPropertyReplacements
[
"CudaTempDirectory"
]
=
CudaTempDirectory
();
deprecatedPropertyReplacements
[
"CudaDisablePmeStream"
]
=
CudaDisablePmeStream
();
deprecatedPropertyReplacements
[
"CudaDeterministicForces"
]
=
CudaDeterministicForces
();
CudaKernelFactory
*
factory
=
new
CudaKernelFactory
();
CudaKernelFactory
*
factory
=
new
CudaKernelFactory
();
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
UpdateStateDataKernel
::
Name
(),
factory
);
registerKernelFactory
(
UpdateStateDataKernel
::
Name
(),
factory
);
...
@@ -102,12 +110,14 @@ CudaPlatform::CudaPlatform() {
...
@@ -102,12 +110,14 @@ CudaPlatform::CudaPlatform() {
platformProperties
.
push_back
(
CudaTempDirectory
());
platformProperties
.
push_back
(
CudaTempDirectory
());
platformProperties
.
push_back
(
CudaHostCompiler
());
platformProperties
.
push_back
(
CudaHostCompiler
());
platformProperties
.
push_back
(
CudaDisablePmeStream
());
platformProperties
.
push_back
(
CudaDisablePmeStream
());
platformProperties
.
push_back
(
CudaDeterministicForces
());
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaDeviceName
(),
""
);
setPropertyDefaultValue
(
CudaDeviceName
(),
""
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
setPropertyDefaultValue
(
CudaPrecision
(),
"single"
);
setPropertyDefaultValue
(
CudaPrecision
(),
"single"
);
setPropertyDefaultValue
(
CudaUseCpuPme
(),
"false"
);
setPropertyDefaultValue
(
CudaUseCpuPme
(),
"false"
);
setPropertyDefaultValue
(
CudaDisablePmeStream
(),
"false"
);
setPropertyDefaultValue
(
CudaDisablePmeStream
(),
"false"
);
setPropertyDefaultValue
(
CudaDeterministicForces
(),
"false"
);
#ifdef _MSC_VER
#ifdef _MSC_VER
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
...
@@ -142,7 +152,10 @@ bool CudaPlatform::supportsDoublePrecision() const {
...
@@ -142,7 +152,10 @@ bool CudaPlatform::supportsDoublePrecision() const {
const
string
&
CudaPlatform
::
getPropertyValue
(
const
Context
&
context
,
const
string
&
property
)
const
{
const
string
&
CudaPlatform
::
getPropertyValue
(
const
Context
&
context
,
const
string
&
property
)
const
{
const
ContextImpl
&
impl
=
getContextImpl
(
context
);
const
ContextImpl
&
impl
=
getContextImpl
(
context
);
const
PlatformData
*
data
=
reinterpret_cast
<
const
PlatformData
*>
(
impl
.
getPlatformData
());
const
PlatformData
*
data
=
reinterpret_cast
<
const
PlatformData
*>
(
impl
.
getPlatformData
());
map
<
string
,
string
>::
const_iterator
value
=
data
->
propertyValues
.
find
(
property
);
string
propertyName
=
property
;
if
(
deprecatedPropertyReplacements
.
find
(
property
)
!=
deprecatedPropertyReplacements
.
end
())
propertyName
=
deprecatedPropertyReplacements
.
find
(
property
)
->
second
;
map
<
string
,
string
>::
const_iterator
value
=
data
->
propertyValues
.
find
(
propertyName
);
if
(
value
!=
data
->
propertyValues
.
end
())
if
(
value
!=
data
->
propertyValues
.
end
())
return
value
->
second
;
return
value
->
second
;
return
Platform
::
getPropertyValue
(
context
,
property
);
return
Platform
::
getPropertyValue
(
context
,
property
);
...
@@ -168,10 +181,13 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
...
@@ -168,10 +181,13 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue
(
CudaHostCompiler
())
:
properties
.
find
(
CudaHostCompiler
())
->
second
);
getPropertyDefaultValue
(
CudaHostCompiler
())
:
properties
.
find
(
CudaHostCompiler
())
->
second
);
string
pmeStreamPropValue
=
(
properties
.
find
(
CudaDisablePmeStream
())
==
properties
.
end
()
?
string
pmeStreamPropValue
=
(
properties
.
find
(
CudaDisablePmeStream
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaDisablePmeStream
())
:
properties
.
find
(
CudaDisablePmeStream
())
->
second
);
getPropertyDefaultValue
(
CudaDisablePmeStream
())
:
properties
.
find
(
CudaDisablePmeStream
())
->
second
);
string
deterministicForcesValue
=
(
properties
.
find
(
CudaDeterministicForces
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaDeterministicForces
())
:
properties
.
find
(
CudaDeterministicForces
())
->
second
);
transform
(
blockingPropValue
.
begin
(),
blockingPropValue
.
end
(),
blockingPropValue
.
begin
(),
::
tolower
);
transform
(
blockingPropValue
.
begin
(),
blockingPropValue
.
end
(),
blockingPropValue
.
begin
(),
::
tolower
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
transform
(
pmeStreamPropValue
.
begin
(),
pmeStreamPropValue
.
end
(),
pmeStreamPropValue
.
begin
(),
::
tolower
);
transform
(
pmeStreamPropValue
.
begin
(),
pmeStreamPropValue
.
end
(),
pmeStreamPropValue
.
begin
(),
::
tolower
);
transform
(
deterministicForcesValue
.
begin
(),
deterministicForcesValue
.
end
(),
deterministicForcesValue
.
begin
(),
::
tolower
);
vector
<
string
>
pmeKernelName
;
vector
<
string
>
pmeKernelName
;
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
if
(
!
supportsKernels
(
pmeKernelName
))
...
@@ -180,7 +196,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
...
@@ -180,7 +196,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
if
(
threadsEnv
!=
NULL
)
if
(
threadsEnv
!=
NULL
)
stringstream
(
threadsEnv
)
>>
threads
;
stringstream
(
threadsEnv
)
>>
threads
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
threads
));
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
deterministicForcesValue
,
threads
));
}
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
@@ -189,7 +206,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
...
@@ -189,7 +206,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
}
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
int
numThreads
)
:
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
const
string
&
deterministicForcesProperty
,
int
numThreads
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
vector
<
string
>
devices
;
...
@@ -230,6 +248,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
...
@@ -230,6 +248,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
}
}
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
deterministicForces
=
(
deterministicForcesProperty
==
"true"
);
propertyValues
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
CudaPlatform
::
CudaUseBlockingSync
()]
=
blocking
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaUseBlockingSync
()]
=
blocking
?
"true"
:
"false"
;
...
@@ -239,6 +258,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
...
@@ -239,6 +258,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
propertyValues
[
CudaPlatform
::
CudaHostCompiler
()]
=
hostCompilerProperty
;
propertyValues
[
CudaPlatform
::
CudaHostCompiler
()]
=
hostCompilerProperty
;
propertyValues
[
CudaPlatform
::
CudaDisablePmeStream
()]
=
disablePmeStream
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaDisablePmeStream
()]
=
disablePmeStream
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaDeterministicForces
()]
=
deterministicForces
?
"true"
:
"false"
;
contextEnergy
.
resize
(
contexts
.
size
());
contextEnergy
.
resize
(
contexts
.
size
());
// Determine whether peer-to-peer copying is supported, and enable it if so.
// Determine whether peer-to-peer copying is supported, and enable it if so.
...
...
platforms/cuda/src/kernels/angleForce.cu
View file @
73183c61
real3
v0
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real3
v0
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real3
v1
=
make_real3
(
pos2
.
x
-
pos3
.
x
,
pos2
.
y
-
pos3
.
y
,
pos2
.
z
-
pos3
.
z
);
real3
v1
=
make_real3
(
pos2
.
x
-
pos3
.
x
,
pos2
.
y
-
pos3
.
y
,
pos2
.
z
-
pos3
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0
)
APPLY_PERIODIC_TO_DELTA
(
v1
)
#endif
real3
cp
=
cross
(
v0
,
v1
);
real3
cp
=
cross
(
v0
,
v1
);
real
rp
=
cp
.
x
*
cp
.
x
+
cp
.
y
*
cp
.
y
+
cp
.
z
*
cp
.
z
;
real
rp
=
cp
.
x
*
cp
.
x
+
cp
.
y
*
cp
.
y
+
cp
.
z
*
cp
.
z
;
rp
=
max
(
SQRT
(
rp
),
(
real
)
1.0e-06
f
);
rp
=
max
(
SQRT
(
rp
),
(
real
)
1.0e-06
f
);
...
...
platforms/cuda/src/kernels/bondForce.cu
View file @
73183c61
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r
=
SQRT
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
);
real
r
=
SQRT
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
);
COMPUTE_FORCE
COMPUTE_FORCE
dEdR
=
(
r
>
0
)
?
(
dEdR
/
r
)
:
0
;
dEdR
=
(
r
>
0
)
?
(
dEdR
/
r
)
:
0
;
...
...
platforms/cuda/src/kernels/cmapTorsionForce.cu
View file @
73183c61
...
@@ -5,6 +5,11 @@ const real PI = (real) 3.14159265358979323846;
...
@@ -5,6 +5,11 @@ const real PI = (real) 3.14159265358979323846;
real3
v0a
=
make_real3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
real3
v0a
=
make_real3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
real3
v1a
=
make_real3
(
pos3
.
x
-
pos2
.
x
,
pos3
.
y
-
pos2
.
y
,
pos3
.
z
-
pos2
.
z
);
real3
v1a
=
make_real3
(
pos3
.
x
-
pos2
.
x
,
pos3
.
y
-
pos2
.
y
,
pos3
.
z
-
pos2
.
z
);
real3
v2a
=
make_real3
(
pos3
.
x
-
pos4
.
x
,
pos3
.
y
-
pos4
.
y
,
pos3
.
z
-
pos4
.
z
);
real3
v2a
=
make_real3
(
pos3
.
x
-
pos4
.
x
,
pos3
.
y
-
pos4
.
y
,
pos3
.
z
-
pos4
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0a
)
APPLY_PERIODIC_TO_DELTA
(
v1a
)
APPLY_PERIODIC_TO_DELTA
(
v2a
)
#endif
real3
cp0a
=
cross
(
v0a
,
v1a
);
real3
cp0a
=
cross
(
v0a
,
v1a
);
real3
cp1a
=
cross
(
v1a
,
v2a
);
real3
cp1a
=
cross
(
v1a
,
v2a
);
real
cosangle
=
dot
(
normalize
(
cp0a
),
normalize
(
cp1a
));
real
cosangle
=
dot
(
normalize
(
cp0a
),
normalize
(
cp1a
));
...
@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
...
@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
real3
v0b
=
make_real3
(
pos5
.
x
-
pos6
.
x
,
pos5
.
y
-
pos6
.
y
,
pos5
.
z
-
pos6
.
z
);
real3
v0b
=
make_real3
(
pos5
.
x
-
pos6
.
x
,
pos5
.
y
-
pos6
.
y
,
pos5
.
z
-
pos6
.
z
);
real3
v1b
=
make_real3
(
pos7
.
x
-
pos6
.
x
,
pos7
.
y
-
pos6
.
y
,
pos7
.
z
-
pos6
.
z
);
real3
v1b
=
make_real3
(
pos7
.
x
-
pos6
.
x
,
pos7
.
y
-
pos6
.
y
,
pos7
.
z
-
pos6
.
z
);
real3
v2b
=
make_real3
(
pos7
.
x
-
pos8
.
x
,
pos7
.
y
-
pos8
.
y
,
pos7
.
z
-
pos8
.
z
);
real3
v2b
=
make_real3
(
pos7
.
x
-
pos8
.
x
,
pos7
.
y
-
pos8
.
y
,
pos7
.
z
-
pos8
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0b
)
APPLY_PERIODIC_TO_DELTA
(
v1b
)
APPLY_PERIODIC_TO_DELTA
(
v2b
)
#endif
real3
cp0b
=
cross
(
v0b
,
v1b
);
real3
cp0b
=
cross
(
v0b
,
v1b
);
real3
cp1b
=
cross
(
v1b
,
v2b
);
real3
cp1b
=
cross
(
v1b
,
v2b
);
cosangle
=
dot
(
normalize
(
cp0b
),
normalize
(
cp1b
));
cosangle
=
dot
(
normalize
(
cp0b
),
normalize
(
cp1b
));
...
...
platforms/cuda/src/kernels/customCentroidBond.cu
View file @
73183c61
...
@@ -66,8 +66,11 @@ inline __device__ real3 trim(real4 v) {
...
@@ -66,8 +66,11 @@ inline __device__ real3 trim(real4 v) {
/**
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
*/
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
)
{
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
,
bool
periodic
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
);
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
return
result
;
}
}
...
@@ -105,7 +108,7 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -105,7 +108,7 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
* Compute the forces on groups based on the bonds.
* Compute the forces on groups based on the bonds.
*/
*/
extern
"C"
__global__
void
computeGroupForces
(
unsigned
long
long
*
__restrict__
groupForce
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
centerPositions
,
extern
"C"
__global__
void
computeGroupForces
(
unsigned
long
long
*
__restrict__
groupForce
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
centerPositions
,
const
int
*
__restrict__
bondGroups
const
int
*
__restrict__
bondGroups
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
EXTRA_ARGS
)
{
EXTRA_ARGS
)
{
mixed
energy
=
0
;
mixed
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_BONDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_BONDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
...
...
platforms/cuda/src/kernels/customCompoundBond.cu
View file @
73183c61
...
@@ -8,8 +8,11 @@ inline __device__ real3 ccb_trim(real4 v) {
...
@@ -8,8 +8,11 @@ inline __device__ real3 ccb_trim(real4 v) {
/**
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
*/
inline
__device__
real4
ccb_delta
(
real4
vec1
,
real4
vec2
)
{
inline
__device__
real4
ccb_delta
(
real4
vec1
,
real4
vec2
,
bool
periodic
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
);
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
return
result
;
}
}
...
...
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
73183c61
...
@@ -168,6 +168,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -168,6 +168,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -191,39 +193,35 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -191,39 +193,35 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
#else
else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
#endif
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
{
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
)
);
y
+
=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
73183c61
...
@@ -146,6 +146,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -146,6 +146,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -167,39 +169,35 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -167,39 +169,35 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
#else
}
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
else
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
#endif
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
{
y
+=
(
x
<
y
?
-
1
:
1
);
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
73183c61
...
@@ -204,6 +204,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -204,6 +204,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -225,39 +227,35 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -225,39 +227,35 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
#else
else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
#endif
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
{
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
)
);
y
+
=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
@@ -559,6 +557,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -559,6 +557,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -580,39 +580,35 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -580,39 +580,35 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
#else
}
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
else
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
#endif
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
{
y
+=
(
x
<
y
?
-
1
:
1
);
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
73183c61
...
@@ -117,7 +117,9 @@ extern "C" __global__ void computeNonbonded(
...
@@ -117,7 +117,9 @@ extern "C" __global__ void computeNonbonded(
#ifndef ENABLE_SHUFFLE
#ifndef ENABLE_SHUFFLE
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
#endif
#endif
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE
+
warp
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE
+
warp
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE
+
(
warp
+
1
)
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE
+
(
warp
+
1
)
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
for
(
int
pos
=
firstExclusionTile
;
pos
<
lastExclusionTile
;
pos
++
)
{
for
(
int
pos
=
firstExclusionTile
;
pos
<
lastExclusionTile
;
pos
++
)
{
...
@@ -309,8 +311,11 @@ extern "C" __global__ void computeNonbonded(
...
@@ -309,8 +311,11 @@ extern "C" __global__ void computeNonbonded(
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// of them (no cutoff).
// of them (no cutoff).
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
const
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
#else
#else
...
@@ -335,39 +340,35 @@ extern "C" __global__ void computeNonbonded(
...
@@ -335,39 +340,35 @@ extern "C" __global__ void computeNonbonded(
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
MAX_CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
MAX_CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
MAX_CUTOFF
);
#else
}
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
else
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
#endif
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
{
y
+=
(
x
<
y
?
-
1
:
1
);
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
// Load atom data for this tile.
// Load atom data for this tile.
...
...
platforms/cuda/src/kernels/pme.cu
View file @
73183c61
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// Compute the index of the grid point each atom is associated with.
// Compute the index of the grid point each atom is associated with.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
i
];
real4
pos
=
posq
[
i
];
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
pos
.
z
*
recipBoxVecZ
.
z
);
...
@@ -18,7 +20,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
...
@@ -18,7 +20,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
}
}
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -28,9 +31,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
...
@@ -28,9 +31,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
atom
];
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS
(
pos
)
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
pos
.
z
*
recipBoxVecZ
.
z
);
...
@@ -83,7 +84,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
...
@@ -83,7 +84,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
#ifdef USE_DOUBLE_PRECISION
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
#elif __CUDA_ARCH__ < 200
#elif __CUDA_ARCH__ < 200
|| defined(USE_DETERMINISTIC_FORCES)
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
int
gridIndex
=
index
;
int
gridIndex
=
index
;
gridIndex
=
(
gridIndex
%
2
==
0
?
gridIndex
/
2
:
(
gridIndex
+
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
)
/
2
);
gridIndex
=
(
gridIndex
%
2
==
0
?
gridIndex
/
2
:
(
gridIndex
+
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
)
/
2
);
...
@@ -197,7 +198,8 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
...
@@ -197,7 +198,8 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
extern
"C"
__global__
extern
"C"
__global__
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -209,9 +211,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
...
@@ -209,9 +211,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real3
force
=
make_real3
(
0
);
real3
force
=
make_real3
(
0
);
real4
pos
=
posq
[
atom
];
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS
(
pos
)
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
pos
.
z
*
recipBoxVecZ
.
z
);
...
...
platforms/cuda/src/kernels/torsionForce.cu
View file @
73183c61
...
@@ -2,6 +2,11 @@ const real PI = (real) 3.14159265358979323846;
...
@@ -2,6 +2,11 @@ const real PI = (real) 3.14159265358979323846;
real3
v0
=
make_real3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
real3
v0
=
make_real3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
real3
v1
=
make_real3
(
pos3
.
x
-
pos2
.
x
,
pos3
.
y
-
pos2
.
y
,
pos3
.
z
-
pos2
.
z
);
real3
v1
=
make_real3
(
pos3
.
x
-
pos2
.
x
,
pos3
.
y
-
pos2
.
y
,
pos3
.
z
-
pos2
.
z
);
real3
v2
=
make_real3
(
pos3
.
x
-
pos4
.
x
,
pos3
.
y
-
pos4
.
y
,
pos3
.
z
-
pos4
.
z
);
real3
v2
=
make_real3
(
pos3
.
x
-
pos4
.
x
,
pos3
.
y
-
pos4
.
y
,
pos3
.
z
-
pos4
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0
)
APPLY_PERIODIC_TO_DELTA
(
v1
)
APPLY_PERIODIC_TO_DELTA
(
v2
)
#endif
real3
cp0
=
cross
(
v0
,
v1
);
real3
cp0
=
cross
(
v0
,
v1
);
real3
cp1
=
cross
(
v1
,
v2
);
real3
cp1
=
cross
(
v1
,
v2
);
real
cosangle
=
dot
(
normalize
(
cp0
),
normalize
(
cp1
));
real
cosangle
=
dot
(
normalize
(
cp0
),
normalize
(
cp1
));
...
...
platforms/cuda/tests/CudaTests.h
View file @
73183c61
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2015 Stanford University and the Authors.
*
* Portions copyright (c) 2015
-2016
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -39,5 +39,5 @@ OpenMM::CudaPlatform platform;
...
@@ -39,5 +39,5 @@ OpenMM::CudaPlatform platform;
void
initializeTests
(
int
argc
,
char
*
argv
[])
{
void
initializeTests
(
int
argc
,
char
*
argv
[])
{
if
(
argc
>
1
)
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"
Cuda
Precision"
,
std
::
string
(
argv
[
1
]));
platform
.
setPropertyDefaultValue
(
"Precision"
,
std
::
string
(
argv
[
1
]));
}
}
platforms/cuda/tests/TestCudaFFT3D.cpp
View file @
73183c61
...
@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
...
@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
system
.
addParticle
(
0.0
);
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/cuda/tests/TestCudaNonbondedForce.cpp
View file @
73183c61
...
@@ -118,9 +118,44 @@ void testReordering() {
...
@@ -118,9 +118,44 @@ void testReordering() {
}
}
}
}
void
testDeterministicForces
()
{
// Check that the CudaDeterministicForces property works correctly.
const
int
numParticles
=
1000
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
6
,
0
,
0
),
Vec3
(
2.1
,
6
,
0
),
Vec3
(
-
1.5
,
-
0.5
,
6
));
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
system
.
addForce
(
nonbonded
);
vector
<
Vec3
>
positions
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
i
%
2
==
0
?
1
:
-
1
,
1
,
0
);
positions
.
push_back
(
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
)
*
6
);
}
VerletIntegrator
integrator
(
0.001
);
map
<
string
,
string
>
properties
;
properties
[
CudaPlatform
::
CudaDeterministicForces
()]
=
"true"
;
Context
context
(
system
,
integrator
,
platform
,
properties
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
);
State
state2
=
context
.
getState
(
State
::
Forces
);
// All forces should be *exactly* equal.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL
(
state1
.
getForces
()[
i
][
0
],
state2
.
getForces
()[
i
][
0
]);
ASSERT_EQUAL
(
state1
.
getForces
()[
i
][
1
],
state2
.
getForces
()[
i
][
1
]);
ASSERT_EQUAL
(
state1
.
getForces
()[
i
][
2
],
state2
.
getForces
()[
i
][
2
]);
}
}
void
runPlatformTests
()
{
void
runPlatformTests
()
{
testParallelComputation
(
NonbondedForce
::
NoCutoff
);
testParallelComputation
(
NonbondedForce
::
NoCutoff
);
testParallelComputation
(
NonbondedForce
::
Ewald
);
testParallelComputation
(
NonbondedForce
::
Ewald
);
testParallelComputation
(
NonbondedForce
::
PME
);
testParallelComputation
(
NonbondedForce
::
PME
);
testReordering
();
testReordering
();
testDeterministicForces
();
}
}
platforms/cuda/tests/TestCudaRandom.cpp
View file @
73183c61
...
@@ -56,7 +56,7 @@ void testGaussian() {
...
@@ -56,7 +56,7 @@ void testGaussian() {
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
Prev
1
2
3
4
5
6
7
8
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment