Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
73183c61
Commit
73183c61
authored
May 31, 2016
by
ChayaSt
Browse files
resolved conflict
parents
0e218233
32e08b87
Changes
267
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
328 additions
and
201 deletions
+328
-201
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+3
-4
platforms/cuda/src/CudaExpressionUtilities.cpp
platforms/cuda/src/CudaExpressionUtilities.cpp
+8
-8
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+71
-24
platforms/cuda/src/CudaNonbondedUtilities.cpp
platforms/cuda/src/CudaNonbondedUtilities.cpp
+20
-15
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+23
-3
platforms/cuda/src/kernels/angleForce.cu
platforms/cuda/src/kernels/angleForce.cu
+4
-0
platforms/cuda/src/kernels/bondForce.cu
platforms/cuda/src/kernels/bondForce.cu
+3
-0
platforms/cuda/src/kernels/cmapTorsionForce.cu
platforms/cuda/src/kernels/cmapTorsionForce.cu
+10
-0
platforms/cuda/src/kernels/customCentroidBond.cu
platforms/cuda/src/kernels/customCentroidBond.cu
+5
-2
platforms/cuda/src/kernels/customCompoundBond.cu
platforms/cuda/src/kernels/customCompoundBond.cu
+4
-1
platforms/cuda/src/kernels/customGBEnergyN2.cu
platforms/cuda/src/kernels/customGBEnergyN2.cu
+21
-23
platforms/cuda/src/kernels/customGBValueN2.cu
platforms/cuda/src/kernels/customGBValueN2.cu
+26
-28
platforms/cuda/src/kernels/gbsaObc1.cu
platforms/cuda/src/kernels/gbsaObc1.cu
+47
-51
platforms/cuda/src/kernels/nonbonded.cu
platforms/cuda/src/kernels/nonbonded.cu
+29
-28
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+10
-10
platforms/cuda/src/kernels/torsionForce.cu
platforms/cuda/src/kernels/torsionForce.cu
+5
-0
platforms/cuda/tests/CudaTests.h
platforms/cuda/tests/CudaTests.h
+2
-2
platforms/cuda/tests/TestCudaFFT3D.cpp
platforms/cuda/tests/TestCudaFFT3D.cpp
+1
-1
platforms/cuda/tests/TestCudaNonbondedForce.cpp
platforms/cuda/tests/TestCudaNonbondedForce.cpp
+35
-0
platforms/cuda/tests/TestCudaRandom.cpp
platforms/cuda/tests/TestCudaRandom.cpp
+1
-1
No files found.
platforms/cuda/src/CudaContext.cpp
View file @
73183c61
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -106,7 +106,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -106,7 +106,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
useMixedPrecision
=
false
;
useMixedPrecision
=
false
;
}
}
else
else
throw
OpenMMException
(
"Illegal value for
Cuda
Precision: "
+
precision
);
throw
OpenMMException
(
"Illegal value for Precision: "
+
precision
);
char
*
cacheVariable
=
getenv
(
"OPENMM_CACHE_DIR"
);
char
*
cacheVariable
=
getenv
(
"OPENMM_CACHE_DIR"
);
cacheDir
=
(
cacheVariable
==
NULL
?
tempDir
:
string
(
cacheVariable
));
cacheDir
=
(
cacheVariable
==
NULL
?
tempDir
:
string
(
cacheVariable
));
#ifdef WIN32
#ifdef WIN32
...
@@ -121,7 +121,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
...
@@ -121,7 +121,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
string
errorMessage
=
"Error initializing Context"
;
string
errorMessage
=
"Error initializing Context"
;
CHECK_RESULT
(
cuDeviceGetCount
(
&
numDevices
));
CHECK_RESULT
(
cuDeviceGetCount
(
&
numDevices
));
if
(
deviceIndex
<
-
1
||
deviceIndex
>=
numDevices
)
if
(
deviceIndex
<
-
1
||
deviceIndex
>=
numDevices
)
throw
OpenMMException
(
"Illegal value for
Cuda
DeviceIndex: "
+
intToString
(
deviceIndex
));
throw
OpenMMException
(
"Illegal value for DeviceIndex: "
+
intToString
(
deviceIndex
));
vector
<
int
>
devicePrecedence
;
vector
<
int
>
devicePrecedence
;
if
(
deviceIndex
==
-
1
)
{
if
(
deviceIndex
==
-
1
)
{
...
@@ -1131,7 +1131,6 @@ void CudaContext::reorderAtoms() {
...
@@ -1131,7 +1131,6 @@ void CudaContext::reorderAtoms() {
reorderAtomsImpl
<
float
,
float4
,
double
,
double4
>
();
reorderAtomsImpl
<
float
,
float4
,
double
,
double4
>
();
else
else
reorderAtomsImpl
<
float
,
float4
,
float
,
float4
>
();
reorderAtomsImpl
<
float
,
float4
,
float
,
float4
>
();
nonbonded
->
updateNeighborListSize
();
}
}
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
template
<
class
Real
,
class
Real4
,
class
Mixed
,
class
Mixed4
>
...
...
platforms/cuda/src/CudaExpressionUtilities.cpp
View file @
73183c61
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -174,8 +174,8 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -174,8 +174,8 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
"if (x >= "
<<
paramsFloat
[
2
]
<<
" && x <= "
<<
paramsFloat
[
3
]
<<
" && y >= "
<<
paramsFloat
[
4
]
<<
" && y <= "
<<
paramsFloat
[
5
]
<<
") {
\n
"
;
out
<<
"if (x >= "
<<
paramsFloat
[
2
]
<<
" && x <= "
<<
paramsFloat
[
3
]
<<
" && y >= "
<<
paramsFloat
[
4
]
<<
" && y <= "
<<
paramsFloat
[
5
]
<<
") {
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
2
]
<<
")*"
<<
paramsFloat
[
6
]
<<
";
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
2
]
<<
")*"
<<
paramsFloat
[
6
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
4
]
<<
")*"
<<
paramsFloat
[
7
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
4
]
<<
")*"
<<
paramsFloat
[
7
]
<<
";
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
");
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
"
-1
);
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
");
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
"
-1
);
\n
"
;
out
<<
"int coeffIndex = 4*(s+"
<<
paramsInt
[
0
]
<<
"*t);
\n
"
;
out
<<
"int coeffIndex = 4*(s+"
<<
paramsInt
[
0
]
<<
"*t);
\n
"
;
out
<<
"float4 c[4];
\n
"
;
out
<<
"float4 c[4];
\n
"
;
for
(
int
j
=
0
;
j
<
4
;
j
++
)
for
(
int
j
=
0
;
j
<
4
;
j
++
)
...
@@ -217,9 +217,9 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -217,9 +217,9 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
"x = (x - "
<<
paramsFloat
[
3
]
<<
")*"
<<
paramsFloat
[
9
]
<<
";
\n
"
;
out
<<
"x = (x - "
<<
paramsFloat
[
3
]
<<
")*"
<<
paramsFloat
[
9
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
5
]
<<
")*"
<<
paramsFloat
[
10
]
<<
";
\n
"
;
out
<<
"y = (y - "
<<
paramsFloat
[
5
]
<<
")*"
<<
paramsFloat
[
10
]
<<
";
\n
"
;
out
<<
"z = (z - "
<<
paramsFloat
[
7
]
<<
")*"
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
"z = (z - "
<<
paramsFloat
[
7
]
<<
")*"
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
");
\n
"
;
out
<<
"int s = min((int) floor(x), "
<<
paramsInt
[
0
]
<<
"
-1
);
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
");
\n
"
;
out
<<
"int t = min((int) floor(y), "
<<
paramsInt
[
1
]
<<
"
-1
);
\n
"
;
out
<<
"int u = min((int) floor(z), "
<<
paramsInt
[
2
]
<<
");
\n
"
;
out
<<
"int u = min((int) floor(z), "
<<
paramsInt
[
2
]
<<
"
-1
);
\n
"
;
out
<<
"int coeffIndex = 16*(s+"
<<
paramsInt
[
0
]
<<
"*(t+"
<<
paramsInt
[
1
]
<<
"*u));
\n
"
;
out
<<
"int coeffIndex = 16*(s+"
<<
paramsInt
[
0
]
<<
"*(t+"
<<
paramsInt
[
1
]
<<
"*u));
\n
"
;
out
<<
"float4 c[16];
\n
"
;
out
<<
"float4 c[16];
\n
"
;
for
(
int
j
=
0
;
j
<
16
;
j
++
)
for
(
int
j
=
0
;
j
<
16
;
j
++
)
...
@@ -254,7 +254,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -254,7 +254,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
for
(
int
k
=
3
;
k
>=
0
;
k
--
)
for
(
int
k
=
3
;
k
>=
0
;
k
--
)
for
(
int
m
=
0
;
m
<
4
;
m
++
)
{
for
(
int
m
=
0
;
m
<
4
;
m
++
)
{
int
base
=
4
*
m
;
int
base
=
4
*
m
;
string
suffix
=
suffixes
[
m
];
string
suffix
=
suffixes
[
k
];
out
<<
"derivy["
<<
m
<<
"] = da*derivy["
<<
m
<<
"] + (3*c["
<<
(
base
+
3
)
<<
"]"
<<
suffix
<<
"*db + 2*c["
<<
(
base
+
2
)
<<
"]"
<<
suffix
<<
")*db + c["
<<
(
base
+
1
)
<<
"]"
<<
suffix
<<
";
\n
"
;
out
<<
"derivy["
<<
m
<<
"] = da*derivy["
<<
m
<<
"] + (3*c["
<<
(
base
+
3
)
<<
"]"
<<
suffix
<<
"*db + 2*c["
<<
(
base
+
2
)
<<
"]"
<<
suffix
<<
")*db + c["
<<
(
base
+
1
)
<<
"]"
<<
suffix
<<
";
\n
"
;
}
}
out
<<
nodeNames
[
j
]
<<
" = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));
\n
"
;
out
<<
nodeNames
[
j
]
<<
" = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));
\n
"
;
...
@@ -271,7 +271,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
...
@@ -271,7 +271,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
nodeNames
[
j
]
<<
" *= "
<<
paramsFloat
[
11
]
<<
";
\n
"
;
out
<<
nodeNames
[
j
]
<<
" *= "
<<
paramsFloat
[
11
]
<<
";
\n
"
;
}
}
else
else
throw
OpenMMException
(
"Unsupported derivative order for Continuous
2
DFunction"
);
throw
OpenMMException
(
"Unsupported derivative order for Continuous
3
DFunction"
);
}
}
out
<<
"}
\n
"
;
out
<<
"}
\n
"
;
}
}
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
73183c61
...
@@ -94,6 +94,7 @@ void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
...
@@ -94,6 +94,7 @@ void CudaCalcForcesAndEnergyKernel::initialize(const System& system) {
}
}
void
CudaCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
void
CudaCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
,
int
groups
)
{
cu
.
setForcesValid
(
true
);
cu
.
setAsCurrent
();
cu
.
setAsCurrent
();
cu
.
clearAutoclearBuffers
();
cu
.
clearAutoclearBuffers
();
for
(
vector
<
CudaContext
::
ForcePreComputation
*>::
iterator
iter
=
cu
.
getPreComputations
().
begin
();
iter
!=
cu
.
getPreComputations
().
end
();
++
iter
)
for
(
vector
<
CudaContext
::
ForcePreComputation
*>::
iterator
iter
=
cu
.
getPreComputations
().
begin
();
iter
!=
cu
.
getPreComputations
().
end
();
++
iter
)
...
@@ -125,6 +126,8 @@ double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bo
...
@@ -125,6 +126,8 @@ double CudaCalcForcesAndEnergyKernel::finishComputation(ContextImpl& context, bo
sum
+=
energy
[
i
];
sum
+=
energy
[
i
];
}
}
}
}
if
(
!
cu
.
getForcesValid
())
valid
=
false
;
return
sum
;
return
sum
;
}
}
...
@@ -340,10 +343,27 @@ void CudaUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3
...
@@ -340,10 +343,27 @@ void CudaUpdateStateDataKernel::getPeriodicBoxVectors(ContextImpl& context, Vec3
cu
.
getPeriodicBoxVectors
(
a
,
b
,
c
);
cu
.
getPeriodicBoxVectors
(
a
,
b
,
c
);
}
}
void CudaUpdateStateDataKernel::setPeriodicBoxVectors(ContextImpl& context, const Vec3& a, const Vec3& b, const Vec3& c)
const
{
void
CudaUpdateStateDataKernel
::
setPeriodicBoxVectors
(
ContextImpl
&
context
,
const
Vec3
&
a
,
const
Vec3
&
b
,
const
Vec3
&
c
)
{
vector
<
CudaContext
*>&
contexts
=
cu
.
getPlatformData
().
contexts
;
vector
<
CudaContext
*>&
contexts
=
cu
.
getPlatformData
().
contexts
;
// If any particles have been wrapped to the first periodic box, we need to unwrap them
// to avoid changing their positions.
vector
<
Vec3
>
positions
;
for
(
int
i
=
0
;
i
<
(
int
)
cu
.
getPosCellOffsets
().
size
();
i
++
)
{
int4
&
offset
=
cu
.
getPosCellOffsets
()[
i
];
if
(
offset
.
x
!=
0
||
offset
.
y
!=
0
||
offset
.
z
!=
0
)
{
getPositions
(
context
,
positions
);
break
;
}
}
// Update the vectors.
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
contexts
.
size
();
i
++
)
contexts
[
i
]
->
setPeriodicBoxVectors
(
a
,
b
,
c
);
contexts
[
i
]
->
setPeriodicBoxVectors
(
a
,
b
,
c
);
if
(
positions
.
size
()
>
0
)
setPositions
(
context
,
positions
);
}
}
void
CudaUpdateStateDataKernel
::
createCheckpoint
(
ContextImpl
&
context
,
ostream
&
stream
)
{
void
CudaUpdateStateDataKernel
::
createCheckpoint
(
ContextImpl
&
context
,
ostream
&
stream
)
{
...
@@ -502,6 +522,7 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har
...
@@ -502,6 +522,7 @@ void CudaCalcHarmonicBondForceKernel::initialize(const System& system, const Har
}
}
params
->
upload
(
paramVector
);
params
->
upload
(
paramVector
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"APPLY_PERIODIC"
]
=
(
force
.
usesPeriodicBoundaryConditions
()
?
"1"
:
"0"
);
replacements
[
"COMPUTE_FORCE"
]
=
CudaKernelSources
::
harmonicBondForce
;
replacements
[
"COMPUTE_FORCE"
]
=
CudaKernelSources
::
harmonicBondForce
;
replacements
[
"PARAMS"
]
=
cu
.
getBondedUtilities
().
addArgument
(
params
->
getDevicePointer
(),
"float2"
);
replacements
[
"PARAMS"
]
=
cu
.
getBondedUtilities
().
addArgument
(
params
->
getDevicePointer
(),
"float2"
);
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
bondForce
,
replacements
),
force
.
getForceGroup
());
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
bondForce
,
replacements
),
force
.
getForceGroup
());
...
@@ -638,6 +659,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
...
@@ -638,6 +659,7 @@ void CudaCalcCustomBondForceKernel::initialize(const System& system, const Custo
vector
<
pair
<
string
,
string
>
>
functionNames
;
vector
<
pair
<
string
,
string
>
>
functionNames
;
compute
<<
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
functionNames
,
"temp"
);
compute
<<
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
functionNames
,
"temp"
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"APPLY_PERIODIC"
]
=
(
force
.
usesPeriodicBoundaryConditions
()
?
"1"
:
"0"
);
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
bondForce
,
replacements
),
force
.
getForceGroup
());
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
bondForce
,
replacements
),
force
.
getForceGroup
());
}
}
...
@@ -737,6 +759,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
...
@@ -737,6 +759,7 @@ void CudaCalcHarmonicAngleForceKernel::initialize(const System& system, const Ha
}
}
params
->
upload
(
paramVector
);
params
->
upload
(
paramVector
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"APPLY_PERIODIC"
]
=
(
force
.
usesPeriodicBoundaryConditions
()
?
"1"
:
"0"
);
replacements
[
"COMPUTE_FORCE"
]
=
CudaKernelSources
::
harmonicAngleForce
;
replacements
[
"COMPUTE_FORCE"
]
=
CudaKernelSources
::
harmonicAngleForce
;
replacements
[
"PARAMS"
]
=
cu
.
getBondedUtilities
().
addArgument
(
params
->
getDevicePointer
(),
"float2"
);
replacements
[
"PARAMS"
]
=
cu
.
getBondedUtilities
().
addArgument
(
params
->
getDevicePointer
(),
"float2"
);
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
angleForce
,
replacements
),
force
.
getForceGroup
());
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
angleForce
,
replacements
),
force
.
getForceGroup
());
...
@@ -874,6 +897,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
...
@@ -874,6 +897,7 @@ void CudaCalcCustomAngleForceKernel::initialize(const System& system, const Cust
vector
<
pair
<
string
,
string
>
>
functionNames
;
vector
<
pair
<
string
,
string
>
>
functionNames
;
compute
<<
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
functionNames
,
"temp"
);
compute
<<
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
functionNames
,
"temp"
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"APPLY_PERIODIC"
]
=
(
force
.
usesPeriodicBoundaryConditions
()
?
"1"
:
"0"
);
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
angleForce
,
replacements
),
force
.
getForceGroup
());
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
angleForce
,
replacements
),
force
.
getForceGroup
());
}
}
...
@@ -974,6 +998,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
...
@@ -974,6 +998,7 @@ void CudaCalcPeriodicTorsionForceKernel::initialize(const System& system, const
}
}
params
->
upload
(
paramVector
);
params
->
upload
(
paramVector
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"APPLY_PERIODIC"
]
=
(
force
.
usesPeriodicBoundaryConditions
()
?
"1"
:
"0"
);
replacements
[
"COMPUTE_FORCE"
]
=
CudaKernelSources
::
periodicTorsionForce
;
replacements
[
"COMPUTE_FORCE"
]
=
CudaKernelSources
::
periodicTorsionForce
;
replacements
[
"PARAMS"
]
=
cu
.
getBondedUtilities
().
addArgument
(
params
->
getDevicePointer
(),
"float4"
);
replacements
[
"PARAMS"
]
=
cu
.
getBondedUtilities
().
addArgument
(
params
->
getDevicePointer
(),
"float4"
);
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
torsionForce
,
replacements
),
force
.
getForceGroup
());
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
torsionForce
,
replacements
),
force
.
getForceGroup
());
...
@@ -1069,6 +1094,7 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
...
@@ -1069,6 +1094,7 @@ void CudaCalcRBTorsionForceKernel::initialize(const System& system, const RBTors
params1
->
upload
(
paramVector1
);
params1
->
upload
(
paramVector1
);
params2
->
upload
(
paramVector2
);
params2
->
upload
(
paramVector2
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"APPLY_PERIODIC"
]
=
(
force
.
usesPeriodicBoundaryConditions
()
?
"1"
:
"0"
);
replacements
[
"COMPUTE_FORCE"
]
=
CudaKernelSources
::
rbTorsionForce
;
replacements
[
"COMPUTE_FORCE"
]
=
CudaKernelSources
::
rbTorsionForce
;
replacements
[
"PARAMS1"
]
=
cu
.
getBondedUtilities
().
addArgument
(
params1
->
getDevicePointer
(),
"float4"
);
replacements
[
"PARAMS1"
]
=
cu
.
getBondedUtilities
().
addArgument
(
params1
->
getDevicePointer
(),
"float4"
);
replacements
[
"PARAMS2"
]
=
cu
.
getBondedUtilities
().
addArgument
(
params2
->
getDevicePointer
(),
"float2"
);
replacements
[
"PARAMS2"
]
=
cu
.
getBondedUtilities
().
addArgument
(
params2
->
getDevicePointer
(),
"float2"
);
...
@@ -1186,6 +1212,7 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
...
@@ -1186,6 +1212,7 @@ void CudaCalcCMAPTorsionForceKernel::initialize(const System& system, const CMAP
mapPositions
->
upload
(
mapPositionsVec
);
mapPositions
->
upload
(
mapPositionsVec
);
torsionMaps
->
upload
(
torsionMapsVec
);
torsionMaps
->
upload
(
torsionMapsVec
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"APPLY_PERIODIC"
]
=
(
force
.
usesPeriodicBoundaryConditions
()
?
"1"
:
"0"
);
replacements
[
"COEFF"
]
=
cu
.
getBondedUtilities
().
addArgument
(
coefficients
->
getDevicePointer
(),
"float4"
);
replacements
[
"COEFF"
]
=
cu
.
getBondedUtilities
().
addArgument
(
coefficients
->
getDevicePointer
(),
"float4"
);
replacements
[
"MAP_POS"
]
=
cu
.
getBondedUtilities
().
addArgument
(
mapPositions
->
getDevicePointer
(),
"int2"
);
replacements
[
"MAP_POS"
]
=
cu
.
getBondedUtilities
().
addArgument
(
mapPositions
->
getDevicePointer
(),
"int2"
);
replacements
[
"MAPS"
]
=
cu
.
getBondedUtilities
().
addArgument
(
torsionMaps
->
getDevicePointer
(),
"int"
);
replacements
[
"MAPS"
]
=
cu
.
getBondedUtilities
().
addArgument
(
torsionMaps
->
getDevicePointer
(),
"int"
);
...
@@ -1341,6 +1368,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
...
@@ -1341,6 +1368,7 @@ void CudaCalcCustomTorsionForceKernel::initialize(const System& system, const Cu
vector
<
pair
<
string
,
string
>
>
functionNames
;
vector
<
pair
<
string
,
string
>
>
functionNames
;
compute
<<
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
functionNames
,
"temp"
);
compute
<<
cu
.
getExpressionUtilities
().
createExpressions
(
expressions
,
variables
,
functions
,
functionNames
,
"temp"
);
map
<
string
,
string
>
replacements
;
map
<
string
,
string
>
replacements
;
replacements
[
"APPLY_PERIODIC"
]
=
(
force
.
usesPeriodicBoundaryConditions
()
?
"1"
:
"0"
);
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
replacements
[
"COMPUTE_FORCE"
]
=
compute
.
str
();
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
torsionForce
,
replacements
),
force
.
getForceGroup
());
cu
.
getBondedUtilities
().
addInteraction
(
atoms
,
cu
.
replaceStrings
(
CudaKernelSources
::
torsionForce
,
replacements
),
force
.
getForceGroup
());
}
}
...
@@ -1693,6 +1721,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
...
@@ -1693,6 +1721,8 @@ void CudaCalcNonbondedForceKernel::initialize(const System& system, const Nonbon
pmeDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
pmeDefines
[
"USE_DOUBLE_PRECISION"
]
=
"1"
;
if
(
usePmeStream
)
if
(
usePmeStream
)
pmeDefines
[
"USE_PME_STREAM"
]
=
"1"
;
pmeDefines
[
"USE_PME_STREAM"
]
=
"1"
;
if
(
cu
.
getPlatformData
().
deterministicForces
)
pmeDefines
[
"USE_DETERMINISTIC_FORCES"
]
=
"1"
;
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
,
pmeDefines
);
CUmodule
module
=
cu
.
createModule
(
CudaKernelSources
::
vectorOps
+
CudaKernelSources
::
pme
,
pmeDefines
);
if
(
cu
.
getPlatformData
().
useCpuPme
)
{
if
(
cu
.
getPlatformData
().
useCpuPme
)
{
// Create the CPU PME kernel.
// Create the CPU PME kernel.
...
@@ -1911,16 +1941,18 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1911,16 +1941,18 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
// Execute the reciprocal space kernels.
// Execute the reciprocal space kernels.
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
void
*
gridIndexArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
pmeAtomGridIndex
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
]};
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
());
cu
.
executeKernel
(
pmeGridIndexKernel
,
gridIndexArgs
,
cu
.
getNumAtoms
());
sort
->
sort
(
*
pmeAtomGridIndex
);
sort
->
sort
(
*
pmeAtomGridIndex
);
void
*
spreadArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
directPmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
void
*
spreadArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
directPmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomGridIndex
->
getDevicePointer
()};
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeSpreadChargeKernel
,
spreadArgs
,
cu
.
getNumAtoms
(),
128
);
cu
.
executeKernel
(
pmeSpreadChargeKernel
,
spreadArgs
,
cu
.
getNumAtoms
(),
128
);
if (cu.getUseDoublePrecision() || cu.getComputeCapability() < 2.0) {
if
(
cu
.
getUseDoublePrecision
()
||
cu
.
getComputeCapability
()
<
2.0
||
cu
.
getPlatformData
().
deterministicForces
)
{
void
*
finishSpreadArgs
[]
=
{
&
directPmeGrid
->
getDevicePointer
()};
void
*
finishSpreadArgs
[]
=
{
&
directPmeGrid
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
directPmeGrid
->
getSize
());
cu
.
executeKernel
(
pmeFinishSpreadChargeKernel
,
finishSpreadArgs
,
directPmeGrid
->
getSize
());
}
}
...
@@ -1957,8 +1989,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
...
@@ -1957,8 +1989,9 @@ double CudaCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeF
fft
->
execFFT
(
*
reciprocalPmeGrid
,
*
directPmeGrid
,
false
);
fft
->
execFFT
(
*
reciprocalPmeGrid
,
*
directPmeGrid
,
false
);
}
}
void* interpolateArgs[] = {&cu.getPosq().getDevicePointer(), &cu.getForce().getDevicePointer(), &directPmeGrid->getDevicePointer(),
void
*
interpolateArgs
[]
=
{
&
cu
.
getPosq
().
getDevicePointer
(),
&
cu
.
getForce
().
getDevicePointer
(),
&
directPmeGrid
->
getDevicePointer
(),
cu
.
getPeriodicBoxSizePointer
(),
cu.getPeriodicBoxSizePointer(), recipBoxVectorPointer[0], recipBoxVectorPointer[1], recipBoxVectorPointer[2], &pmeAtomGridIndex->getDevicePointer()};
cu
.
getInvPeriodicBoxSizePointer
(),
cu
.
getPeriodicBoxVecXPointer
(),
cu
.
getPeriodicBoxVecYPointer
(),
cu
.
getPeriodicBoxVecZPointer
(),
recipBoxVectorPointer
[
0
],
recipBoxVectorPointer
[
1
],
recipBoxVectorPointer
[
2
],
&
pmeAtomGridIndex
->
getDevicePointer
()};
cu
.
executeKernel
(
pmeInterpolateForceKernel
,
interpolateArgs
,
cu
.
getNumAtoms
(),
128
);
cu
.
executeKernel
(
pmeInterpolateForceKernel
,
interpolateArgs
,
cu
.
getNumAtoms
(),
128
);
if
(
usePmeStream
)
{
if
(
usePmeStream
)
{
cuEventRecord
(
pmeSyncEvent
,
pmeStream
);
cuEventRecord
(
pmeSyncEvent
,
pmeStream
);
...
@@ -2157,6 +2190,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
...
@@ -2157,6 +2190,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
map
<
string
,
Lepton
::
CustomFunction
*>
functions
;
map
<
string
,
Lepton
::
CustomFunction
*>
functions
;
vector
<
pair
<
string
,
string
>
>
functionDefinitions
;
vector
<
pair
<
string
,
string
>
>
functionDefinitions
;
vector
<
const
TabulatedFunction
*>
functionList
;
vector
<
const
TabulatedFunction
*>
functionList
;
vector
<
string
>
tableTypes
;
for
(
int
i
=
0
;
i
<
force
.
getNumTabulatedFunctions
();
i
++
)
{
for
(
int
i
=
0
;
i
<
force
.
getNumTabulatedFunctions
();
i
++
)
{
functionList
.
push_back
(
&
force
.
getTabulatedFunction
(
i
));
functionList
.
push_back
(
&
force
.
getTabulatedFunction
(
i
));
string
name
=
force
.
getTabulatedFunctionName
(
i
);
string
name
=
force
.
getTabulatedFunctionName
(
i
);
...
@@ -2168,6 +2202,10 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
...
@@ -2168,6 +2202,10 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
tabulatedFunctions
.
push_back
(
CudaArray
::
create
<
float
>
(
cu
,
f
.
size
(),
"TabulatedFunction"
));
tabulatedFunctions
.
push_back
(
CudaArray
::
create
<
float
>
(
cu
,
f
.
size
(),
"TabulatedFunction"
));
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
upload
(
f
);
cu
.
getNonbondedUtilities
().
addArgument
(
CudaNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
width
,
width
*
sizeof
(
float
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDevicePointer
()));
cu
.
getNonbondedUtilities
().
addArgument
(
CudaNonbondedUtilities
::
ParameterInfo
(
arrayName
,
"float"
,
width
,
width
*
sizeof
(
float
),
tabulatedFunctions
[
tabulatedFunctions
.
size
()
-
1
]
->
getDevicePointer
()));
if
(
width
==
1
)
tableTypes
.
push_back
(
"float"
);
else
tableTypes
.
push_back
(
"float"
+
cu
.
intToString
(
width
));
}
}
// Record information for the expressions.
// Record information for the expressions.
...
@@ -2220,7 +2258,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
...
@@ -2220,7 +2258,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
}
}
string
source
=
cu
.
replaceStrings
(
CudaKernelSources
::
customNonbonded
,
replacements
);
string
source
=
cu
.
replaceStrings
(
CudaKernelSources
::
customNonbonded
,
replacements
);
if
(
force
.
getNumInteractionGroups
()
>
0
)
if
(
force
.
getNumInteractionGroups
()
>
0
)
initInteractionGroups(force, source);
initInteractionGroups
(
force
,
source
,
tableTypes
);
else
{
else
{
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
cu
.
getNonbondedUtilities
().
addInteraction
(
useCutoff
,
usePeriodic
,
true
,
force
.
getCutoffDistance
(),
exclusionList
,
source
,
force
.
getForceGroup
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
{
...
@@ -2246,7 +2284,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
...
@@ -2246,7 +2284,7 @@ void CudaCalcCustomNonbondedForceKernel::initialize(const System& system, const
}
}
}
}
void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbondedForce& force, const string& interactionSource) {
void
CudaCalcCustomNonbondedForceKernel
::
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
string
&
interactionSource
,
const
vector
<
string
>&
tableTypes
)
{
// Process groups to form tiles.
// Process groups to form tiles.
vector
<
vector
<
int
>
>
atomLists
;
vector
<
vector
<
int
>
>
atomLists
;
...
@@ -2429,6 +2467,8 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
...
@@ -2429,6 +2467,8 @@ void CudaCalcCustomNonbondedForceKernel::initInteractionGroups(const CustomNonbo
stringstream
args
;
stringstream
args
;
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
buffers
.
size
();
i
++
)
args
<<
", const "
<<
buffers
[
i
].
getType
()
<<
"* __restrict__ global_params"
<<
(
i
+
1
);
args
<<
", const "
<<
buffers
[
i
].
getType
()
<<
"* __restrict__ global_params"
<<
(
i
+
1
);
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
args
<<
", const "
<<
tableTypes
[
i
]
<<
"* __restrict__ table"
<<
i
;
if
(
globals
!=
NULL
)
if
(
globals
!=
NULL
)
args
<<
", const float* __restrict__ globals"
;
args
<<
", const float* __restrict__ globals"
;
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
replacements
[
"PARAMETER_ARGUMENTS"
]
=
args
.
str
();
...
@@ -2519,6 +2559,8 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
...
@@ -2519,6 +2559,8 @@ double CudaCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool in
interactionGroupArgs
.
push_back
(
cu
.
getPeriodicBoxVecZPointer
());
interactionGroupArgs
.
push_back
(
cu
.
getPeriodicBoxVecZPointer
());
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
params
->
getBuffers
().
size
();
i
++
)
interactionGroupArgs
.
push_back
(
&
params
->
getBuffers
()[
i
].
getMemory
());
interactionGroupArgs
.
push_back
(
&
params
->
getBuffers
()[
i
].
getMemory
());
for
(
int
i
=
0
;
i
<
(
int
)
tabulatedFunctions
.
size
();
i
++
)
interactionGroupArgs
.
push_back
(
&
tabulatedFunctions
[
i
]
->
getDevicePointer
());
if
(
globals
!=
NULL
)
if
(
globals
!=
NULL
)
interactionGroupArgs
.
push_back
(
&
globals
->
getDevicePointer
());
interactionGroupArgs
.
push_back
(
&
globals
->
getDevicePointer
());
}
}
...
@@ -4446,6 +4488,11 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
...
@@ -4446,6 +4488,11 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
groupForcesArgs
.
push_back
(
NULL
);
// Energy buffer hasn't been created yet
groupForcesArgs
.
push_back
(
NULL
);
// Energy buffer hasn't been created yet
groupForcesArgs
.
push_back
(
&
centerPositions
->
getDevicePointer
());
groupForcesArgs
.
push_back
(
&
centerPositions
->
getDevicePointer
());
groupForcesArgs
.
push_back
(
&
bondGroups
->
getDevicePointer
());
groupForcesArgs
.
push_back
(
&
bondGroups
->
getDevicePointer
());
groupForcesArgs
.
push_back
(
cu
.
getPeriodicBoxSizePointer
());
groupForcesArgs
.
push_back
(
cu
.
getInvPeriodicBoxSizePointer
());
groupForcesArgs
.
push_back
(
cu
.
getPeriodicBoxVecXPointer
());
groupForcesArgs
.
push_back
(
cu
.
getPeriodicBoxVecYPointer
());
groupForcesArgs
.
push_back
(
cu
.
getPeriodicBoxVecZPointer
());
// Record the tabulated functions.
// Record the tabulated functions.
...
@@ -4526,7 +4573,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
...
@@ -4526,7 +4573,7 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
const
vector
<
int
>&
groups
=
iter
->
second
;
const
vector
<
int
>&
groups
=
iter
->
second
;
string
deltaName
=
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]];
string
deltaName
=
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]];
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
compute<<"real4 delta"<<deltaName<<" = delta("<<posNames[groups[0]]<<", "<<posNames[groups[1]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName
<<
" = delta("
<<
posNames
[
groups
[
0
]]
<<
", "
<<
posNames
[
groups
[
1
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName
);
computedDeltas
.
insert
(
deltaName
);
}
}
compute
<<
"real r_"
<<
deltaName
<<
" = sqrt(delta"
<<
deltaName
<<
".w);
\n
"
;
compute
<<
"real r_"
<<
deltaName
<<
" = sqrt(delta"
<<
deltaName
<<
".w);
\n
"
;
...
@@ -4540,11 +4587,11 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
...
@@ -4540,11 +4587,11 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
string
deltaName2
=
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
2
]];
string
deltaName2
=
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
2
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[groups[1]]<<", "<<posNames[groups[0]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName1
<<
" = delta("
<<
posNames
[
groups
[
1
]]
<<
", "
<<
posNames
[
groups
[
0
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName1
);
computedDeltas
.
insert
(
deltaName1
);
}
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[groups[1]]<<", "<<posNames[groups[2]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName2
<<
" = delta("
<<
posNames
[
groups
[
1
]]
<<
", "
<<
posNames
[
groups
[
2
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName2
);
computedDeltas
.
insert
(
deltaName2
);
}
}
compute
<<
"real "
<<
angleName
<<
" = computeAngle(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"real "
<<
angleName
<<
" = computeAngle(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
...
@@ -4561,15 +4608,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
...
@@ -4561,15 +4608,15 @@ void CudaCalcCustomCentroidBondForceKernel::initialize(const System& system, con
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
dihedralName
=
"dihedral_"
+
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
2
]]
+
atomNames
[
groups
[
3
]];
string
dihedralName
=
"dihedral_"
+
atomNames
[
groups
[
0
]]
+
atomNames
[
groups
[
1
]]
+
atomNames
[
groups
[
2
]]
+
atomNames
[
groups
[
3
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
compute<<"real4 delta"<<deltaName1<<" = delta("<<posNames[groups[0]]<<", "<<posNames[groups[1]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName1
<<
" = delta("
<<
posNames
[
groups
[
0
]]
<<
", "
<<
posNames
[
groups
[
1
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName1
);
computedDeltas
.
insert
(
deltaName1
);
}
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
compute<<"real4 delta"<<deltaName2<<" = delta("<<posNames[groups[2]]<<", "<<posNames[groups[1]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName2
<<
" = delta("
<<
posNames
[
groups
[
2
]]
<<
", "
<<
posNames
[
groups
[
1
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName2
);
computedDeltas
.
insert
(
deltaName2
);
}
}
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
compute<<"real4 delta"<<deltaName3<<" = delta("<<posNames[groups[2]]<<", "<<posNames[groups[3]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName3
<<
" = delta("
<<
posNames
[
groups
[
2
]]
<<
", "
<<
posNames
[
groups
[
3
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName3
);
computedDeltas
.
insert
(
deltaName3
);
}
}
compute
<<
"real4 "
<<
crossName1
<<
" = computeCross(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"real4 "
<<
crossName1
<<
" = computeCross(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
...
@@ -4860,7 +4907,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
...
@@ -4860,7 +4907,7 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
const
vector
<
int
>&
atoms
=
iter
->
second
;
const
vector
<
int
>&
atoms
=
iter
->
second
;
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
string
deltaName
=
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]];
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName
)
==
0
)
{
compute<<"real4 delta"<<deltaName<<" = ccb_delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName
<<
" = ccb_delta("
<<
posNames
[
atoms
[
0
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName
);
computedDeltas
.
insert
(
deltaName
);
}
}
compute
<<
"real r_"
<<
deltaName
<<
" = sqrt(delta"
<<
deltaName
<<
".w);
\n
"
;
compute
<<
"real r_"
<<
deltaName
<<
" = sqrt(delta"
<<
deltaName
<<
".w);
\n
"
;
...
@@ -4874,11 +4921,11 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
...
@@ -4874,11 +4921,11 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
deltaName2
=
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
string
angleName
=
"angle_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
compute<<"real4 delta"<<deltaName1<<" = ccb_delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[0]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName1
<<
" = ccb_delta("
<<
posNames
[
atoms
[
1
]]
<<
", "
<<
posNames
[
atoms
[
0
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName1
);
computedDeltas
.
insert
(
deltaName1
);
}
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
compute<<"real4 delta"<<deltaName2<<" = ccb_delta("<<posNames[atoms[1]]<<", "<<posNames[atoms[2]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName2
<<
" = ccb_delta("
<<
posNames
[
atoms
[
1
]]
<<
", "
<<
posNames
[
atoms
[
2
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName2
);
computedDeltas
.
insert
(
deltaName2
);
}
}
compute
<<
"real "
<<
angleName
<<
" = ccb_computeAngle(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"real "
<<
angleName
<<
" = ccb_computeAngle(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
...
@@ -4895,15 +4942,15 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
...
@@ -4895,15 +4942,15 @@ void CudaCalcCustomCompoundBondForceKernel::initialize(const System& system, con
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
crossName2
=
"cross_"
+
deltaName2
+
"_"
+
deltaName3
;
string
dihedralName
=
"dihedral_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
string
dihedralName
=
"dihedral_"
+
atomNames
[
atoms
[
0
]]
+
atomNames
[
atoms
[
1
]]
+
atomNames
[
atoms
[
2
]]
+
atomNames
[
atoms
[
3
]];
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName1
)
==
0
)
{
compute<<"real4 delta"<<deltaName1<<" = ccb_delta("<<posNames[atoms[0]]<<", "<<posNames[atoms[1]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName1
<<
" = ccb_delta("
<<
posNames
[
atoms
[
0
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName1
);
computedDeltas
.
insert
(
deltaName1
);
}
}
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName2
)
==
0
)
{
compute<<"real4 delta"<<deltaName2<<" = ccb_delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[1]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName2
<<
" = ccb_delta("
<<
posNames
[
atoms
[
2
]]
<<
", "
<<
posNames
[
atoms
[
1
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName2
);
computedDeltas
.
insert
(
deltaName2
);
}
}
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
if
(
computedDeltas
.
count
(
deltaName3
)
==
0
)
{
compute<<"real4 delta"<<deltaName3<<" = ccb_delta("<<posNames[atoms[2]]<<", "<<posNames[atoms[3]]<<");\n";
compute
<<
"real4 delta"
<<
deltaName3
<<
" = ccb_delta("
<<
posNames
[
atoms
[
2
]]
<<
", "
<<
posNames
[
atoms
[
3
]]
<<
"
, "
<<
force
.
usesPeriodicBoundaryConditions
()
<<
", periodicBoxSize, invPeriodicBoxSize, periodicBoxVecX, periodicBoxVecY, periodicBoxVecZ
);
\n
"
;
computedDeltas
.
insert
(
deltaName3
);
computedDeltas
.
insert
(
deltaName3
);
}
}
compute
<<
"real4 "
<<
crossName1
<<
" = ccb_computeCross(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
compute
<<
"real4 "
<<
crossName1
<<
" = ccb_computeCross(delta"
<<
deltaName1
<<
", delta"
<<
deltaName2
<<
");
\n
"
;
...
@@ -6236,9 +6283,9 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
...
@@ -6236,9 +6283,9 @@ void CudaIntegrateCustomStepKernel::prepareForComputation(ContextImpl& context,
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
for
(
int
step
=
0
;
step
<
numSteps
;
step
++
)
{
string
expr
;
string
expr
;
integrator
.
getComputationStep
(
step
,
stepType
[
step
],
variable
[
step
],
expr
);
integrator
.
getComputationStep
(
step
,
stepType
[
step
],
variable
[
step
],
expr
);
if (stepType[step] == CustomIntegrator::
Begin
WhileBlock)
if
(
stepType
[
step
]
==
CustomIntegrator
::
WhileBlock
Start
)
blockEnd
[
blockEnd
[
step
]]
=
step
;
// Record where to branch back to.
blockEnd
[
blockEnd
[
step
]]
=
step
;
// Record where to branch back to.
if (stepType[step] == CustomIntegrator::ComputeGlobal || stepType[step] == CustomIntegrator::
Begin
IfBlock || stepType[step] == CustomIntegrator::
Begin
WhileBlock)
if
(
stepType
[
step
]
==
CustomIntegrator
::
ComputeGlobal
||
stepType
[
step
]
==
CustomIntegrator
::
IfBlock
Start
||
stepType
[
step
]
==
CustomIntegrator
::
WhileBlock
Start
)
for
(
int
i
=
0
;
i
<
(
int
)
expression
[
step
].
size
();
i
++
)
for
(
int
i
=
0
;
i
<
(
int
)
expression
[
step
].
size
();
i
++
)
globalExpressions
[
step
].
push_back
(
expression
[
step
][
i
].
createCompiledExpression
());
globalExpressions
[
step
].
push_back
(
expression
[
step
][
i
].
createCompiledExpression
());
}
}
...
@@ -6685,15 +6732,15 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
...
@@ -6685,15 +6732,15 @@ void CudaIntegrateCustomStepKernel::execute(ContextImpl& context, CustomIntegrat
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainVelocities
)
{
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
ConstrainVelocities
)
{
cu
.
getIntegrationUtilities
().
applyVelocityConstraints
(
integrator
.
getConstraintTolerance
());
cu
.
getIntegrationUtilities
().
applyVelocityConstraints
(
integrator
.
getConstraintTolerance
());
}
}
else if (stepType[step] == CustomIntegrator::
Begin
IfBlock) {
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
IfBlock
Start
)
{
if
(
!
evaluateCondition
(
step
))
if
(
!
evaluateCondition
(
step
))
nextStep
=
blockEnd
[
step
]
+
1
;
nextStep
=
blockEnd
[
step
]
+
1
;
}
}
else if (stepType[step] == CustomIntegrator::
Begin
WhileBlock) {
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
WhileBlock
Start
)
{
if
(
!
evaluateCondition
(
step
))
if
(
!
evaluateCondition
(
step
))
nextStep
=
blockEnd
[
step
]
+
1
;
nextStep
=
blockEnd
[
step
]
+
1
;
}
}
else if (stepType[step] == CustomIntegrator::
End
Block) {
else
if
(
stepType
[
step
]
==
CustomIntegrator
::
Block
End
)
{
if
(
blockEnd
[
step
]
!=
-
1
)
if
(
blockEnd
[
step
]
!=
-
1
)
nextStep
=
blockEnd
[
step
];
// Return to the start of a while block.
nextStep
=
blockEnd
[
step
];
// Return to the start of a while block.
}
}
...
@@ -6958,7 +7005,7 @@ void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, d
...
@@ -6958,7 +7005,7 @@ void CudaApplyMonteCarloBarostatKernel::scaleCoordinates(ContextImpl& context, d
std
::
stringstream
m
;
std
::
stringstream
m
;
m
<<
"Error saving positions for MC barostat: "
<<
cu
.
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
m
<<
"Error saving positions for MC barostat: "
<<
cu
.
getErrorString
(
result
)
<<
" ("
<<
result
<<
")"
;
throw
OpenMMException
(
m
.
str
());
throw
OpenMMException
(
m
.
str
());
}
}
float
scalefX
=
(
float
)
scaleX
;
float
scalefX
=
(
float
)
scaleX
;
float
scalefY
=
(
float
)
scaleY
;
float
scalefY
=
(
float
)
scaleY
;
float
scalefZ
=
(
float
)
scaleZ
;
float
scalefZ
=
(
float
)
scaleZ
;
...
...
platforms/cuda/src/CudaNonbondedUtilities.cpp
View file @
73183c61
...
@@ -65,12 +65,14 @@ private:
...
@@ -65,12 +65,14 @@ private:
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
CudaNonbondedUtilities
::
CudaNonbondedUtilities
(
CudaContext
&
context
)
:
context
(
context
),
useCutoff
(
false
),
usePeriodic
(
false
),
anyExclusions
(
false
),
usePadding
(
true
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
exclusionIndices
(
NULL
),
exclusionRowIndices
(
NULL
),
exclusionTiles
(
NULL
),
exclusions
(
NULL
),
interactingTiles
(
NULL
),
interactingAtoms
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
interactionCount
(
NULL
),
blockCenter
(
NULL
),
blockBoundingBox
(
NULL
),
sortedBlocks
(
NULL
),
sortedBlockCenter
(
NULL
),
sortedBlockBoundingBox
(
NULL
),
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
oldPositions
(
NULL
),
rebuildNeighborList
(
NULL
),
blockSorter
(
NULL
),
pinnedCountBuffer
(
NULL
),
forceRebuildNeighborList
(
true
),
lastCutoff
(
0.0
),
groupFlags
(
0
)
{
// Decide how many thread blocks to use.
// Decide how many thread blocks to use.
string
errorMessage
=
"Error initializing nonbonded utilities"
;
string
errorMessage
=
"Error initializing nonbonded utilities"
;
int
multiprocessors
;
int
multiprocessors
;
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
context
.
getDevice
()));
CHECK_RESULT
(
cuDeviceGetAttribute
(
&
multiprocessors
,
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT
,
context
.
getDevice
()));
CHECK_RESULT
(
cuEventCreate
(
&
downloadCountEvent
,
0
));
CHECK_RESULT
(
cuMemHostAlloc
((
void
**
)
&
pinnedCountBuffer
,
sizeof
(
int
),
CU_MEMHOSTALLOC_PORTABLE
));
numForceThreadBlocks
=
4
*
multiprocessors
;
numForceThreadBlocks
=
4
*
multiprocessors
;
forceThreadBlockSize
=
(
context
.
getComputeCapability
()
<
2.0
?
128
:
256
);
forceThreadBlockSize
=
(
context
.
getComputeCapability
()
<
2.0
?
128
:
256
);
}
}
...
@@ -106,6 +108,9 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
...
@@ -106,6 +108,9 @@ CudaNonbondedUtilities::~CudaNonbondedUtilities() {
delete
rebuildNeighborList
;
delete
rebuildNeighborList
;
if
(
blockSorter
!=
NULL
)
if
(
blockSorter
!=
NULL
)
delete
blockSorter
;
delete
blockSorter
;
if
(
pinnedCountBuffer
!=
NULL
)
cuMemFreeHost
(
pinnedCountBuffer
);
cuEventDestroy
(
downloadCountEvent
);
}
}
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
void
CudaNonbondedUtilities
::
addInteraction
(
bool
usesCutoff
,
bool
usesPeriodic
,
bool
usesExclusions
,
double
cutoffDistance
,
const
vector
<
vector
<
int
>
>&
exclusionList
,
const
string
&
kernel
,
int
forceGroup
)
{
...
@@ -375,17 +380,14 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
...
@@ -375,17 +380,14 @@ void CudaNonbondedUtilities::prepareInteractions(int forceGroups) {
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
if
(
lastCutoff
!=
kernels
.
cutoffDistance
)
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
bool
rebuild
=
false
;
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
do
{
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
kernels
.
findBlockBoundsKernel
,
&
findBlockBoundsArgs
[
0
],
context
.
getNumAtoms
());
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
blockSorter
->
sort
(
*
sortedBlocks
);
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
context
.
executeKernel
(
kernels
.
sortBoxDataKernel
,
&
sortBoxDataArgs
[
0
],
context
.
getNumAtoms
());
forceRebuildNeighborList
=
false
;
context
.
executeKernel
(
kernels
.
findInteractingBlocksKernel
,
&
findInteractingBlocksArgs
[
0
],
context
.
getNumAtoms
(),
256
);
forceRebuildNeighborList
=
false
;
if
(
context
.
getComputeForceCount
()
==
1
)
rebuild
=
updateNeighborListSize
();
// This is the first time step, so check whether our initial guess was large enough.
}
while
(
rebuild
);
lastCutoff
=
kernels
.
cutoffDistance
;
lastCutoff
=
kernels
.
cutoffDistance
;
interactionCount
->
download
(
pinnedCountBuffer
,
false
);
cuEventRecord
(
downloadCountEvent
,
context
.
getCurrentStream
());
}
}
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
void
CudaNonbondedUtilities
::
computeInteractions
(
int
forceGroups
,
bool
includeForces
,
bool
includeEnergy
)
{
...
@@ -398,20 +400,22 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo
...
@@ -398,20 +400,22 @@ void CudaNonbondedUtilities::computeInteractions(int forceGroups, bool includeFo
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
kernel
=
createInteractionKernel
(
kernels
.
source
,
parameters
,
arguments
,
true
,
true
,
forceGroups
,
includeForces
,
includeEnergy
);
context
.
executeKernel
(
kernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
context
.
executeKernel
(
kernel
,
&
forceArgs
[
0
],
numForceThreadBlocks
*
forceThreadBlockSize
,
forceThreadBlockSize
);
}
}
if
(
useCutoff
&&
numTiles
>
0
)
{
cuEventSynchronize
(
downloadCountEvent
);
updateNeighborListSize
();
}
}
}
bool
CudaNonbondedUtilities
::
updateNeighborListSize
()
{
bool
CudaNonbondedUtilities
::
updateNeighborListSize
()
{
if
(
!
useCutoff
)
if
(
!
useCutoff
)
return
false
;
return
false
;
unsigned
int
*
pinnedInteractionCount
=
(
unsigned
int
*
)
context
.
getPinnedBuffer
();
if
(
pinnedCountBuffer
[
0
]
<=
(
unsigned
int
)
maxTiles
)
interactionCount
->
download
(
pinnedInteractionCount
);
if
(
pinnedInteractionCount
[
0
]
<=
(
unsigned
int
)
maxTiles
)
return
false
;
return
false
;
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// The most recent timestep had too many interactions to fit in the arrays. Make the arrays bigger to prevent
// this from happening in the future.
// this from happening in the future.
maxTiles
=
(
int
)
(
1.2
*
pinned
InteractionCount
[
0
]);
maxTiles
=
(
int
)
(
1.2
*
pinned
CountBuffer
[
0
]);
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
int
totalTiles
=
context
.
getNumAtomBlocks
()
*
(
context
.
getNumAtomBlocks
()
+
1
)
/
2
;
if
(
maxTiles
>
totalTiles
)
if
(
maxTiles
>
totalTiles
)
maxTiles
=
totalTiles
;
maxTiles
=
totalTiles
;
...
@@ -428,6 +432,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
...
@@ -428,6 +432,7 @@ bool CudaNonbondedUtilities::updateNeighborListSize() {
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
forceArgs
[
17
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
findInteractingBlocksArgs
[
7
]
=
&
interactingAtoms
->
getDevicePointer
();
forceRebuildNeighborList
=
true
;
forceRebuildNeighborList
=
true
;
context
.
setForcesValid
(
false
);
return
true
;
return
true
;
}
}
...
...
platforms/cuda/src/CudaPlatform.cpp
View file @
73183c61
...
@@ -62,6 +62,14 @@ extern "C" OPENMM_EXPORT_CUDA void registerPlatforms() {
...
@@ -62,6 +62,14 @@ extern "C" OPENMM_EXPORT_CUDA void registerPlatforms() {
#endif
#endif
CudaPlatform
::
CudaPlatform
()
{
CudaPlatform
::
CudaPlatform
()
{
deprecatedPropertyReplacements
[
"CudaDeviceIndex"
]
=
CudaDeviceIndex
();
deprecatedPropertyReplacements
[
"CudaDeviceName"
]
=
CudaDeviceName
();
deprecatedPropertyReplacements
[
"CudaUseBlockingSync"
]
=
CudaUseBlockingSync
();
deprecatedPropertyReplacements
[
"CudaPrecision"
]
=
CudaPrecision
();
deprecatedPropertyReplacements
[
"CudaUseCpuPme"
]
=
CudaUseCpuPme
();
deprecatedPropertyReplacements
[
"CudaTempDirectory"
]
=
CudaTempDirectory
();
deprecatedPropertyReplacements
[
"CudaDisablePmeStream"
]
=
CudaDisablePmeStream
();
deprecatedPropertyReplacements
[
"CudaDeterministicForces"
]
=
CudaDeterministicForces
();
CudaKernelFactory
*
factory
=
new
CudaKernelFactory
();
CudaKernelFactory
*
factory
=
new
CudaKernelFactory
();
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
UpdateStateDataKernel
::
Name
(),
factory
);
registerKernelFactory
(
UpdateStateDataKernel
::
Name
(),
factory
);
...
@@ -102,12 +110,14 @@ CudaPlatform::CudaPlatform() {
...
@@ -102,12 +110,14 @@ CudaPlatform::CudaPlatform() {
platformProperties
.
push_back
(
CudaTempDirectory
());
platformProperties
.
push_back
(
CudaTempDirectory
());
platformProperties
.
push_back
(
CudaHostCompiler
());
platformProperties
.
push_back
(
CudaHostCompiler
());
platformProperties
.
push_back
(
CudaDisablePmeStream
());
platformProperties
.
push_back
(
CudaDisablePmeStream
());
platformProperties
.
push_back
(
CudaDeterministicForces
());
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaDeviceName
(),
""
);
setPropertyDefaultValue
(
CudaDeviceName
(),
""
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
setPropertyDefaultValue
(
CudaPrecision
(),
"single"
);
setPropertyDefaultValue
(
CudaPrecision
(),
"single"
);
setPropertyDefaultValue
(
CudaUseCpuPme
(),
"false"
);
setPropertyDefaultValue
(
CudaUseCpuPme
(),
"false"
);
setPropertyDefaultValue
(
CudaDisablePmeStream
(),
"false"
);
setPropertyDefaultValue
(
CudaDisablePmeStream
(),
"false"
);
setPropertyDefaultValue
(
CudaDeterministicForces
(),
"false"
);
#ifdef _MSC_VER
#ifdef _MSC_VER
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
...
@@ -142,7 +152,10 @@ bool CudaPlatform::supportsDoublePrecision() const {
...
@@ -142,7 +152,10 @@ bool CudaPlatform::supportsDoublePrecision() const {
const
string
&
CudaPlatform
::
getPropertyValue
(
const
Context
&
context
,
const
string
&
property
)
const
{
const
string
&
CudaPlatform
::
getPropertyValue
(
const
Context
&
context
,
const
string
&
property
)
const
{
const
ContextImpl
&
impl
=
getContextImpl
(
context
);
const
ContextImpl
&
impl
=
getContextImpl
(
context
);
const
PlatformData
*
data
=
reinterpret_cast
<
const
PlatformData
*>
(
impl
.
getPlatformData
());
const
PlatformData
*
data
=
reinterpret_cast
<
const
PlatformData
*>
(
impl
.
getPlatformData
());
map
<
string
,
string
>::
const_iterator
value
=
data
->
propertyValues
.
find
(
property
);
string
propertyName
=
property
;
if
(
deprecatedPropertyReplacements
.
find
(
property
)
!=
deprecatedPropertyReplacements
.
end
())
propertyName
=
deprecatedPropertyReplacements
.
find
(
property
)
->
second
;
map
<
string
,
string
>::
const_iterator
value
=
data
->
propertyValues
.
find
(
propertyName
);
if
(
value
!=
data
->
propertyValues
.
end
())
if
(
value
!=
data
->
propertyValues
.
end
())
return
value
->
second
;
return
value
->
second
;
return
Platform
::
getPropertyValue
(
context
,
property
);
return
Platform
::
getPropertyValue
(
context
,
property
);
...
@@ -168,10 +181,13 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
...
@@ -168,10 +181,13 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue
(
CudaHostCompiler
())
:
properties
.
find
(
CudaHostCompiler
())
->
second
);
getPropertyDefaultValue
(
CudaHostCompiler
())
:
properties
.
find
(
CudaHostCompiler
())
->
second
);
string
pmeStreamPropValue
=
(
properties
.
find
(
CudaDisablePmeStream
())
==
properties
.
end
()
?
string
pmeStreamPropValue
=
(
properties
.
find
(
CudaDisablePmeStream
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaDisablePmeStream
())
:
properties
.
find
(
CudaDisablePmeStream
())
->
second
);
getPropertyDefaultValue
(
CudaDisablePmeStream
())
:
properties
.
find
(
CudaDisablePmeStream
())
->
second
);
string
deterministicForcesValue
=
(
properties
.
find
(
CudaDeterministicForces
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaDeterministicForces
())
:
properties
.
find
(
CudaDeterministicForces
())
->
second
);
transform
(
blockingPropValue
.
begin
(),
blockingPropValue
.
end
(),
blockingPropValue
.
begin
(),
::
tolower
);
transform
(
blockingPropValue
.
begin
(),
blockingPropValue
.
end
(),
blockingPropValue
.
begin
(),
::
tolower
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
transform
(
pmeStreamPropValue
.
begin
(),
pmeStreamPropValue
.
end
(),
pmeStreamPropValue
.
begin
(),
::
tolower
);
transform
(
pmeStreamPropValue
.
begin
(),
pmeStreamPropValue
.
end
(),
pmeStreamPropValue
.
begin
(),
::
tolower
);
transform
(
deterministicForcesValue
.
begin
(),
deterministicForcesValue
.
end
(),
deterministicForcesValue
.
begin
(),
::
tolower
);
vector
<
string
>
pmeKernelName
;
vector
<
string
>
pmeKernelName
;
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
if
(
!
supportsKernels
(
pmeKernelName
))
...
@@ -180,7 +196,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
...
@@ -180,7 +196,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
if
(
threadsEnv
!=
NULL
)
if
(
threadsEnv
!=
NULL
)
stringstream
(
threadsEnv
)
>>
threads
;
stringstream
(
threadsEnv
)
>>
threads
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
threads
));
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
deterministicForcesValue
,
threads
));
}
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
@@ -189,7 +206,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
...
@@ -189,7 +206,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
}
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
int
numThreads
)
:
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
const
string
&
deterministicForcesProperty
,
int
numThreads
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
vector
<
string
>
devices
;
...
@@ -230,6 +248,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
...
@@ -230,6 +248,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
}
}
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
deterministicForces
=
(
deterministicForcesProperty
==
"true"
);
propertyValues
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
CudaPlatform
::
CudaUseBlockingSync
()]
=
blocking
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaUseBlockingSync
()]
=
blocking
?
"true"
:
"false"
;
...
@@ -239,6 +258,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
...
@@ -239,6 +258,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
propertyValues
[
CudaPlatform
::
CudaHostCompiler
()]
=
hostCompilerProperty
;
propertyValues
[
CudaPlatform
::
CudaHostCompiler
()]
=
hostCompilerProperty
;
propertyValues
[
CudaPlatform
::
CudaDisablePmeStream
()]
=
disablePmeStream
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaDisablePmeStream
()]
=
disablePmeStream
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaDeterministicForces
()]
=
deterministicForces
?
"true"
:
"false"
;
contextEnergy
.
resize
(
contexts
.
size
());
contextEnergy
.
resize
(
contexts
.
size
());
// Determine whether peer-to-peer copying is supported, and enable it if so.
// Determine whether peer-to-peer copying is supported, and enable it if so.
...
...
platforms/cuda/src/kernels/angleForce.cu
View file @
73183c61
real3
v0
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real3
v0
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real3
v1
=
make_real3
(
pos2
.
x
-
pos3
.
x
,
pos2
.
y
-
pos3
.
y
,
pos2
.
z
-
pos3
.
z
);
real3
v1
=
make_real3
(
pos2
.
x
-
pos3
.
x
,
pos2
.
y
-
pos3
.
y
,
pos2
.
z
-
pos3
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0
)
APPLY_PERIODIC_TO_DELTA
(
v1
)
#endif
real3
cp
=
cross
(
v0
,
v1
);
real3
cp
=
cross
(
v0
,
v1
);
real
rp
=
cp
.
x
*
cp
.
x
+
cp
.
y
*
cp
.
y
+
cp
.
z
*
cp
.
z
;
real
rp
=
cp
.
x
*
cp
.
x
+
cp
.
y
*
cp
.
y
+
cp
.
z
*
cp
.
z
;
rp
=
max
(
SQRT
(
rp
),
(
real
)
1.0e-06
f
);
rp
=
max
(
SQRT
(
rp
),
(
real
)
1.0e-06
f
);
...
...
platforms/cuda/src/kernels/bondForce.cu
View file @
73183c61
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r
=
SQRT
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
);
real
r
=
SQRT
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
);
COMPUTE_FORCE
COMPUTE_FORCE
dEdR
=
(
r
>
0
)
?
(
dEdR
/
r
)
:
0
;
dEdR
=
(
r
>
0
)
?
(
dEdR
/
r
)
:
0
;
...
...
platforms/cuda/src/kernels/cmapTorsionForce.cu
View file @
73183c61
...
@@ -5,6 +5,11 @@ const real PI = (real) 3.14159265358979323846;
...
@@ -5,6 +5,11 @@ const real PI = (real) 3.14159265358979323846;
real3
v0a
=
make_real3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
real3
v0a
=
make_real3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
real3
v1a
=
make_real3
(
pos3
.
x
-
pos2
.
x
,
pos3
.
y
-
pos2
.
y
,
pos3
.
z
-
pos2
.
z
);
real3
v1a
=
make_real3
(
pos3
.
x
-
pos2
.
x
,
pos3
.
y
-
pos2
.
y
,
pos3
.
z
-
pos2
.
z
);
real3
v2a
=
make_real3
(
pos3
.
x
-
pos4
.
x
,
pos3
.
y
-
pos4
.
y
,
pos3
.
z
-
pos4
.
z
);
real3
v2a
=
make_real3
(
pos3
.
x
-
pos4
.
x
,
pos3
.
y
-
pos4
.
y
,
pos3
.
z
-
pos4
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0a
)
APPLY_PERIODIC_TO_DELTA
(
v1a
)
APPLY_PERIODIC_TO_DELTA
(
v2a
)
#endif
real3
cp0a
=
cross
(
v0a
,
v1a
);
real3
cp0a
=
cross
(
v0a
,
v1a
);
real3
cp1a
=
cross
(
v1a
,
v2a
);
real3
cp1a
=
cross
(
v1a
,
v2a
);
real
cosangle
=
dot
(
normalize
(
cp0a
),
normalize
(
cp1a
));
real
cosangle
=
dot
(
normalize
(
cp0a
),
normalize
(
cp1a
));
...
@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
...
@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
real3
v0b
=
make_real3
(
pos5
.
x
-
pos6
.
x
,
pos5
.
y
-
pos6
.
y
,
pos5
.
z
-
pos6
.
z
);
real3
v0b
=
make_real3
(
pos5
.
x
-
pos6
.
x
,
pos5
.
y
-
pos6
.
y
,
pos5
.
z
-
pos6
.
z
);
real3
v1b
=
make_real3
(
pos7
.
x
-
pos6
.
x
,
pos7
.
y
-
pos6
.
y
,
pos7
.
z
-
pos6
.
z
);
real3
v1b
=
make_real3
(
pos7
.
x
-
pos6
.
x
,
pos7
.
y
-
pos6
.
y
,
pos7
.
z
-
pos6
.
z
);
real3
v2b
=
make_real3
(
pos7
.
x
-
pos8
.
x
,
pos7
.
y
-
pos8
.
y
,
pos7
.
z
-
pos8
.
z
);
real3
v2b
=
make_real3
(
pos7
.
x
-
pos8
.
x
,
pos7
.
y
-
pos8
.
y
,
pos7
.
z
-
pos8
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0b
)
APPLY_PERIODIC_TO_DELTA
(
v1b
)
APPLY_PERIODIC_TO_DELTA
(
v2b
)
#endif
real3
cp0b
=
cross
(
v0b
,
v1b
);
real3
cp0b
=
cross
(
v0b
,
v1b
);
real3
cp1b
=
cross
(
v1b
,
v2b
);
real3
cp1b
=
cross
(
v1b
,
v2b
);
cosangle
=
dot
(
normalize
(
cp0b
),
normalize
(
cp1b
));
cosangle
=
dot
(
normalize
(
cp0b
),
normalize
(
cp1b
));
...
...
platforms/cuda/src/kernels/customCentroidBond.cu
View file @
73183c61
...
@@ -66,8 +66,11 @@ inline __device__ real3 trim(real4 v) {
...
@@ -66,8 +66,11 @@ inline __device__ real3 trim(real4 v) {
/**
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
*/
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
)
{
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
,
bool
periodic
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
);
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
return
result
;
}
}
...
@@ -105,7 +108,7 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
...
@@ -105,7 +108,7 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
* Compute the forces on groups based on the bonds.
* Compute the forces on groups based on the bonds.
*/
*/
extern
"C"
__global__
void
computeGroupForces
(
unsigned
long
long
*
__restrict__
groupForce
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
centerPositions
,
extern
"C"
__global__
void
computeGroupForces
(
unsigned
long
long
*
__restrict__
groupForce
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
centerPositions
,
const
int
*
__restrict__
bondGroups
const
int
*
__restrict__
bondGroups
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
EXTRA_ARGS
)
{
EXTRA_ARGS
)
{
mixed
energy
=
0
;
mixed
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_BONDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_BONDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
...
...
platforms/cuda/src/kernels/customCompoundBond.cu
View file @
73183c61
...
@@ -8,8 +8,11 @@ inline __device__ real3 ccb_trim(real4 v) {
...
@@ -8,8 +8,11 @@ inline __device__ real3 ccb_trim(real4 v) {
/**
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
*/
inline
__device__
real4
ccb_delta
(
real4
vec1
,
real4
vec2
)
{
inline
__device__
real4
ccb_delta
(
real4
vec1
,
real4
vec2
,
bool
periodic
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
);
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
return
result
;
}
}
...
...
platforms/cuda/src/kernels/customGBEnergyN2.cu
View file @
73183c61
...
@@ -168,6 +168,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -168,6 +168,8 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -191,39 +193,35 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
...
@@ -191,39 +193,35 @@ extern "C" __global__ void computeN2Energy(unsigned long long* __restrict__ forc
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
#else
else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
#endif
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
{
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
)
);
y
+
=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/customGBValueN2.cu
View file @
73183c61
...
@@ -146,6 +146,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -146,6 +146,8 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -167,39 +169,35 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
...
@@ -167,39 +169,35 @@ extern "C" __global__ void computeN2Value(const real4* __restrict__ posq, const
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
#else
}
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
else
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
#endif
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
{
y
+=
(
x
<
y
?
-
1
:
1
);
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/gbsaObc1.cu
View file @
73183c61
...
@@ -204,6 +204,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -204,6 +204,8 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -225,39 +227,35 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
...
@@ -225,39 +227,35 @@ extern "C" __global__ void computeBornSum(unsigned long long* __restrict__ globa
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
}
#else
else
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
#endif
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
{
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
)
);
y
+
=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
@@ -559,6 +557,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -559,6 +557,8 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
unsigned
int
numTiles
=
interactionCount
[
0
];
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
pos
=
(
int
)
(
warp
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
int
end
=
(
int
)
((
warp
+
1
)
*
(
numTiles
>
maxTiles
?
NUM_BLOCKS
*
((
long
long
)
NUM_BLOCKS
+
1
)
/
2
:
(
long
)
numTiles
)
/
totalWarps
);
#else
#else
...
@@ -580,39 +580,35 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
...
@@ -580,39 +580,35 @@ extern "C" __global__ void computeGBSAForce1(unsigned long long* __restrict__ fo
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
CUTOFF
);
#else
}
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
else
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
#endif
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
{
y
+=
(
x
<
y
?
-
1
:
1
);
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
...
...
platforms/cuda/src/kernels/nonbonded.cu
View file @
73183c61
...
@@ -117,7 +117,9 @@ extern "C" __global__ void computeNonbonded(
...
@@ -117,7 +117,9 @@ extern "C" __global__ void computeNonbonded(
#ifndef ENABLE_SHUFFLE
#ifndef ENABLE_SHUFFLE
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
__shared__
AtomData
localData
[
THREAD_BLOCK_SIZE
];
#endif
#endif
// First loop: process tiles that contain exclusions.
// First loop: process tiles that contain exclusions.
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE
+
warp
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
firstExclusionTile
=
FIRST_EXCLUSION_TILE
+
warp
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE
+
(
warp
+
1
)
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
const
unsigned
int
lastExclusionTile
=
FIRST_EXCLUSION_TILE
+
(
warp
+
1
)
*
(
LAST_EXCLUSION_TILE
-
FIRST_EXCLUSION_TILE
)
/
totalWarps
;
for
(
int
pos
=
firstExclusionTile
;
pos
<
lastExclusionTile
;
pos
++
)
{
for
(
int
pos
=
firstExclusionTile
;
pos
<
lastExclusionTile
;
pos
++
)
{
...
@@ -309,8 +311,11 @@ extern "C" __global__ void computeNonbonded(
...
@@ -309,8 +311,11 @@ extern "C" __global__ void computeNonbonded(
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// Second loop: tiles without exclusions, either from the neighbor list (with cutoff) or just enumerating all
// of them (no cutoff).
// of them (no cutoff).
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
const
unsigned
int
numTiles
=
interactionCount
[
0
];
const
unsigned
int
numTiles
=
interactionCount
[
0
];
if
(
numTiles
>
maxTiles
)
return
;
// There wasn't enough memory for the neighbor list.
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
pos
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
warp
*
(
long
long
)
numTileIndices
/
totalWarps
:
warp
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
int
end
=
(
int
)
(
numTiles
>
maxTiles
?
startTileIndex
+
(
warp
+
1
)
*
(
long
long
)
numTileIndices
/
totalWarps
:
(
warp
+
1
)
*
(
long
long
)
numTiles
/
totalWarps
);
#else
#else
...
@@ -335,39 +340,35 @@ extern "C" __global__ void computeNonbonded(
...
@@ -335,39 +340,35 @@ extern "C" __global__ void computeNonbonded(
int
x
,
y
;
int
x
,
y
;
bool
singlePeriodicCopy
=
false
;
bool
singlePeriodicCopy
=
false
;
#ifdef USE_CUTOFF
#ifdef USE_CUTOFF
if
(
numTiles
<=
maxTiles
)
{
x
=
tiles
[
pos
];
x
=
tiles
[
pos
];
real4
blockSizeX
=
blockSize
[
x
];
real4
blockSizeX
=
blockSize
[
x
];
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
MAX_CUTOFF
&&
singlePeriodicCopy
=
(
0.5
f
*
periodicBoxSize
.
x
-
blockSizeX
.
x
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
y
-
blockSizeX
.
y
>=
MAX_CUTOFF
&&
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
MAX_CUTOFF
);
0.5
f
*
periodicBoxSize
.
z
-
blockSizeX
.
z
>=
MAX_CUTOFF
);
#else
}
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
else
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
#endif
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
{
y
+=
(
x
<
y
?
-
1
:
1
);
y
=
(
int
)
floor
(
NUM_BLOCKS
+
0.5
f
-
SQRT
((
NUM_BLOCKS
+
0.5
f
)
*
(
NUM_BLOCKS
+
0.5
f
)
-
2
*
pos
));
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
if
(
x
<
y
||
x
>=
NUM_BLOCKS
)
{
// Occasionally happens due to roundoff error.
}
y
+=
(
x
<
y
?
-
1
:
1
);
x
=
(
pos
-
y
*
NUM_BLOCKS
+
y
*
(
y
+
1
)
/
2
);
}
// Skip over tiles that have exclusions, since they were already processed.
// Skip over tiles that have exclusions, since they were already processed.
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
while
(
skipTiles
[
tbx
+
TILE_SIZE
-
1
]
<
pos
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
if
(
skipBase
+
tgx
<
NUM_TILES_WITH_EXCLUSIONS
)
{
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
ushort2
tile
=
exclusionTiles
[
skipBase
+
tgx
];
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
skipTiles
[
threadIdx
.
x
]
=
tile
.
x
+
tile
.
y
*
NUM_BLOCKS
-
tile
.
y
*
(
tile
.
y
+
1
)
/
2
;
}
else
skipTiles
[
threadIdx
.
x
]
=
end
;
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
else
currentSkipIndex
++
;
skipTiles
[
threadIdx
.
x
]
=
end
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
skipBase
+=
TILE_SIZE
;
currentSkipIndex
=
tbx
;
}
}
while
(
skipTiles
[
currentSkipIndex
]
<
pos
)
currentSkipIndex
++
;
includeTile
=
(
skipTiles
[
currentSkipIndex
]
!=
pos
);
#endif
if
(
includeTile
)
{
if
(
includeTile
)
{
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
unsigned
int
atom1
=
x
*
TILE_SIZE
+
tgx
;
// Load atom data for this tile.
// Load atom data for this tile.
...
...
platforms/cuda/src/kernels/pme.cu
View file @
73183c61
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
extern
"C"
__global__
void
findAtomGridIndex
(
const
real4
*
__restrict__
posq
,
int2
*
__restrict__
pmeAtomGridIndex
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
)
{
// Compute the index of the grid point each atom is associated with.
// Compute the index of the grid point each atom is associated with.
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
real4
pos
=
posq
[
i
];
real4
pos
=
posq
[
i
];
APPLY_PERIODIC_TO_POS
(
pos
)
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
pos
.
z
*
recipBoxVecZ
.
z
);
...
@@ -18,7 +20,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
...
@@ -18,7 +20,8 @@ extern "C" __global__ void findAtomGridIndex(const real4* __restrict__ posq, int
}
}
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
extern
"C"
__global__
void
gridSpreadCharge
(
const
real4
*
__restrict__
posq
,
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -28,9 +31,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
...
@@ -28,9 +31,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
NUM_ATOMS
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real4
pos
=
posq
[
atom
];
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS
(
pos
)
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
pos
.
z
*
recipBoxVecZ
.
z
);
...
@@ -83,7 +84,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
...
@@ -83,7 +84,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
#ifdef USE_DOUBLE_PRECISION
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
#elif __CUDA_ARCH__ < 200
#elif __CUDA_ARCH__ < 200
|| defined(USE_DETERMINISTIC_FORCES)
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
int
gridIndex
=
index
;
int
gridIndex
=
index
;
gridIndex
=
(
gridIndex
%
2
==
0
?
gridIndex
/
2
:
(
gridIndex
+
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
)
/
2
);
gridIndex
=
(
gridIndex
%
2
==
0
?
gridIndex
/
2
:
(
gridIndex
+
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
)
/
2
);
...
@@ -197,7 +198,8 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
...
@@ -197,7 +198,8 @@ gridEvaluateEnergy(real2* __restrict__ halfcomplex_pmeGrid, mixed* __restrict__
extern
"C"
__global__
extern
"C"
__global__
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
void
gridInterpolateForce
(
const
real4
*
__restrict__
posq
,
unsigned
long
long
*
__restrict__
forceBuffers
,
const
real
*
__restrict__
originalPmeGrid
,
real4
periodicBoxSize
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
,
real3
recipBoxVecX
,
real3
recipBoxVecY
,
real3
recipBoxVecZ
,
const
int2
*
__restrict__
pmeAtomGridIndex
)
{
real3
data
[
PME_ORDER
];
real3
data
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
real3
ddata
[
PME_ORDER
];
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
const
real
scale
=
RECIP
(
PME_ORDER
-
1
);
...
@@ -209,9 +211,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
...
@@ -209,9 +211,7 @@ void gridInterpolateForce(const real4* __restrict__ posq, unsigned long long* __
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
int
atom
=
pmeAtomGridIndex
[
i
].
x
;
real3
force
=
make_real3
(
0
);
real3
force
=
make_real3
(
0
);
real4
pos
=
posq
[
atom
];
real4
pos
=
posq
[
atom
];
pos
.
x
-=
floor
(
pos
.
x
*
recipBoxVecX
.
x
)
*
periodicBoxSize
.
x
;
APPLY_PERIODIC_TO_POS
(
pos
)
pos
.
y
-=
floor
(
pos
.
y
*
recipBoxVecY
.
y
)
*
periodicBoxSize
.
y
;
pos
.
z
-=
floor
(
pos
.
z
*
recipBoxVecZ
.
z
)
*
periodicBoxSize
.
z
;
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
real3
t
=
make_real3
(
pos
.
x
*
recipBoxVecX
.
x
+
pos
.
y
*
recipBoxVecY
.
x
+
pos
.
z
*
recipBoxVecZ
.
x
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
y
*
recipBoxVecY
.
y
+
pos
.
z
*
recipBoxVecZ
.
y
,
pos
.
z
*
recipBoxVecZ
.
z
);
pos
.
z
*
recipBoxVecZ
.
z
);
...
...
platforms/cuda/src/kernels/torsionForce.cu
View file @
73183c61
...
@@ -2,6 +2,11 @@ const real PI = (real) 3.14159265358979323846;
...
@@ -2,6 +2,11 @@ const real PI = (real) 3.14159265358979323846;
real3
v0
=
make_real3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
real3
v0
=
make_real3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
real3
v1
=
make_real3
(
pos3
.
x
-
pos2
.
x
,
pos3
.
y
-
pos2
.
y
,
pos3
.
z
-
pos2
.
z
);
real3
v1
=
make_real3
(
pos3
.
x
-
pos2
.
x
,
pos3
.
y
-
pos2
.
y
,
pos3
.
z
-
pos2
.
z
);
real3
v2
=
make_real3
(
pos3
.
x
-
pos4
.
x
,
pos3
.
y
-
pos4
.
y
,
pos3
.
z
-
pos4
.
z
);
real3
v2
=
make_real3
(
pos3
.
x
-
pos4
.
x
,
pos3
.
y
-
pos4
.
y
,
pos3
.
z
-
pos4
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0
)
APPLY_PERIODIC_TO_DELTA
(
v1
)
APPLY_PERIODIC_TO_DELTA
(
v2
)
#endif
real3
cp0
=
cross
(
v0
,
v1
);
real3
cp0
=
cross
(
v0
,
v1
);
real3
cp1
=
cross
(
v1
,
v2
);
real3
cp1
=
cross
(
v1
,
v2
);
real
cosangle
=
dot
(
normalize
(
cp0
),
normalize
(
cp1
));
real
cosangle
=
dot
(
normalize
(
cp0
),
normalize
(
cp1
));
...
...
platforms/cuda/tests/CudaTests.h
View file @
73183c61
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* *
* Portions copyright (c) 2015 Stanford University and the Authors.
*
* Portions copyright (c) 2015
-2016
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Authors: Peter Eastman *
* Contributors: *
* Contributors: *
* *
* *
...
@@ -39,5 +39,5 @@ OpenMM::CudaPlatform platform;
...
@@ -39,5 +39,5 @@ OpenMM::CudaPlatform platform;
void
initializeTests
(
int
argc
,
char
*
argv
[])
{
void
initializeTests
(
int
argc
,
char
*
argv
[])
{
if
(
argc
>
1
)
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"
Cuda
Precision"
,
std
::
string
(
argv
[
1
]));
platform
.
setPropertyDefaultValue
(
"Precision"
,
std
::
string
(
argv
[
1
]));
}
}
platforms/cuda/tests/TestCudaFFT3D.cpp
View file @
73183c61
...
@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
...
@@ -56,7 +56,7 @@ void testTransform(bool realToComplex, int xsize, int ysize, int zsize) {
system
.
addParticle
(
0.0
);
system
.
addParticle
(
0.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
OpenMM_SFMT
::
SFMT
sfmt
;
OpenMM_SFMT
::
SFMT
sfmt
;
...
...
platforms/cuda/tests/TestCudaNonbondedForce.cpp
View file @
73183c61
...
@@ -118,9 +118,44 @@ void testReordering() {
...
@@ -118,9 +118,44 @@ void testReordering() {
}
}
}
}
void
testDeterministicForces
()
{
// Check that the CudaDeterministicForces property works correctly.
const
int
numParticles
=
1000
;
System
system
;
system
.
setDefaultPeriodicBoxVectors
(
Vec3
(
6
,
0
,
0
),
Vec3
(
2.1
,
6
,
0
),
Vec3
(
-
1.5
,
-
0.5
,
6
));
NonbondedForce
*
nonbonded
=
new
NonbondedForce
();
nonbonded
->
setNonbondedMethod
(
NonbondedForce
::
PME
);
system
.
addForce
(
nonbonded
);
vector
<
Vec3
>
positions
;
OpenMM_SFMT
::
SFMT
sfmt
;
init_gen_rand
(
0
,
sfmt
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
system
.
addParticle
(
1.0
);
nonbonded
->
addParticle
(
i
%
2
==
0
?
1
:
-
1
,
1
,
0
);
positions
.
push_back
(
Vec3
(
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
,
genrand_real2
(
sfmt
)
-
0.5
)
*
6
);
}
VerletIntegrator
integrator
(
0.001
);
map
<
string
,
string
>
properties
;
properties
[
CudaPlatform
::
CudaDeterministicForces
()]
=
"true"
;
Context
context
(
system
,
integrator
,
platform
,
properties
);
context
.
setPositions
(
positions
);
State
state1
=
context
.
getState
(
State
::
Forces
);
State
state2
=
context
.
getState
(
State
::
Forces
);
// All forces should be *exactly* equal.
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
ASSERT_EQUAL
(
state1
.
getForces
()[
i
][
0
],
state2
.
getForces
()[
i
][
0
]);
ASSERT_EQUAL
(
state1
.
getForces
()[
i
][
1
],
state2
.
getForces
()[
i
][
1
]);
ASSERT_EQUAL
(
state1
.
getForces
()[
i
][
2
],
state2
.
getForces
()[
i
][
2
]);
}
}
void
runPlatformTests
()
{
void
runPlatformTests
()
{
testParallelComputation
(
NonbondedForce
::
NoCutoff
);
testParallelComputation
(
NonbondedForce
::
NoCutoff
);
testParallelComputation
(
NonbondedForce
::
Ewald
);
testParallelComputation
(
NonbondedForce
::
Ewald
);
testParallelComputation
(
NonbondedForce
::
PME
);
testParallelComputation
(
NonbondedForce
::
PME
);
testReordering
();
testReordering
();
testDeterministicForces
();
}
}
platforms/cuda/tests/TestCudaRandom.cpp
View file @
73183c61
...
@@ -56,7 +56,7 @@ void testGaussian() {
...
@@ -56,7 +56,7 @@ void testGaussian() {
system
.
addParticle
(
1.0
);
system
.
addParticle
(
1.0
);
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
CudaPlatform
::
PlatformData
platformData
(
NULL
,
system
,
""
,
"true"
,
platform
.
getPropertyDefaultValue
(
"CudaPrecision"
),
"false"
,
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaTempDirectory
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
1
);
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaHostCompiler
()),
platform
.
getPropertyDefaultValue
(
CudaPlatform
::
CudaDisablePmeStream
()),
"false"
,
1
);
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
CudaContext
&
context
=
*
platformData
.
contexts
[
0
];
context
.
initialize
();
context
.
initialize
();
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
context
.
getIntegrationUtilities
().
initRandomNumberGenerator
(
0
);
...
...
Prev
1
2
3
4
5
6
7
8
…
14
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment