Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
2e9c418a
Commit
2e9c418a
authored
May 05, 2016
by
peastman
Browse files
Merge branch 'master' into gayberne
parents
8f532e31
a4d327f5
Changes
254
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
337 additions
and
163 deletions
+337
-163
platforms/cpu/src/CpuCustomNonbondedForce.cpp
platforms/cpu/src/CpuCustomNonbondedForce.cpp
+4
-3
platforms/cpu/src/CpuKernels.cpp
platforms/cpu/src/CpuKernels.cpp
+73
-74
platforms/cpu/src/CpuNeighborList.cpp
platforms/cpu/src/CpuNeighborList.cpp
+66
-22
platforms/cpu/src/CpuPlatform.cpp
platforms/cpu/src/CpuPlatform.cpp
+32
-2
platforms/cpu/tests/TestCpuNeighborList.cpp
platforms/cpu/tests/TestCpuNeighborList.cpp
+6
-6
platforms/cuda/include/CudaKernels.h
platforms/cuda/include/CudaKernels.h
+2
-2
platforms/cuda/include/CudaPlatform.h
platforms/cuda/include/CudaPlatform.h
+16
-9
platforms/cuda/src/CudaContext.cpp
platforms/cuda/src/CudaContext.cpp
+3
-3
platforms/cuda/src/CudaExpressionUtilities.cpp
platforms/cuda/src/CudaExpressionUtilities.cpp
+3
-3
platforms/cuda/src/CudaKernels.cpp
platforms/cuda/src/CudaKernels.cpp
+70
-27
platforms/cuda/src/CudaPlatform.cpp
platforms/cuda/src/CudaPlatform.cpp
+23
-3
platforms/cuda/src/kernels/angleForce.cu
platforms/cuda/src/kernels/angleForce.cu
+4
-0
platforms/cuda/src/kernels/bondForce.cu
platforms/cuda/src/kernels/bondForce.cu
+3
-0
platforms/cuda/src/kernels/cmapTorsionForce.cu
platforms/cuda/src/kernels/cmapTorsionForce.cu
+10
-0
platforms/cuda/src/kernels/customCentroidBond.cu
platforms/cuda/src/kernels/customCentroidBond.cu
+5
-2
platforms/cuda/src/kernels/customCompoundBond.cu
platforms/cuda/src/kernels/customCompoundBond.cu
+4
-1
platforms/cuda/src/kernels/customManyParticle.cu
platforms/cuda/src/kernels/customManyParticle.cu
+5
-3
platforms/cuda/src/kernels/pme.cu
platforms/cuda/src/kernels/pme.cu
+1
-1
platforms/cuda/src/kernels/torsionForce.cu
platforms/cuda/src/kernels/torsionForce.cu
+5
-0
platforms/cuda/tests/CudaTests.h
platforms/cuda/tests/CudaTests.h
+2
-2
No files found.
platforms/cpu/src/CpuCustomNonbondedForce.cpp
View file @
2e9c418a
/* Portions copyright (c) 2009-201
4
Stanford University and Simbios.
/* Portions copyright (c) 2009-201
6
Stanford University and Simbios.
* Contributors: Peter Eastman
*
* Permission is hereby granted, free of charge, to any person obtaining
...
...
@@ -190,7 +190,8 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
int
blockIndex
=
gmx_atomic_fetch_add
(
reinterpret_cast
<
gmx_atomic_t
*>
(
atomicCounter
),
1
);
if
(
blockIndex
>=
neighborList
->
getNumBlocks
())
break
;
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
4
*
blockIndex
];
const
int
blockSize
=
neighborList
->
getBlockSize
();
const
int
*
blockAtom
=
&
neighborList
->
getSortedAtoms
()[
blockSize
*
blockIndex
];
const
vector
<
int
>&
neighbors
=
neighborList
->
getBlockNeighbors
(
blockIndex
);
const
vector
<
char
>&
exclusions
=
neighborList
->
getBlockExclusions
(
blockIndex
);
for
(
int
i
=
0
;
i
<
(
int
)
neighbors
.
size
();
i
++
)
{
...
...
@@ -199,7 +200,7 @@ void CpuCustomNonbondedForce::threadComputeForce(ThreadPool& threads, int thread
ReferenceForce
::
setVariable
(
data
.
energyParticleParams
[
j
*
2
],
atomParameters
[
first
][
j
]);
ReferenceForce
::
setVariable
(
data
.
forceParticleParams
[
j
*
2
],
atomParameters
[
first
][
j
]);
}
for
(
int
k
=
0
;
k
<
4
;
k
++
)
{
for
(
int
k
=
0
;
k
<
blockSize
;
k
++
)
{
if
((
exclusions
[
i
]
&
(
1
<<
k
))
==
0
)
{
int
second
=
blockAtom
[
k
];
for
(
int
j
=
0
;
j
<
(
int
)
paramNames
.
size
();
j
++
)
{
...
...
platforms/cpu/src/CpuKernels.cpp
View file @
2e9c418a
...
...
@@ -230,6 +230,7 @@ CpuCalcForcesAndEnergyKernel::CpuCalcForcesAndEnergyKernel(std::string name, con
void
CpuCalcForcesAndEnergyKernel
::
initialize
(
const
System
&
system
)
{
referenceKernel
.
getAs
<
ReferenceCalcForcesAndEnergyKernel
>
().
initialize
(
system
);
lastPositions
.
resize
(
system
.
getNumParticles
(),
Vec3
(
1e10
,
1e10
,
1e10
));
}
void
CpuCalcForcesAndEnergyKernel
::
beginComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
)
{
...
...
@@ -237,11 +238,60 @@ void CpuCalcForcesAndEnergyKernel::beginComputation(ContextImpl& context, bool i
// Convert positions to single precision and clear the forces.
InitForceTask
task
(
context
.
getSystem
().
getNumParticles
(),
context
,
data
);
int
numParticles
=
context
.
getSystem
().
getNumParticles
();
InitForceTask
task
(
numParticles
,
context
,
data
);
data
.
threads
.
execute
(
task
);
data
.
threads
.
waitForThreads
();
if
(
!
task
.
positionsValid
)
throw
OpenMMException
(
"Particle coordinate is nan"
);
// Determine whether we need to recompute the neighbor list.
if
(
data
.
neighborList
!=
NULL
)
{
double
padding
=
data
.
paddedCutoff
-
data
.
cutoff
;;
bool
needRecompute
=
false
;
double
closeCutoff2
=
0.25
*
padding
*
padding
;
double
farCutoff2
=
0.5
*
padding
*
padding
;
int
maxNumMoved
=
numParticles
/
10
;
vector
<
int
>
moved
;
vector
<
RealVec
>&
posData
=
extractPositions
(
context
);
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
RealVec
delta
=
posData
[
i
]
-
lastPositions
[
i
];
double
dist2
=
delta
.
dot
(
delta
);
if
(
dist2
>
closeCutoff2
)
{
moved
.
push_back
(
i
);
if
(
dist2
>
farCutoff2
||
moved
.
size
()
>
maxNumMoved
)
{
needRecompute
=
true
;
break
;
}
}
}
if
(
!
needRecompute
&&
moved
.
size
()
>
0
)
{
// Some particles have moved further than half the padding distance. Look for pairs
// that are missing from the neighbor list.
int
numMoved
=
moved
.
size
();
double
cutoff2
=
data
.
cutoff
*
data
.
cutoff
;
double
paddedCutoff2
=
data
.
paddedCutoff
*
data
.
paddedCutoff
;
for
(
int
i
=
1
;
i
<
numMoved
&&
!
needRecompute
;
i
++
)
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
RealVec
delta
=
posData
[
moved
[
i
]]
-
posData
[
moved
[
j
]];
if
(
delta
.
dot
(
delta
)
<
cutoff2
)
{
// These particles should interact. See if they are in the neighbor list.
RealVec
oldDelta
=
lastPositions
[
moved
[
i
]]
-
lastPositions
[
moved
[
j
]];
if
(
oldDelta
.
dot
(
oldDelta
)
>
paddedCutoff2
)
{
needRecompute
=
true
;
break
;
}
}
}
}
if
(
needRecompute
)
{
data
.
neighborList
->
computeNeighborList
(
numParticles
,
data
.
posq
,
data
.
exclusions
,
extractBoxVectors
(
context
),
data
.
isPeriodic
,
data
.
paddedCutoff
,
data
.
threads
);
lastPositions
=
posData
;
}
}
}
double
CpuCalcForcesAndEnergyKernel
::
finishComputation
(
ContextImpl
&
context
,
bool
includeForce
,
bool
includeEnergy
,
int
groups
,
bool
&
valid
)
{
...
...
@@ -283,6 +333,7 @@ void CpuCalcHarmonicAngleForceKernel::initialize(const System& system, const Har
angleParamArray
[
i
][
1
]
=
(
RealOpenMM
)
k
;
}
bondForce
.
initialize
(
system
.
getNumParticles
(),
numAngles
,
3
,
angleIndexArray
,
data
.
threads
);
usePeriodic
=
force
.
usesPeriodicBoundaryConditions
();
}
double
CpuCalcHarmonicAngleForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
@@ -290,6 +341,8 @@ double CpuCalcHarmonicAngleForceKernel::execute(ContextImpl& context, bool inclu
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
RealOpenMM
energy
=
0
;
ReferenceAngleBondIxn
angleBond
;
if
(
usePeriodic
)
angleBond
.
setPeriodic
(
extractBoxVectors
(
context
));
bondForce
.
calculateForce
(
posData
,
angleParamArray
,
forceData
,
includeEnergy
?
&
energy
:
NULL
,
angleBond
);
return
energy
;
}
...
...
@@ -343,6 +396,7 @@ void CpuCalcPeriodicTorsionForceKernel::initialize(const System& system, const P
torsionParamArray
[
i
][
2
]
=
(
RealOpenMM
)
periodicity
;
}
bondForce
.
initialize
(
system
.
getNumParticles
(),
numTorsions
,
4
,
torsionIndexArray
,
data
.
threads
);
usePeriodic
=
force
.
usesPeriodicBoundaryConditions
();
}
double
CpuCalcPeriodicTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
@@ -350,6 +404,8 @@ double CpuCalcPeriodicTorsionForceKernel::execute(ContextImpl& context, bool inc
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
RealOpenMM
energy
=
0
;
ReferenceProperDihedralBond
periodicTorsionBond
;
if
(
usePeriodic
)
periodicTorsionBond
.
setPeriodic
(
extractBoxVectors
(
context
));
bondForce
.
calculateForce
(
posData
,
torsionParamArray
,
forceData
,
includeEnergy
?
&
energy
:
NULL
,
periodicTorsionBond
);
return
energy
;
}
...
...
@@ -407,6 +463,7 @@ void CpuCalcRBTorsionForceKernel::initialize(const System& system, const RBTorsi
torsionParamArray
[
i
][
5
]
=
(
RealOpenMM
)
c5
;
}
bondForce
.
initialize
(
system
.
getNumParticles
(),
numTorsions
,
4
,
torsionIndexArray
,
data
.
threads
);
usePeriodic
=
force
.
usesPeriodicBoundaryConditions
();
}
double
CpuCalcRBTorsionForceKernel
::
execute
(
ContextImpl
&
context
,
bool
includeForces
,
bool
includeEnergy
)
{
...
...
@@ -414,6 +471,8 @@ double CpuCalcRBTorsionForceKernel::execute(ContextImpl& context, bool includeFo
vector
<
RealVec
>&
forceData
=
extractForces
(
context
);
RealOpenMM
energy
=
0
;
ReferenceRbDihedralBond
rbTorsionBond
;
if
(
usePeriodic
)
rbTorsionBond
.
setPeriodic
(
extractBoxVectors
(
context
));
bondForce
.
calculateForce
(
posData
,
torsionParamArray
,
forceData
,
includeEnergy
?
&
energy
:
NULL
,
rbTorsionBond
);
return
energy
;
}
...
...
@@ -464,15 +523,11 @@ CpuNonbondedForce* createCpuNonbondedForceVec4();
CpuNonbondedForce
*
createCpuNonbondedForceVec8
();
CpuCalcNonbondedForceKernel
::
CpuCalcNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
),
neighborList
(
NULL
),
nonbonded
(
NULL
)
{
if
(
isVec8Supported
())
{
neighborList
=
new
CpuNeighborList
(
8
);
data
(
data
),
bonded14IndexArray
(
NULL
),
bonded14ParamArray
(
NULL
),
hasInitializedPme
(
false
),
nonbonded
(
NULL
)
{
if
(
isVec8Supported
())
nonbonded
=
createCpuNonbondedForceVec8
();
}
else
{
neighborList
=
new
CpuNeighborList
(
4
);
else
nonbonded
=
createCpuNonbondedForceVec4
();
}
}
CpuCalcNonbondedForceKernel
::~
CpuCalcNonbondedForceKernel
()
{
...
...
@@ -486,8 +541,6 @@ CpuCalcNonbondedForceKernel::~CpuCalcNonbondedForceKernel() {
}
if
(
nonbonded
!=
NULL
)
delete
nonbonded
;
if
(
neighborList
!=
NULL
)
delete
neighborList
;
}
void
CpuCalcNonbondedForceKernel
::
initialize
(
const
System
&
system
,
const
NonbondedForce
&
force
)
{
...
...
@@ -547,6 +600,7 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
if
(
nonbondedMethod
==
NoCutoff
)
useSwitchingFunction
=
false
;
else
{
data
.
requestNeighborList
(
nonbondedCutoff
,
0.25
*
nonbondedCutoff
,
true
,
exclusions
);
useSwitchingFunction
=
force
.
getUseSwitchingFunction
();
switchingDistance
=
force
.
getSwitchingDistance
();
}
...
...
@@ -569,7 +623,6 @@ void CpuCalcNonbondedForceKernel::initialize(const System& system, const Nonbond
dispersionCoefficient
=
NonbondedForceImpl
::
calcDispersionCorrection
(
system
,
force
);
else
dispersionCoefficient
=
0.0
;
lastPositions
.
resize
(
numParticles
,
Vec3
(
1e10
,
1e10
,
1e10
));
data
.
isPeriodic
=
(
nonbondedMethod
==
CutoffPeriodic
||
nonbondedMethod
==
Ewald
||
nonbondedMethod
==
PME
);
}
...
...
@@ -596,53 +649,8 @@ double CpuCalcNonbondedForceKernel::execute(ContextImpl& context, bool includeFo
double
energy
=
(
includeReciprocal
?
ewaldSelfEnergy
:
0.0
);
bool
ewald
=
(
nonbondedMethod
==
Ewald
);
bool
pme
=
(
nonbondedMethod
==
PME
);
if
(
nonbondedMethod
!=
NoCutoff
)
{
// Determine whether we need to recompute the neighbor list.
double
padding
=
0.25
*
nonbondedCutoff
;
bool
needRecompute
=
false
;
double
closeCutoff2
=
0.25
*
padding
*
padding
;
double
farCutoff2
=
0.5
*
padding
*
padding
;
int
maxNumMoved
=
numParticles
/
10
;
vector
<
int
>
moved
;
for
(
int
i
=
0
;
i
<
numParticles
;
i
++
)
{
RealVec
delta
=
posData
[
i
]
-
lastPositions
[
i
];
double
dist2
=
delta
.
dot
(
delta
);
if
(
dist2
>
closeCutoff2
)
{
moved
.
push_back
(
i
);
if
(
dist2
>
farCutoff2
||
moved
.
size
()
>
maxNumMoved
)
{
needRecompute
=
true
;
break
;
}
}
}
if
(
!
needRecompute
&&
moved
.
size
()
>
0
)
{
// Some particles have moved further than half the padding distance. Look for pairs
// that are missing from the neighbor list.
int
numMoved
=
moved
.
size
();
double
cutoff2
=
nonbondedCutoff
*
nonbondedCutoff
;
double
paddedCutoff2
=
(
nonbondedCutoff
+
padding
)
*
(
nonbondedCutoff
+
padding
);
for
(
int
i
=
1
;
i
<
numMoved
&&
!
needRecompute
;
i
++
)
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
RealVec
delta
=
posData
[
moved
[
i
]]
-
posData
[
moved
[
j
]];
if
(
delta
.
dot
(
delta
)
<
cutoff2
)
{
// These particles should interact. See if they are in the neighbor list.
RealVec
oldDelta
=
lastPositions
[
moved
[
i
]]
-
lastPositions
[
moved
[
j
]];
if
(
oldDelta
.
dot
(
oldDelta
)
>
paddedCutoff2
)
{
needRecompute
=
true
;
break
;
}
}
}
}
if
(
needRecompute
)
{
neighborList
->
computeNeighborList
(
numParticles
,
posq
,
exclusions
,
boxVectors
,
data
.
isPeriodic
,
nonbondedCutoff
+
padding
,
data
.
threads
);
lastPositions
=
posData
;
}
nonbonded
->
setUseCutoff
(
nonbondedCutoff
,
*
neighborList
,
rfDielectric
);
}
if
(
nonbondedMethod
!=
NoCutoff
)
nonbonded
->
setUseCutoff
(
nonbondedCutoff
,
*
data
.
neighborList
,
rfDielectric
);
if
(
data
.
isPeriodic
)
{
RealVec
*
boxVectors
=
extractBoxVectors
(
context
);
double
minAllowedSize
=
1.999999
*
nonbondedCutoff
;
...
...
@@ -739,7 +747,7 @@ void CpuCalcNonbondedForceKernel::getPMEParameters(double& alpha, int& nx, int&
}
CpuCalcCustomNonbondedForceKernel
::
CpuCalcCustomNonbondedForceKernel
(
string
name
,
const
Platform
&
platform
,
CpuPlatform
::
PlatformData
&
data
)
:
CalcCustomNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
forceCopy
(
NULL
),
neighborList
(
NULL
),
nonbonded
(
NULL
)
{
CalcCustomNonbondedForceKernel
(
name
,
platform
),
data
(
data
),
forceCopy
(
NULL
),
nonbonded
(
NULL
)
{
}
CpuCalcCustomNonbondedForceKernel
::~
CpuCalcCustomNonbondedForceKernel
()
{
...
...
@@ -748,8 +756,6 @@ CpuCalcCustomNonbondedForceKernel::~CpuCalcCustomNonbondedForceKernel() {
delete
[]
particleParamArray
[
i
];
delete
[]
particleParamArray
;
}
if
(
neighborList
!=
NULL
)
delete
neighborList
;
if
(
nonbonded
!=
NULL
)
delete
nonbonded
;
if
(
forceCopy
!=
NULL
)
...
...
@@ -786,7 +792,7 @@ void CpuCalcCustomNonbondedForceKernel::initialize(const System& system, const C
if
(
nonbondedMethod
==
NoCutoff
)
useSwitchingFunction
=
false
;
else
{
neighborList
=
new
CpuNeighborList
(
4
);
data
.
requestNeighborList
(
nonbondedCutoff
,
0.25
*
nonbondedCutoff
,
true
,
exclusions
);
useSwitchingFunction
=
force
.
getUseSwitchingFunction
();
switchingDistance
=
force
.
getSwitchingDistance
();
}
...
...
@@ -852,10 +858,8 @@ double CpuCalcCustomNonbondedForceKernel::execute(ContextImpl& context, bool inc
RealVec
*
boxVectors
=
extractBoxVectors
(
context
);
double
energy
=
0
;
bool
periodic
=
(
nonbondedMethod
==
CutoffPeriodic
);
if
(
nonbondedMethod
!=
NoCutoff
)
{
neighborList
->
computeNeighborList
(
numParticles
,
data
.
posq
,
exclusions
,
boxVectors
,
data
.
isPeriodic
,
nonbondedCutoff
,
data
.
threads
);
nonbonded
->
setUseCutoff
(
nonbondedCutoff
,
*
neighborList
);
}
if
(
nonbondedMethod
!=
NoCutoff
)
nonbonded
->
setUseCutoff
(
nonbondedCutoff
,
*
data
.
neighborList
);
if
(
periodic
)
{
double
minAllowedSize
=
2
*
nonbondedCutoff
;
if
(
boxVectors
[
0
][
0
]
<
minAllowedSize
||
boxVectors
[
1
][
1
]
<
minAllowedSize
||
boxVectors
[
2
][
2
]
<
minAllowedSize
)
...
...
@@ -963,8 +967,6 @@ CpuCalcCustomGBForceKernel::~CpuCalcCustomGBForceKernel() {
delete
[]
particleParamArray
[
i
];
delete
[]
particleParamArray
;
}
if
(
neighborList
!=
NULL
)
delete
neighborList
;
if
(
ixn
!=
NULL
)
delete
ixn
;
}
...
...
@@ -1012,10 +1014,8 @@ void CpuCalcCustomGBForceKernel::initialize(const System& system, const CustomGB
globalParameterNames
.
push_back
(
force
.
getGlobalParameterName
(
i
));
nonbondedMethod
=
CalcCustomGBForceKernel
::
NonbondedMethod
(
force
.
getNonbondedMethod
());
nonbondedCutoff
=
(
RealOpenMM
)
force
.
getCutoffDistance
();
if
(
nonbondedMethod
==
NoCutoff
)
neighborList
=
NULL
;
else
neighborList
=
new
CpuNeighborList
(
4
);
if
(
nonbondedMethod
!=
NoCutoff
)
data
.
requestNeighborList
(
nonbondedCutoff
,
0.25
*
nonbondedCutoff
,
true
,
exclusions
);
// Create custom functions for the tabulated functions.
...
...
@@ -1112,8 +1112,7 @@ double CpuCalcCustomGBForceKernel::execute(ContextImpl& context, bool includeFor
ixn
->
setPeriodic
(
extractBoxSize
(
context
));
if
(
nonbondedMethod
!=
NoCutoff
)
{
vector
<
set
<
int
>
>
noExclusions
(
numParticles
);
neighborList
->
computeNeighborList
(
numParticles
,
data
.
posq
,
exclusions
,
boxVectors
,
data
.
isPeriodic
,
nonbondedCutoff
,
data
.
threads
);
ixn
->
setUseCutoff
(
nonbondedCutoff
,
*
neighborList
);
ixn
->
setUseCutoff
(
nonbondedCutoff
,
*
data
.
neighborList
);
}
map
<
string
,
double
>
globalParameters
;
for
(
int
i
=
0
;
i
<
(
int
)
globalParameterNames
.
size
();
i
++
)
...
...
platforms/cpu/src/CpuNeighborList.cpp
View file @
2e9c418a
...
...
@@ -225,38 +225,78 @@ public:
if
(
usePeriodic
)
voxelIndex
.
y
=
(
y
<
0
?
y
+
ny
:
(
y
>=
ny
?
y
-
ny
:
y
));
float
boxy
=
floor
((
float
)
y
/
ny
);
float
xoffset
=
(
float
)
(
usePeriodic
?
boxy
*
periodicBoxVectors
[
1
][
0
]
+
boxz
*
periodicBoxVectors
[
2
][
0
]
:
0
);
// Identify the range of atoms within this bin we need to search. When using periodic boundary
// conditions, there may be two separate ranges.
float
minx
=
centerPos
[
0
];
float
maxx
=
centerPos
[
0
];
fvec4
offset
(
-
xoffset
,
-
yoffset
+
voxelSizeY
*
y
+
(
usePeriodic
?
0.0
f
:
miny
),
voxelSizeZ
*
z
+
(
usePeriodic
?
0.0
f
:
minz
),
0
);
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
const
float
*
atomPos
=
&
sortedPositions
[
4
*
(
blockSize
*
blockIndex
+
k
)];
fvec4
posVec
(
atomPos
);
fvec4
delta1
=
offset
-
posVec
;
fvec4
delta2
=
delta1
+
fvec4
(
0
,
voxelSizeY
,
voxelSizeZ
,
0
);
if
(
usePeriodic
)
{
delta1
-=
round
(
delta1
*
invBoxSize
)
*
boxSize
;
delta2
-=
round
(
delta2
*
invBoxSize
)
*
boxSize
;
if
(
usePeriodic
&&
triclinic
)
{
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
const
float
*
atomPos
=
&
sortedPositions
[
4
*
(
blockSize
*
blockIndex
+
k
)];
fvec4
delta1
(
0
,
voxelSizeY
*
voxelIndex
.
y
-
atomPos
[
1
],
voxelSizeZ
*
voxelIndex
.
z
-
atomPos
[
2
],
0
);
fvec4
delta2
=
delta1
+
fvec4
(
0
,
voxelSizeY
,
0
,
0
);
fvec4
delta3
=
delta1
+
fvec4
(
0
,
0
,
voxelSizeZ
,
0
);
fvec4
delta4
=
delta1
+
fvec4
(
0
,
voxelSizeY
,
voxelSizeZ
,
0
);
delta1
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta1
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
delta1
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta1
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
delta1
-=
periodicBoxVec4
[
0
]
*
floorf
(
delta1
[
0
]
*
recipBoxSize
[
0
]
+
0.5
f
);
delta2
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta2
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
delta2
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta2
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
delta2
-=
periodicBoxVec4
[
0
]
*
floorf
(
delta2
[
0
]
*
recipBoxSize
[
0
]
+
0.5
f
);
delta3
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta3
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
delta3
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta3
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
delta3
-=
periodicBoxVec4
[
0
]
*
floorf
(
delta3
[
0
]
*
recipBoxSize
[
0
]
+
0.5
f
);
delta4
-=
periodicBoxVec4
[
2
]
*
floorf
(
delta4
[
2
]
*
recipBoxSize
[
2
]
+
0.5
f
);
delta4
-=
periodicBoxVec4
[
1
]
*
floorf
(
delta4
[
1
]
*
recipBoxSize
[
1
]
+
0.5
f
);
delta4
-=
periodicBoxVec4
[
0
]
*
floorf
(
delta4
[
0
]
*
recipBoxSize
[
0
]
+
0.5
f
);
if
(
delta1
[
1
]
<
0
&&
delta1
[
1
]
+
voxelSizeY
>
0
)
delta1
=
fvec4
(
delta1
[
0
],
0
,
delta1
[
2
],
0
);
if
(
delta1
[
2
]
<
0
&&
delta1
[
2
]
+
voxelSizeZ
>
0
)
delta1
=
fvec4
(
delta1
[
0
],
delta1
[
1
],
0
,
0
);
if
(
delta3
[
1
]
<
0
&&
delta3
[
1
]
+
voxelSizeY
>
0
)
delta3
=
fvec4
(
delta3
[
0
],
0
,
delta3
[
2
],
0
);
if
(
delta2
[
2
]
<
0
&&
delta2
[
2
]
+
voxelSizeZ
>
0
)
delta2
=
fvec4
(
delta2
[
0
],
delta2
[
1
],
0
,
0
);
fvec4
delta
=
min
(
min
(
min
(
abs
(
delta1
),
abs
(
delta2
)),
abs
(
delta3
)),
abs
(
delta4
));
float
dy
=
(
voxelIndex
.
y
==
atomVoxelIndex
[
k
].
y
?
0.0
f
:
delta
[
1
]);
float
dz
=
(
voxelIndex
.
z
==
atomVoxelIndex
[
k
].
z
?
0.0
f
:
delta
[
2
]);
float
dist2
=
maxDistanceSquared
-
dy
*
dy
-
dz
*
dz
;
if
(
dist2
>
0
)
{
float
dist
=
sqrtf
(
dist2
);
minx
=
min
(
minx
,
atomPos
[
0
]
-
dist
-
max
(
max
(
max
(
delta1
[
0
],
delta2
[
0
]),
delta3
[
0
]),
delta4
[
0
]));
maxx
=
max
(
maxx
,
atomPos
[
0
]
+
dist
-
min
(
min
(
min
(
delta1
[
0
],
delta2
[
0
]),
delta3
[
0
]),
delta4
[
0
]));
}
}
fvec4
delta
=
min
(
abs
(
delta1
),
abs
(
delta2
));
float
dy
=
(
y
==
atomVoxelIndex
[
k
].
y
?
0.0
f
:
delta
[
1
]);
float
dz
=
(
z
==
atomVoxelIndex
[
k
].
z
?
0.0
f
:
delta
[
2
]);
float
dist2
=
maxDistanceSquared
-
dy
*
dy
-
dz
*
dz
;
if
(
dist2
>
0
)
{
float
dist
=
sqrtf
(
dist2
);
minx
=
min
(
minx
,
atomPos
[
0
]
-
dist
-
xoffset
);
maxx
=
max
(
maxx
,
atomPos
[
0
]
+
dist
-
xoffset
);
}
else
{
float
xoffset
=
(
float
)
(
usePeriodic
?
boxy
*
periodicBoxVectors
[
1
][
0
]
+
boxz
*
periodicBoxVectors
[
2
][
0
]
:
0
);
fvec4
offset
(
-
xoffset
,
-
yoffset
+
voxelSizeY
*
y
+
(
usePeriodic
?
0.0
f
:
miny
),
voxelSizeZ
*
z
+
(
usePeriodic
?
0.0
f
:
minz
),
0
);
for
(
int
k
=
0
;
k
<
(
int
)
blockAtoms
.
size
();
k
++
)
{
const
float
*
atomPos
=
&
sortedPositions
[
4
*
(
blockSize
*
blockIndex
+
k
)];
fvec4
posVec
(
atomPos
);
fvec4
delta1
=
offset
-
posVec
;
fvec4
delta2
=
delta1
+
fvec4
(
0
,
voxelSizeY
,
voxelSizeZ
,
0
);
if
(
usePeriodic
)
{
delta1
-=
round
(
delta1
*
invBoxSize
)
*
boxSize
;
delta2
-=
round
(
delta2
*
invBoxSize
)
*
boxSize
;
}
fvec4
delta
=
min
(
abs
(
delta1
),
abs
(
delta2
));
float
dy
=
(
y
==
atomVoxelIndex
[
k
].
y
?
0.0
f
:
delta
[
1
]);
float
dz
=
(
z
==
atomVoxelIndex
[
k
].
z
?
0.0
f
:
delta
[
2
]);
float
dist2
=
maxDistanceSquared
-
dy
*
dy
-
dz
*
dz
;
if
(
dist2
>
0
)
{
float
dist
=
sqrtf
(
dist2
);
minx
=
min
(
minx
,
atomPos
[
0
]
-
dist
-
xoffset
);
maxx
=
max
(
maxx
,
atomPos
[
0
]
+
dist
-
xoffset
);
}
}
}
if
(
minx
==
maxx
)
continue
;
bool
needPeriodic
=
(
centerPos
[
1
]
-
blockWidth
[
1
]
<
maxDistance
||
centerPos
[
1
]
+
blockWidth
[
1
]
>
periodicBoxSize
[
1
]
-
maxDistance
||
centerPos
[
2
]
-
blockWidth
[
2
]
<
maxDistance
||
centerPos
[
2
]
+
blockWidth
[
2
]
>
periodicBoxSize
[
2
]
-
maxDistance
||
minx
<
0.0
f
||
maxx
>
periodicBoxVectors
[
0
][
0
]);
bool
needPeriodic
=
usePeriodic
&&
(
centerPos
[
1
]
-
blockWidth
[
1
]
<
maxDistance
||
centerPos
[
1
]
+
blockWidth
[
1
]
>
periodicBoxSize
[
1
]
-
maxDistance
||
centerPos
[
2
]
-
blockWidth
[
2
]
<
maxDistance
||
centerPos
[
2
]
+
blockWidth
[
2
]
>
periodicBoxSize
[
2
]
-
maxDistance
||
minx
<
0.0
f
||
maxx
>
periodicBoxVectors
[
0
][
0
]);
int
numRanges
;
int
rangeStart
[
2
];
int
rangeEnd
[
2
];
...
...
@@ -294,7 +334,7 @@ public:
continue
;
fvec4
atomPos
(
&
sortedPositions
[
4
*
sortedIndex
]);
fvec4
delta
=
atomPos
-
c
enter
Pos
;
fvec4
delta
=
atomPos
-
blockC
enter
;
if
(
periodicRectangular
)
delta
-=
round
(
delta
*
invBoxSize
)
*
boxSize
;
else
if
(
needPeriodic
)
{
...
...
@@ -468,6 +508,10 @@ int CpuNeighborList::getNumBlocks() const {
return
sortedAtoms
.
size
()
/
blockSize
;
}
int
CpuNeighborList
::
getBlockSize
()
const
{
return
blockSize
;
}
const
std
::
vector
<
int
>&
CpuNeighborList
::
getSortedAtoms
()
const
{
return
sortedAtoms
;
}
...
...
platforms/cpu/src/CpuPlatform.cpp
View file @
2e9c418a
...
...
@@ -34,6 +34,7 @@
#include "CpuKernels.h"
#include "CpuSETTLE.h"
#include "ReferenceConstraints.h"
#include "openmm/OpenMMException.h"
#include "openmm/internal/hardware.h"
#include "openmm/internal/vectorize.h"
#include <sstream>
...
...
@@ -59,6 +60,7 @@ extern "C" OPENMM_EXPORT_CPU void registerPlatforms() {
map
<
const
ContextImpl
*
,
CpuPlatform
::
PlatformData
*>
CpuPlatform
::
contextData
;
CpuPlatform
::
CpuPlatform
()
{
deprecatedPropertyReplacements
[
"CpuThreads"
]
=
CpuThreads
();
CpuKernelFactory
*
factory
=
new
CpuKernelFactory
();
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
CalcHarmonicAngleForceKernel
::
Name
(),
factory
);
...
...
@@ -84,7 +86,10 @@ CpuPlatform::CpuPlatform() {
const
string
&
CpuPlatform
::
getPropertyValue
(
const
Context
&
context
,
const
string
&
property
)
const
{
const
ContextImpl
&
impl
=
getContextImpl
(
context
);
const
PlatformData
&
data
=
getPlatformData
(
impl
);
map
<
string
,
string
>::
const_iterator
value
=
data
.
propertyValues
.
find
(
property
);
string
propertyName
=
property
;
if
(
deprecatedPropertyReplacements
.
find
(
property
)
!=
deprecatedPropertyReplacements
.
end
())
propertyName
=
deprecatedPropertyReplacements
.
find
(
property
)
->
second
;
map
<
string
,
string
>::
const_iterator
value
=
data
.
propertyValues
.
find
(
propertyName
);
if
(
value
!=
data
.
propertyValues
.
end
())
return
value
->
second
;
return
ReferencePlatform
::
getPropertyValue
(
context
,
property
);
...
...
@@ -132,7 +137,8 @@ const CpuPlatform::PlatformData& CpuPlatform::getPlatformData(const ContextImpl&
return
*
contextData
[
&
context
];
}
CpuPlatform
::
PlatformData
::
PlatformData
(
int
numParticles
,
int
numThreads
)
:
posq
(
4
*
numParticles
),
threads
(
numThreads
)
{
CpuPlatform
::
PlatformData
::
PlatformData
(
int
numParticles
,
int
numThreads
)
:
posq
(
4
*
numParticles
),
threads
(
numThreads
),
neighborList
(
NULL
),
cutoff
(
0.0
),
paddedCutoff
(
0.0
),
anyExclusions
(
false
)
{
numThreads
=
threads
.
getNumThreads
();
threadForce
.
resize
(
numThreads
);
for
(
int
i
=
0
;
i
<
numThreads
;
i
++
)
...
...
@@ -142,3 +148,27 @@ CpuPlatform::PlatformData::PlatformData(int numParticles, int numThreads) : posq
threadsProperty
<<
numThreads
;
propertyValues
[
CpuThreads
()]
=
threadsProperty
.
str
();
}
CpuPlatform
::
PlatformData
::~
PlatformData
()
{
if
(
neighborList
!=
NULL
)
delete
neighborList
;
}
bool
isVec8Supported
();
void
CpuPlatform
::
PlatformData
::
requestNeighborList
(
double
cutoffDistance
,
double
padding
,
bool
useExclusions
,
vector
<
set
<
int
>
>&
exclusionList
)
{
if
(
neighborList
==
NULL
)
neighborList
=
new
CpuNeighborList
(
isVec8Supported
()
?
8
:
4
);
if
(
cutoffDistance
>
cutoff
)
cutoff
=
cutoffDistance
;
if
(
cutoffDistance
+
padding
>
paddedCutoff
)
paddedCutoff
=
cutoffDistance
+
padding
;
if
(
useExclusions
)
{
if
(
anyExclusions
&&
exclusions
!=
exclusionList
)
throw
OpenMMException
(
"All Forces must have identical exclusions"
);
else
{
exclusions
=
exclusionList
;
anyExclusions
=
true
;
}
}
}
platforms/cpu/tests/TestCpuNeighborList.cpp
View file @
2e9c418a
...
...
@@ -53,14 +53,14 @@ void testNeighborList(bool periodic, bool triclinic) {
const
float
cutoff
=
2.0
f
;
RealVec
boxVectors
[
3
];
if
(
triclinic
)
{
boxVectors
[
0
]
=
RealVec
(
2
0
,
0
,
0
);
boxVectors
[
1
]
=
RealVec
(
5
,
15
,
0
);
boxVectors
[
2
]
=
RealVec
(
-
3
,
-
7
,
22
);
boxVectors
[
0
]
=
RealVec
(
1
0
,
0
,
0
);
boxVectors
[
1
]
=
RealVec
(
4
,
9
,
0
);
boxVectors
[
2
]
=
RealVec
(
-
3
,
-
3.5
,
11
);
}
else
{
boxVectors
[
0
]
=
RealVec
(
2
0
,
0
,
0
);
boxVectors
[
1
]
=
RealVec
(
0
,
15
,
0
);
boxVectors
[
2
]
=
RealVec
(
0
,
0
,
22
);
boxVectors
[
0
]
=
RealVec
(
1
0
,
0
,
0
);
boxVectors
[
1
]
=
RealVec
(
0
,
9
,
0
);
boxVectors
[
2
]
=
RealVec
(
0
,
0
,
11
);
}
const
float
boxSize
[
3
]
=
{(
float
)
boxVectors
[
0
][
0
],
(
float
)
boxVectors
[
1
][
1
],
(
float
)
boxVectors
[
2
][
2
]};
const
int
blockSize
=
8
;
...
...
platforms/cuda/include/CudaKernels.h
View file @
2e9c418a
...
...
@@ -178,7 +178,7 @@ public:
* @param b the vector defining the second edge of the periodic box
* @param c the vector defining the third edge of the periodic box
*/
void
setPeriodicBoxVectors
(
ContextImpl
&
context
,
const
Vec3
&
a
,
const
Vec3
&
b
,
const
Vec3
&
c
)
const
;
void
setPeriodicBoxVectors
(
ContextImpl
&
context
,
const
Vec3
&
a
,
const
Vec3
&
b
,
const
Vec3
&
c
);
/**
* Create a checkpoint recording the current state of the Context.
*
...
...
@@ -718,7 +718,7 @@ public:
*/
void
copyParametersToContext
(
ContextImpl
&
context
,
const
CustomNonbondedForce
&
force
);
private:
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
);
void
initInteractionGroups
(
const
CustomNonbondedForce
&
force
,
const
std
::
string
&
interactionSource
,
const
std
::
vector
<
std
::
string
>&
tableTypes
);
CudaContext
&
cu
;
CudaParameterSet
*
params
;
CudaArray
*
globals
;
...
...
platforms/cuda/include/CudaPlatform.h
View file @
2e9c418a
...
...
@@ -58,35 +58,35 @@ public:
* This is the name of the parameter for selecting which CUDA device or devices to use.
*/
static
const
std
::
string
&
CudaDeviceIndex
()
{
static
const
std
::
string
key
=
"
Cuda
DeviceIndex"
;
static
const
std
::
string
key
=
"DeviceIndex"
;
return
key
;
}
/**
* This is the name of the parameter that reports the CUDA device or devices being used.
*/
static
const
std
::
string
&
CudaDeviceName
()
{
static
const
std
::
string
key
=
"
Cuda
DeviceName"
;
static
const
std
::
string
key
=
"DeviceName"
;
return
key
;
}
/**
* This is the name of the parameter for selecting whether CUDA should sync or spin loop while waiting for results.
*/
static
const
std
::
string
&
CudaUseBlockingSync
()
{
static
const
std
::
string
key
=
"
Cuda
UseBlockingSync"
;
static
const
std
::
string
key
=
"UseBlockingSync"
;
return
key
;
}
/**
* This is the name of the parameter for selecting what numerical precision to use.
*/
static
const
std
::
string
&
CudaPrecision
()
{
static
const
std
::
string
key
=
"
Cuda
Precision"
;
static
const
std
::
string
key
=
"Precision"
;
return
key
;
}
/**
* This is the name of the parameter for selecting whether to use the CPU based PME calculation.
*/
static
const
std
::
string
&
CudaUseCpuPme
()
{
static
const
std
::
string
key
=
"
Cuda
UseCpuPme"
;
static
const
std
::
string
key
=
"UseCpuPme"
;
return
key
;
}
/**
...
...
@@ -107,14 +107,21 @@ public:
* This is the name of the parameter for specifying the path to the directory for creating temporary files.
*/
static
const
std
::
string
&
CudaTempDirectory
()
{
static
const
std
::
string
key
=
"
Cuda
TempDirectory"
;
static
const
std
::
string
key
=
"TempDirectory"
;
return
key
;
}
/**
* This is the name of the parameter for selecting whether to disable use of a separate stream for PME.
*/
static
const
std
::
string
&
CudaDisablePmeStream
()
{
static
const
std
::
string
key
=
"CudaDisablePmeStream"
;
static
const
std
::
string
key
=
"DisablePmeStream"
;
return
key
;
}
/**
* This is the name of the parameter for requesting that force computations be fully deterministic.
*/
static
const
std
::
string
&
CudaDeterministicForces
()
{
static
const
std
::
string
key
=
"DeterministicForces"
;
return
key
;
}
};
...
...
@@ -123,14 +130,14 @@ class OPENMM_EXPORT_CUDA CudaPlatform::PlatformData {
public:
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
std
::
string
&
deviceIndexProperty
,
const
std
::
string
&
blockingProperty
,
const
std
::
string
&
precisionProperty
,
const
std
::
string
&
cpuPmeProperty
,
const
std
::
string
&
compilerProperty
,
const
std
::
string
&
tempProperty
,
const
std
::
string
&
hostCompilerProperty
,
const
std
::
string
&
pmeStreamProperty
,
int
numThreads
);
const
std
::
string
&
pmeStreamProperty
,
const
std
::
string
&
deterministicForcesProperty
,
int
numThreads
);
~
PlatformData
();
void
initializeContexts
(
const
System
&
system
);
void
syncContexts
();
ContextImpl
*
context
;
std
::
vector
<
CudaContext
*>
contexts
;
std
::
vector
<
double
>
contextEnergy
;
bool
hasInitializedContexts
,
removeCM
,
peerAccessSupported
,
useCpuPme
,
disablePmeStream
;
bool
hasInitializedContexts
,
removeCM
,
peerAccessSupported
,
useCpuPme
,
disablePmeStream
,
deterministicForces
;
int
cmMotionFrequency
;
int
stepCount
,
computeForceCount
;
double
time
;
...
...
platforms/cuda/src/CudaContext.cpp
View file @
2e9c418a
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -106,7 +106,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
useMixedPrecision
=
false
;
}
else
throw
OpenMMException
(
"Illegal value for
Cuda
Precision: "
+
precision
);
throw
OpenMMException
(
"Illegal value for Precision: "
+
precision
);
char
*
cacheVariable
=
getenv
(
"OPENMM_CACHE_DIR"
);
cacheDir
=
(
cacheVariable
==
NULL
?
tempDir
:
string
(
cacheVariable
));
#ifdef WIN32
...
...
@@ -121,7 +121,7 @@ CudaContext::CudaContext(const System& system, int deviceIndex, bool useBlocking
string
errorMessage
=
"Error initializing Context"
;
CHECK_RESULT
(
cuDeviceGetCount
(
&
numDevices
));
if
(
deviceIndex
<
-
1
||
deviceIndex
>=
numDevices
)
throw
OpenMMException
(
"Illegal value for
Cuda
DeviceIndex: "
+
intToString
(
deviceIndex
));
throw
OpenMMException
(
"Illegal value for DeviceIndex: "
+
intToString
(
deviceIndex
));
vector
<
int
>
devicePrecedence
;
if
(
deviceIndex
==
-
1
)
{
...
...
platforms/cuda/src/CudaExpressionUtilities.cpp
View file @
2e9c418a
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009-201
5
Stanford University and the Authors. *
* Portions copyright (c) 2009-201
6
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -254,7 +254,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
for
(
int
k
=
3
;
k
>=
0
;
k
--
)
for
(
int
m
=
0
;
m
<
4
;
m
++
)
{
int
base
=
4
*
m
;
string
suffix
=
suffixes
[
m
];
string
suffix
=
suffixes
[
k
];
out
<<
"derivy["
<<
m
<<
"] = da*derivy["
<<
m
<<
"] + (3*c["
<<
(
base
+
3
)
<<
"]"
<<
suffix
<<
"*db + 2*c["
<<
(
base
+
2
)
<<
"]"
<<
suffix
<<
")*db + c["
<<
(
base
+
1
)
<<
"]"
<<
suffix
<<
";
\n
"
;
}
out
<<
nodeNames
[
j
]
<<
" = derivy[0] + dc*(derivy[1] + dc*(derivy[2] + dc*derivy[3]));
\n
"
;
...
...
@@ -271,7 +271,7 @@ void CudaExpressionUtilities::processExpression(stringstream& out, const Express
out
<<
nodeNames
[
j
]
<<
" *= "
<<
paramsFloat
[
11
]
<<
";
\n
"
;
}
else
throw
OpenMMException
(
"Unsupported derivative order for Continuous
2
DFunction"
);
throw
OpenMMException
(
"Unsupported derivative order for Continuous
3
DFunction"
);
}
out
<<
"}
\n
"
;
}
...
...
platforms/cuda/src/CudaKernels.cpp
View file @
2e9c418a
This diff is collapsed.
Click to expand it.
platforms/cuda/src/CudaPlatform.cpp
View file @
2e9c418a
...
...
@@ -62,6 +62,14 @@ extern "C" OPENMM_EXPORT_CUDA void registerPlatforms() {
#endif
CudaPlatform
::
CudaPlatform
()
{
deprecatedPropertyReplacements
[
"CudaDeviceIndex"
]
=
CudaDeviceIndex
();
deprecatedPropertyReplacements
[
"CudaDeviceName"
]
=
CudaDeviceName
();
deprecatedPropertyReplacements
[
"CudaUseBlockingSync"
]
=
CudaUseBlockingSync
();
deprecatedPropertyReplacements
[
"CudaPrecision"
]
=
CudaPrecision
();
deprecatedPropertyReplacements
[
"CudaUseCpuPme"
]
=
CudaUseCpuPme
();
deprecatedPropertyReplacements
[
"CudaTempDirectory"
]
=
CudaTempDirectory
();
deprecatedPropertyReplacements
[
"CudaDisablePmeStream"
]
=
CudaDisablePmeStream
();
deprecatedPropertyReplacements
[
"CudaDeterministicForces"
]
=
CudaDeterministicForces
();
CudaKernelFactory
*
factory
=
new
CudaKernelFactory
();
registerKernelFactory
(
CalcForcesAndEnergyKernel
::
Name
(),
factory
);
registerKernelFactory
(
UpdateStateDataKernel
::
Name
(),
factory
);
...
...
@@ -102,12 +110,14 @@ CudaPlatform::CudaPlatform() {
platformProperties
.
push_back
(
CudaTempDirectory
());
platformProperties
.
push_back
(
CudaHostCompiler
());
platformProperties
.
push_back
(
CudaDisablePmeStream
());
platformProperties
.
push_back
(
CudaDeterministicForces
());
setPropertyDefaultValue
(
CudaDeviceIndex
(),
""
);
setPropertyDefaultValue
(
CudaDeviceName
(),
""
);
setPropertyDefaultValue
(
CudaUseBlockingSync
(),
"true"
);
setPropertyDefaultValue
(
CudaPrecision
(),
"single"
);
setPropertyDefaultValue
(
CudaUseCpuPme
(),
"false"
);
setPropertyDefaultValue
(
CudaDisablePmeStream
(),
"false"
);
setPropertyDefaultValue
(
CudaDeterministicForces
(),
"false"
);
#ifdef _MSC_VER
char
*
bindir
=
getenv
(
"CUDA_BIN_PATH"
);
string
nvcc
=
(
bindir
==
NULL
?
"nvcc.exe"
:
string
(
bindir
)
+
"
\\
nvcc.exe"
);
...
...
@@ -142,7 +152,10 @@ bool CudaPlatform::supportsDoublePrecision() const {
const
string
&
CudaPlatform
::
getPropertyValue
(
const
Context
&
context
,
const
string
&
property
)
const
{
const
ContextImpl
&
impl
=
getContextImpl
(
context
);
const
PlatformData
*
data
=
reinterpret_cast
<
const
PlatformData
*>
(
impl
.
getPlatformData
());
map
<
string
,
string
>::
const_iterator
value
=
data
->
propertyValues
.
find
(
property
);
string
propertyName
=
property
;
if
(
deprecatedPropertyReplacements
.
find
(
property
)
!=
deprecatedPropertyReplacements
.
end
())
propertyName
=
deprecatedPropertyReplacements
.
find
(
property
)
->
second
;
map
<
string
,
string
>::
const_iterator
value
=
data
->
propertyValues
.
find
(
propertyName
);
if
(
value
!=
data
->
propertyValues
.
end
())
return
value
->
second
;
return
Platform
::
getPropertyValue
(
context
,
property
);
...
...
@@ -168,10 +181,13 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
getPropertyDefaultValue
(
CudaHostCompiler
())
:
properties
.
find
(
CudaHostCompiler
())
->
second
);
string
pmeStreamPropValue
=
(
properties
.
find
(
CudaDisablePmeStream
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaDisablePmeStream
())
:
properties
.
find
(
CudaDisablePmeStream
())
->
second
);
string
deterministicForcesValue
=
(
properties
.
find
(
CudaDeterministicForces
())
==
properties
.
end
()
?
getPropertyDefaultValue
(
CudaDeterministicForces
())
:
properties
.
find
(
CudaDeterministicForces
())
->
second
);
transform
(
blockingPropValue
.
begin
(),
blockingPropValue
.
end
(),
blockingPropValue
.
begin
(),
::
tolower
);
transform
(
precisionPropValue
.
begin
(),
precisionPropValue
.
end
(),
precisionPropValue
.
begin
(),
::
tolower
);
transform
(
cpuPmePropValue
.
begin
(),
cpuPmePropValue
.
end
(),
cpuPmePropValue
.
begin
(),
::
tolower
);
transform
(
pmeStreamPropValue
.
begin
(),
pmeStreamPropValue
.
end
(),
pmeStreamPropValue
.
begin
(),
::
tolower
);
transform
(
deterministicForcesValue
.
begin
(),
deterministicForcesValue
.
end
(),
deterministicForcesValue
.
begin
(),
::
tolower
);
vector
<
string
>
pmeKernelName
;
pmeKernelName
.
push_back
(
CalcPmeReciprocalForceKernel
::
Name
());
if
(
!
supportsKernels
(
pmeKernelName
))
...
...
@@ -180,7 +196,8 @@ void CudaPlatform::contextCreated(ContextImpl& context, const map<string, string
char
*
threadsEnv
=
getenv
(
"OPENMM_CPU_THREADS"
);
if
(
threadsEnv
!=
NULL
)
stringstream
(
threadsEnv
)
>>
threads
;
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
threads
));
context
.
setPlatformData
(
new
PlatformData
(
&
context
,
context
.
getSystem
(),
devicePropValue
,
blockingPropValue
,
precisionPropValue
,
cpuPmePropValue
,
compilerPropValue
,
tempPropValue
,
hostCompilerPropValue
,
pmeStreamPropValue
,
deterministicForcesValue
,
threads
));
}
void
CudaPlatform
::
contextDestroyed
(
ContextImpl
&
context
)
const
{
...
...
@@ -189,7 +206,8 @@ void CudaPlatform::contextDestroyed(ContextImpl& context) const {
}
CudaPlatform
::
PlatformData
::
PlatformData
(
ContextImpl
*
context
,
const
System
&
system
,
const
string
&
deviceIndexProperty
,
const
string
&
blockingProperty
,
const
string
&
precisionProperty
,
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
int
numThreads
)
:
const
string
&
cpuPmeProperty
,
const
string
&
compilerProperty
,
const
string
&
tempProperty
,
const
string
&
hostCompilerProperty
,
const
string
&
pmeStreamProperty
,
const
string
&
deterministicForcesProperty
,
int
numThreads
)
:
context
(
context
),
removeCM
(
false
),
stepCount
(
0
),
computeForceCount
(
0
),
time
(
0.0
),
hasInitializedContexts
(
false
),
threads
(
numThreads
)
{
bool
blocking
=
(
blockingProperty
==
"true"
);
vector
<
string
>
devices
;
...
...
@@ -230,6 +248,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
}
useCpuPme
=
(
cpuPmeProperty
==
"true"
&&
!
contexts
[
0
]
->
getUseDoublePrecision
());
disablePmeStream
=
(
pmeStreamProperty
==
"true"
);
deterministicForces
=
(
deterministicForcesProperty
==
"true"
);
propertyValues
[
CudaPlatform
::
CudaDeviceIndex
()]
=
deviceIndex
.
str
();
propertyValues
[
CudaPlatform
::
CudaDeviceName
()]
=
deviceName
.
str
();
propertyValues
[
CudaPlatform
::
CudaUseBlockingSync
()]
=
blocking
?
"true"
:
"false"
;
...
...
@@ -239,6 +258,7 @@ CudaPlatform::PlatformData::PlatformData(ContextImpl* context, const System& sys
propertyValues
[
CudaPlatform
::
CudaTempDirectory
()]
=
tempProperty
;
propertyValues
[
CudaPlatform
::
CudaHostCompiler
()]
=
hostCompilerProperty
;
propertyValues
[
CudaPlatform
::
CudaDisablePmeStream
()]
=
disablePmeStream
?
"true"
:
"false"
;
propertyValues
[
CudaPlatform
::
CudaDeterministicForces
()]
=
deterministicForces
?
"true"
:
"false"
;
contextEnergy
.
resize
(
contexts
.
size
());
// Determine whether peer-to-peer copying is supported, and enable it if so.
...
...
platforms/cuda/src/kernels/angleForce.cu
View file @
2e9c418a
real3
v0
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
real3
v1
=
make_real3
(
pos2
.
x
-
pos3
.
x
,
pos2
.
y
-
pos3
.
y
,
pos2
.
z
-
pos3
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0
)
APPLY_PERIODIC_TO_DELTA
(
v1
)
#endif
real3
cp
=
cross
(
v0
,
v1
);
real
rp
=
cp
.
x
*
cp
.
x
+
cp
.
y
*
cp
.
y
+
cp
.
z
*
cp
.
z
;
rp
=
max
(
SQRT
(
rp
),
(
real
)
1.0e-06
f
);
...
...
platforms/cuda/src/kernels/bondForce.cu
View file @
2e9c418a
real3
delta
=
make_real3
(
pos2
.
x
-
pos1
.
x
,
pos2
.
y
-
pos1
.
y
,
pos2
.
z
-
pos1
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
delta
)
#endif
real
r
=
SQRT
(
delta
.
x
*
delta
.
x
+
delta
.
y
*
delta
.
y
+
delta
.
z
*
delta
.
z
);
COMPUTE_FORCE
dEdR
=
(
r
>
0
)
?
(
dEdR
/
r
)
:
0
;
...
...
platforms/cuda/src/kernels/cmapTorsionForce.cu
View file @
2e9c418a
...
...
@@ -5,6 +5,11 @@ const real PI = (real) 3.14159265358979323846;
real3
v0a
=
make_real3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
real3
v1a
=
make_real3
(
pos3
.
x
-
pos2
.
x
,
pos3
.
y
-
pos2
.
y
,
pos3
.
z
-
pos2
.
z
);
real3
v2a
=
make_real3
(
pos3
.
x
-
pos4
.
x
,
pos3
.
y
-
pos4
.
y
,
pos3
.
z
-
pos4
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0a
)
APPLY_PERIODIC_TO_DELTA
(
v1a
)
APPLY_PERIODIC_TO_DELTA
(
v2a
)
#endif
real3
cp0a
=
cross
(
v0a
,
v1a
);
real3
cp1a
=
cross
(
v1a
,
v2a
);
real
cosangle
=
dot
(
normalize
(
cp0a
),
normalize
(
cp1a
));
...
...
@@ -28,6 +33,11 @@ angleA = fmod(angleA+2.0f*PI, 2.0f*PI);
real3
v0b
=
make_real3
(
pos5
.
x
-
pos6
.
x
,
pos5
.
y
-
pos6
.
y
,
pos5
.
z
-
pos6
.
z
);
real3
v1b
=
make_real3
(
pos7
.
x
-
pos6
.
x
,
pos7
.
y
-
pos6
.
y
,
pos7
.
z
-
pos6
.
z
);
real3
v2b
=
make_real3
(
pos7
.
x
-
pos8
.
x
,
pos7
.
y
-
pos8
.
y
,
pos7
.
z
-
pos8
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0b
)
APPLY_PERIODIC_TO_DELTA
(
v1b
)
APPLY_PERIODIC_TO_DELTA
(
v2b
)
#endif
real3
cp0b
=
cross
(
v0b
,
v1b
);
real3
cp1b
=
cross
(
v1b
,
v2b
);
cosangle
=
dot
(
normalize
(
cp0b
),
normalize
(
cp1b
));
...
...
platforms/cuda/src/kernels/customCentroidBond.cu
View file @
2e9c418a
...
...
@@ -66,8 +66,11 @@ inline __device__ real3 trim(real4 v) {
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
)
{
inline
__device__
real4
delta
(
real4
vec1
,
real4
vec2
,
bool
periodic
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
);
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
}
...
...
@@ -105,7 +108,7 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
* Compute the forces on groups based on the bonds.
*/
extern
"C"
__global__
void
computeGroupForces
(
unsigned
long
long
*
__restrict__
groupForce
,
mixed
*
__restrict__
energyBuffer
,
const
real4
*
__restrict__
centerPositions
,
const
int
*
__restrict__
bondGroups
const
int
*
__restrict__
bondGroups
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
EXTRA_ARGS
)
{
mixed
energy
=
0
;
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
NUM_BONDS
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
...
...
platforms/cuda/src/kernels/customCompoundBond.cu
View file @
2e9c418a
...
...
@@ -8,8 +8,11 @@ inline __device__ real3 ccb_trim(real4 v) {
/**
* Compute the difference between two vectors, setting the fourth component to the squared magnitude.
*/
inline
__device__
real4
ccb_delta
(
real4
vec1
,
real4
vec2
)
{
inline
__device__
real4
ccb_delta
(
real4
vec1
,
real4
vec2
,
bool
periodic
,
real4
periodicBoxSize
,
real4
invPeriodicBoxSize
,
real4
periodicBoxVecX
,
real4
periodicBoxVecY
,
real4
periodicBoxVecZ
)
{
real4
result
=
make_real4
(
vec1
.
x
-
vec2
.
x
,
vec1
.
y
-
vec2
.
y
,
vec1
.
z
-
vec2
.
z
,
0
);
if
(
periodic
)
APPLY_PERIODIC_TO_DELTA
(
result
);
result
.
w
=
result
.
x
*
result
.
x
+
result
.
y
*
result
.
y
+
result
.
z
*
result
.
z
;
return
result
;
}
...
...
platforms/cuda/src/kernels/customManyParticle.cu
View file @
2e9c418a
...
...
@@ -59,7 +59,7 @@ inline __device__ real4 computeCross(real4 vec1, real4 vec2) {
/**
* Determine whether a particular interaction is in the list of exclusions.
*/
inline
__device__
bool
isInteractionExcluded
(
int
atom1
,
int
atom2
,
int
*
__restrict__
exclusions
,
int
*
__restrict__
exclusionStartIndex
)
{
inline
__device__
bool
isInteractionExcluded
(
int
atom1
,
int
atom2
,
const
int
*
__restrict__
exclusions
,
const
int
*
__restrict__
exclusionStartIndex
)
{
int
first
=
exclusionStartIndex
[
atom1
];
int
last
=
exclusionStartIndex
[
atom1
+
1
];
for
(
int
i
=
last
-
1
;
i
>=
first
;
i
--
)
{
...
...
@@ -180,7 +180,7 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
const
real4
*
__restrict__
posq
,
const
real4
*
__restrict__
blockCenter
,
const
real4
*
__restrict__
blockBoundingBox
,
int2
*
__restrict__
neighborPairs
,
int
*
__restrict__
numNeighborPairs
,
int
*
__restrict__
numNeighborsForAtom
,
int
maxNeighborPairs
#ifdef USE_EXCLUSIONS
,
int
*
__restrict__
exclusions
,
int
*
__restrict__
exclusionStartIndex
,
const
int
*
__restrict__
exclusions
,
const
int
*
__restrict__
exclusionStartIndex
#endif
)
{
__shared__
real3
positionCache
[
FIND_NEIGHBORS_WORKGROUP_SIZE
];
...
...
@@ -265,7 +265,8 @@ extern "C" __global__ void findNeighbors(real4 periodicBoxSize, real4 invPeriodi
}
}
}
numNeighborsForAtom
[
atom1
]
=
totalNeighborsForAtom1
;
if
(
atom1
<
NUM_ATOMS
)
numNeighborsForAtom
[
atom1
]
=
totalNeighborsForAtom1
;
}
}
...
...
@@ -308,6 +309,7 @@ extern "C" __global__ void computeNeighborStartIndices(int* __restrict__ numNeig
numNeighborsForAtom
[
globalIndex
]
=
0
;
// Clear this so the next kernel can use it as a counter
}
globalOffset
+=
posBuffer
[
blockDim
.
x
-
1
];
__syncthreads
();
}
if
(
threadIdx
.
x
==
0
)
neighborStartIndex
[
0
]
=
0
;
...
...
platforms/cuda/src/kernels/pme.cu
View file @
2e9c418a
...
...
@@ -83,7 +83,7 @@ extern "C" __global__ void gridSpreadCharge(const real4* __restrict__ posq, real
#ifdef USE_DOUBLE_PRECISION
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
atomicAdd
(
&
ulonglong_p
[
index
],
static_cast
<
unsigned
long
long
>
((
long
long
)
(
add
*
0x100000000
)));
#elif __CUDA_ARCH__ < 200
#elif __CUDA_ARCH__ < 200
|| defined(USE_DETERMINISTIC_FORCES)
unsigned
long
long
*
ulonglong_p
=
(
unsigned
long
long
*
)
originalPmeGrid
;
int
gridIndex
=
index
;
gridIndex
=
(
gridIndex
%
2
==
0
?
gridIndex
/
2
:
(
gridIndex
+
GRID_SIZE_X
*
GRID_SIZE_Y
*
GRID_SIZE_Z
)
/
2
);
...
...
platforms/cuda/src/kernels/torsionForce.cu
View file @
2e9c418a
...
...
@@ -2,6 +2,11 @@ const real PI = (real) 3.14159265358979323846;
real3
v0
=
make_real3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
real3
v1
=
make_real3
(
pos3
.
x
-
pos2
.
x
,
pos3
.
y
-
pos2
.
y
,
pos3
.
z
-
pos2
.
z
);
real3
v2
=
make_real3
(
pos3
.
x
-
pos4
.
x
,
pos3
.
y
-
pos4
.
y
,
pos3
.
z
-
pos4
.
z
);
#if APPLY_PERIODIC
APPLY_PERIODIC_TO_DELTA
(
v0
)
APPLY_PERIODIC_TO_DELTA
(
v1
)
APPLY_PERIODIC_TO_DELTA
(
v2
)
#endif
real3
cp0
=
cross
(
v0
,
v1
);
real3
cp1
=
cross
(
v1
,
v2
);
real
cosangle
=
dot
(
normalize
(
cp0
),
normalize
(
cp1
));
...
...
platforms/cuda/tests/CudaTests.h
View file @
2e9c418a
...
...
@@ -6,7 +6,7 @@
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2015 Stanford University and the Authors.
*
* Portions copyright (c) 2015
-2016
Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
...
...
@@ -39,5 +39,5 @@ OpenMM::CudaPlatform platform;
void
initializeTests
(
int
argc
,
char
*
argv
[])
{
if
(
argc
>
1
)
platform
.
setPropertyDefaultValue
(
"
Cuda
Precision"
,
std
::
string
(
argv
[
1
]));
platform
.
setPropertyDefaultValue
(
"Precision"
,
std
::
string
(
argv
[
1
]));
}
Prev
1
2
3
4
5
6
7
8
…
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment