Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
f329a047
Commit
f329a047
authored
May 13, 2009
by
Peter Eastman
Browse files
Completed Cuda implementation of CSHAKE algorithm
parent
4ef9536e
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
467 additions
and
41 deletions
+467
-41
platforms/cuda/sharedTarget/CMakeLists.txt
platforms/cuda/sharedTarget/CMakeLists.txt
+1
-0
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+7
-0
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+123
-12
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+5
-0
platforms/cuda/src/kernels/kCShake.cu
platforms/cuda/src/kernels/kCShake.cu
+317
-0
platforms/cuda/staticTarget/CMakeLists.txt
platforms/cuda/staticTarget/CMakeLists.txt
+1
-0
platforms/reference/src/SimTKReference/ReferenceRigidShakeAlgorithm.cpp
...rence/src/SimTKReference/ReferenceRigidShakeAlgorithm.cpp
+13
-29
No files found.
platforms/cuda/sharedTarget/CMakeLists.txt
View file @
f329a047
...
@@ -13,6 +13,7 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
...
@@ -13,6 +13,7 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
CUDA_INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/
${
subdir
}
/include
)
CUDA_INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/
${
subdir
}
/include
)
CUDA_INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/
${
subdir
}
/src
)
CUDA_INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/
${
subdir
}
/src
)
ENDFOREACH
(
subdir
)
ENDFOREACH
(
subdir
)
CUDA_INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/jama/include
)
CUDA_ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
CUDA_ADD_LIBRARY
(
${
SHARED_TARGET
}
SHARED
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
...
...
platforms/cuda/src/kernels/cudatypes.h
View file @
f329a047
...
@@ -356,6 +356,9 @@ struct cudaGmxSimulation {
...
@@ -356,6 +356,9 @@ struct cudaGmxSimulation {
unsigned
int
ShakeConstraints
;
// Total number of Shake constraints
unsigned
int
ShakeConstraints
;
// Total number of Shake constraints
unsigned
int
settleConstraints
;
// Total number of Settle constraints
unsigned
int
settleConstraints
;
// Total number of Settle constraints
unsigned
int
lincsConstraints
;
// Total number of LINCS constraints.
unsigned
int
lincsConstraints
;
// Total number of LINCS constraints.
unsigned
int
rigidClusters
;
// Total number of rigid clusters
unsigned
int
maxRigidClusterSize
;
// The size of the largest rigid cluster
unsigned
int
clusterShakeBlockSize
;
// The number of threads to process each rigid cluster
unsigned
int
NonShakeConstraints
;
// Total number of NonShake atoms
unsigned
int
NonShakeConstraints
;
// Total number of NonShake atoms
unsigned
int
maxShakeIterations
;
// Maximum shake iterations
unsigned
int
maxShakeIterations
;
// Maximum shake iterations
unsigned
int
degreesOfFreedom
;
// Number of degrees of freedom in system
unsigned
int
degreesOfFreedom
;
// Number of degrees of freedom in system
...
@@ -391,6 +394,10 @@ struct cudaGmxSimulation {
...
@@ -391,6 +394,10 @@ struct cudaGmxSimulation {
short
*
pSyncCounter
;
// Used for global thread synchronization
short
*
pSyncCounter
;
// Used for global thread synchronization
unsigned
int
*
pRequiredIterations
;
// Used by SHAKE to communicate whether iteration has converged
unsigned
int
*
pRequiredIterations
;
// Used by SHAKE to communicate whether iteration has converged
float
*
pShakeReducedMass
;
// The reduced mass for each SHAKE constraint
float
*
pShakeReducedMass
;
// The reduced mass for each SHAKE constraint
int
*
pRigidClusterConstraints
;
// The constraints in each rigid cluster
float
*
pRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
unsigned
int
*
pRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
unsigned
int
*
pRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
// Mutable stuff
// Mutable stuff
float4
*
pPosq
;
// Pointer to atom positions and charges
float4
*
pPosq
;
// Pointer to atom positions and charges
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
f329a047
...
@@ -40,6 +40,7 @@
...
@@ -40,6 +40,7 @@
#include <sstream>
#include <sstream>
#include <cmath>
#include <cmath>
#include <map>
#include <map>
#include <set>
#include <algorithm>
#include <algorithm>
#ifdef WIN32
#ifdef WIN32
#include <windows.h>
#include <windows.h>
...
@@ -121,8 +122,6 @@ static const float BOLTZ = (RGAS / KILO); // (k
...
@@ -121,8 +122,6 @@ static const float BOLTZ = (RGAS / KILO); // (k
#define DUMP_PARAMETERS 0
#define DUMP_PARAMETERS 0
#define DeltaShake
extern
"C"
extern
"C"
void
gpuSetBondParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
length
,
const
vector
<
float
>&
k
)
void
gpuSetBondParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
length
,
const
vector
<
float
>&
k
)
{
{
...
@@ -458,7 +457,7 @@ void gpuSetObcParameters(gpuContext gpu, float innerDielectric, float solventDie
...
@@ -458,7 +457,7 @@ void gpuSetObcParameters(gpuContext gpu, float innerDielectric, float solventDie
gpu
->
sim
.
preFactor
=
2.0
f
*
electricConstant
*
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
))
*
gpu
->
sim
.
forceConversionFactor
;
gpu
->
sim
.
preFactor
=
2.0
f
*
electricConstant
*
((
1.0
f
/
innerDielectric
)
-
(
1.0
f
/
solventDielectric
))
*
gpu
->
sim
.
forceConversionFactor
;
}
}
void
markShakeClusterInvalid
(
ShakeCluster
&
cluster
,
map
<
int
,
ShakeCluster
>&
allClusters
,
vector
<
bool
>&
invalidForShake
)
static
void
markShakeClusterInvalid
(
ShakeCluster
&
cluster
,
map
<
int
,
ShakeCluster
>&
allClusters
,
vector
<
bool
>&
invalidForShake
)
{
{
cluster
.
valid
=
false
;
cluster
.
valid
=
false
;
invalidForShake
[
cluster
.
centralID
]
=
true
;
invalidForShake
[
cluster
.
centralID
]
=
true
;
...
@@ -470,6 +469,116 @@ void markShakeClusterInvalid(ShakeCluster& cluster, map<int, ShakeCluster>& allC
...
@@ -470,6 +469,116 @@ void markShakeClusterInvalid(ShakeCluster& cluster, map<int, ShakeCluster>& allC
}
}
}
}
static
void
findRigidClusters
(
gpuContext
gpu
,
const
vector
<
int
>&
firstAtom
,
const
vector
<
int
>&
secondAtom
,
const
vector
<
int
>&
constraintIndices
)
{
vector
<
map
<
int
,
int
>
>
atomConstraints
(
firstAtom
.
size
());
for
(
int
i
=
0
;
i
<
constraintIndices
.
size
();
i
++
)
{
atomConstraints
[
firstAtom
[
i
]][
secondAtom
[
i
]]
=
constraintIndices
[
i
];
atomConstraints
[
secondAtom
[
i
]][
firstAtom
[
i
]]
=
constraintIndices
[
i
];
}
vector
<
vector
<
int
>
>
rigidClusters
;
int
totalConstraints
=
0
;
int
totalMatrixElements
=
0
;
for
(
int
i
=
0
;
i
<
firstAtom
.
size
();
i
++
)
{
if
(
atomConstraints
[
i
].
size
()
<
2
)
continue
;
// Begin by looking for a triangle this atom is part of.
set
<
int
>
atoms
;
atoms
.
insert
(
i
);
for
(
map
<
int
,
int
>::
const_iterator
atom1
=
atomConstraints
[
i
].
begin
();
atom1
!=
atomConstraints
[
i
].
end
()
&&
atoms
.
size
()
==
1
;
++
atom1
)
{
for
(
map
<
int
,
int
>::
const_iterator
atom2
=
atomConstraints
[
atom1
->
first
].
begin
();
atom2
!=
atomConstraints
[
atom1
->
first
].
end
();
++
atom2
)
{
if
(
atomConstraints
[
i
].
count
(
atom2
->
first
)
!=
0
)
{
atoms
.
insert
(
atom1
->
first
);
atoms
.
insert
(
atom2
->
first
);
break
;
}
}
}
if
(
atoms
.
size
()
==
1
)
continue
;
// We have three atoms that are part of a cluster, so look for other atoms we can add.
bool
done
=
false
;
while
(
!
done
)
{
done
=
true
;
for
(
set
<
int
>::
const_iterator
atom1
=
atoms
.
begin
();
atom1
!=
atoms
.
end
();
++
atom1
)
{
for
(
map
<
int
,
int
>::
const_iterator
atom2
=
atomConstraints
[
*
atom1
].
begin
();
atom2
!=
atomConstraints
[
*
atom1
].
end
();
++
atom2
)
{
if
(
atoms
.
find
(
atom2
->
first
)
!=
atoms
.
end
())
continue
;
// This atom is already in the cluster.
// See if this atom is linked to three other atoms in the cluster.
int
linkCount
=
0
;
for
(
map
<
int
,
int
>::
const_iterator
atom3
=
atomConstraints
[
atom2
->
first
].
begin
();
atom3
!=
atomConstraints
[
atom2
->
first
].
end
();
++
atom3
)
if
(
atoms
.
find
(
atom3
->
first
)
!=
atoms
.
end
())
linkCount
++
;
if
(
linkCount
>
2
)
{
atoms
.
insert
(
atom2
->
first
);
done
=
false
;
}
}
}
}
// Record the cluster.
vector
<
int
>
constraints
;
for
(
set
<
int
>::
const_iterator
atom1
=
atoms
.
begin
();
atom1
!=
atoms
.
end
();
++
atom1
)
{
for
(
map
<
int
,
int
>::
const_iterator
atom2
=
atomConstraints
[
*
atom1
].
begin
();
atom2
!=
atomConstraints
[
*
atom1
].
end
();
++
atom2
)
{
if
(
*
atom1
<
atom2
->
first
&&
atoms
.
find
(
atom2
->
first
)
!=
atoms
.
end
())
constraints
.
push_back
(
atom2
->
second
);
}
}
rigidClusters
.
push_back
(
constraints
);
totalConstraints
+=
constraints
.
size
();
totalMatrixElements
+=
constraints
.
size
()
*
constraints
.
size
();
for
(
set
<
int
>::
const_iterator
atom1
=
atoms
.
begin
();
atom1
!=
atoms
.
end
();
++
atom1
)
{
for
(
map
<
int
,
int
>::
const_iterator
atom2
=
atomConstraints
[
*
atom1
].
begin
();
atom2
!=
atomConstraints
[
*
atom1
].
end
();
++
atom2
)
atomConstraints
[
atom2
->
first
].
erase
(
*
atom1
);
atomConstraints
[
*
atom1
].
clear
();
}
}
// Build the CUDA streams.
CUDAStream
<
int
>*
psRigidClusterConstraints
=
new
CUDAStream
<
int
>
(
totalConstraints
,
1
,
"RigidClusterConstraints"
);
gpu
->
psRigidClusterConstraints
=
psRigidClusterConstraints
;
gpu
->
sim
.
pRigidClusterConstraints
=
psRigidClusterConstraints
->
_pDevData
;
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
=
new
CUDAStream
<
unsigned
int
>
((
int
)
rigidClusters
.
size
()
+
1
,
1
,
"RigidClusterConstraintIndex"
);
gpu
->
psRigidClusterConstraintIndex
=
psRigidClusterConstraintIndex
;
gpu
->
sim
.
pRigidClusterConstraintIndex
=
psRigidClusterConstraintIndex
->
_pDevData
;
CUDAStream
<
float
>*
psRigidClusterMatrix
=
new
CUDAStream
<
float
>
(
totalMatrixElements
,
1
,
"RigidClusterMatrix"
);
gpu
->
psRigidClusterMatrix
=
psRigidClusterMatrix
;
gpu
->
sim
.
pRigidClusterMatrix
=
psRigidClusterMatrix
->
_pDevData
;
CUDAStream
<
unsigned
int
>*
psRigidClusterMatrixIndex
=
new
CUDAStream
<
unsigned
int
>
((
int
)
rigidClusters
.
size
()
+
1
,
1
,
"RigidClusterMatrixIndex"
);
gpu
->
psRigidClusterMatrixIndex
=
psRigidClusterMatrixIndex
;
gpu
->
sim
.
pRigidClusterMatrixIndex
=
psRigidClusterMatrixIndex
->
_pDevData
;
unsigned
int
constraintIndex
=
0
;
unsigned
int
maxClusterSize
=
0
;
for
(
unsigned
int
i
=
0
;
i
<
rigidClusters
.
size
();
i
++
)
{
vector
<
int
>&
cluster
=
rigidClusters
[
i
];
(
*
psRigidClusterConstraintIndex
)[
i
]
=
constraintIndex
;
for
(
unsigned
int
j
=
0
;
j
<
cluster
.
size
();
j
++
)
(
*
psRigidClusterConstraints
)[
constraintIndex
++
]
=
cluster
[
j
];
if
(
cluster
.
size
()
>
maxClusterSize
)
maxClusterSize
=
cluster
.
size
();
}
(
*
psRigidClusterConstraintIndex
)[
rigidClusters
.
size
()]
=
constraintIndex
;
gpu
->
sim
.
rigidClusters
=
rigidClusters
.
size
();
gpu
->
sim
.
maxRigidClusterSize
=
maxClusterSize
;
gpu
->
sim
.
clusterShakeBlockSize
=
1
;
while
(
gpu
->
sim
.
clusterShakeBlockSize
<
maxClusterSize
)
gpu
->
sim
.
clusterShakeBlockSize
*=
2
;
if
(
gpu
->
sim
.
lincs_threads_per_block
%
gpu
->
sim
.
clusterShakeBlockSize
!=
0
)
gpu
->
sim
.
lincs_threads_per_block
+=
gpu
->
sim
.
clusterShakeBlockSize
-
gpu
->
sim
.
lincs_threads_per_block
%
gpu
->
sim
.
clusterShakeBlockSize
;
psRigidClusterConstraints
->
Upload
();
psRigidClusterConstraintIndex
->
Upload
();
gpu
->
hasInitializedRigidClusters
=
false
;
}
extern
"C"
extern
"C"
void
gpuSetConstraintParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
distance
,
void
gpuSetConstraintParameters
(
gpuContext
gpu
,
const
vector
<
int
>&
atom1
,
const
vector
<
int
>&
atom2
,
const
vector
<
float
>&
distance
,
const
vector
<
float
>&
invMass1
,
const
vector
<
float
>&
invMass2
,
float
shakeTolerance
,
unsigned
int
lincsTerms
)
const
vector
<
float
>&
invMass1
,
const
vector
<
float
>&
invMass2
,
float
shakeTolerance
,
unsigned
int
lincsTerms
)
...
@@ -730,7 +839,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -730,7 +839,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
CUDAStream
<
float
>*
psLincsSolution
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsSolution"
);
CUDAStream
<
float
>*
psLincsSolution
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsSolution"
);
gpu
->
psLincsSolution
=
psLincsSolution
;
gpu
->
psLincsSolution
=
psLincsSolution
;
gpu
->
sim
.
pLincsSolution
=
psLincsSolution
->
_pDevData
;
gpu
->
sim
.
pLincsSolution
=
psLincsSolution
->
_pDevData
;
CUDAStream
<
short
>*
psSyncCounter
=
new
CUDAStream
<
short
>
(
2
*
gpu
->
sim
.
blocks
,
1
,
"SyncCounter"
);
CUDAStream
<
short
>*
psSyncCounter
=
new
CUDAStream
<
short
>
(
3
*
gpu
->
sim
.
blocks
,
1
,
"SyncCounter"
);
gpu
->
psSyncCounter
=
psSyncCounter
;
gpu
->
psSyncCounter
=
psSyncCounter
;
gpu
->
sim
.
pSyncCounter
=
psSyncCounter
->
_pDevData
;
gpu
->
sim
.
pSyncCounter
=
psSyncCounter
->
_pDevData
;
CUDAStream
<
unsigned
int
>*
psRequiredIterations
=
new
CUDAStream
<
unsigned
int
>
(
1
,
1
,
"RequiredIterations"
);
CUDAStream
<
unsigned
int
>*
psRequiredIterations
=
new
CUDAStream
<
unsigned
int
>
(
1
,
1
,
"RequiredIterations"
);
...
@@ -774,14 +883,9 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -774,14 +883,9 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
if
(
gpu
->
sim
.
lincs_threads_per_block
<
gpu
->
sim
.
blocks
)
if
(
gpu
->
sim
.
lincs_threads_per_block
<
gpu
->
sim
.
blocks
)
gpu
->
sim
.
lincs_threads_per_block
=
gpu
->
sim
.
blocks
;
gpu
->
sim
.
lincs_threads_per_block
=
gpu
->
sim
.
blocks
;
// Identify rigid clusters of atoms.
gpu
->
psLincsNumConnections
->
Download
();
findRigidClusters
(
gpu
,
atom1
,
atom2
,
lincsConstraints
);
gpu
->
psLincsConnections
->
Download
();
gpu
->
psLincsCoupling
->
Download
();
#ifdef DeltaShake
// count number of atoms w/o constraint
// count number of atoms w/o constraint
...
@@ -820,7 +924,6 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -820,7 +924,6 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
}
else
{
}
else
{
gpu
->
sim
.
nonshake_threads_per_block
=
0
;
gpu
->
sim
.
nonshake_threads_per_block
=
0
;
}
}
#endif
}
}
extern
"C"
extern
"C"
...
@@ -1152,6 +1255,10 @@ void* gpuInit(int numAtoms)
...
@@ -1152,6 +1255,10 @@ void* gpuInit(int numAtoms)
gpu
->
psSyncCounter
=
NULL
;
gpu
->
psSyncCounter
=
NULL
;
gpu
->
psRequiredIterations
=
NULL
;
gpu
->
psRequiredIterations
=
NULL
;
gpu
->
psShakeReducedMass
=
NULL
;
gpu
->
psShakeReducedMass
=
NULL
;
gpu
->
psRigidClusterConstraints
=
NULL
;
gpu
->
psRigidClusterConstraintIndex
=
NULL
;
gpu
->
psRigidClusterMatrix
=
NULL
;
gpu
->
psRigidClusterMatrixIndex
=
NULL
;
// Initialize output buffer before reading parameters
// Initialize output buffer before reading parameters
gpu
->
pOutputBufferCounter
=
new
unsigned
int
[
gpu
->
sim
.
paddedNumberOfAtoms
];
gpu
->
pOutputBufferCounter
=
new
unsigned
int
[
gpu
->
sim
.
paddedNumberOfAtoms
];
...
@@ -1304,6 +1411,10 @@ void gpuShutDown(gpuContext gpu)
...
@@ -1304,6 +1411,10 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psSyncCounter
;
delete
gpu
->
psSyncCounter
;
delete
gpu
->
psRequiredIterations
;
delete
gpu
->
psRequiredIterations
;
delete
gpu
->
psShakeReducedMass
;
delete
gpu
->
psShakeReducedMass
;
delete
gpu
->
psRigidClusterConstraints
;
delete
gpu
->
psRigidClusterConstraintIndex
;
delete
gpu
->
psRigidClusterMatrix
;
delete
gpu
->
psRigidClusterMatrixIndex
;
if
(
gpu
->
cudpp
!=
0
)
if
(
gpu
->
cudpp
!=
0
)
cudppDestroyPlan
(
gpu
->
cudpp
);
cudppDestroyPlan
(
gpu
->
cudpp
);
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
f329a047
...
@@ -80,6 +80,7 @@ struct _gpuContext {
...
@@ -80,6 +80,7 @@ struct _gpuContext {
bool
bRecalculateBornRadii
;
bool
bRecalculateBornRadii
;
bool
bOutputBufferPerWarp
;
bool
bOutputBufferPerWarp
;
bool
bIncludeGBSA
;
bool
bIncludeGBSA
;
bool
hasInitializedRigidClusters
;
unsigned
long
seed
;
unsigned
long
seed
;
SM_VERSION
sm_version
;
SM_VERSION
sm_version
;
CUDPPHandle
cudpp
;
CUDPPHandle
cudpp
;
...
@@ -143,6 +144,10 @@ struct _gpuContext {
...
@@ -143,6 +144,10 @@ struct _gpuContext {
CUDAStream
<
short
>*
psSyncCounter
;
// Used for global thread synchronization
CUDAStream
<
short
>*
psSyncCounter
;
// Used for global thread synchronization
CUDAStream
<
unsigned
int
>*
psRequiredIterations
;
// Used by SHAKE to communicate whether iteration has converged
CUDAStream
<
unsigned
int
>*
psRequiredIterations
;
// Used by SHAKE to communicate whether iteration has converged
CUDAStream
<
float
>*
psShakeReducedMass
;
// The reduced mass for each SHAKE constraint
CUDAStream
<
float
>*
psShakeReducedMass
;
// The reduced mass for each SHAKE constraint
CUDAStream
<
int
>*
psRigidClusterConstraints
;
// The constraints in each rigid cluster
CUDAStream
<
float
>*
psRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
CUDAStream
<
unsigned
int
>*
psRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
};
};
typedef
struct
_gpuContext
*
gpuContext
;
typedef
struct
_gpuContext
*
gpuContext
;
...
...
platforms/cuda/src/kernels/kCShake.cu
0 → 100644
View file @
f329a047
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2009 Stanford University and the Authors. *
* Authors: Scott Le Grand, Peter Eastman *
* Contributors: *
* *
* Permission is hereby granted, free of charge, to any person obtaining a *
* copy of this software and associated documentation files (the "Software"), *
* to deal in the Software without restriction, including without limitation *
* the rights to use, copy, modify, merge, publish, distribute, sublicense, *
* and/or sell copies of the Software, and to permit persons to whom the *
* Software is furnished to do so, subject to the following conditions: *
* *
* The above copyright notice and this permission notice shall be included in *
* all copies or substantial portions of the Software. *
* *
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR *
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, *
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL *
* THE AUTHORS, CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, *
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR *
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE *
* USE OR OTHER DEALINGS IN THE SOFTWARE. *
* -------------------------------------------------------------------------- */
#include <cuda.h>
#include <vector_functions.h>
#include <vector>
#include "jama_svd.h"
#include "gputypes.h"
using
namespace
std
;
using
TNT
::
Array2D
;
using
JAMA
::
SVD
;
static
__constant__
cudaGmxSimulation
cSim
;
void
SetCShakeSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyToSymbol
(
cSim
,
&
gpu
->
sim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyToSymbol: SetSim copy to cSim failed"
);
}
void
GetCShakeSim
(
gpuContext
gpu
)
{
cudaError_t
status
;
status
=
cudaMemcpyFromSymbol
(
&
gpu
->
sim
,
cSim
,
sizeof
(
cudaGmxSimulation
));
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
/**
* Synchronize all threads across all blocks.
*/
__device__
void
kSyncAllThreads_kernel
(
short
*
syncCounter
,
short
newCount
)
{
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
syncCounter
[
blockIdx
.
x
]
=
newCount
;
if
(
threadIdx
.
x
<
gridDim
.
x
)
{
volatile
short
*
counter
=
&
syncCounter
[
threadIdx
.
x
];
do
{
}
while
(
*
counter
!=
newCount
);
}
__syncthreads
();
}
__global__
void
kApplyCShake_kernel
(
float4
*
atomPositions
,
bool
addOldPosition
)
{
extern
__shared__
float
temp
[];
// Initialize counters used for monitoring convergence and doing global thread synchronization.
__shared__
unsigned
int
requiredIterations
;
if
(
threadIdx
.
x
==
0
)
{
requiredIterations
=
0
;
cSim
.
pSyncCounter
[
gridDim
.
x
+
blockIdx
.
x
]
=
-
1
;
cSim
.
pSyncCounter
[
2
*
gridDim
.
x
+
blockIdx
.
x
]
=
-
1
;
cSim
.
pRequiredIterations
[
0
]
=
0
;
}
// Calculate the direction of each constraint.
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
lincsConstraints
)
{
int2
atoms
=
cSim
.
pLincsAtoms
[
pos
];
float4
dir
=
cSim
.
pLincsDistance
[
pos
];
float4
oldPos1
=
cSim
.
pOldPosq
[
atoms
.
x
];
float4
oldPos2
=
cSim
.
pOldPosq
[
atoms
.
y
];
dir
.
x
=
oldPos1
.
x
-
oldPos2
.
x
;
dir
.
y
=
oldPos1
.
y
-
oldPos2
.
y
;
dir
.
z
=
oldPos1
.
z
-
oldPos2
.
z
;
cSim
.
pLincsDistance
[
pos
]
=
dir
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
__syncthreads
();
// Iteratively update the atom positions.
unsigned
int
maxIterations
=
150
;
float
lowerTol
=
1.0
f
-
2.0
f
*
cSim
.
shakeTolerance
+
cSim
.
shakeTolerance
*
cSim
.
shakeTolerance
;
float
upperTol
=
1.0
f
+
2.0
f
*
cSim
.
shakeTolerance
+
cSim
.
shakeTolerance
*
cSim
.
shakeTolerance
;
for
(
unsigned
int
iteration
=
0
;
iteration
<
maxIterations
&&
iteration
==
requiredIterations
;
iteration
++
)
{
// Calculate the constraint force for each constraint.
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
lincsConstraints
)
{
int2
atoms
=
cSim
.
pLincsAtoms
[
pos
];
float4
delta1
=
atomPositions
[
atoms
.
x
];
float4
delta2
=
atomPositions
[
atoms
.
y
];
float4
dir
=
cSim
.
pLincsDistance
[
pos
];
float3
rp_ij
=
make_float3
(
delta1
.
x
-
delta2
.
x
,
delta1
.
y
-
delta2
.
y
,
delta1
.
z
-
delta2
.
z
);
if
(
addOldPosition
)
{
rp_ij
.
x
+=
dir
.
x
;
rp_ij
.
y
+=
dir
.
y
;
rp_ij
.
z
+=
dir
.
z
;
}
float
rp2
=
rp_ij
.
x
*
rp_ij
.
x
+
rp_ij
.
y
*
rp_ij
.
y
+
rp_ij
.
z
*
rp_ij
.
z
;
float
dist2
=
dir
.
w
*
dir
.
w
;
float
diff
=
dist2
-
rp2
;
float
rrpr
=
rp_ij
.
x
*
dir
.
x
+
rp_ij
.
y
*
dir
.
y
+
rp_ij
.
z
*
dir
.
z
;
float
d_ij2
=
dir
.
x
*
dir
.
x
+
dir
.
y
*
dir
.
y
+
dir
.
z
*
dir
.
z
;
float
reducedMass
=
cSim
.
pShakeReducedMass
[
pos
];
cSim
.
pLincsSolution
[
pos
]
=
(
rrpr
>
d_ij2
*
1e-6
f
?
reducedMass
*
diff
/
rrpr
:
0.0
f
);
if
(
requiredIterations
==
iteration
&&
(
rp2
<
lowerTol
*
dist2
||
rp2
>
upperTol
*
dist2
))
requiredIterations
=
iteration
+
1
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
kSyncAllThreads_kernel
(
cSim
.
pSyncCounter
,
iteration
);
if
(
threadIdx
.
x
==
0
&&
requiredIterations
>
iteration
)
cSim
.
pRequiredIterations
[
0
]
=
requiredIterations
;
// Multiply by the inverse constraint matrix for each rigid cluster.
if
(
cSim
.
rigidClusters
>
0
)
{
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
block
=
pos
/
cSim
.
clusterShakeBlockSize
;
unsigned
int
indexInBlock
=
pos
-
block
*
cSim
.
clusterShakeBlockSize
;
while
(
block
<
cSim
.
rigidClusters
)
{
unsigned
int
firstIndex
=
cSim
.
pRigidClusterConstraintIndex
[
block
];
unsigned
int
blockSize
=
cSim
.
pRigidClusterConstraintIndex
[
block
+
1
]
-
firstIndex
;
if
(
indexInBlock
<
blockSize
)
{
// Load the constraint forces and matrix.
unsigned
int
constraint
=
cSim
.
pRigidClusterConstraints
[
firstIndex
+
indexInBlock
];
temp
[
threadIdx
.
x
]
=
cSim
.
pLincsSolution
[
constraint
];
unsigned
int
firstMatrixIndex
=
cSim
.
pRigidClusterMatrixIndex
[
block
];
// Multiply by the matrix.
float
sum
=
0.0
f
;
for
(
unsigned
int
i
=
0
;
i
<
blockSize
;
i
++
)
sum
+=
temp
[
threadIdx
.
x
-
indexInBlock
+
i
]
*
cSim
.
pRigidClusterMatrix
[
firstMatrixIndex
+
i
*
blockSize
+
indexInBlock
];
cSim
.
pLincsSolution
[
constraint
]
=
sum
;
}
block
+=
(
blockDim
.
x
*
gridDim
.
x
)
/
cSim
.
clusterShakeBlockSize
;
}
kSyncAllThreads_kernel
(
&
cSim
.
pSyncCounter
[
gridDim
.
x
],
iteration
);
}
// Update the position of each atom.
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
atoms
)
{
float4
atomPos
=
atomPositions
[
pos
];
float
invMass
=
cSim
.
pVelm4
[
pos
].
w
;
int
num
=
cSim
.
pLincsNumAtomConstraints
[
pos
];
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
int
index
=
pos
+
i
*
cSim
.
atoms
;
int
constraint
=
cSim
.
pLincsAtomConstraints
[
index
];
float
constraintForce
=
invMass
*
cSim
.
pLincsSolution
[
constraint
];
constraintForce
=
(
cSim
.
pLincsAtoms
[
constraint
].
x
==
pos
?
constraintForce
:
-
constraintForce
);
float4
dir
=
cSim
.
pLincsDistance
[
constraint
];
atomPos
.
x
+=
constraintForce
*
dir
.
x
;
atomPos
.
y
+=
constraintForce
*
dir
.
y
;
atomPos
.
z
+=
constraintForce
*
dir
.
z
;
}
atomPositions
[
pos
]
=
atomPos
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
kSyncAllThreads_kernel
(
&
cSim
.
pSyncCounter
[
2
*
gridDim
.
x
],
iteration
);
requiredIterations
=
cSim
.
pRequiredIterations
[
0
];
}
// Reset the initial sync counter to be ready for the next call.
if
(
threadIdx
.
x
==
0
)
cSim
.
pSyncCounter
[
blockIdx
.
x
]
=
-
1
;
}
static
void
initInverseMatrices
(
gpuContext
gpu
)
{
// Build the inverse constraint matrix for each cluster.
gpu
->
psPosq4
->
Download
();
gpu
->
psVelm4
->
Download
();
unsigned
int
elementIndex
=
0
;
for
(
unsigned
int
i
=
0
;
i
<
gpu
->
sim
.
rigidClusters
;
i
++
)
{
// Compute the constraint coupling matrix for this cluster.
unsigned
int
startIndex
=
(
*
gpu
->
psRigidClusterConstraintIndex
)[
i
];
unsigned
int
endIndex
=
(
*
gpu
->
psRigidClusterConstraintIndex
)[
i
+
1
];
unsigned
int
size
=
endIndex
-
startIndex
;
vector
<
float3
>
r
(
size
);
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
{
int2
atoms
=
(
*
gpu
->
psLincsAtoms
)[(
*
gpu
->
psRigidClusterConstraints
)[
startIndex
+
j
]];
float4
pos1
=
(
*
gpu
->
psPosq4
)[
atoms
.
x
];
float4
pos2
=
(
*
gpu
->
psPosq4
)[
atoms
.
y
];
r
[
j
]
=
make_float3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
float
invLength
=
1.0
f
/
sqrt
(
r
[
j
].
x
*
r
[
j
].
x
+
r
[
j
].
y
*
r
[
j
].
y
+
r
[
j
].
z
*
r
[
j
].
z
);
r
[
j
].
x
*=
invLength
;
r
[
j
].
y
*=
invLength
;
r
[
j
].
z
*=
invLength
;
}
Array2D
<
double
>
matrix
(
size
,
size
);
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
{
int
constraintj
=
(
*
gpu
->
psRigidClusterConstraints
)[
startIndex
+
j
];
int2
atomsj
=
(
*
gpu
->
psLincsAtoms
)[
constraintj
];
for
(
unsigned
int
k
=
0
;
k
<
size
;
k
++
)
{
int
constraintk
=
(
*
gpu
->
psRigidClusterConstraints
)[
startIndex
+
k
];
int2
atomsk
=
(
*
gpu
->
psLincsAtoms
)[
constraintk
];
float
invMassj0
=
(
*
gpu
->
psVelm4
)[
atomsj
.
x
].
w
;
float
invMassj1
=
(
*
gpu
->
psVelm4
)[
atomsj
.
y
].
w
;
double
dot
=
r
[
j
].
x
*
r
[
k
].
x
+
r
[
j
].
y
*
r
[
k
].
y
+
r
[
j
].
z
*
r
[
k
].
z
;
if
(
atomsj
.
x
==
atomsk
.
x
)
dot
*=
invMassj0
/
(
invMassj0
+
invMassj1
);
else
if
(
atomsj
.
y
==
atomsk
.
y
)
dot
*=
invMassj1
/
(
invMassj0
+
invMassj1
);
else
if
(
atomsj
.
x
==
atomsk
.
y
)
dot
*=
-
invMassj0
/
(
invMassj0
+
invMassj1
);
else
if
(
atomsj
.
y
==
atomsk
.
x
)
dot
*=
-
invMassj1
/
(
invMassj0
+
invMassj1
);
else
dot
=
0.0
;
matrix
[
j
][
k
]
=
dot
;
}
matrix
[
j
][
j
]
=
1.0
;
}
// Invert it using SVD.
Array2D
<
double
>
u
,
v
;
Array1D
<
double
>
w
;
SVD
<
double
>
svd
(
matrix
);
svd
.
getU
(
u
);
svd
.
getV
(
v
);
svd
.
getSingularValues
(
w
);
double
singularValueCutoff
=
0.01
*
w
[
0
];
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
w
[
j
]
=
(
w
[
j
]
<
singularValueCutoff
?
0.0
:
1.0
/
w
[
j
]);
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
{
for
(
unsigned
int
k
=
0
;
k
<
size
;
k
++
)
{
matrix
[
j
][
k
]
=
0.0
;
for
(
unsigned
int
m
=
0
;
m
<
size
;
m
++
)
matrix
[
j
][
k
]
+=
v
[
j
][
m
]
*
w
[
m
]
*
u
[
k
][
m
];
}
}
// Record the inverted matrix.
(
*
gpu
->
psRigidClusterMatrixIndex
)[
i
]
=
elementIndex
;
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
{
float
distance1
=
(
*
gpu
->
psLincsDistance
)[(
*
gpu
->
psRigidClusterConstraints
)[
startIndex
+
j
]].
w
;
for
(
unsigned
int
k
=
0
;
k
<
size
;
k
++
)
{
float
distance2
=
(
*
gpu
->
psLincsDistance
)[(
*
gpu
->
psRigidClusterConstraints
)[
startIndex
+
k
]].
w
;
(
*
gpu
->
psRigidClusterMatrix
)[
elementIndex
++
]
=
matrix
[
k
][
j
]
*
distance1
/
distance2
;
}
}
}
(
*
gpu
->
psRigidClusterMatrixIndex
)[
gpu
->
sim
.
rigidClusters
]
=
elementIndex
;
gpu
->
psRigidClusterMatrix
->
Upload
();
gpu
->
psRigidClusterMatrixIndex
->
Upload
();
gpu
->
hasInitializedRigidClusters
=
true
;
}
void
kApplyFirstCShake
(
gpuContext
gpu
)
{
// printf("kApplyFirstCShake\n");
if
(
gpu
->
sim
.
lincsConstraints
>
0
)
{
if
(
!
gpu
->
hasInitializedRigidClusters
)
initInverseMatrices
(
gpu
);
kApplyCShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
,
4
*
gpu
->
sim
.
lincs_threads_per_block
>>>
(
gpu
->
sim
.
pPosqP
,
true
);
LAUNCHERROR
(
"kApplyCShake"
);
}
}
void
kApplySecondCShake
(
gpuContext
gpu
)
{
// printf("kApplySecondCShake\n");
if
(
gpu
->
sim
.
lincsConstraints
>
0
)
{
kApplyCShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
,
4
*
gpu
->
sim
.
lincs_threads_per_block
>>>
(
gpu
->
sim
.
pPosq
,
false
);
LAUNCHERROR
(
"kApplyCShake"
);
}
}
platforms/cuda/staticTarget/CMakeLists.txt
View file @
f329a047
...
@@ -13,6 +13,7 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
...
@@ -13,6 +13,7 @@ FOREACH(subdir ${OPENMM_SOURCE_SUBDIRS})
CUDA_INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/
${
subdir
}
/include
)
CUDA_INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/
${
subdir
}
/include
)
CUDA_INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/
${
subdir
}
/src
)
CUDA_INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/platforms/cuda/
${
subdir
}
/src
)
ENDFOREACH
(
subdir
)
ENDFOREACH
(
subdir
)
CUDA_INCLUDE_DIRECTORIES
(
BEFORE
${
CMAKE_SOURCE_DIR
}
/jama/include
)
CUDA_ADD_LIBRARY
(
${
STATIC_TARGET
}
STATIC
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
CUDA_ADD_LIBRARY
(
${
STATIC_TARGET
}
STATIC
${
SOURCE_FILES
}
${
SOURCE_INCLUDE_FILES
}
${
API_ABS_INCLUDE_FILES
}
)
...
...
platforms/reference/src/SimTKReference/ReferenceRigidShakeAlgorithm.cpp
View file @
f329a047
...
@@ -317,8 +317,6 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
...
@@ -317,8 +317,6 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
static
const
RealOpenMM
zero
=
0.0
;
static
const
RealOpenMM
zero
=
0.0
;
static
const
RealOpenMM
one
=
1.0
;
static
const
RealOpenMM
one
=
1.0
;
static
const
RealOpenMM
two
=
2.0
;
static
const
RealOpenMM
two
=
2.0
;
static
const
RealOpenMM
three
=
3.0
;
static
const
RealOpenMM
oneM
=
-
1.0
;
static
const
RealOpenMM
half
=
0.5
;
static
const
RealOpenMM
half
=
0.5
;
static
const
RealOpenMM
epsilon6
=
(
RealOpenMM
)
1.0e-06
;
static
const
RealOpenMM
epsilon6
=
(
RealOpenMM
)
1.0e-06
;
...
@@ -327,8 +325,6 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
...
@@ -327,8 +325,6 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
// ---------------------------------------------------------------------------------------
// ---------------------------------------------------------------------------------------
int
numberOfConstraints
=
getNumberOfConstraints
();
// temp arrays
// temp arrays
RealOpenMM
**
r_ij
=
_r_ij
;
RealOpenMM
**
r_ij
=
_r_ij
;
...
@@ -345,7 +341,6 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
...
@@ -345,7 +341,6 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
int
atomJ
=
_atomIndices
[
ii
][
1
];
int
atomJ
=
_atomIndices
[
ii
][
1
];
reducedMasses
[
ii
]
=
half
/
(
inverseMasses
[
atomI
]
+
inverseMasses
[
atomJ
]
);
reducedMasses
[
ii
]
=
half
/
(
inverseMasses
[
atomI
]
+
inverseMasses
[
atomJ
]
);
}
}
vector
<
double
>
temp
;
for
(
unsigned
int
i
=
0
;
i
<
_rigidClusters
.
size
();
i
++
)
{
for
(
unsigned
int
i
=
0
;
i
<
_rigidClusters
.
size
();
i
++
)
{
// Compute the constraint coupling matrix for this cluster.
// Compute the constraint coupling matrix for this cluster.
...
@@ -364,8 +359,8 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
...
@@ -364,8 +359,8 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
r
[
j
][
2
]
*=
invLength
;
r
[
j
][
2
]
*=
invLength
;
}
}
Array2D
<
double
>
matrix
(
size
,
size
);
Array2D
<
double
>
matrix
(
size
,
size
);
for
(
int
j
=
0
;
j
<
(
int
)
size
;
j
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
{
for
(
int
k
=
0
;
k
<
(
int
)
size
;
k
++
)
{
for
(
unsigned
int
k
=
0
;
k
<
size
;
k
++
)
{
double
dot
;
double
dot
;
int
atomj0
=
_atomIndices
[
cluster
[
j
]][
0
];
int
atomj0
=
_atomIndices
[
cluster
[
j
]][
0
];
int
atomj1
=
_atomIndices
[
cluster
[
j
]][
1
];
int
atomj1
=
_atomIndices
[
cluster
[
j
]][
1
];
...
@@ -395,23 +390,21 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
...
@@ -395,23 +390,21 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
svd
.
getV
(
v
);
svd
.
getV
(
v
);
svd
.
getSingularValues
(
w
);
svd
.
getSingularValues
(
w
);
double
singularValueCutoff
=
0.01
*
w
[
0
];
double
singularValueCutoff
=
0.01
*
w
[
0
];
for
(
int
j
=
0
;
j
<
(
int
)
size
;
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
w
[
j
]
=
(
w
[
j
]
<
singularValueCutoff
?
0.0
:
1.0
/
w
[
j
]);
w
[
j
]
=
(
w
[
j
]
<
singularValueCutoff
?
0.0
:
1.0
/
w
[
j
]);
if
(
temp
.
size
()
<
size
)
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
{
temp
.
resize
(
size
);
for
(
unsigned
int
k
=
0
;
k
<
size
;
k
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
size
;
j
++
)
{
for
(
int
k
=
0
;
k
<
(
int
)
size
;
k
++
)
{
matrix
[
j
][
k
]
=
0.0
;
matrix
[
j
][
k
]
=
0.0
;
for
(
int
m
=
0
;
m
<
(
int
)
size
;
m
++
)
for
(
unsigned
int
m
=
0
;
m
<
size
;
m
++
)
matrix
[
j
][
k
]
+=
v
[
j
][
m
]
*
w
[
m
]
*
u
[
k
][
m
];
matrix
[
j
][
k
]
+=
v
[
j
][
m
]
*
w
[
m
]
*
u
[
k
][
m
];
}
}
}
}
// Record the inverted matrix.
// Record the inverted matrix.
for
(
int
j
=
0
;
j
<
(
int
)
size
;
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
for
(
int
k
=
0
;
k
<
(
int
)
size
;
k
++
)
for
(
unsigned
int
k
=
0
;
k
<
size
;
k
++
)
_matrices
[
i
][
j
][
k
]
=
(
RealOpenMM
)
matrix
[
j
][
k
];
_matrices
[
i
][
j
][
k
]
=
(
RealOpenMM
)
matrix
[
j
][
k
]
*
_distance
[
cluster
[
k
]]
/
_distance
[
cluster
[
j
]]
;
}
}
}
}
...
@@ -478,12 +471,12 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
...
@@ -478,12 +471,12 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
unsigned
int
size
=
cluster
.
size
();
unsigned
int
size
=
cluster
.
size
();
if
(
size
>
tempForce
.
size
())
if
(
size
>
tempForce
.
size
())
tempForce
.
resize
(
size
);
tempForce
.
resize
(
size
);
for
(
int
j
=
0
;
j
<
(
int
)
size
;
j
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
{
tempForce
[
j
]
=
zero
;
tempForce
[
j
]
=
zero
;
for
(
int
k
=
0
;
k
<
(
int
)
size
;
k
++
)
for
(
unsigned
int
k
=
0
;
k
<
size
;
k
++
)
tempForce
[
j
]
+=
matrix
[
j
][
k
]
*
constraintForce
[
cluster
[
k
]]
*
_distance
[
cluster
[
k
]]
/
_distance
[
cluster
[
j
]]
;
tempForce
[
j
]
+=
matrix
[
j
][
k
]
*
constraintForce
[
cluster
[
k
]];
}
}
for
(
int
j
=
0
;
j
<
(
int
)
size
;
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
constraintForce
[
cluster
[
j
]]
=
tempForce
[
j
];
constraintForce
[
cluster
[
j
]]
=
tempForce
[
j
];
}
}
...
@@ -499,15 +492,6 @@ RealOpenMM damping = one;//(RealOpenMM) (iterations%2 == 0 ? 0.5 : 1.0);
...
@@ -499,15 +492,6 @@ RealOpenMM damping = one;//(RealOpenMM) (iterations%2 == 0 ? 0.5 : 1.0);
}
}
}
}
}
}
// static int sum = 0;
// static int count = 0;
// sum += iterations;
// count++;
// if (count == 100) {
// printf("%d iterations\n", sum);
// sum = 0;
// count = 0;
// }
// diagnostics
// diagnostics
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment