Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
47a6fb1e
"vscode:/vscode.git/clone" did not exist on "2b8ad70394f38d296497215da757aa31ac36b87b"
Commit
47a6fb1e
authored
Jun 03, 2009
by
Peter Eastman
Browse files
Created initial CUDA implementation of new constraint algorithm
parent
fa66c669
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
209 additions
and
45 deletions
+209
-45
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+2
-0
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+163
-19
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+2
-0
platforms/cuda/src/kernels/kCShake.cu
platforms/cuda/src/kernels/kCShake.cu
+40
-24
platforms/reference/src/SimTKReference/ReferenceRigidShakeAlgorithm.cpp
...rence/src/SimTKReference/ReferenceRigidShakeAlgorithm.cpp
+2
-2
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
47a6fb1e
...
@@ -402,6 +402,8 @@ struct cudaGmxSimulation {
...
@@ -402,6 +402,8 @@ struct cudaGmxSimulation {
float
*
pRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
float
*
pRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
unsigned
int
*
pRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
unsigned
int
*
pRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
unsigned
int
*
pRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
unsigned
int
*
pRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
unsigned
int
*
pConstraintMatrixColumn
;
// The column of each element in the constraint matrix.
float
*
pConstraintMatrixValue
;
// The value of each element in the constraint matrix.
// Mutable stuff
// Mutable stuff
float4
*
pPosq
;
// Pointer to atom positions and charges
float4
*
pPosq
;
// Pointer to atom positions and charges
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
47a6fb1e
...
@@ -49,6 +49,7 @@ using namespace std;
...
@@ -49,6 +49,7 @@ using namespace std;
#include "hilbert.h"
#include "hilbert.h"
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
#include "jama_svd.h"
#include "jama_svd.h"
#include "quern.h"
using
OpenMM
::
OpenMMException
;
using
OpenMM
::
OpenMMException
;
using
TNT
::
Array2D
;
using
TNT
::
Array2D
;
...
@@ -562,22 +563,22 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
...
@@ -562,22 +563,22 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
// Reorder the constraints so those in a cluster are sequential.
// Reorder the constraints so those in a cluster are sequential.
vector
<
int
>
constraintOrder
(
constraintIndices
.
size
());
//
vector<int> constraintOrder(constraintIndices.size());
vector
<
int
>
clusterStartIndex
(
rigidClusters
.
size
());
//
vector<int> clusterStartIndex(rigidClusters.size());
set
<
int
>
inCluster
;
//
set<int> inCluster;
int
nextIndex
=
0
;
//
int nextIndex = 0;
for
(
int
i
=
0
;
i
<
(
int
)
rigidClusters
.
size
();
++
i
)
{
//
for (int i = 0; i < (int) rigidClusters.size(); ++i) {
clusterStartIndex
[
i
]
=
nextIndex
;
//
clusterStartIndex[i] = nextIndex;
for
(
int
j
=
0
;
j
<
(
int
)
rigidClusters
[
i
].
size
();
++
j
)
{
//
for (int j = 0; j < (int) rigidClusters[i].size(); ++j) {
int
constraint
=
rigidClusters
[
i
][
j
];
//
int constraint = rigidClusters[i][j];
constraintOrder
[
nextIndex
++
]
=
constraint
;
//
constraintOrder[nextIndex++] = constraint;
inCluster
.
insert
(
constraint
);
//
inCluster.insert(constraint);
}
//
}
}
//
}
for
(
int
i
=
0
;
i
<
(
int
)
constraintIndices
.
size
();
++
i
)
//
for (int i = 0; i < (int) constraintIndices.size(); ++i)
if
(
inCluster
.
find
(
constraintIndices
[
i
])
==
inCluster
.
end
())
//
if (inCluster.find(constraintIndices[i]) == inCluster.end())
constraintOrder
[
nextIndex
++
]
=
constraintIndices
[
i
];
//
constraintOrder[nextIndex++] = constraintIndices[i];
constraintIndices
=
constraintOrder
;
//
constraintIndices = constraintOrder;
// Build the CUDA streams.
// Build the CUDA streams.
...
@@ -931,6 +932,132 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -931,6 +932,132 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
for
(
unsigned
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
for
(
unsigned
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
// Compute the constraint coupling matrix
vector
<
vector
<
int
>
>
atomAngles
(
gpu
->
natoms
);
for
(
int
i
=
0
;
i
<
gpu
->
sim
.
bond_angles
;
i
++
)
atomAngles
[(
*
gpu
->
psBondAngleID1
)[
i
].
y
].
push_back
(
i
);
vector
<
vector
<
pair
<
int
,
double
>
>
>
matrix
(
numLincs
);
if
(
numLincs
>
0
)
{
for
(
int
j
=
0
;
j
<
numLincs
;
j
++
)
{
for
(
int
k
=
0
;
k
<
numLincs
;
k
++
)
{
if
(
j
==
k
)
{
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
j
,
1.0
));
continue
;
}
double
scale
;
int
atomj0
=
atom1
[
j
];
int
atomj1
=
atom2
[
j
];
int
atomk0
=
atom1
[
k
];
int
atomk1
=
atom2
[
k
];
int
atoma
,
atomb
,
atomc
;
if
(
atomj0
==
atomk0
)
{
atoma
=
atomj1
;
atomb
=
atomj0
;
atomc
=
atomk1
;
scale
=
invMass1
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
}
else
if
(
atomj1
==
atomk1
)
{
atoma
=
atomj0
;
atomb
=
atomj1
;
atomc
=
atomk0
;
scale
=
invMass2
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
}
else
if
(
atomj0
==
atomk1
)
{
atoma
=
atomj1
;
atomb
=
atomj0
;
atomc
=
atomk0
;
scale
=
invMass1
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
}
else
if
(
atomj1
==
atomk0
)
{
atoma
=
atomj0
;
atomb
=
atomj1
;
atomc
=
atomk1
;
scale
=
invMass2
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
}
else
continue
;
// These constraints are not connected.
// Look for a third constraint forming a triangle with these two.
bool
foundConstraint
=
false
;
for
(
int
other
=
0
;
other
<
numLincs
;
other
++
)
{
if
((
atom1
[
other
]
==
atoma
&&
atom2
[
other
]
==
atomc
)
||
(
atom1
[
other
]
==
atomc
&&
atom2
[
other
]
==
atoma
))
{
double
d1
=
distance
[
j
];
double
d2
=
distance
[
k
];
double
d3
=
distance
[
other
];
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
k
,
scale
*
(
d1
*
d1
+
d2
*
d2
-
d3
*
d3
)
/
(
2.0
*
d1
*
d2
)));
foundConstraint
=
true
;
break
;
}
}
if
(
!
foundConstraint
)
{
// We didn't find one, so look for an angle force field term.
const
vector
<
int
>&
angleCandidates
=
atomAngles
[
atomb
];
for
(
vector
<
int
>::
const_iterator
iter
=
angleCandidates
.
begin
();
iter
!=
angleCandidates
.
end
();
iter
++
)
{
int4
atoms
=
(
*
gpu
->
psBondAngleID1
)[
*
iter
];
if
((
atoms
.
x
==
atoma
&&
atoms
.
z
==
atomc
)
||
(
atoms
.
z
==
atoma
&&
atoms
.
x
==
atomc
))
{
double
angle
=
(
*
gpu
->
psBondAngleParameter
)[
*
iter
].
x
;
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
k
,
scale
*
cos
(
angle
*
PI
/
180.0
)));
break
;
}
}
}
}
}
// Invert it using QR.
vector
<
int
>
matrixRowStart
;
vector
<
int
>
matrixColIndex
;
vector
<
double
>
matrixValue
;
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
matrixRowStart
.
push_back
(
matrixValue
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
matrix
[
i
].
size
();
j
++
)
{
pair
<
int
,
double
>
element
=
matrix
[
i
][
j
];
matrixColIndex
.
push_back
(
element
.
first
);
matrixValue
.
push_back
(
element
.
second
);
}
}
matrixRowStart
.
push_back
(
matrixValue
.
size
());
int
*
qRowStart
,
*
qColIndex
,
*
rRowStart
,
*
rColIndex
;
double
*
qValue
,
*
rValue
;
int
result
=
QUERN_compute_qr
(
numLincs
,
numLincs
,
&
matrixRowStart
[
0
],
&
matrixColIndex
[
0
],
&
matrixValue
[
0
],
NULL
,
&
qRowStart
,
&
qColIndex
,
&
qValue
,
&
rRowStart
,
&
rColIndex
,
&
rValue
);
vector
<
double
>
rhs
(
numLincs
);
matrix
.
clear
();
matrix
.
resize
(
numLincs
);
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
// Extract column i of the inverse matrix.
for
(
int
j
=
0
;
j
<
numLincs
;
j
++
)
rhs
[
j
]
=
(
i
==
j
?
1.0
:
0.0
);
result
=
QUERN_multiply_with_q_transpose
(
numLincs
,
qRowStart
,
qColIndex
,
qValue
,
&
rhs
[
0
]);
result
=
QUERN_solve_with_r
(
numLincs
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
for
(
int
j
=
0
;
j
<
numLincs
;
j
++
)
{
double
value
=
rhs
[
j
]
*
distance
[
i
]
/
distance
[
j
];
if
(
abs
(
value
)
>
0.02
)
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
i
,
value
));
}
}
QUERN_free_result
(
qRowStart
,
qColIndex
,
qValue
);
QUERN_free_result
(
rRowStart
,
rColIndex
,
rValue
);
}
int
maxRowElements
=
0
;
for
(
unsigned
i
=
0
;
i
<
matrix
.
size
();
i
++
)
maxRowElements
=
max
(
maxRowElements
,
(
int
)
matrix
[
i
].
size
());
maxRowElements
++
;
// Fill in the CUDA streams.
// Fill in the CUDA streams.
CUDAStream
<
int2
>*
psLincsAtoms
=
new
CUDAStream
<
int2
>
(
numLincs
,
1
,
"LincsAtoms"
);
CUDAStream
<
int2
>*
psLincsAtoms
=
new
CUDAStream
<
int2
>
(
numLincs
,
1
,
"LincsAtoms"
);
...
@@ -975,6 +1102,12 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -975,6 +1102,12 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
CUDAStream
<
float
>*
psShakeReducedMass
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsSolution"
);
CUDAStream
<
float
>*
psShakeReducedMass
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsSolution"
);
gpu
->
psShakeReducedMass
=
psShakeReducedMass
;
gpu
->
psShakeReducedMass
=
psShakeReducedMass
;
gpu
->
sim
.
pShakeReducedMass
=
psShakeReducedMass
->
_pDevData
;
gpu
->
sim
.
pShakeReducedMass
=
psShakeReducedMass
->
_pDevData
;
CUDAStream
<
unsigned
int
>*
psConstraintMatrixColumn
=
new
CUDAStream
<
unsigned
int
>
(
numLincs
*
maxRowElements
,
1
,
"ConstraintMatrixColumn"
);
gpu
->
psConstraintMatrixColumn
=
psConstraintMatrixColumn
;
gpu
->
sim
.
pConstraintMatrixColumn
=
psConstraintMatrixColumn
->
_pDevData
;
CUDAStream
<
float
>*
psConstraintMatrixValue
=
new
CUDAStream
<
float
>
(
numLincs
*
maxRowElements
,
1
,
"ConstraintMatrixValue"
);
gpu
->
psConstraintMatrixValue
=
psConstraintMatrixValue
;
gpu
->
sim
.
pConstraintMatrixValue
=
psConstraintMatrixValue
->
_pDevData
;
gpu
->
sim
.
lincsConstraints
=
numLincs
;
gpu
->
sim
.
lincsConstraints
=
numLincs
;
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
int
c
=
lincsConstraints
[
i
];
int
c
=
lincsConstraints
[
i
];
...
@@ -986,6 +1119,11 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -986,6 +1119,11 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
(
*
psLincsNumConnections
)[
i
]
=
linkedConstraints
[
i
].
size
();
(
*
psLincsNumConnections
)[
i
]
=
linkedConstraints
[
i
].
size
();
for
(
unsigned
int
j
=
0
;
j
<
linkedConstraints
[
i
].
size
();
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
linkedConstraints
[
i
].
size
();
j
++
)
(
*
psLincsConnections
)[
i
+
j
*
numLincs
]
=
linkedConstraints
[
i
][
j
];
(
*
psLincsConnections
)[
i
+
j
*
numLincs
]
=
linkedConstraints
[
i
][
j
];
for
(
unsigned
int
j
=
0
;
j
<
matrix
[
i
].
size
();
j
++
)
{
(
*
psConstraintMatrixColumn
)[
i
+
j
*
numLincs
]
=
matrix
[
i
][
j
].
first
;
(
*
psConstraintMatrixValue
)[
i
+
j
*
numLincs
]
=
matrix
[
i
][
j
].
second
;
}
(
*
psConstraintMatrixColumn
)[
i
+
matrix
[
i
].
size
()
*
numLincs
]
=
numLincs
;
}
}
for
(
unsigned
int
i
=
0
;
i
<
psSyncCounter
->
_length
;
i
++
)
for
(
unsigned
int
i
=
0
;
i
<
psSyncCounter
->
_length
;
i
++
)
(
*
psSyncCounter
)[
i
]
=
-
1
;
(
*
psSyncCounter
)[
i
]
=
-
1
;
...
@@ -1005,6 +1143,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -1005,6 +1143,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
psLincsAtomConstraints
->
Upload
();
psLincsAtomConstraints
->
Upload
();
psLincsNumAtomConstraints
->
Upload
();
psLincsNumAtomConstraints
->
Upload
();
psSyncCounter
->
Upload
();
psSyncCounter
->
Upload
();
psConstraintMatrixColumn
->
Upload
();
psConstraintMatrixValue
->
Upload
();
gpu
->
sim
.
lincsTerms
=
lincsTerms
;
gpu
->
sim
.
lincsTerms
=
lincsTerms
;
gpu
->
sim
.
lincs_threads_per_block
=
(
gpu
->
sim
.
lincsConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
gpu
->
sim
.
lincs_threads_per_block
=
(
gpu
->
sim
.
lincsConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
if
(
gpu
->
sim
.
lincs_threads_per_block
>
gpu
->
sim
.
threads_per_block
)
if
(
gpu
->
sim
.
lincs_threads_per_block
>
gpu
->
sim
.
threads_per_block
)
...
@@ -1391,6 +1531,8 @@ void* gpuInit(int numAtoms)
...
@@ -1391,6 +1531,8 @@ void* gpuInit(int numAtoms)
gpu
->
psRigidClusterConstraintIndex
=
NULL
;
gpu
->
psRigidClusterConstraintIndex
=
NULL
;
gpu
->
psRigidClusterMatrix
=
NULL
;
gpu
->
psRigidClusterMatrix
=
NULL
;
gpu
->
psRigidClusterMatrixIndex
=
NULL
;
gpu
->
psRigidClusterMatrixIndex
=
NULL
;
gpu
->
psConstraintMatrixColumn
=
NULL
;
gpu
->
psConstraintMatrixValue
=
NULL
;
// Initialize output buffer before reading parameters
// Initialize output buffer before reading parameters
gpu
->
pOutputBufferCounter
=
new
unsigned
int
[
gpu
->
sim
.
paddedNumberOfAtoms
];
gpu
->
pOutputBufferCounter
=
new
unsigned
int
[
gpu
->
sim
.
paddedNumberOfAtoms
];
...
@@ -1492,9 +1634,9 @@ void gpuShutDown(gpuContext gpu)
...
@@ -1492,9 +1634,9 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psxVector4
;
delete
gpu
->
psxVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psvVector4
;
delete
gpu
->
psSigEps2
;
delete
gpu
->
psSigEps2
;
delete
gpu
->
psEwaldEikr
;
delete
gpu
->
psEwaldEikr
;
delete
gpu
->
psEwaldStructureFactor
;
delete
gpu
->
psEwaldStructureFactor
;
delete
gpu
->
psEwaldCosSinSum
;
delete
gpu
->
psEwaldCosSinSum
;
delete
gpu
->
psObcData
;
delete
gpu
->
psObcData
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psObcChain
;
delete
gpu
->
psBornForce
;
delete
gpu
->
psBornForce
;
...
@@ -1549,6 +1691,8 @@ void gpuShutDown(gpuContext gpu)
...
@@ -1549,6 +1691,8 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psRigidClusterConstraintIndex
;
delete
gpu
->
psRigidClusterConstraintIndex
;
delete
gpu
->
psRigidClusterMatrix
;
delete
gpu
->
psRigidClusterMatrix
;
delete
gpu
->
psRigidClusterMatrixIndex
;
delete
gpu
->
psRigidClusterMatrixIndex
;
delete
gpu
->
psConstraintMatrixColumn
;
delete
gpu
->
psConstraintMatrixValue
;
if
(
gpu
->
cudpp
!=
0
)
if
(
gpu
->
cudpp
!=
0
)
cudppDestroyPlan
(
gpu
->
cudpp
);
cudppDestroyPlan
(
gpu
->
cudpp
);
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
47a6fb1e
...
@@ -144,6 +144,8 @@ struct _gpuContext {
...
@@ -144,6 +144,8 @@ struct _gpuContext {
CUDAStream
<
float
>*
psRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
CUDAStream
<
float
>*
psRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
CUDAStream
<
unsigned
int
>*
psRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
CUDAStream
<
unsigned
int
>*
psRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
CUDAStream
<
unsigned
int
>*
psConstraintMatrixColumn
;
// The column of each element in the constraint matrix.
CUDAStream
<
float
>*
psConstraintMatrixValue
;
// The value of each element in the constraint matrix.
};
};
typedef
struct
_gpuContext
*
gpuContext
;
typedef
struct
_gpuContext
*
gpuContext
;
...
...
platforms/cuda/src/kernels/kCShake.cu
View file @
47a6fb1e
...
@@ -127,7 +127,7 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
...
@@ -127,7 +127,7 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
float
rrpr
=
rp_ij
.
x
*
dir
.
x
+
rp_ij
.
y
*
dir
.
y
+
rp_ij
.
z
*
dir
.
z
;
float
rrpr
=
rp_ij
.
x
*
dir
.
x
+
rp_ij
.
y
*
dir
.
y
+
rp_ij
.
z
*
dir
.
z
;
float
d_ij2
=
dir
.
x
*
dir
.
x
+
dir
.
y
*
dir
.
y
+
dir
.
z
*
dir
.
z
;
float
d_ij2
=
dir
.
x
*
dir
.
x
+
dir
.
y
*
dir
.
y
+
dir
.
z
*
dir
.
z
;
float
reducedMass
=
cSim
.
pShakeReducedMass
[
pos
];
float
reducedMass
=
cSim
.
pShakeReducedMass
[
pos
];
cSim
.
pLincs
Solution
[
pos
]
=
(
rrpr
>
d_ij2
*
1e-6
f
?
reducedMass
*
diff
/
rrpr
:
0.0
f
);
cSim
.
pLincs
Rhs1
[
pos
]
=
(
rrpr
>
d_ij2
*
1e-6
f
?
reducedMass
*
diff
/
rrpr
:
0.0
f
);
if
(
requiredIterations
==
iteration
&&
(
rp2
<
lowerTol
*
dist2
||
rp2
>
upperTol
*
dist2
))
if
(
requiredIterations
==
iteration
&&
(
rp2
<
lowerTol
*
dist2
||
rp2
>
upperTol
*
dist2
))
requiredIterations
=
iteration
+
1
;
requiredIterations
=
iteration
+
1
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
...
@@ -138,33 +138,49 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
...
@@ -138,33 +138,49 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
// Multiply by the inverse constraint matrix for each rigid cluster.
// Multiply by the inverse constraint matrix for each rigid cluster.
if
(
cSim
.
rigidClusters
>
0
)
// if (cSim.rigidClusters > 0)
// {
// pos = threadIdx.x + blockIdx.x * blockDim.x;
// unsigned int block = pos/cSim.clusterShakeBlockSize;
// unsigned int indexInBlock = pos-block*cSim.clusterShakeBlockSize;
// while (block < cSim.rigidClusters)
// {
// unsigned int firstConstraint = cSim.pRigidClusterConstraintIndex[block];
// unsigned int blockSize = cSim.pRigidClusterConstraintIndex[block+1]-firstConstraint;
// if (indexInBlock < blockSize)
// {
// // Load the constraint forces and matrix.
//
// temp[threadIdx.x] = cSim.pLincsSolution[firstConstraint+indexInBlock];
// unsigned int firstMatrixIndex = cSim.pRigidClusterMatrixIndex[block];
//
// // Multiply by the matrix.
//
// float sum = 0.0f;
// for (unsigned int i = 0; i < blockSize; i++)
// sum += temp[threadIdx.x-indexInBlock+i]*cSim.pRigidClusterMatrix[firstMatrixIndex+i*blockSize+indexInBlock];
// cSim.pLincsSolution[firstConstraint+indexInBlock] = sum;
// }
// block += (blockDim.x*gridDim.x)/cSim.clusterShakeBlockSize;
// }
// kSyncAllThreads_kernel(&cSim.pSyncCounter[gridDim.x], iteration);
// }
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
lincsConstraints
)
{
{
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
float
sum
=
0.0
f
;
unsigned
int
block
=
pos
/
cSim
.
clusterShakeBlockSize
;
for
(
unsigned
int
i
=
0
;
;
i
++
)
unsigned
int
indexInBlock
=
pos
-
block
*
cSim
.
clusterShakeBlockSize
;
while
(
block
<
cSim
.
rigidClusters
)
{
{
unsigned
int
firstConstraint
=
cSim
.
pRigidClusterConstraintIndex
[
block
];
int
index
=
pos
+
i
*
cSim
.
lincsConstraints
;
unsigned
int
blockSize
=
cSim
.
pRigidClusterConstraintIndex
[
block
+
1
]
-
firstConstraint
;
unsigned
int
column
=
cSim
.
pConstraintMatrixColumn
[
pos
+
i
*
cSim
.
lincsConstraints
];
if
(
indexInBlock
<
blockSize
)
if
(
column
>=
cSim
.
lincsConstraints
)
{
break
;
// Load the constraint forces and matrix.
sum
+=
cSim
.
pLincsRhs1
[
column
]
*
cSim
.
pConstraintMatrixValue
[
index
];
temp
[
threadIdx
.
x
]
=
cSim
.
pLincsSolution
[
firstConstraint
+
indexInBlock
];
unsigned
int
firstMatrixIndex
=
cSim
.
pRigidClusterMatrixIndex
[
block
];
// Multiply by the matrix.
float
sum
=
0.0
f
;
for
(
unsigned
int
i
=
0
;
i
<
blockSize
;
i
++
)
sum
+=
temp
[
threadIdx
.
x
-
indexInBlock
+
i
]
*
cSim
.
pRigidClusterMatrix
[
firstMatrixIndex
+
i
*
blockSize
+
indexInBlock
];
cSim
.
pLincsSolution
[
firstConstraint
+
indexInBlock
]
=
sum
;
}
block
+=
(
blockDim
.
x
*
gridDim
.
x
)
/
cSim
.
clusterShakeBlockSize
;
}
}
kSyncAllThreads_kernel
(
&
cSim
.
pSyncCounter
[
gridDim
.
x
],
iteration
);
cSim
.
pLincsSolution
[
pos
]
=
sum
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
kSyncAllThreads_kernel
(
&
cSim
.
pSyncCounter
[
gridDim
.
x
],
iteration
);
// Update the position of each atom.
// Update the position of each atom.
...
...
platforms/reference/src/SimTKReference/ReferenceRigidShakeAlgorithm.cpp
View file @
47a6fb1e
...
@@ -201,7 +201,7 @@ ReferenceRigidShakeAlgorithm::ReferenceRigidShakeAlgorithm( int numberOfAtoms,
...
@@ -201,7 +201,7 @@ ReferenceRigidShakeAlgorithm::ReferenceRigidShakeAlgorithm( int numberOfAtoms,
QUERN_solve_with_r
(
numberOfConstraints
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
QUERN_solve_with_r
(
numberOfConstraints
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
for
(
int
j
=
0
;
j
<
numberOfConstraints
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numberOfConstraints
;
j
++
)
{
double
value
=
rhs
[
j
]
*
_distance
[
i
]
/
_distance
[
j
];
double
value
=
rhs
[
j
]
*
_distance
[
i
]
/
_distance
[
j
];
if
(
abs
(
value
)
>
0.0
1
)
if
(
abs
(
value
)
>
0.0
2
)
_matrix
[
j
].
push_back
(
pair
<
int
,
RealOpenMM
>
(
i
,
(
RealOpenMM
)
value
));
_matrix
[
j
].
push_back
(
pair
<
int
,
RealOpenMM
>
(
i
,
(
RealOpenMM
)
value
));
}
}
}
}
...
@@ -600,7 +600,7 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
...
@@ -600,7 +600,7 @@ int ReferenceRigidShakeAlgorithm::apply( int numberOfAtoms, RealOpenMM** atomCoo
numberConverged
++
;
numberConverged
++
;
}
}
}
}
if
(
numberConverged
==
_numberOfConstraints
){
if
(
numberConverged
==
_numberOfConstraints
){
done
=
true
;
done
=
true
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment