Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
47a6fb1e
Commit
47a6fb1e
authored
Jun 03, 2009
by
Peter Eastman
Browse files
Created initial CUDA implementation of new constraint algorithm
parent
fa66c669
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
209 additions
and
45 deletions
+209
-45
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+2
-0
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+163
-19
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+2
-0
platforms/cuda/src/kernels/kCShake.cu
platforms/cuda/src/kernels/kCShake.cu
+40
-24
platforms/reference/src/SimTKReference/ReferenceRigidShakeAlgorithm.cpp
...rence/src/SimTKReference/ReferenceRigidShakeAlgorithm.cpp
+2
-2
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
47a6fb1e
...
@@ -402,6 +402,8 @@ struct cudaGmxSimulation {
...
@@ -402,6 +402,8 @@ struct cudaGmxSimulation {
float
*
pRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
float
*
pRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
unsigned
int
*
pRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
unsigned
int
*
pRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
unsigned
int
*
pRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
unsigned
int
*
pRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
unsigned
int
*
pConstraintMatrixColumn
;
// The column of each element in the constraint matrix.
float
*
pConstraintMatrixValue
;
// The value of each element in the constraint matrix.
// Mutable stuff
// Mutable stuff
float4
*
pPosq
;
// Pointer to atom positions and charges
float4
*
pPosq
;
// Pointer to atom positions and charges
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
47a6fb1e
...
@@ -49,6 +49,7 @@ using namespace std;
...
@@ -49,6 +49,7 @@ using namespace std;
#include "hilbert.h"
#include "hilbert.h"
#include "openmm/OpenMMException.h"
#include "openmm/OpenMMException.h"
#include "jama_svd.h"
#include "jama_svd.h"
#include "quern.h"
using
OpenMM
::
OpenMMException
;
using
OpenMM
::
OpenMMException
;
using
TNT
::
Array2D
;
using
TNT
::
Array2D
;
...
@@ -562,22 +563,22 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
...
@@ -562,22 +563,22 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
// Reorder the constraints so those in a cluster are sequential.
// Reorder the constraints so those in a cluster are sequential.
vector
<
int
>
constraintOrder
(
constraintIndices
.
size
());
//
vector<int> constraintOrder(constraintIndices.size());
vector
<
int
>
clusterStartIndex
(
rigidClusters
.
size
());
//
vector<int> clusterStartIndex(rigidClusters.size());
set
<
int
>
inCluster
;
//
set<int> inCluster;
int
nextIndex
=
0
;
//
int nextIndex = 0;
for
(
int
i
=
0
;
i
<
(
int
)
rigidClusters
.
size
();
++
i
)
{
//
for (int i = 0; i < (int) rigidClusters.size(); ++i) {
clusterStartIndex
[
i
]
=
nextIndex
;
//
clusterStartIndex[i] = nextIndex;
for
(
int
j
=
0
;
j
<
(
int
)
rigidClusters
[
i
].
size
();
++
j
)
{
//
for (int j = 0; j < (int) rigidClusters[i].size(); ++j) {
int
constraint
=
rigidClusters
[
i
][
j
];
//
int constraint = rigidClusters[i][j];
constraintOrder
[
nextIndex
++
]
=
constraint
;
//
constraintOrder[nextIndex++] = constraint;
inCluster
.
insert
(
constraint
);
//
inCluster.insert(constraint);
}
//
}
}
//
}
for
(
int
i
=
0
;
i
<
(
int
)
constraintIndices
.
size
();
++
i
)
//
for (int i = 0; i < (int) constraintIndices.size(); ++i)
if
(
inCluster
.
find
(
constraintIndices
[
i
])
==
inCluster
.
end
())
//
if (inCluster.find(constraintIndices[i]) == inCluster.end())
constraintOrder
[
nextIndex
++
]
=
constraintIndices
[
i
];
//
constraintOrder[nextIndex++] = constraintIndices[i];
constraintIndices
=
constraintOrder
;
//
constraintIndices = constraintOrder;
// Build the CUDA streams.
// Build the CUDA streams.
...
@@ -931,6 +932,132 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -931,6 +932,132 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
for
(
unsigned
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
for
(
unsigned
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
// Compute the constraint coupling matrix
vector
<
vector
<
int
>
>
atomAngles
(
gpu
->
natoms
);
for
(
int
i
=
0
;
i
<
gpu
->
sim
.
bond_angles
;
i
++
)
atomAngles
[(
*
gpu
->
psBondAngleID1
)[
i
].
y
].
push_back
(
i
);
vector
<
vector
<
pair
<
int
,
double
>
>
>
matrix
(
numLincs
);
if
(
numLincs
>
0
)
{
for
(
int
j
=
0
;
j
<
numLincs
;
j
++
)
{
for
(
int
k
=
0
;
k
<
numLincs
;
k
++
)
{
if
(
j
==
k
)
{
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
j
,
1.0
));
continue
;
}
double
scale
;
int
atomj0
=
atom1
[
j
];
int
atomj1
=
atom2
[
j
];
int
atomk0
=
atom1
[
k
];
int
atomk1
=
atom2
[
k
];
int
atoma
,
atomb
,
atomc
;
if
(
atomj0
==
atomk0
)
{
atoma
=
atomj1
;
atomb
=
atomj0
;
atomc
=
atomk1
;
scale
=
invMass1
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
}
else
if
(
atomj1
==
atomk1
)
{
atoma
=
atomj0
;
atomb
=
atomj1
;
atomc
=
atomk0
;
scale
=
invMass2
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
}
else
if
(
atomj0
==
atomk1
)
{
atoma
=
atomj1
;
atomb
=
atomj0
;
atomc
=
atomk0
;
scale
=
invMass1
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
}
else
if
(
atomj1
==
atomk0
)
{
atoma
=
atomj0
;
atomb
=
atomj1
;
atomc
=
atomk1
;
scale
=
invMass2
[
j
]
/
(
invMass1
[
j
]
+
invMass2
[
j
]);
}
else
continue
;
// These constraints are not connected.
// Look for a third constraint forming a triangle with these two.
bool
foundConstraint
=
false
;
for
(
int
other
=
0
;
other
<
numLincs
;
other
++
)
{
if
((
atom1
[
other
]
==
atoma
&&
atom2
[
other
]
==
atomc
)
||
(
atom1
[
other
]
==
atomc
&&
atom2
[
other
]
==
atoma
))
{
double
d1
=
distance
[
j
];
double
d2
=
distance
[
k
];
double
d3
=
distance
[
other
];
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
k
,
scale
*
(
d1
*
d1
+
d2
*
d2
-
d3
*
d3
)
/
(
2.0
*
d1
*
d2
)));
foundConstraint
=
true
;
break
;
}
}
if
(
!
foundConstraint
)
{
// We didn't find one, so look for an angle force field term.
const
vector
<
int
>&
angleCandidates
=
atomAngles
[
atomb
];
for
(
vector
<
int
>::
const_iterator
iter
=
angleCandidates
.
begin
();
iter
!=
angleCandidates
.
end
();
iter
++
)
{
int4
atoms
=
(
*
gpu
->
psBondAngleID1
)[
*
iter
];
if
((
atoms
.
x
==
atoma
&&
atoms
.
z
==
atomc
)
||
(
atoms
.
z
==
atoma
&&
atoms
.
x
==
atomc
))
{
double
angle
=
(
*
gpu
->
psBondAngleParameter
)[
*
iter
].
x
;
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
k
,
scale
*
cos
(
angle
*
PI
/
180.0
)));
break
;
}
}
}
}
}
// Invert it using QR.
vector
<
int
>
matrixRowStart
;
vector
<
int
>
matrixColIndex
;
vector
<
double
>
matrixValue
;
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
matrixRowStart
.
push_back
(
matrixValue
.
size
());
for
(
int
j
=
0
;
j
<
(
int
)
matrix
[
i
].
size
();
j
++
)
{
pair
<
int
,
double
>
element
=
matrix
[
i
][
j
];
matrixColIndex
.
push_back
(
element
.
first
);
matrixValue
.
push_back
(
element
.
second
);
}
}
matrixRowStart
.
push_back
(
matrixValue
.
size
());
int
*
qRowStart
,
*
qColIndex
,
*
rRowStart
,
*
rColIndex
;
double
*
qValue
,
*
rValue
;
int
result
=
QUERN_compute_qr
(
numLincs
,
numLincs
,
&
matrixRowStart
[
0
],
&
matrixColIndex
[
0
],
&
matrixValue
[
0
],
NULL
,
&
qRowStart
,
&
qColIndex
,
&
qValue
,
&
rRowStart
,
&
rColIndex
,
&
rValue
);
vector
<
double
>
rhs
(
numLincs
);
matrix
.
clear
();
matrix
.
resize
(
numLincs
);
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
// Extract column i of the inverse matrix.
for
(
int
j
=
0
;
j
<
numLincs
;
j
++
)
rhs
[
j
]
=
(
i
==
j
?
1.0
:
0.0
);
result
=
QUERN_multiply_with_q_transpose
(
numLincs
,
qRowStart
,
qColIndex
,
qValue
,
&
rhs
[
0
]);
result
=
QUERN_solve_with_r
(
numLincs
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
for
(
int
j
=
0
;
j
<
numLincs
;
j
++
)
{
double
value
=
rhs
[
j
]
*
distance
[
i
]
/
distance
[
j
];
if
(
abs
(
value
)
>
0.02
)
matrix
[
j
].
push_back
(
pair
<
int
,
double
>
(
i
,
value
));
}
}
QUERN_free_result
(
qRowStart
,
qColIndex
,
qValue
);
QUERN_free_result
(
rRowStart
,
rColIndex
,
rValue
);
}
int
maxRowElements
=
0
;
for
(
unsigned
i
=
0
;
i
<
matrix
.
size
();
i
++
)
maxRowElements
=
max
(
maxRowElements
,
(
int
)
matrix
[
i
].
size
());
maxRowElements
++
;
// Fill in the CUDA streams.
// Fill in the CUDA streams.
CUDAStream
<
int2
>*
psLincsAtoms
=
new
CUDAStream
<
int2
>
(
numLincs
,
1
,
"LincsAtoms"
);
CUDAStream
<
int2
>*
psLincsAtoms
=
new
CUDAStream
<
int2
>
(
numLincs
,
1
,
"LincsAtoms"
);
...
@@ -975,6 +1102,12 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -975,6 +1102,12 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
CUDAStream
<
float
>*
psShakeReducedMass
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsSolution"
);
CUDAStream
<
float
>*
psShakeReducedMass
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsSolution"
);
gpu
->
psShakeReducedMass
=
psShakeReducedMass
;
gpu
->
psShakeReducedMass
=
psShakeReducedMass
;
gpu
->
sim
.
pShakeReducedMass
=
psShakeReducedMass
->
_pDevData
;
gpu
->
sim
.
pShakeReducedMass
=
psShakeReducedMass
->
_pDevData
;
CUDAStream
<
unsigned
int
>*
psConstraintMatrixColumn
=
new
CUDAStream
<
unsigned
int
>
(
numLincs
*
maxRowElements
,
1
,
"ConstraintMatrixColumn"
);
gpu
->
psConstraintMatrixColumn
=
psConstraintMatrixColumn
;
gpu
->
sim
.
pConstraintMatrixColumn
=
psConstraintMatrixColumn
->
_pDevData
;
CUDAStream
<
float
>*
psConstraintMatrixValue
=
new
CUDAStream
<
float
>
(
numLincs
*
maxRowElements
,
1
,
"ConstraintMatrixValue"
);
gpu
->
psConstraintMatrixValue
=
psConstraintMatrixValue
;
gpu
->
sim
.
pConstraintMatrixValue
=
psConstraintMatrixValue
->
_pDevData
;
gpu
->
sim
.
lincsConstraints
=
numLincs
;
gpu
->
sim
.
lincsConstraints
=
numLincs
;
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
int
c
=
lincsConstraints
[
i
];
int
c
=
lincsConstraints
[
i
];
...
@@ -986,6 +1119,11 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -986,6 +1119,11 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
(
*
psLincsNumConnections
)[
i
]
=
linkedConstraints
[
i
].
size
();
(
*
psLincsNumConnections
)[
i
]
=
linkedConstraints
[
i
].
size
();
for
(
unsigned
int
j
=
0
;
j
<
linkedConstraints
[
i
].
size
();
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
linkedConstraints
[
i
].
size
();
j
++
)
(
*
psLincsConnections
)[
i
+
j
*
numLincs
]
=
linkedConstraints
[
i
][
j
];
(
*
psLincsConnections
)[
i
+
j
*
numLincs
]
=
linkedConstraints
[
i
][
j
];
for
(
unsigned
int
j
=
0
;
j
<
matrix
[
i
].
size
();
j
++
)
{
(
*
psConstraintMatrixColumn
)[
i
+
j
*
numLincs
]
=
matrix
[
i
][
j
].
first
;
(
*
psConstraintMatrixValue
)[
i
+
j
*
numLincs
]
=
matrix
[
i
][
j
].
second
;
}
(
*
psConstraintMatrixColumn
)[
i
+
matrix
[
i
].
size
()
*
numLincs
]
=
numLincs
;
}
}
for
(
unsigned
int
i
=
0
;
i
<
psSyncCounter
->
_length
;
i
++
)
for
(
unsigned
int
i
=
0
;
i
<
psSyncCounter
->
_length
;
i
++
)
(
*
psSyncCounter
)[
i
]
=
-
1
;
(
*
psSyncCounter
)[
i
]
=
-
1
;
...
@@ -1005,6 +1143,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -1005,6 +1143,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
psLincsAtomConstraints
->
Upload
();
psLincsAtomConstraints
->
Upload
();
psLincsNumAtomConstraints
->
Upload
();
psLincsNumAtomConstraints
->
Upload
();
psSyncCounter
->
Upload
();
psSyncCounter
->
Upload
();
psConstraintMatrixColumn
->
Upload
();
psConstraintMatrixValue
->
Upload
();
gpu
->
sim
.
lincsTerms
=
lincsTerms
;
gpu
->
sim
.
lincsTerms
=
lincsTerms
;
gpu
->
sim
.
lincs_threads_per_block
=
(
gpu
->
sim
.
lincsConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
gpu
->
sim
.
lincs_threads_per_block
=
(
gpu
->
sim
.
lincsConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
if
(
gpu
->
sim
.
lincs_threads_per_block
>
gpu
->
sim
.
threads_per_block
)
if
(
gpu
->
sim
.
lincs_threads_per_block
>
gpu
->
sim
.
threads_per_block
)
...
@@ -1391,6 +1531,8 @@ void* gpuInit(int numAtoms)
...
@@ -1391,6 +1531,8 @@ void* gpuInit(int numAtoms)
gpu
->
psRigidClusterConstraintIndex
=
NULL
;
gpu
->
psRigidClusterConstraintIndex
=
NULL
;
gpu
->
psRigidClusterMatrix
=
NULL
;
gpu
->
psRigidClusterMatrix
=
NULL
;
gpu
->
psRigidClusterMatrixIndex
=
NULL
;
gpu
->
psRigidClusterMatrixIndex
=
NULL
;
gpu
->
psConstraintMatrixColumn
=
NULL
;
gpu
->
psConstraintMatrixValue
=
NULL
;
// Initialize output buffer before reading parameters
// Initialize output buffer before reading parameters
gpu
->
pOutputBufferCounter
=
new
unsigned
int
[
gpu
->
sim
.
paddedNumberOfAtoms
];
gpu
->
pOutputBufferCounter
=
new
unsigned
int
[
gpu
->
sim
.
paddedNumberOfAtoms
];
...
@@ -1549,6 +1691,8 @@ void gpuShutDown(gpuContext gpu)
...
@@ -1549,6 +1691,8 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psRigidClusterConstraintIndex
;
delete
gpu
->
psRigidClusterConstraintIndex
;
delete
gpu
->
psRigidClusterMatrix
;
delete
gpu
->
psRigidClusterMatrix
;
delete
gpu
->
psRigidClusterMatrixIndex
;
delete
gpu
->
psRigidClusterMatrixIndex
;
delete
gpu
->
psConstraintMatrixColumn
;
delete
gpu
->
psConstraintMatrixValue
;
if
(
gpu
->
cudpp
!=
0
)
if
(
gpu
->
cudpp
!=
0
)
cudppDestroyPlan
(
gpu
->
cudpp
);
cudppDestroyPlan
(
gpu
->
cudpp
);
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
47a6fb1e
...
@@ -144,6 +144,8 @@ struct _gpuContext {
...
@@ -144,6 +144,8 @@ struct _gpuContext {
CUDAStream
<
float
>*
psRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
CUDAStream
<
float
>*
psRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
CUDAStream
<
unsigned
int
>*
psRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
CUDAStream
<
unsigned
int
>*
psRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
CUDAStream
<
unsigned
int
>*
psConstraintMatrixColumn
;
// The column of each element in the constraint matrix.
CUDAStream
<
float
>*
psConstraintMatrixValue
;
// The value of each element in the constraint matrix.
};
};
typedef
struct
_gpuContext
*
gpuContext
;
typedef
struct
_gpuContext
*
gpuContext
;
...
...
platforms/cuda/src/kernels/kCShake.cu
View file @
47a6fb1e
...
@@ -127,7 +127,7 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
...
@@ -127,7 +127,7 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
float
rrpr
=
rp_ij
.
x
*
dir
.
x
+
rp_ij
.
y
*
dir
.
y
+
rp_ij
.
z
*
dir
.
z
;
float
rrpr
=
rp_ij
.
x
*
dir
.
x
+
rp_ij
.
y
*
dir
.
y
+
rp_ij
.
z
*
dir
.
z
;
float
d_ij2
=
dir
.
x
*
dir
.
x
+
dir
.
y
*
dir
.
y
+
dir
.
z
*
dir
.
z
;
float
d_ij2
=
dir
.
x
*
dir
.
x
+
dir
.
y
*
dir
.
y
+
dir
.
z
*
dir
.
z
;
float
reducedMass
=
cSim
.
pShakeReducedMass
[
pos
];
float
reducedMass
=
cSim
.
pShakeReducedMass
[
pos
];
cSim
.
pLincs
Solution
[
pos
]
=
(
rrpr
>
d_ij2
*
1e-6
f
?
reducedMass
*
diff
/
rrpr
:
0.0
f
);
cSim
.
pLincs
Rhs1
[
pos
]
=
(
rrpr
>
d_ij2
*
1e-6
f
?
reducedMass
*
diff
/
rrpr
:
0.0
f
);
if
(
requiredIterations
==
iteration
&&
(
rp2
<
lowerTol
*
dist2
||
rp2
>
upperTol
*
dist2
))
if
(
requiredIterations
==
iteration
&&
(
rp2
<
lowerTol
*
dist2
||
rp2
>
upperTol
*
dist2
))
requiredIterations
=
iteration
+
1
;
requiredIterations
=
iteration
+
1
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
...
@@ -138,33 +138,49 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
...
@@ -138,33 +138,49 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
// Multiply by the inverse constraint matrix for each rigid cluster.
// Multiply by the inverse constraint matrix for each rigid cluster.
if
(
cSim
.
rigidClusters
>
0
)
// if (cSim.rigidClusters > 0)
{
// {
// pos = threadIdx.x + blockIdx.x * blockDim.x;
// unsigned int block = pos/cSim.clusterShakeBlockSize;
// unsigned int indexInBlock = pos-block*cSim.clusterShakeBlockSize;
// while (block < cSim.rigidClusters)
// {
// unsigned int firstConstraint = cSim.pRigidClusterConstraintIndex[block];
// unsigned int blockSize = cSim.pRigidClusterConstraintIndex[block+1]-firstConstraint;
// if (indexInBlock < blockSize)
// {
// // Load the constraint forces and matrix.
//
// temp[threadIdx.x] = cSim.pLincsSolution[firstConstraint+indexInBlock];
// unsigned int firstMatrixIndex = cSim.pRigidClusterMatrixIndex[block];
//
// // Multiply by the matrix.
//
// float sum = 0.0f;
// for (unsigned int i = 0; i < blockSize; i++)
// sum += temp[threadIdx.x-indexInBlock+i]*cSim.pRigidClusterMatrix[firstMatrixIndex+i*blockSize+indexInBlock];
// cSim.pLincsSolution[firstConstraint+indexInBlock] = sum;
// }
// block += (blockDim.x*gridDim.x)/cSim.clusterShakeBlockSize;
// }
// kSyncAllThreads_kernel(&cSim.pSyncCounter[gridDim.x], iteration);
// }
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
unsigned
int
block
=
pos
/
cSim
.
clusterShakeBlockSize
;
while
(
pos
<
cSim
.
lincsConstraints
)
unsigned
int
indexInBlock
=
pos
-
block
*
cSim
.
clusterShakeBlockSize
;
while
(
block
<
cSim
.
rigidClusters
)
{
unsigned
int
firstConstraint
=
cSim
.
pRigidClusterConstraintIndex
[
block
];
unsigned
int
blockSize
=
cSim
.
pRigidClusterConstraintIndex
[
block
+
1
]
-
firstConstraint
;
if
(
indexInBlock
<
blockSize
)
{
{
// Load the constraint forces and matrix.
temp
[
threadIdx
.
x
]
=
cSim
.
pLincsSolution
[
firstConstraint
+
indexInBlock
];
unsigned
int
firstMatrixIndex
=
cSim
.
pRigidClusterMatrixIndex
[
block
];
// Multiply by the matrix.
float
sum
=
0.0
f
;
float
sum
=
0.0
f
;
for
(
unsigned
int
i
=
0
;
i
<
blockSize
;
i
++
)
for
(
unsigned
int
i
=
0
;
;
i
++
)
sum
+=
temp
[
threadIdx
.
x
-
indexInBlock
+
i
]
*
cSim
.
pRigidClusterMatrix
[
firstMatrixIndex
+
i
*
blockSize
+
indexInBlock
];
{
cSim
.
pLincsSolution
[
firstConstraint
+
indexInBlock
]
=
sum
;
int
index
=
pos
+
i
*
cSim
.
lincsConstraints
;
unsigned
int
column
=
cSim
.
pConstraintMatrixColumn
[
pos
+
i
*
cSim
.
lincsConstraints
];
if
(
column
>=
cSim
.
lincsConstraints
)
break
;
sum
+=
cSim
.
pLincsRhs1
[
column
]
*
cSim
.
pConstraintMatrixValue
[
index
];
}
}
block
+=
(
blockDim
.
x
*
gridDim
.
x
)
/
cSim
.
clusterShakeBlockSize
;
cSim
.
pLincsSolution
[
pos
]
=
sum
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
kSyncAllThreads_kernel
(
&
cSim
.
pSyncCounter
[
gridDim
.
x
],
iteration
);
kSyncAllThreads_kernel
(
&
cSim
.
pSyncCounter
[
gridDim
.
x
],
iteration
);
}
// Update the position of each atom.
// Update the position of each atom.
...
...
platforms/reference/src/SimTKReference/ReferenceRigidShakeAlgorithm.cpp
View file @
47a6fb1e
...
@@ -201,7 +201,7 @@ ReferenceRigidShakeAlgorithm::ReferenceRigidShakeAlgorithm( int numberOfAtoms,
...
@@ -201,7 +201,7 @@ ReferenceRigidShakeAlgorithm::ReferenceRigidShakeAlgorithm( int numberOfAtoms,
QUERN_solve_with_r
(
numberOfConstraints
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
QUERN_solve_with_r
(
numberOfConstraints
,
rRowStart
,
rColIndex
,
rValue
,
&
rhs
[
0
],
&
rhs
[
0
]);
for
(
int
j
=
0
;
j
<
numberOfConstraints
;
j
++
)
{
for
(
int
j
=
0
;
j
<
numberOfConstraints
;
j
++
)
{
double
value
=
rhs
[
j
]
*
_distance
[
i
]
/
_distance
[
j
];
double
value
=
rhs
[
j
]
*
_distance
[
i
]
/
_distance
[
j
];
if
(
abs
(
value
)
>
0.0
1
)
if
(
abs
(
value
)
>
0.0
2
)
_matrix
[
j
].
push_back
(
pair
<
int
,
RealOpenMM
>
(
i
,
(
RealOpenMM
)
value
));
_matrix
[
j
].
push_back
(
pair
<
int
,
RealOpenMM
>
(
i
,
(
RealOpenMM
)
value
));
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment