Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
043c7b6c
Commit
043c7b6c
authored
May 15, 2009
by
Peter Eastman
Browse files
Optimizations to CUDA C-SHAKE implementation
parent
fe1e6ffa
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
83 additions
and
39 deletions
+83
-39
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+0
-1
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+34
-17
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+0
-1
platforms/cuda/src/kernels/kCShake.cu
platforms/cuda/src/kernels/kCShake.cu
+46
-19
platforms/cuda/src/kernels/kLincs.cu
platforms/cuda/src/kernels/kLincs.cu
+3
-1
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
043c7b6c
...
@@ -389,7 +389,6 @@ struct cudaGmxSimulation {
...
@@ -389,7 +389,6 @@ struct cudaGmxSimulation {
short
*
pSyncCounter
;
// Used for global thread synchronization
short
*
pSyncCounter
;
// Used for global thread synchronization
unsigned
int
*
pRequiredIterations
;
// Used by SHAKE to communicate whether iteration has converged
unsigned
int
*
pRequiredIterations
;
// Used by SHAKE to communicate whether iteration has converged
float
*
pShakeReducedMass
;
// The reduced mass for each SHAKE constraint
float
*
pShakeReducedMass
;
// The reduced mass for each SHAKE constraint
int
*
pRigidClusterConstraints
;
// The constraints in each rigid cluster
float
*
pRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
float
*
pRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
unsigned
int
*
pRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
unsigned
int
*
pRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
unsigned
int
*
pRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
unsigned
int
*
pRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
043c7b6c
...
@@ -464,7 +464,7 @@ static void markShakeClusterInvalid(ShakeCluster& cluster, map<int, ShakeCluster
...
@@ -464,7 +464,7 @@ static void markShakeClusterInvalid(ShakeCluster& cluster, map<int, ShakeCluster
}
}
}
}
static
void
findRigidClusters
(
gpuContext
gpu
,
const
vector
<
int
>&
firstAtom
,
const
vector
<
int
>&
secondAtom
,
const
vector
<
int
>&
constraintIndices
)
static
void
findRigidClusters
(
gpuContext
gpu
,
const
vector
<
int
>&
firstAtom
,
const
vector
<
int
>&
secondAtom
,
vector
<
int
>&
constraintIndices
)
{
{
vector
<
map
<
int
,
int
>
>
atomConstraints
(
firstAtom
.
size
());
vector
<
map
<
int
,
int
>
>
atomConstraints
(
firstAtom
.
size
());
for
(
int
i
=
0
;
i
<
(
int
)
constraintIndices
.
size
();
i
++
)
{
for
(
int
i
=
0
;
i
<
(
int
)
constraintIndices
.
size
();
i
++
)
{
...
@@ -537,11 +537,27 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
...
@@ -537,11 +537,27 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
}
}
}
}
// Reorder the constraints so those in a cluster are sequential.
vector
<
int
>
constraintOrder
(
constraintIndices
.
size
());
vector
<
int
>
clusterStartIndex
(
rigidClusters
.
size
());
set
<
int
>
inCluster
;
int
nextIndex
=
0
;
for
(
int
i
=
0
;
i
<
(
int
)
rigidClusters
.
size
();
++
i
)
{
clusterStartIndex
[
i
]
=
nextIndex
;
for
(
int
j
=
0
;
j
<
(
int
)
rigidClusters
[
i
].
size
();
++
j
)
{
int
constraint
=
rigidClusters
[
i
][
j
];
constraintOrder
[
nextIndex
++
]
=
constraint
;
inCluster
.
insert
(
constraint
);
}
}
for
(
int
i
=
0
;
i
<
(
int
)
constraintIndices
.
size
();
++
i
)
if
(
inCluster
.
find
(
constraintIndices
[
i
])
==
inCluster
.
end
())
constraintOrder
[
nextIndex
++
]
=
constraintIndices
[
i
];
constraintIndices
=
constraintOrder
;
// Build the CUDA streams.
// Build the CUDA streams.
CUDAStream
<
int
>*
psRigidClusterConstraints
=
new
CUDAStream
<
int
>
(
totalConstraints
,
1
,
"RigidClusterConstraints"
);
gpu
->
psRigidClusterConstraints
=
psRigidClusterConstraints
;
gpu
->
sim
.
pRigidClusterConstraints
=
psRigidClusterConstraints
->
_pDevData
;
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
=
new
CUDAStream
<
unsigned
int
>
((
int
)
rigidClusters
.
size
()
+
1
,
1
,
"RigidClusterConstraintIndex"
);
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
=
new
CUDAStream
<
unsigned
int
>
((
int
)
rigidClusters
.
size
()
+
1
,
1
,
"RigidClusterConstraintIndex"
);
gpu
->
psRigidClusterConstraintIndex
=
psRigidClusterConstraintIndex
;
gpu
->
psRigidClusterConstraintIndex
=
psRigidClusterConstraintIndex
;
gpu
->
sim
.
pRigidClusterConstraintIndex
=
psRigidClusterConstraintIndex
->
_pDevData
;
gpu
->
sim
.
pRigidClusterConstraintIndex
=
psRigidClusterConstraintIndex
->
_pDevData
;
...
@@ -556,8 +572,7 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
...
@@ -556,8 +572,7 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
for
(
unsigned
int
i
=
0
;
i
<
rigidClusters
.
size
();
i
++
)
{
for
(
unsigned
int
i
=
0
;
i
<
rigidClusters
.
size
();
i
++
)
{
vector
<
int
>&
cluster
=
rigidClusters
[
i
];
vector
<
int
>&
cluster
=
rigidClusters
[
i
];
(
*
psRigidClusterConstraintIndex
)[
i
]
=
constraintIndex
;
(
*
psRigidClusterConstraintIndex
)[
i
]
=
constraintIndex
;
for
(
unsigned
int
j
=
0
;
j
<
cluster
.
size
();
j
++
)
constraintIndex
+=
cluster
.
size
();
(
*
psRigidClusterConstraints
)[
constraintIndex
++
]
=
cluster
[
j
];
if
(
cluster
.
size
()
>
maxClusterSize
)
if
(
cluster
.
size
()
>
maxClusterSize
)
maxClusterSize
=
cluster
.
size
();
maxClusterSize
=
cluster
.
size
();
}
}
...
@@ -567,9 +582,6 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
...
@@ -567,9 +582,6 @@ static void findRigidClusters(gpuContext gpu, const vector<int>& firstAtom, cons
gpu
->
sim
.
clusterShakeBlockSize
=
1
;
gpu
->
sim
.
clusterShakeBlockSize
=
1
;
while
(
gpu
->
sim
.
clusterShakeBlockSize
<
maxClusterSize
)
while
(
gpu
->
sim
.
clusterShakeBlockSize
<
maxClusterSize
)
gpu
->
sim
.
clusterShakeBlockSize
*=
2
;
gpu
->
sim
.
clusterShakeBlockSize
*=
2
;
if
(
gpu
->
sim
.
lincs_threads_per_block
%
gpu
->
sim
.
clusterShakeBlockSize
!=
0
)
gpu
->
sim
.
lincs_threads_per_block
+=
gpu
->
sim
.
clusterShakeBlockSize
-
gpu
->
sim
.
lincs_threads_per_block
%
gpu
->
sim
.
clusterShakeBlockSize
;
psRigidClusterConstraints
->
Upload
();
psRigidClusterConstraintIndex
->
Upload
();
psRigidClusterConstraintIndex
->
Upload
();
gpu
->
hasInitializedRigidClusters
=
false
;
gpu
->
hasInitializedRigidClusters
=
false
;
}
}
...
@@ -776,6 +788,13 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -776,6 +788,13 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
for
(
unsigned
i
=
0
;
i
<
atom1
.
size
();
i
++
)
for
(
unsigned
i
=
0
;
i
<
atom1
.
size
();
i
++
)
if
(
!
isShakeAtom
[
atom1
[
i
]])
if
(
!
isShakeAtom
[
atom1
[
i
]])
lincsConstraints
.
push_back
(
i
);
lincsConstraints
.
push_back
(
i
);
// Identify rigid clusters of atoms.
findRigidClusters
(
gpu
,
atom1
,
atom2
,
lincsConstraints
);
// Record the connections between constraints.
int
numLincs
=
(
int
)
lincsConstraints
.
size
();
int
numLincs
=
(
int
)
lincsConstraints
.
size
();
vector
<
vector
<
int
>
>
atomConstraints
(
gpu
->
natoms
);
vector
<
vector
<
int
>
>
atomConstraints
(
gpu
->
natoms
);
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
...
@@ -859,8 +878,10 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -859,8 +878,10 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
(
*
psSyncCounter
)[
i
]
=
-
1
;
(
*
psSyncCounter
)[
i
]
=
-
1
;
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
{
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
{
(
*
psLincsNumAtomConstraints
)[
i
]
=
atomConstraints
[
i
].
size
();
(
*
psLincsNumAtomConstraints
)[
i
]
=
atomConstraints
[
i
].
size
();
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
{
(
*
psLincsAtomConstraints
)[
i
+
j
*
gpu
->
natoms
]
=
atomConstraints
[
i
][
j
];
bool
forward
=
(
atom1
[
lincsConstraints
[
atomConstraints
[
i
][
j
]]]
==
i
);
(
*
psLincsAtomConstraints
)[
i
+
j
*
gpu
->
natoms
]
=
(
forward
?
atomConstraints
[
i
][
j
]
+
1
:
-
atomConstraints
[
i
][
j
]
-
1
);
}
}
}
psLincsAtoms
->
Upload
();
psLincsAtoms
->
Upload
();
psLincsDistance
->
Upload
();
psLincsDistance
->
Upload
();
...
@@ -877,10 +898,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -877,10 +898,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
gpu
->
sim
.
lincs_threads_per_block
=
gpu
->
sim
.
threads_per_block
;
gpu
->
sim
.
lincs_threads_per_block
=
gpu
->
sim
.
threads_per_block
;
if
(
gpu
->
sim
.
lincs_threads_per_block
<
gpu
->
sim
.
blocks
)
if
(
gpu
->
sim
.
lincs_threads_per_block
<
gpu
->
sim
.
blocks
)
gpu
->
sim
.
lincs_threads_per_block
=
gpu
->
sim
.
blocks
;
gpu
->
sim
.
lincs_threads_per_block
=
gpu
->
sim
.
blocks
;
if
(
gpu
->
sim
.
lincs_threads_per_block
%
gpu
->
sim
.
clusterShakeBlockSize
!=
0
)
// Identify rigid clusters of atoms.
gpu
->
sim
.
lincs_threads_per_block
+=
gpu
->
sim
.
clusterShakeBlockSize
-
gpu
->
sim
.
lincs_threads_per_block
%
gpu
->
sim
.
clusterShakeBlockSize
;
findRigidClusters
(
gpu
,
atom1
,
atom2
,
lincsConstraints
);
// count number of atoms w/o constraint
// count number of atoms w/o constraint
...
@@ -1250,7 +1269,6 @@ void* gpuInit(int numAtoms)
...
@@ -1250,7 +1269,6 @@ void* gpuInit(int numAtoms)
gpu
->
psSyncCounter
=
NULL
;
gpu
->
psSyncCounter
=
NULL
;
gpu
->
psRequiredIterations
=
NULL
;
gpu
->
psRequiredIterations
=
NULL
;
gpu
->
psShakeReducedMass
=
NULL
;
gpu
->
psShakeReducedMass
=
NULL
;
gpu
->
psRigidClusterConstraints
=
NULL
;
gpu
->
psRigidClusterConstraintIndex
=
NULL
;
gpu
->
psRigidClusterConstraintIndex
=
NULL
;
gpu
->
psRigidClusterMatrix
=
NULL
;
gpu
->
psRigidClusterMatrix
=
NULL
;
gpu
->
psRigidClusterMatrixIndex
=
NULL
;
gpu
->
psRigidClusterMatrixIndex
=
NULL
;
...
@@ -1406,7 +1424,6 @@ void gpuShutDown(gpuContext gpu)
...
@@ -1406,7 +1424,6 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psSyncCounter
;
delete
gpu
->
psSyncCounter
;
delete
gpu
->
psRequiredIterations
;
delete
gpu
->
psRequiredIterations
;
delete
gpu
->
psShakeReducedMass
;
delete
gpu
->
psShakeReducedMass
;
delete
gpu
->
psRigidClusterConstraints
;
delete
gpu
->
psRigidClusterConstraintIndex
;
delete
gpu
->
psRigidClusterConstraintIndex
;
delete
gpu
->
psRigidClusterMatrix
;
delete
gpu
->
psRigidClusterMatrix
;
delete
gpu
->
psRigidClusterMatrixIndex
;
delete
gpu
->
psRigidClusterMatrixIndex
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
043c7b6c
...
@@ -139,7 +139,6 @@ struct _gpuContext {
...
@@ -139,7 +139,6 @@ struct _gpuContext {
CUDAStream
<
short
>*
psSyncCounter
;
// Used for global thread synchronization
CUDAStream
<
short
>*
psSyncCounter
;
// Used for global thread synchronization
CUDAStream
<
unsigned
int
>*
psRequiredIterations
;
// Used by SHAKE to communicate whether iteration has converged
CUDAStream
<
unsigned
int
>*
psRequiredIterations
;
// Used by SHAKE to communicate whether iteration has converged
CUDAStream
<
float
>*
psShakeReducedMass
;
// The reduced mass for each SHAKE constraint
CUDAStream
<
float
>*
psShakeReducedMass
;
// The reduced mass for each SHAKE constraint
CUDAStream
<
int
>*
psRigidClusterConstraints
;
// The constraints in each rigid cluster
CUDAStream
<
float
>*
psRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
CUDAStream
<
float
>*
psRigidClusterMatrix
;
// The inverse constraint matrix for each rigid cluster
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
CUDAStream
<
unsigned
int
>*
psRigidClusterConstraintIndex
;
// The index of each cluster in the stream containing cluster constraints.
CUDAStream
<
unsigned
int
>*
psRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
CUDAStream
<
unsigned
int
>*
psRigidClusterMatrixIndex
;
// The index of each cluster in the stream containing cluster matrices.
...
...
platforms/cuda/src/kernels/kCShake.cu
View file @
043c7b6c
...
@@ -148,14 +148,13 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
...
@@ -148,14 +148,13 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
unsigned
int
indexInBlock
=
pos
-
block
*
cSim
.
clusterShakeBlockSize
;
unsigned
int
indexInBlock
=
pos
-
block
*
cSim
.
clusterShakeBlockSize
;
while
(
block
<
cSim
.
rigidClusters
)
while
(
block
<
cSim
.
rigidClusters
)
{
{
unsigned
int
first
Index
=
cSim
.
pRigidClusterConstraintIndex
[
block
];
unsigned
int
first
Constraint
=
cSim
.
pRigidClusterConstraintIndex
[
block
];
unsigned
int
blockSize
=
cSim
.
pRigidClusterConstraintIndex
[
block
+
1
]
-
first
Index
;
unsigned
int
blockSize
=
cSim
.
pRigidClusterConstraintIndex
[
block
+
1
]
-
first
Constraint
;
if
(
indexInBlock
<
blockSize
)
if
(
indexInBlock
<
blockSize
)
{
{
// Load the constraint forces and matrix.
// Load the constraint forces and matrix.
unsigned
int
constraint
=
cSim
.
pRigidClusterConstraints
[
firstIndex
+
indexInBlock
];
temp
[
threadIdx
.
x
]
=
cSim
.
pLincsSolution
[
firstConstraint
+
indexInBlock
];
temp
[
threadIdx
.
x
]
=
cSim
.
pLincsSolution
[
constraint
];
unsigned
int
firstMatrixIndex
=
cSim
.
pRigidClusterMatrixIndex
[
block
];
unsigned
int
firstMatrixIndex
=
cSim
.
pRigidClusterMatrixIndex
[
block
];
// Multiply by the matrix.
// Multiply by the matrix.
...
@@ -163,7 +162,7 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
...
@@ -163,7 +162,7 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
float
sum
=
0.0
f
;
float
sum
=
0.0
f
;
for
(
unsigned
int
i
=
0
;
i
<
blockSize
;
i
++
)
for
(
unsigned
int
i
=
0
;
i
<
blockSize
;
i
++
)
sum
+=
temp
[
threadIdx
.
x
-
indexInBlock
+
i
]
*
cSim
.
pRigidClusterMatrix
[
firstMatrixIndex
+
i
*
blockSize
+
indexInBlock
];
sum
+=
temp
[
threadIdx
.
x
-
indexInBlock
+
i
]
*
cSim
.
pRigidClusterMatrix
[
firstMatrixIndex
+
i
*
blockSize
+
indexInBlock
];
cSim
.
pLincsSolution
[
c
onstraint
]
=
sum
;
cSim
.
pLincsSolution
[
firstC
onstraint
+
indexInBlock
]
=
sum
;
}
}
block
+=
(
blockDim
.
x
*
gridDim
.
x
)
/
cSim
.
clusterShakeBlockSize
;
block
+=
(
blockDim
.
x
*
gridDim
.
x
)
/
cSim
.
clusterShakeBlockSize
;
}
}
...
@@ -173,6 +172,7 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
...
@@ -173,6 +172,7 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
// Update the position of each atom.
// Update the position of each atom.
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
float
damping
=
(
iteration
<
2
?
0.5
f
:
1.0
f
);
while
(
pos
<
cSim
.
atoms
)
while
(
pos
<
cSim
.
atoms
)
{
{
float4
atomPos
=
atomPositions
[
pos
];
float4
atomPos
=
atomPositions
[
pos
];
...
@@ -182,8 +182,10 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
...
@@ -182,8 +182,10 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
{
{
int
index
=
pos
+
i
*
cSim
.
atoms
;
int
index
=
pos
+
i
*
cSim
.
atoms
;
int
constraint
=
cSim
.
pLincsAtomConstraints
[
index
];
int
constraint
=
cSim
.
pLincsAtomConstraints
[
index
];
float
constraintForce
=
invMass
*
cSim
.
pLincsSolution
[
constraint
];
bool
forward
=
(
constraint
>
0
);
constraintForce
=
(
cSim
.
pLincsAtoms
[
constraint
].
x
==
pos
?
constraintForce
:
-
constraintForce
);
constraint
=
(
forward
?
constraint
-
1
:
-
constraint
-
1
);
float
constraintForce
=
damping
*
invMass
*
cSim
.
pLincsSolution
[
constraint
];
constraintForce
=
(
forward
?
constraintForce
:
-
constraintForce
);
float4
dir
=
cSim
.
pLincsDistance
[
constraint
];
float4
dir
=
cSim
.
pLincsDistance
[
constraint
];
atomPos
.
x
+=
constraintForce
*
dir
.
x
;
atomPos
.
x
+=
constraintForce
*
dir
.
x
;
atomPos
.
y
+=
constraintForce
*
dir
.
y
;
atomPos
.
y
+=
constraintForce
*
dir
.
y
;
...
@@ -202,12 +204,14 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
...
@@ -202,12 +204,14 @@ __global__ void kApplyCShake_kernel(float4* atomPositions, bool addOldPosition)
cSim
.
pSyncCounter
[
blockIdx
.
x
]
=
-
1
;
cSim
.
pSyncCounter
[
blockIdx
.
x
]
=
-
1
;
}
}
static
void
initInverseMatrices
(
gpuContext
gpu
)
static
void
initInverseMatrices
(
gpuContext
gpu
,
bool
useNewPositions
)
{
{
// Build the inverse constraint matrix for each cluster.
// Build the inverse constraint matrix for each cluster.
gpu
->
psPosq4
->
Download
();
gpu
->
psPosq4
->
Download
();
gpu
->
psVelm4
->
Download
();
gpu
->
psVelm4
->
Download
();
if
(
useNewPositions
)
gpu
->
psPosqP4
->
Download
();
unsigned
int
elementIndex
=
0
;
unsigned
int
elementIndex
=
0
;
for
(
unsigned
int
i
=
0
;
i
<
gpu
->
sim
.
rigidClusters
;
i
++
)
{
for
(
unsigned
int
i
=
0
;
i
<
gpu
->
sim
.
rigidClusters
;
i
++
)
{
// Compute the constraint coupling matrix for this cluster.
// Compute the constraint coupling matrix for this cluster.
...
@@ -217,9 +221,24 @@ static void initInverseMatrices(gpuContext gpu)
...
@@ -217,9 +221,24 @@ static void initInverseMatrices(gpuContext gpu)
unsigned
int
size
=
endIndex
-
startIndex
;
unsigned
int
size
=
endIndex
-
startIndex
;
vector
<
float3
>
r
(
size
);
vector
<
float3
>
r
(
size
);
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
{
for
(
unsigned
int
j
=
0
;
j
<
size
;
j
++
)
{
int2
atoms
=
(
*
gpu
->
psLincsAtoms
)[(
*
gpu
->
psRigidClusterConstraints
)[
startIndex
+
j
]];
int2
atoms
=
(
*
gpu
->
psLincsAtoms
)[
startIndex
+
j
];
float4
pos1
=
(
*
gpu
->
psPosq4
)[
atoms
.
x
];
float4
pos1
,
pos2
;
float4
pos2
=
(
*
gpu
->
psPosq4
)[
atoms
.
y
];
if
(
useNewPositions
)
{
float4
oldpos1
=
(
*
gpu
->
psPosq4
)[
atoms
.
x
];
float4
oldpos2
=
(
*
gpu
->
psPosq4
)[
atoms
.
y
];
pos1
=
(
*
gpu
->
psPosqP4
)[
atoms
.
x
];
pos2
=
(
*
gpu
->
psPosqP4
)[
atoms
.
y
];
pos1
.
x
+=
oldpos1
.
x
;
pos1
.
y
+=
oldpos1
.
y
;
pos1
.
z
+=
oldpos1
.
z
;
pos2
.
x
+=
oldpos2
.
x
;
pos2
.
y
+=
oldpos2
.
y
;
pos2
.
z
+=
oldpos2
.
z
;
}
else
{
pos1
=
(
*
gpu
->
psPosq4
)[
atoms
.
x
];
pos2
=
(
*
gpu
->
psPosq4
)[
atoms
.
y
];
}
r
[
j
]
=
make_float3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
r
[
j
]
=
make_float3
(
pos1
.
x
-
pos2
.
x
,
pos1
.
y
-
pos2
.
y
,
pos1
.
z
-
pos2
.
z
);
float
invLength
=
1.0
f
/
sqrt
(
r
[
j
].
x
*
r
[
j
].
x
+
r
[
j
].
y
*
r
[
j
].
y
+
r
[
j
].
z
*
r
[
j
].
z
);
float
invLength
=
1.0
f
/
sqrt
(
r
[
j
].
x
*
r
[
j
].
x
+
r
[
j
].
y
*
r
[
j
].
y
+
r
[
j
].
z
*
r
[
j
].
z
);
r
[
j
].
x
*=
invLength
;
r
[
j
].
x
*=
invLength
;
...
@@ -228,11 +247,9 @@ static void initInverseMatrices(gpuContext gpu)
...
@@ -228,11 +247,9 @@ static void initInverseMatrices(gpuContext gpu)
}
}
Array2D
<
double
>
matrix
(
size
,
size
);
Array2D
<
double
>
matrix
(
size
,
size
);
for
(
int
j
=
0
;
j
<
(
int
)
size
;
j
++
)
{
for
(
int
j
=
0
;
j
<
(
int
)
size
;
j
++
)
{
int
constraintj
=
(
*
gpu
->
psRigidClusterConstraints
)[
startIndex
+
j
];
int2
atomsj
=
(
*
gpu
->
psLincsAtoms
)[
startIndex
+
j
];
int2
atomsj
=
(
*
gpu
->
psLincsAtoms
)[
constraintj
];
for
(
int
k
=
0
;
k
<
(
int
)
size
;
k
++
)
{
for
(
int
k
=
0
;
k
<
(
int
)
size
;
k
++
)
{
int
constraintk
=
(
*
gpu
->
psRigidClusterConstraints
)[
startIndex
+
k
];
int2
atomsk
=
(
*
gpu
->
psLincsAtoms
)[
startIndex
+
k
];
int2
atomsk
=
(
*
gpu
->
psLincsAtoms
)[
constraintk
];
float
invMassj0
=
(
*
gpu
->
psVelm4
)[
atomsj
.
x
].
w
;
float
invMassj0
=
(
*
gpu
->
psVelm4
)[
atomsj
.
x
].
w
;
float
invMassj1
=
(
*
gpu
->
psVelm4
)[
atomsj
.
y
].
w
;
float
invMassj1
=
(
*
gpu
->
psVelm4
)[
atomsj
.
y
].
w
;
double
dot
=
r
[
j
].
x
*
r
[
k
].
x
+
r
[
j
].
y
*
r
[
k
].
y
+
r
[
j
].
z
*
r
[
k
].
z
;
double
dot
=
r
[
j
].
x
*
r
[
k
].
x
+
r
[
j
].
y
*
r
[
k
].
y
+
r
[
j
].
z
*
r
[
k
].
z
;
...
@@ -275,10 +292,10 @@ static void initInverseMatrices(gpuContext gpu)
...
@@ -275,10 +292,10 @@ static void initInverseMatrices(gpuContext gpu)
(
*
gpu
->
psRigidClusterMatrixIndex
)[
i
]
=
elementIndex
;
(
*
gpu
->
psRigidClusterMatrixIndex
)[
i
]
=
elementIndex
;
for
(
int
j
=
0
;
j
<
(
int
)
size
;
j
++
)
for
(
int
j
=
0
;
j
<
(
int
)
size
;
j
++
)
{
{
float
distance1
=
(
*
gpu
->
psLincsDistance
)[
(
*
gpu
->
psRigidClusterConstraints
)[
startIndex
+
j
]
]
.
w
;
float
distance1
=
(
*
gpu
->
psLincsDistance
)[
startIndex
+
j
].
w
;
for
(
int
k
=
0
;
k
<
(
int
)
size
;
k
++
)
for
(
int
k
=
0
;
k
<
(
int
)
size
;
k
++
)
{
{
float
distance2
=
(
*
gpu
->
psLincsDistance
)[
(
*
gpu
->
psRigidClusterConstraints
)[
startIndex
+
k
]
]
.
w
;
float
distance2
=
(
*
gpu
->
psLincsDistance
)[
startIndex
+
k
].
w
;
(
*
gpu
->
psRigidClusterMatrix
)[
elementIndex
++
]
=
(
float
)(
matrix
[
k
][
j
]
*
distance1
/
distance2
);
(
*
gpu
->
psRigidClusterMatrix
)[
elementIndex
++
]
=
(
float
)(
matrix
[
k
][
j
]
*
distance1
/
distance2
);
}
}
}
}
...
@@ -286,7 +303,6 @@ static void initInverseMatrices(gpuContext gpu)
...
@@ -286,7 +303,6 @@ static void initInverseMatrices(gpuContext gpu)
(
*
gpu
->
psRigidClusterMatrixIndex
)[
gpu
->
sim
.
rigidClusters
]
=
elementIndex
;
(
*
gpu
->
psRigidClusterMatrixIndex
)[
gpu
->
sim
.
rigidClusters
]
=
elementIndex
;
gpu
->
psRigidClusterMatrix
->
Upload
();
gpu
->
psRigidClusterMatrix
->
Upload
();
gpu
->
psRigidClusterMatrixIndex
->
Upload
();
gpu
->
psRigidClusterMatrixIndex
->
Upload
();
gpu
->
hasInitializedRigidClusters
=
true
;
}
}
void
kApplyFirstCShake
(
gpuContext
gpu
)
void
kApplyFirstCShake
(
gpuContext
gpu
)
...
@@ -295,9 +311,20 @@ void kApplyFirstCShake(gpuContext gpu)
...
@@ -295,9 +311,20 @@ void kApplyFirstCShake(gpuContext gpu)
if
(
gpu
->
sim
.
lincsConstraints
>
0
)
if
(
gpu
->
sim
.
lincsConstraints
>
0
)
{
{
if
(
!
gpu
->
hasInitializedRigidClusters
)
if
(
!
gpu
->
hasInitializedRigidClusters
)
initInverseMatrices
(
gpu
);
{
// Build preliminary constraint matrices for use on this call.
initInverseMatrices
(
gpu
,
false
);
}
kApplyCShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
,
4
*
gpu
->
sim
.
lincs_threads_per_block
>>>
(
gpu
->
sim
.
pPosqP
,
true
);
kApplyCShake_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
,
4
*
gpu
->
sim
.
lincs_threads_per_block
>>>
(
gpu
->
sim
.
pPosqP
,
true
);
LAUNCHERROR
(
"kApplyCShake"
);
LAUNCHERROR
(
"kApplyCShake"
);
if
(
!
gpu
->
hasInitializedRigidClusters
)
{
// Rebuild the constraint matrices, now that we know all constraints are really satisfied.
initInverseMatrices
(
gpu
,
true
);
gpu
->
hasInitializedRigidClusters
=
true
;
}
}
}
}
}
...
...
platforms/cuda/src/kernels/kLincs.cu
View file @
043c7b6c
...
@@ -101,9 +101,11 @@ __global__ void kSolveLincsMatrix_kernel(float4* atomPositions)
...
@@ -101,9 +101,11 @@ __global__ void kSolveLincsMatrix_kernel(float4* atomPositions)
{
{
int
index
=
pos
+
i
*
cSim
.
atoms
;
int
index
=
pos
+
i
*
cSim
.
atoms
;
int
constraint
=
cSim
.
pLincsAtomConstraints
[
index
];
int
constraint
=
cSim
.
pLincsAtomConstraints
[
index
];
bool
forward
=
(
constraint
>
0
);
constraint
=
(
forward
?
constraint
-
1
:
-
constraint
-
1
);
float4
dir
=
cSim
.
pLincsDistance
[
constraint
];
float4
dir
=
cSim
.
pLincsDistance
[
constraint
];
float
c
=
invMass
*
cSim
.
pLincsS
[
constraint
]
*
cSim
.
pLincsSolution
[
constraint
];
float
c
=
invMass
*
cSim
.
pLincsS
[
constraint
]
*
cSim
.
pLincsSolution
[
constraint
];
c
=
(
cSim
.
pLincsAtoms
[
constraint
].
x
==
pos
?
-
c
:
c
);
c
=
(
forward
?
-
c
:
c
);
atomPos
.
x
+=
c
*
dir
.
x
;
atomPos
.
x
+=
c
*
dir
.
x
;
atomPos
.
y
+=
c
*
dir
.
y
;
atomPos
.
y
+=
c
*
dir
.
y
;
atomPos
.
z
+=
c
*
dir
.
z
;
atomPos
.
z
+=
c
*
dir
.
z
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment