Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
95d1c382
Commit
95d1c382
authored
Apr 10, 2009
by
Peter Eastman
Browse files
Improved LINCS performance by eliminating some uncoalesced memory access
parent
297538f7
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
54 additions
and
54 deletions
+54
-54
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+2
-2
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+36
-36
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+2
-2
platforms/cuda/src/kernels/kLincs.cu
platforms/cuda/src/kernels/kLincs.cu
+14
-14
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
95d1c382
...
@@ -380,14 +380,14 @@ struct cudaGmxSimulation {
...
@@ -380,14 +380,14 @@ struct cudaGmxSimulation {
int2
*
pLincsAtoms
;
// The atoms connected by each LINCS constraint
int2
*
pLincsAtoms
;
// The atoms connected by each LINCS constraint
float4
*
pLincsDistance
;
// The displacement vector (x, y, z) and constraint distance (w) for each LINCS constraint
float4
*
pLincsDistance
;
// The displacement vector (x, y, z) and constraint distance (w) for each LINCS constraint
int
*
pLincsConnections
;
// The indices of constraints that other constraints are connected to
int
*
pLincsConnections
;
// The indices of constraints that other constraints are connected to
int
*
pLincsConnections
Index
;
// The
index in pLincsConnections at which
the con
nections for a particular constraint start
int
*
pLincs
Num
Connections
;
// The
number of o
the
r
con
straints that each constraint is linked to
float
*
pLincsS
;
// S matrix for LINCS
float
*
pLincsS
;
// S matrix for LINCS
float
*
pLincsCoupling
;
// Coupling matrix for LINCS
float
*
pLincsCoupling
;
// Coupling matrix for LINCS
float
*
pLincsRhs1
;
// Workspace for LINCS
float
*
pLincsRhs1
;
// Workspace for LINCS
float
*
pLincsRhs2
;
// Workspace for LINCS
float
*
pLincsRhs2
;
// Workspace for LINCS
float
*
pLincsSolution
;
// Workspace for LINCS
float
*
pLincsSolution
;
// Workspace for LINCS
int
*
pLincsAtomConstraints
;
// The indices of constraints involving each atom
int
*
pLincsAtomConstraints
;
// The indices of constraints involving each atom
int
*
pLincsAtomConstraints
Index
;
// The
index in psLincsAtomConstraints at which the constraints for a particular atom start
int
*
pLincs
Num
AtomConstraints
;
// The
number of constraints involving each atom
unsigned
int
*
pSyncCounter
;
// Used for global thread synchronization
unsigned
int
*
pSyncCounter
;
// Used for global thread synchronization
// Mutable stuff
// Mutable stuff
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
95d1c382
...
@@ -670,12 +670,13 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -670,12 +670,13 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
for
(
unsigned
int
i
=
0
;
i
<
atom1
.
size
();
i
++
)
for
(
unsigned
int
i
=
0
;
i
<
atom1
.
size
();
i
++
)
if
(
!
isShakeAtom
[
atom1
[
i
]])
if
(
!
isShakeAtom
[
atom1
[
i
]])
lincsConstraints
.
push_back
(
i
);
lincsConstraints
.
push_back
(
i
);
int
numLincs
=
(
int
)
lincsConstraints
.
size
();
vector
<
vector
<
int
>
>
atomConstraints
(
gpu
->
natoms
);
vector
<
vector
<
int
>
>
atomConstraints
(
gpu
->
natoms
);
for
(
unsigned
int
i
=
0
;
i
<
lincsConstraints
.
size
()
;
i
++
)
{
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
atomConstraints
[
atom1
[
lincsConstraints
[
i
]]].
push_back
(
i
);
atomConstraints
[
atom1
[
lincsConstraints
[
i
]]].
push_back
(
i
);
atomConstraints
[
atom2
[
lincsConstraints
[
i
]]].
push_back
(
i
);
atomConstraints
[
atom2
[
lincsConstraints
[
i
]]].
push_back
(
i
);
}
}
vector
<
vector
<
int
>
>
linkedConstraints
(
lincsConstraints
.
size
()
);
vector
<
vector
<
int
>
>
linkedConstraints
(
numLincs
);
for
(
unsigned
int
atom
=
0
;
atom
<
atomConstraints
.
size
();
atom
++
)
{
for
(
unsigned
int
atom
=
0
;
atom
<
atomConstraints
.
size
();
atom
++
)
{
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
[
atom
].
size
();
i
++
)
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
[
atom
].
size
();
i
++
)
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
for
(
int
j
=
0
;
j
<
i
;
j
++
)
{
...
@@ -685,77 +686,76 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -685,77 +686,76 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
linkedConstraints
[
c2
].
push_back
(
c1
);
linkedConstraints
[
c2
].
push_back
(
c1
);
}
}
}
}
int
total
Links
=
0
;
int
max
Links
=
0
;
for
(
unsigned
int
i
=
0
;
i
<
linkedConstraints
.
size
();
i
++
)
for
(
unsigned
int
i
=
0
;
i
<
linkedConstraints
.
size
();
i
++
)
totalLinks
+=
linkedConstraints
[
i
].
size
();
maxLinks
=
max
(
maxLinks
,
(
int
)
linkedConstraints
[
i
].
size
());
int
maxAtomConstraints
=
0
;
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
maxAtomConstraints
=
max
(
maxAtomConstraints
,
(
int
)
atomConstraints
[
i
].
size
());
// Fill in the CUDA streams.
// Fill in the CUDA streams.
CUDAStream
<
int2
>*
psLincsAtoms
=
new
CUDAStream
<
int2
>
(
(
int
)
lincsConstraints
.
size
()
,
1
,
"LincsAtoms"
);
CUDAStream
<
int2
>*
psLincsAtoms
=
new
CUDAStream
<
int2
>
(
numLincs
,
1
,
"LincsAtoms"
);
gpu
->
psLincsAtoms
=
psLincsAtoms
;
gpu
->
psLincsAtoms
=
psLincsAtoms
;
gpu
->
sim
.
pLincsAtoms
=
psLincsAtoms
->
_pDevData
;
gpu
->
sim
.
pLincsAtoms
=
psLincsAtoms
->
_pDevData
;
CUDAStream
<
float4
>*
psLincsDistance
=
new
CUDAStream
<
float4
>
(
(
int
)
lincsConstraints
.
size
()
,
1
,
"LincsDistance"
);
CUDAStream
<
float4
>*
psLincsDistance
=
new
CUDAStream
<
float4
>
(
numLincs
,
1
,
"LincsDistance"
);
gpu
->
psLincsDistance
=
psLincsDistance
;
gpu
->
psLincsDistance
=
psLincsDistance
;
gpu
->
sim
.
pLincsDistance
=
psLincsDistance
->
_pDevData
;
gpu
->
sim
.
pLincsDistance
=
psLincsDistance
->
_pDevData
;
CUDAStream
<
int
>*
psLincsConnections
=
new
CUDAStream
<
int
>
(
total
Links
,
1
,
"LincsConnections"
);
CUDAStream
<
int
>*
psLincsConnections
=
new
CUDAStream
<
int
>
(
numLincs
*
max
Links
,
1
,
"LincsConnections"
);
gpu
->
psLincsConnections
=
psLincsConnections
;
gpu
->
psLincsConnections
=
psLincsConnections
;
gpu
->
sim
.
pLincsConnections
=
psLincsConnections
->
_pDevData
;
gpu
->
sim
.
pLincsConnections
=
psLincsConnections
->
_pDevData
;
CUDAStream
<
int
>*
psLincsConnections
Index
=
new
CUDAStream
<
int
>
(
(
int
)
lincsConstraints
.
size
()
+
1
,
1
,
"LincsConnectionsIndex"
);
CUDAStream
<
int
>*
psLincs
Num
Connections
=
new
CUDAStream
<
int
>
(
numLincs
,
1
,
"LincsConnectionsIndex"
);
gpu
->
psLincsConnections
Index
=
psLincsConnections
Index
;
gpu
->
psLincs
Num
Connections
=
psLincs
Num
Connections
;
gpu
->
sim
.
pLincsConnections
Index
=
psLincsConnections
Index
->
_pDevData
;
gpu
->
sim
.
pLincs
Num
Connections
=
psLincs
Num
Connections
->
_pDevData
;
CUDAStream
<
int
>*
psLincsAtomConstraints
=
new
CUDAStream
<
int
>
(
(
int
)
lincs
Constraints
.
size
()
*
2
,
1
,
"LincsAtomConstraints"
);
CUDAStream
<
int
>*
psLincsAtomConstraints
=
new
CUDAStream
<
int
>
(
gpu
->
natoms
*
maxAtom
Constraints
,
1
,
"LincsAtomConstraints"
);
gpu
->
psLincsAtomConstraints
=
psLincsAtomConstraints
;
gpu
->
psLincsAtomConstraints
=
psLincsAtomConstraints
;
gpu
->
sim
.
pLincsAtomConstraints
=
psLincsAtomConstraints
->
_pDevData
;
gpu
->
sim
.
pLincsAtomConstraints
=
psLincsAtomConstraints
->
_pDevData
;
CUDAStream
<
int
>*
psLincsAtomConstraints
Index
=
new
CUDAStream
<
int
>
(
gpu
->
natoms
+
1
,
1
,
"LincsAtomConstraintsIndex"
);
CUDAStream
<
int
>*
psLincs
Num
AtomConstraints
=
new
CUDAStream
<
int
>
(
gpu
->
natoms
,
1
,
"LincsAtomConstraintsIndex"
);
gpu
->
psLincsAtomConstraints
Index
=
psLincsAtomConstraints
Index
;
gpu
->
psLincs
Num
AtomConstraints
=
psLincs
Num
AtomConstraints
;
gpu
->
sim
.
pLincsAtomConstraints
Index
=
psLincsAtomConstraints
Index
->
_pDevData
;
gpu
->
sim
.
pLincs
Num
AtomConstraints
=
psLincs
Num
AtomConstraints
->
_pDevData
;
CUDAStream
<
float
>*
psLincsS
=
new
CUDAStream
<
float
>
(
(
int
)
lincsConstraints
.
size
()
,
1
,
"LincsS"
);
CUDAStream
<
float
>*
psLincsS
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsS"
);
gpu
->
psLincsS
=
psLincsS
;
gpu
->
psLincsS
=
psLincsS
;
gpu
->
sim
.
pLincsS
=
psLincsS
->
_pDevData
;
gpu
->
sim
.
pLincsS
=
psLincsS
->
_pDevData
;
CUDAStream
<
float
>*
psLincsCoupling
=
new
CUDAStream
<
float
>
(
total
Links
,
1
,
"LincsCoupling"
);
CUDAStream
<
float
>*
psLincsCoupling
=
new
CUDAStream
<
float
>
(
numLincs
*
max
Links
,
1
,
"LincsCoupling"
);
gpu
->
psLincsCoupling
=
psLincsCoupling
;
gpu
->
psLincsCoupling
=
psLincsCoupling
;
gpu
->
sim
.
pLincsCoupling
=
psLincsCoupling
->
_pDevData
;
gpu
->
sim
.
pLincsCoupling
=
psLincsCoupling
->
_pDevData
;
CUDAStream
<
float
>*
psLincsRhs1
=
new
CUDAStream
<
float
>
(
(
int
)
lincsConstraints
.
size
()
,
1
,
"LincsRhs1"
);
CUDAStream
<
float
>*
psLincsRhs1
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsRhs1"
);
gpu
->
psLincsRhs1
=
psLincsRhs1
;
gpu
->
psLincsRhs1
=
psLincsRhs1
;
gpu
->
sim
.
pLincsRhs1
=
psLincsRhs1
->
_pDevData
;
gpu
->
sim
.
pLincsRhs1
=
psLincsRhs1
->
_pDevData
;
CUDAStream
<
float
>*
psLincsRhs2
=
new
CUDAStream
<
float
>
(
(
int
)
lincsConstraints
.
size
()
,
1
,
"LincsRhs2"
);
CUDAStream
<
float
>*
psLincsRhs2
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsRhs2"
);
gpu
->
psLincsRhs2
=
psLincsRhs2
;
gpu
->
psLincsRhs2
=
psLincsRhs2
;
gpu
->
sim
.
pLincsRhs2
=
psLincsRhs2
->
_pDevData
;
gpu
->
sim
.
pLincsRhs2
=
psLincsRhs2
->
_pDevData
;
CUDAStream
<
float
>*
psLincsSolution
=
new
CUDAStream
<
float
>
(
(
int
)
lincsConstraints
.
size
()
,
1
,
"LincsSolution"
);
CUDAStream
<
float
>*
psLincsSolution
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsSolution"
);
gpu
->
psLincsSolution
=
psLincsSolution
;
gpu
->
psLincsSolution
=
psLincsSolution
;
gpu
->
sim
.
pLincsSolution
=
psLincsSolution
->
_pDevData
;
gpu
->
sim
.
pLincsSolution
=
psLincsSolution
->
_pDevData
;
CUDAStream
<
unsigned
int
>*
psSyncCounter
=
new
CUDAStream
<
unsigned
int
>
(
2
*
lincsTerms
+
2
,
1
,
"SyncCounter"
);
CUDAStream
<
unsigned
int
>*
psSyncCounter
=
new
CUDAStream
<
unsigned
int
>
(
2
*
lincsTerms
+
2
,
1
,
"SyncCounter"
);
gpu
->
psSyncCounter
=
psSyncCounter
;
gpu
->
psSyncCounter
=
psSyncCounter
;
gpu
->
sim
.
pSyncCounter
=
psSyncCounter
->
_pDevData
;
gpu
->
sim
.
pSyncCounter
=
psSyncCounter
->
_pDevData
;
gpu
->
sim
.
lincsConstraints
=
lincsConstraints
.
size
();
gpu
->
sim
.
lincsConstraints
=
numLincs
;
index
=
0
;
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
for
(
unsigned
int
i
=
0
;
i
<
lincsConstraints
.
size
();
i
++
)
{
int
c
=
lincsConstraints
[
i
];
int
c
=
lincsConstraints
[
i
];
(
*
psLincsAtoms
)[
i
].
x
=
atom1
[
c
];
(
*
psLincsAtoms
)[
i
].
x
=
atom1
[
c
];
(
*
psLincsAtoms
)[
i
].
y
=
atom2
[
c
];
(
*
psLincsAtoms
)[
i
].
y
=
atom2
[
c
];
(
*
psLincsDistance
)[
i
].
w
=
distance
[
c
];
(
*
psLincsDistance
)[
i
].
w
=
distance
[
c
];
(
*
psLincsS
)[
i
]
=
1.0
f
/
sqrt
(
invMass1
[
c
]
+
invMass2
[
c
]);
(
*
psLincsS
)[
i
]
=
1.0
f
/
sqrt
(
invMass1
[
c
]
+
invMass2
[
c
]);
(
*
psLincsConnections
Index
)[
i
]
=
in
dex
;
(
*
psLincs
Num
Connections
)[
i
]
=
l
in
kedConstraints
[
i
].
size
()
;
for
(
unsigned
int
j
=
0
;
j
<
linkedConstraints
[
i
].
size
();
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
linkedConstraints
[
i
].
size
();
j
++
)
(
*
psLincsConnections
)[
i
ndex
++
]
=
linkedConstraints
[
i
][
j
];
(
*
psLincsConnections
)[
i
+
j
*
numLincs
]
=
linkedConstraints
[
i
][
j
];
}
}
(
*
psLincsConnectionsIndex
)[
lincsConstraints
.
size
()]
=
index
;
for
(
unsigned
int
i
=
0
;
i
<
psSyncCounter
->
_length
;
i
++
)
for
(
unsigned
int
i
=
0
;
i
<
psSyncCounter
->
_length
;
i
++
)
(
*
psSyncCounter
)[
i
]
=
0
;
(
*
psSyncCounter
)[
i
]
=
0
;
index
=
0
;
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
{
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
{
(
*
psLincsAtomConstraints
Index
)[
i
]
=
index
;
(
*
psLincs
Num
AtomConstraints
)[
i
]
=
atomConstraints
[
i
].
size
()
;
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
(
*
psLincsAtomConstraints
)[
i
ndex
++
]
=
atomConstraints
[
i
][
j
];
(
*
psLincsAtomConstraints
)[
i
+
j
*
gpu
->
natoms
]
=
atomConstraints
[
i
][
j
];
}
}
(
*
psLincsAtomConstraintsIndex
)[
atomConstraints
.
size
()]
=
index
;
psLincsAtoms
->
Upload
();
psLincsAtoms
->
Upload
();
psLincsDistance
->
Upload
();
psLincsDistance
->
Upload
();
psLincsS
->
Upload
();
psLincsS
->
Upload
();
psLincsConnections
->
Upload
();
psLincsConnections
->
Upload
();
psLincsConnections
Index
->
Upload
();
psLincs
Num
Connections
->
Upload
();
psLincsAtomConstraints
->
Upload
();
psLincsAtomConstraints
->
Upload
();
psLincsAtomConstraints
Index
->
Upload
();
psLincs
Num
AtomConstraints
->
Upload
();
psSyncCounter
->
Upload
();
psSyncCounter
->
Upload
();
gpu
->
sim
.
lincsTerms
=
lincsTerms
;
gpu
->
sim
.
lincsTerms
=
lincsTerms
;
gpu
->
sim
.
lincs_threads_per_block
=
(
gpu
->
sim
.
lincsConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
gpu
->
sim
.
lincs_threads_per_block
=
(
gpu
->
sim
.
lincsConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
...
@@ -765,7 +765,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -765,7 +765,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
gpu
->
sim
.
lincs_threads_per_block
=
1
;
gpu
->
sim
.
lincs_threads_per_block
=
1
;
gpu
->
psLincsConnections
Index
->
Download
();
gpu
->
psLincs
Num
Connections
->
Download
();
gpu
->
psLincsConnections
->
Download
();
gpu
->
psLincsConnections
->
Download
();
gpu
->
psLincsCoupling
->
Download
();
gpu
->
psLincsCoupling
->
Download
();
...
@@ -1131,9 +1131,9 @@ void* gpuInit(int numAtoms)
...
@@ -1131,9 +1131,9 @@ void* gpuInit(int numAtoms)
gpu
->
psLincsAtoms
=
NULL
;
gpu
->
psLincsAtoms
=
NULL
;
gpu
->
psLincsDistance
=
NULL
;
gpu
->
psLincsDistance
=
NULL
;
gpu
->
psLincsConnections
=
NULL
;
gpu
->
psLincsConnections
=
NULL
;
gpu
->
psLincsConnections
Index
=
NULL
;
gpu
->
psLincs
Num
Connections
=
NULL
;
gpu
->
psLincsAtomConstraints
=
NULL
;
gpu
->
psLincsAtomConstraints
=
NULL
;
gpu
->
psLincsAtomConstraints
Index
=
NULL
;
gpu
->
psLincs
Num
AtomConstraints
=
NULL
;
gpu
->
psLincsS
=
NULL
;
gpu
->
psLincsS
=
NULL
;
gpu
->
psLincsCoupling
=
NULL
;
gpu
->
psLincsCoupling
=
NULL
;
gpu
->
psLincsRhs1
=
NULL
;
gpu
->
psLincsRhs1
=
NULL
;
...
@@ -1281,9 +1281,9 @@ void gpuShutDown(gpuContext gpu)
...
@@ -1281,9 +1281,9 @@ void gpuShutDown(gpuContext gpu)
delete
gpu
->
psLincsAtoms
;
delete
gpu
->
psLincsAtoms
;
delete
gpu
->
psLincsDistance
;
delete
gpu
->
psLincsDistance
;
delete
gpu
->
psLincsConnections
;
delete
gpu
->
psLincsConnections
;
delete
gpu
->
psLincsConnections
Index
;
delete
gpu
->
psLincs
Num
Connections
;
delete
gpu
->
psLincsAtomConstraints
;
delete
gpu
->
psLincsAtomConstraints
;
delete
gpu
->
psLincsAtomConstraints
Index
;
delete
gpu
->
psLincs
Num
AtomConstraints
;
delete
gpu
->
psLincsS
;
delete
gpu
->
psLincsS
;
delete
gpu
->
psLincsCoupling
;
delete
gpu
->
psLincsCoupling
;
delete
gpu
->
psLincsRhs1
;
delete
gpu
->
psLincsRhs1
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
95d1c382
...
@@ -132,9 +132,9 @@ struct _gpuContext {
...
@@ -132,9 +132,9 @@ struct _gpuContext {
CUDAStream
<
int2
>*
psLincsAtoms
;
// The atoms connected by each LINCS constraint
CUDAStream
<
int2
>*
psLincsAtoms
;
// The atoms connected by each LINCS constraint
CUDAStream
<
float4
>*
psLincsDistance
;
// The displacement vector (x, y, z) and constraint distance (w) for each LINCS constraint
CUDAStream
<
float4
>*
psLincsDistance
;
// The displacement vector (x, y, z) and constraint distance (w) for each LINCS constraint
CUDAStream
<
int
>*
psLincsConnections
;
// The indices of constraints that other constraints are connected to
CUDAStream
<
int
>*
psLincsConnections
;
// The indices of constraints that other constraints are connected to
CUDAStream
<
int
>*
psLincsConnections
Index
;
// The
index in psLincsConnections at which
the con
nections for a particular constraint start
CUDAStream
<
int
>*
psLincs
Num
Connections
;
// The
number of o
the
r
con
straints that each constraint is linked to
CUDAStream
<
int
>*
psLincsAtomConstraints
;
// The indices of constraints involving each atom
CUDAStream
<
int
>*
psLincsAtomConstraints
;
// The indices of constraints involving each atom
CUDAStream
<
int
>*
psLincsAtomConstraints
Index
;
// The
index in psLincsAtomConstraints at which the constraints for a particular atom start
CUDAStream
<
int
>*
psLincs
Num
AtomConstraints
;
// The
number of constraints involving each atom
CUDAStream
<
float
>*
psLincsS
;
// S matrix for LINCS
CUDAStream
<
float
>*
psLincsS
;
// S matrix for LINCS
CUDAStream
<
float
>*
psLincsCoupling
;
// Coupling matrix for LINCS
CUDAStream
<
float
>*
psLincsCoupling
;
// Coupling matrix for LINCS
CUDAStream
<
float
>*
psLincsRhs1
;
// Workspace for LINCS
CUDAStream
<
float
>*
psLincsRhs1
;
// Workspace for LINCS
...
...
platforms/cuda/src/kernels/kLincs.cu
View file @
95d1c382
...
@@ -61,11 +61,11 @@ __global__ void kUpdateAtomPositions_kernel(float4* atomPositions)
...
@@ -61,11 +61,11 @@ __global__ void kUpdateAtomPositions_kernel(float4* atomPositions)
{
{
float4
atomPos
=
atomPositions
[
pos
];
float4
atomPos
=
atomPositions
[
pos
];
float
invMass
=
cSim
.
pVelm4
[
pos
].
w
;
float
invMass
=
cSim
.
pVelm4
[
pos
].
w
;
int
start
=
cSim
.
pLincsAtomConstraintsIndex
[
pos
];
int
num
=
cSim
.
pLincsNumAtomConstraints
[
pos
];
int
end
=
cSim
.
pLincsAtomConstraintsIndex
[
pos
+
1
];
for
(
int
i
=
0
;
i
<
num
;
i
++
)
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
{
int
constraint
=
cSim
.
pLincsAtomConstraints
[
i
];
int
index
=
pos
+
i
*
cSim
.
atoms
;
int
constraint
=
cSim
.
pLincsAtomConstraints
[
index
];
float4
dir
=
cSim
.
pLincsDistance
[
constraint
];
float4
dir
=
cSim
.
pLincsDistance
[
constraint
];
float
c
=
invMass
*
cSim
.
pLincsS
[
constraint
]
*
cSim
.
pLincsSolution
[
constraint
];
float
c
=
invMass
*
cSim
.
pLincsS
[
constraint
]
*
cSim
.
pLincsSolution
[
constraint
];
c
=
(
cSim
.
pLincsAtoms
[
constraint
].
x
==
pos
?
-
c
:
c
);
c
=
(
cSim
.
pLincsAtoms
[
constraint
].
x
==
pos
?
-
c
:
c
);
...
@@ -88,12 +88,12 @@ __global__ void kIterateLincsMatrix_kernel(int iteration)
...
@@ -88,12 +88,12 @@ __global__ void kIterateLincsMatrix_kernel(int iteration)
while
(
pos
<
cSim
.
lincsConstraints
)
while
(
pos
<
cSim
.
lincsConstraints
)
{
{
float
rhs
=
0.0
f
;
float
rhs
=
0.0
f
;
int
start
=
cSim
.
pLincsConnectionsIndex
[
pos
];
int
num
=
cSim
.
pLincsNumConnections
[
pos
];
int
end
=
cSim
.
pLincsConnectionsIndex
[
pos
+
1
];
for
(
int
i
=
0
;
i
<
num
;
i
++
)
for
(
int
i
=
start
;
i
<
end
;
i
++
)
{
{
int
otherConstraint
=
cSim
.
pLincsConnections
[
i
];
int
index
=
pos
+
i
*
cSim
.
lincsConstraints
;
rhs
+=
cSim
.
pLincsCoupling
[
i
]
*
rhs1
[
otherConstraint
];
int
otherConstraint
=
cSim
.
pLincsConnections
[
index
];
rhs
+=
cSim
.
pLincsCoupling
[
index
]
*
rhs1
[
otherConstraint
];
}
}
rhs2
[
pos
]
=
rhs
;
rhs2
[
pos
]
=
rhs
;
cSim
.
pLincsSolution
[
pos
]
+=
rhs
;
cSim
.
pLincsSolution
[
pos
]
+=
rhs
;
...
@@ -147,17 +147,17 @@ __global__ void kApplyLincsPart2_kernel()
...
@@ -147,17 +147,17 @@ __global__ void kApplyLincsPart2_kernel()
{
{
float4
dir1
=
cSim
.
pLincsDistance
[
pos
];
float4
dir1
=
cSim
.
pLincsDistance
[
pos
];
int2
atoms1
=
cSim
.
pLincsAtoms
[
pos
];
int2
atoms1
=
cSim
.
pLincsAtoms
[
pos
];
int
start
=
cSim
.
pLincsConnectionsIndex
[
pos
];
int
num
=
cSim
.
pLincsNumConnections
[
pos
];
int
end
=
cSim
.
pLincsConnectionsIndex
[
pos
+
1
];
float
s
=
cSim
.
pLincsS
[
pos
];
float
s
=
cSim
.
pLincsS
[
pos
];
float
invMass
=
cSim
.
pVelm4
[
atoms1
.
x
].
w
;
float
invMass
=
cSim
.
pVelm4
[
atoms1
.
x
].
w
;
for
(
int
i
=
start
;
i
<
end
;
i
++
)
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
{
int
otherConstraint
=
cSim
.
pLincsConnections
[
i
];
int
index
=
pos
+
i
*
cSim
.
lincsConstraints
;
int
otherConstraint
=
cSim
.
pLincsConnections
[
index
];
float4
dir2
=
cSim
.
pLincsDistance
[
otherConstraint
];
float4
dir2
=
cSim
.
pLincsDistance
[
otherConstraint
];
int2
atoms2
=
cSim
.
pLincsAtoms
[
otherConstraint
];
int2
atoms2
=
cSim
.
pLincsAtoms
[
otherConstraint
];
float
signedMass
=
(
atoms1
.
x
==
atoms2
.
x
||
atoms1
.
y
==
atoms2
.
y
?
-
invMass
:
cSim
.
pVelm4
[
atoms1
.
y
].
w
);
float
signedMass
=
(
atoms1
.
x
==
atoms2
.
x
||
atoms1
.
y
==
atoms2
.
y
?
-
invMass
:
cSim
.
pVelm4
[
atoms1
.
y
].
w
);
cSim
.
pLincsCoupling
[
i
]
=
signedMass
*
s
*
(
dir1
.
x
*
dir2
.
x
+
dir1
.
y
*
dir2
.
y
+
dir1
.
z
*
dir2
.
z
)
*
cSim
.
pLincsS
[
otherConstraint
];
cSim
.
pLincsCoupling
[
i
ndex
]
=
signedMass
*
s
*
(
dir1
.
x
*
dir2
.
x
+
dir1
.
y
*
dir2
.
y
+
dir1
.
z
*
dir2
.
z
)
*
cSim
.
pLincsS
[
otherConstraint
];
}
}
pos
+=
blockDim
.
x
*
gridDim
.
x
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment