Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
afd06645
Commit
afd06645
authored
Apr 16, 2009
by
Peter Eastman
Browse files
Optimized LINCS by merging kernels and implementing a syncAllThreads() function.
parent
bf7a968c
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
61 additions
and
58 deletions
+61
-58
platforms/cuda/cuda-cmake/FindCuda.cmake
platforms/cuda/cuda-cmake/FindCuda.cmake
+1
-1
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+1
-1
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+6
-6
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+1
-1
platforms/cuda/src/kernels/kLincs.cu
platforms/cuda/src/kernels/kLincs.cu
+49
-46
platforms/cuda/tests/TestCudaBrownianIntegrator.cpp
platforms/cuda/tests/TestCudaBrownianIntegrator.cpp
+1
-1
platforms/cuda/tests/TestCudaLangevinIntegrator.cpp
platforms/cuda/tests/TestCudaLangevinIntegrator.cpp
+1
-1
platforms/cuda/tests/TestCudaVerletIntegrator.cpp
platforms/cuda/tests/TestCudaVerletIntegrator.cpp
+1
-1
No files found.
platforms/cuda/cuda-cmake/FindCuda.cmake
View file @
afd06645
...
...
@@ -117,7 +117,7 @@ ELSE(CUDA_BUILD_TYPE MATCHES "Emulation")
ENDIF
(
CUDA_BUILD_TYPE MATCHES
"Emulation"
)
SET
(
CUDA_BUILD_CUBIN TRUE CACHE BOOL
"Generate and parse .cubin files in Device mode."
)
SET
(
CUDA_NVCC_FLAGS
"-maxrregcount=32;-use_fast_math;-O0
;-arch=sm_11
"
CACHE STRING
"Semi-colon delimit multiple arguments."
)
SET
(
CUDA_NVCC_FLAGS
"-maxrregcount=32;-use_fast_math;-O0"
CACHE STRING
"Semi-colon delimit multiple arguments."
)
# Search for the cuda distribution.
IF
(
NOT CUDA_INSTALL_PREFIX
)
...
...
platforms/cuda/src/kernels/cudatypes.h
View file @
afd06645
...
...
@@ -388,7 +388,7 @@ struct cudaGmxSimulation {
float
*
pLincsSolution
;
// Workspace for LINCS
int
*
pLincsAtomConstraints
;
// The indices of constraints involving each atom
int
*
pLincsNumAtomConstraints
;
// The number of constraints involving each atom
unsigned
int
*
pSyncCounter
;
// Used for global thread synchronization
short
*
pSyncCounter
;
// Used for global thread synchronization
// Mutable stuff
float4
*
pPosq
;
// Pointer to atom positions and charges
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
afd06645
...
...
@@ -728,9 +728,9 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
CUDAStream
<
float
>*
psLincsSolution
=
new
CUDAStream
<
float
>
(
numLincs
,
1
,
"LincsSolution"
);
gpu
->
psLincsSolution
=
psLincsSolution
;
gpu
->
sim
.
pLincsSolution
=
psLincsSolution
->
_pDevData
;
CUDAStream
<
unsigned
in
t
>*
psSyncCounter
=
new
CUDAStream
<
unsigned
int
>
(
2
*
lincsTerms
+
2
,
1
,
"SyncCounter"
);
gpu
->
psSyncCounter
=
psSyncCounter
;
gpu
->
sim
.
pSyncCounter
=
psSyncCounter
->
_pDevData
;
CUDAStream
<
shor
t
>*
psSyncCounter
=
new
CUDAStream
<
short
>
(
2
*
gpu
->
sim
.
blocks
,
1
,
"SyncCounter"
);
gpu
->
psSyncCounter
=
psSyncCounter
;
gpu
->
sim
.
pSyncCounter
=
psSyncCounter
->
_pDevData
;
gpu
->
sim
.
lincsConstraints
=
numLincs
;
for
(
int
i
=
0
;
i
<
numLincs
;
i
++
)
{
int
c
=
lincsConstraints
[
i
];
...
...
@@ -743,7 +743,7 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
(
*
psLincsConnections
)[
i
+
j
*
numLincs
]
=
linkedConstraints
[
i
][
j
];
}
for
(
unsigned
int
i
=
0
;
i
<
psSyncCounter
->
_length
;
i
++
)
(
*
psSyncCounter
)[
i
]
=
0
;
(
*
psSyncCounter
)[
i
]
=
-
1
;
for
(
unsigned
int
i
=
0
;
i
<
atomConstraints
.
size
();
i
++
)
{
(
*
psLincsNumAtomConstraints
)[
i
]
=
atomConstraints
[
i
].
size
();
for
(
unsigned
int
j
=
0
;
j
<
atomConstraints
[
i
].
size
();
j
++
)
...
...
@@ -761,8 +761,8 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
gpu
->
sim
.
lincs_threads_per_block
=
(
gpu
->
sim
.
lincsConstraints
+
gpu
->
sim
.
blocks
-
1
)
/
gpu
->
sim
.
blocks
;
if
(
gpu
->
sim
.
lincs_threads_per_block
>
gpu
->
sim
.
max_shake_threads_per_block
)
gpu
->
sim
.
lincs_threads_per_block
=
gpu
->
sim
.
max_shake_threads_per_block
;
if
(
gpu
->
sim
.
lincs_threads_per_block
<
1
)
gpu
->
sim
.
lincs_threads_per_block
=
1
;
if
(
gpu
->
sim
.
lincs_threads_per_block
<
gpu
->
sim
.
blocks
)
gpu
->
sim
.
lincs_threads_per_block
=
gpu
->
sim
.
blocks
;
gpu
->
psLincsNumConnections
->
Download
();
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
afd06645
...
...
@@ -140,7 +140,7 @@ struct _gpuContext {
CUDAStream
<
float
>*
psLincsRhs1
;
// Workspace for LINCS
CUDAStream
<
float
>*
psLincsRhs2
;
// Workspace for LINCS
CUDAStream
<
float
>*
psLincsSolution
;
// Workspace for LINCS
CUDAStream
<
unsigned
in
t
>*
psSyncCounter
;
// Used for global thread synchronization
CUDAStream
<
shor
t
>*
psSyncCounter
;
// Used for global thread synchronization
};
typedef
struct
_gpuContext
*
gpuContext
;
...
...
platforms/cuda/src/kernels/kLincs.cu
View file @
afd06645
...
...
@@ -52,8 +52,48 @@ void GetLincsSim(gpuContext gpu)
RTERROR
(
status
,
"cudaMemcpyFromSymbol: SetSim copy from cSim failed"
);
}
__global__
void
kUpdateAtomPositions_kernel
(
float4
*
atomPositions
)
/**
* Synchronize all threads across all blocks.
*/
__device__
void
kSyncAllThreads_kernel
(
short
*
syncCounter
,
short
newCount
)
{
// short* syncCounter = &cSim.pSyncCounter[newCount%2 == 0 ? 0 : gridDim.x];
__syncthreads
();
if
(
threadIdx
.
x
==
0
)
syncCounter
[
blockIdx
.
x
]
=
newCount
;
if
(
threadIdx
.
x
<
gridDim
.
x
)
{
volatile
short
*
counter
=
&
syncCounter
[
threadIdx
.
x
];
do
{
}
while
(
*
counter
!=
newCount
);
}
__syncthreads
();
}
__global__
void
kSolveLincsMatrix_kernel
(
float4
*
atomPositions
)
{
for
(
unsigned
int
iteration
=
0
;
iteration
<
cSim
.
lincsTerms
;
iteration
++
)
{
float
*
rhs1
=
(
iteration
%
2
==
0
?
cSim
.
pLincsRhs1
:
cSim
.
pLincsRhs2
);
float
*
rhs2
=
(
iteration
%
2
==
0
?
cSim
.
pLincsRhs2
:
cSim
.
pLincsRhs1
);
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
lincsConstraints
)
{
float
rhs
=
0.0
f
;
int
num
=
cSim
.
pLincsNumConnections
[
pos
];
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
int
index
=
pos
+
i
*
cSim
.
lincsConstraints
;
int
otherConstraint
=
cSim
.
pLincsConnections
[
index
];
rhs
+=
cSim
.
pLincsCoupling
[
index
]
*
rhs1
[
otherConstraint
];
}
rhs2
[
pos
]
=
rhs
;
cSim
.
pLincsSolution
[
pos
]
+=
rhs
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
kSyncAllThreads_kernel
(
&
cSim
.
pSyncCounter
[
iteration
%
2
==
0
?
0
:
gridDim
.
x
],
iteration
);
}
// Update the atom positions based on the solution to the matrix equations.
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
...
...
@@ -78,29 +118,6 @@ __global__ void kUpdateAtomPositions_kernel(float4* atomPositions)
}
}
__global__
void
kIterateLincsMatrix_kernel
(
int
iteration
)
{
// Perform one iteration of inverting the matrix.
float
*
rhs1
=
(
iteration
%
2
==
0
?
cSim
.
pLincsRhs1
:
cSim
.
pLincsRhs2
);
float
*
rhs2
=
(
iteration
%
2
==
0
?
cSim
.
pLincsRhs2
:
cSim
.
pLincsRhs1
);
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
lincsConstraints
)
{
float
rhs
=
0.0
f
;
int
num
=
cSim
.
pLincsNumConnections
[
pos
];
for
(
int
i
=
0
;
i
<
num
;
i
++
)
{
int
index
=
pos
+
i
*
cSim
.
lincsConstraints
;
int
otherConstraint
=
cSim
.
pLincsConnections
[
index
];
rhs
+=
cSim
.
pLincsCoupling
[
index
]
*
rhs1
[
otherConstraint
];
}
rhs2
[
pos
]
=
rhs
;
cSim
.
pLincsSolution
[
pos
]
+=
rhs
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
__global__
void
kApplyLincsPart1_kernel
(
float4
*
atomPositions
,
bool
addOldPosition
)
{
// Calculate the direction of each constraint, along with the initial RHS and solution vectors.
...
...
@@ -136,13 +153,11 @@ __global__ void kApplyLincsPart1_kernel(float4* atomPositions, bool addOldPositi
cSim
.
pLincsSolution
[
pos
]
=
diff
;
pos
+=
blockDim
.
x
*
gridDim
.
x
;
}
}
kSyncAllThreads_kernel
(
cSim
.
pSyncCounter
,
cSim
.
lincsTerms
+
1
);
__global__
void
kApplyLincsPart2_kernel
()
{
// Build the coupling matrix.
unsigned
int
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
pos
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
while
(
pos
<
cSim
.
lincsConstraints
)
{
float4
dir1
=
cSim
.
pLincsDistance
[
pos
];
...
...
@@ -163,7 +178,7 @@ __global__ void kApplyLincsPart2_kernel()
}
}
__global__
void
kApplyLincsPart
3
_kernel
(
float4
*
atomPositions
,
bool
addOldPosition
)
__global__
void
kApplyLincsPart
2
_kernel
(
float4
*
atomPositions
,
bool
addOldPosition
)
{
// Correct for rotational lengthening.
...
...
@@ -200,24 +215,12 @@ static void kApplyLincs(gpuContext gpu, float4* atomPositions, bool addOldPositi
{
kApplyLincsPart1_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
atomPositions
,
addOldPosition
);
LAUNCHERROR
(
"kApplyLincsPart1"
);
kApplyLincsPart2_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
();
kSolveLincsMatrix_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
atomPositions
);
LAUNCHERROR
(
"kSolveLincsMatrix_kernel"
);
kApplyLincsPart2_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
atomPositions
,
addOldPosition
);
LAUNCHERROR
(
"kApplyLincsPart2"
);
for
(
int
i
=
0
;
i
<
gpu
->
sim
.
lincsTerms
;
++
i
)
{
kIterateLincsMatrix_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
i
);
LAUNCHERROR
(
"kIterateLincsMatrix_kernel"
);
}
kUpdateAtomPositions_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
atomPositions
);
LAUNCHERROR
(
"kUpdateAtomPositions"
);
kApplyLincsPart3_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
atomPositions
,
addOldPosition
);
LAUNCHERROR
(
"kApplyLincsPart3"
);
for
(
int
i
=
0
;
i
<
gpu
->
sim
.
lincsTerms
;
++
i
)
{
kIterateLincsMatrix_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
i
);
LAUNCHERROR
(
"kIterateLincsMatrix_kernel"
);
}
kUpdateAtomPositions_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
atomPositions
);
LAUNCHERROR
(
"kUpdateAtomPositions"
);
kSolveLincsMatrix_kernel
<<<
gpu
->
sim
.
blocks
,
gpu
->
sim
.
lincs_threads_per_block
>>>
(
atomPositions
);
LAUNCHERROR
(
"kSolveLincsMatrix_kernel"
);
}
void
kApplyFirstLincs
(
gpuContext
gpu
)
...
...
platforms/cuda/tests/TestCudaBrownianIntegrator.cpp
View file @
afd06645
...
...
@@ -165,7 +165,7 @@ void testConstraints() {
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
2
e-
5
);
ASSERT_EQUAL_TOL
(
distance
,
dist
,
1
e-
4
);
}
integrator
.
step
(
1
);
}
...
...
platforms/cuda/tests/TestCudaLangevinIntegrator.cpp
View file @
afd06645
...
...
@@ -170,7 +170,7 @@ void testConstraints() {
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
2
e-
5
);
ASSERT_EQUAL_TOL
(
distance
,
dist
,
1
e-
4
);
}
integrator
.
step
(
1
);
}
...
...
platforms/cuda/tests/TestCudaVerletIntegrator.cpp
View file @
afd06645
...
...
@@ -127,7 +127,7 @@ void testConstraints() {
Vec3
p1
=
state
.
getPositions
()[
particle1
];
Vec3
p2
=
state
.
getPositions
()[
particle2
];
double
dist
=
std
::
sqrt
((
p1
[
0
]
-
p2
[
0
])
*
(
p1
[
0
]
-
p2
[
0
])
+
(
p1
[
1
]
-
p2
[
1
])
*
(
p1
[
1
]
-
p2
[
1
])
+
(
p1
[
2
]
-
p2
[
2
])
*
(
p1
[
2
]
-
p2
[
2
]));
ASSERT_EQUAL_TOL
(
distance
,
dist
,
2
e-
5
);
ASSERT_EQUAL_TOL
(
distance
,
dist
,
1
e-
4
);
}
double
energy
=
state
.
getKineticEnergy
()
+
state
.
getPotentialEnergy
();
if
(
i
==
1
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment