Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a409f0e8
Commit
a409f0e8
authored
Mar 19, 2010
by
Mark Friedrichs
Browse files
Update thread counts for Fermi-board
parent
80c69c93
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
51 additions
and
5 deletions
+51
-5
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+23
-0
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+25
-4
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+3
-1
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
a409f0e8
...
...
@@ -216,22 +216,44 @@ T& CUDAStream<T>::operator[](int index)
static
const
unsigned
int
GRID
=
32
;
static
const
unsigned
int
GRIDBITS
=
5
;
static
const
int
G8X_BLOCKS_PER_SM
=
1
;
static
const
int
GT2XX_BLOCKS_PER_SM
=
1
;
static
const
int
GF1XX_BLOCKS_PER_SM
=
3
;
static
const
int
G8X_NONBOND_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_NONBOND_THREADS_PER_BLOCK
=
320
;
static
const
int
GF1XX_NONBOND_THREADS_PER_BLOCK
=
256
;
//static const int GF1XX_NONBOND_THREADS_PER_BLOCK = 768;
static
const
int
G8X_BORNFORCE2_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
=
320
;
static
const
int
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
=
256
;
//static const int GF1XX_BORNFORCE2_THREADS_PER_BLOCK = 768;
static
const
int
G8X_SHAKE_THREADS_PER_BLOCK
=
128
;
static
const
int
GT2XX_SHAKE_THREADS_PER_BLOCK
=
256
;
static
const
int
GF1XX_SHAKE_THREADS_PER_BLOCK
=
512
;
static
const
int
G8X_UPDATE_THREADS_PER_BLOCK
=
192
;
static
const
int
GT2XX_UPDATE_THREADS_PER_BLOCK
=
384
;
static
const
int
GF1XX_UPDATE_THREADS_PER_BLOCK
=
768
;
static
const
int
G8X_LOCALFORCES_THREADS_PER_BLOCK
=
192
;
static
const
int
GT2XX_LOCALFORCES_THREADS_PER_BLOCK
=
384
;
static
const
int
GF1XX_LOCALFORCES_THREADS_PER_BLOCK
=
768
;
static
const
int
G8X_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_THREADS_PER_BLOCK
=
256
;
static
const
int
GF1XX_THREADS_PER_BLOCK
=
512
;
static
const
int
G8X_RANDOM_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_RANDOM_THREADS_PER_BLOCK
=
384
;
static
const
int
GF1XX_RANDOM_THREADS_PER_BLOCK
=
768
;
static
const
int
G8X_NONBOND_WORKUNITS_PER_SM
=
220
;
static
const
int
GT2XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
int
GF1XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
unsigned
int
MAX_STACK_SIZE
=
8
;
static
const
unsigned
int
MAX_TABULATED_FUNCTIONS
=
4
;
...
...
@@ -265,6 +287,7 @@ struct cudaGmxSimulation {
unsigned
int
atoms
;
// Number of atoms
unsigned
int
paddedNumberOfAtoms
;
// Padded number of atoms
unsigned
int
blocks
;
// Number of blocks to launch across linear kernels
unsigned
int
blocksPerSM
;
// Number of blocks per share memory
unsigned
int
nonbond_blocks
;
// Number of blocks to launch across CDLJ and Born Force Part1
unsigned
int
bornForce2_blocks
;
// Number of blocks to launch across Born Force 2
unsigned
int
interaction_blocks
;
// Number of blocks to launch when identifying interacting tiles
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
a409f0e8
...
...
@@ -1352,9 +1352,11 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
// Find connected constraints for CCMA.
vector
<
int
>
ccmaConstraints
;
/*
for (unsigned i = 0; i < atom1.size(); i++)
if (!isShakeAtom[atom1[i]])
ccmaConstraints.push_back(i);
*/
// Record the connections between constraints.
...
...
@@ -1833,10 +1835,12 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
break
;
}
}
else
{
gpu
->
sm_version
=
SM_20
;
gpu
->
sim
.
workUnitsPerSM
=
GF1XX_NONBOND_WORKUNITS_PER_SM
;
}
gpu
->
sim
.
nonbond_blocks
=
deviceProp
.
multiProcessorCount
;
gpu
->
sim
.
bornForce2_blocks
=
deviceProp
.
multiProcessorCount
;
gpu
->
sim
.
blocks
=
deviceProp
.
multiProcessorCount
;
if
(
deviceProp
.
regsPerBlock
==
8192
)
{
gpu
->
sim
.
nonbond_threads_per_block
=
G8X_NONBOND_THREADS_PER_BLOCK
;
...
...
@@ -1846,8 +1850,9 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu
->
sim
.
max_localForces_threads_per_block
=
G8X_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
G8X_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
G8X_RANDOM_THREADS_PER_BLOCK
;
gpu
->
blocksPerSM
=
G8X_BLOCKS_PER_SM
;
}
else
else
if
(
deviceProp
.
regsPerBlock
<=
16384
)
{
gpu
->
sim
.
nonbond_threads_per_block
=
GT2XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
bornForce2_threads_per_block
=
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
;
...
...
@@ -1856,7 +1861,23 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu
->
sim
.
max_localForces_threads_per_block
=
GT2XX_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
GT2XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
GT2XX_RANDOM_THREADS_PER_BLOCK
;
gpu
->
blocksPerSM
=
GT2XX_BLOCKS_PER_SM
;
}
else
{
gpu
->
sim
.
nonbond_threads_per_block
=
GF1XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
bornForce2_threads_per_block
=
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_shake_threads_per_block
=
GF1XX_SHAKE_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_update_threads_per_block
=
GF1XX_UPDATE_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_localForces_threads_per_block
=
GF1XX_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
GF1XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
GF1XX_RANDOM_THREADS_PER_BLOCK
;
gpu
->
blocksPerSM
=
GF1XX_BLOCKS_PER_SM
;
}
gpu
->
sim
.
nonbond_blocks
=
deviceProp
.
multiProcessorCount
*
gpu
->
blocksPerSM
;
gpu
->
sim
.
bornForce2_blocks
=
deviceProp
.
multiProcessorCount
*
gpu
->
blocksPerSM
;
gpu
->
sim
.
blocks
=
deviceProp
.
multiProcessorCount
;
gpu
->
sim
.
shake_threads_per_block
=
gpu
->
sim
.
max_shake_threads_per_block
;
gpu
->
sim
.
localForces_threads_per_block
=
gpu
->
sim
.
max_localForces_threads_per_block
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
a409f0e8
...
...
@@ -55,7 +55,8 @@ enum SM_VERSION
{
SM_10
,
SM_11
,
SM_12
SM_12
,
SM_20
};
...
...
@@ -70,6 +71,7 @@ struct _gpuContext {
bool
useBlockingSync
;
gpuAtomType
*
gpAtomTable
;
int
gAtomTypes
;
unsigned
int
blocksPerSM
;
cudaGmxSimulation
sim
;
unsigned
int
*
pOutputBufferCounter
;
std
::
vector
<
std
::
vector
<
int
>
>
exclusions
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment