Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
tsoc
openmm
Commits
a409f0e8
"openmmapi/vscode:/vscode.git/clone" did not exist on "9f90999c4fd4ad0773ce908648659db7cc5302e3"
Commit
a409f0e8
authored
Mar 19, 2010
by
Mark Friedrichs
Browse files
Update thread counts for Fermi-board
parent
80c69c93
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
51 additions
and
5 deletions
+51
-5
platforms/cuda/src/kernels/cudatypes.h
platforms/cuda/src/kernels/cudatypes.h
+23
-0
platforms/cuda/src/kernels/gpu.cpp
platforms/cuda/src/kernels/gpu.cpp
+25
-4
platforms/cuda/src/kernels/gputypes.h
platforms/cuda/src/kernels/gputypes.h
+3
-1
No files found.
platforms/cuda/src/kernels/cudatypes.h
View file @
a409f0e8
...
@@ -216,22 +216,44 @@ T& CUDAStream<T>::operator[](int index)
...
@@ -216,22 +216,44 @@ T& CUDAStream<T>::operator[](int index)
static
const
unsigned
int
GRID
=
32
;
static
const
unsigned
int
GRID
=
32
;
static
const
unsigned
int
GRIDBITS
=
5
;
static
const
unsigned
int
GRIDBITS
=
5
;
static
const
int
G8X_BLOCKS_PER_SM
=
1
;
static
const
int
GT2XX_BLOCKS_PER_SM
=
1
;
static
const
int
GF1XX_BLOCKS_PER_SM
=
3
;
static
const
int
G8X_NONBOND_THREADS_PER_BLOCK
=
256
;
static
const
int
G8X_NONBOND_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_NONBOND_THREADS_PER_BLOCK
=
320
;
static
const
int
GT2XX_NONBOND_THREADS_PER_BLOCK
=
320
;
static
const
int
GF1XX_NONBOND_THREADS_PER_BLOCK
=
256
;
//static const int GF1XX_NONBOND_THREADS_PER_BLOCK = 768;
static
const
int
G8X_BORNFORCE2_THREADS_PER_BLOCK
=
256
;
static
const
int
G8X_BORNFORCE2_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
=
320
;
static
const
int
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
=
320
;
static
const
int
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
=
256
;
//static const int GF1XX_BORNFORCE2_THREADS_PER_BLOCK = 768;
static
const
int
G8X_SHAKE_THREADS_PER_BLOCK
=
128
;
static
const
int
G8X_SHAKE_THREADS_PER_BLOCK
=
128
;
static
const
int
GT2XX_SHAKE_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_SHAKE_THREADS_PER_BLOCK
=
256
;
static
const
int
GF1XX_SHAKE_THREADS_PER_BLOCK
=
512
;
static
const
int
G8X_UPDATE_THREADS_PER_BLOCK
=
192
;
static
const
int
G8X_UPDATE_THREADS_PER_BLOCK
=
192
;
static
const
int
GT2XX_UPDATE_THREADS_PER_BLOCK
=
384
;
static
const
int
GT2XX_UPDATE_THREADS_PER_BLOCK
=
384
;
static
const
int
GF1XX_UPDATE_THREADS_PER_BLOCK
=
768
;
static
const
int
G8X_LOCALFORCES_THREADS_PER_BLOCK
=
192
;
static
const
int
G8X_LOCALFORCES_THREADS_PER_BLOCK
=
192
;
static
const
int
GT2XX_LOCALFORCES_THREADS_PER_BLOCK
=
384
;
static
const
int
GT2XX_LOCALFORCES_THREADS_PER_BLOCK
=
384
;
static
const
int
GF1XX_LOCALFORCES_THREADS_PER_BLOCK
=
768
;
static
const
int
G8X_THREADS_PER_BLOCK
=
256
;
static
const
int
G8X_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_THREADS_PER_BLOCK
=
256
;
static
const
int
GF1XX_THREADS_PER_BLOCK
=
512
;
static
const
int
G8X_RANDOM_THREADS_PER_BLOCK
=
256
;
static
const
int
G8X_RANDOM_THREADS_PER_BLOCK
=
256
;
static
const
int
GT2XX_RANDOM_THREADS_PER_BLOCK
=
384
;
static
const
int
GT2XX_RANDOM_THREADS_PER_BLOCK
=
384
;
static
const
int
GF1XX_RANDOM_THREADS_PER_BLOCK
=
768
;
static
const
int
G8X_NONBOND_WORKUNITS_PER_SM
=
220
;
static
const
int
G8X_NONBOND_WORKUNITS_PER_SM
=
220
;
static
const
int
GT2XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
int
GT2XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
int
GF1XX_NONBOND_WORKUNITS_PER_SM
=
256
;
static
const
unsigned
int
MAX_STACK_SIZE
=
8
;
static
const
unsigned
int
MAX_STACK_SIZE
=
8
;
static
const
unsigned
int
MAX_TABULATED_FUNCTIONS
=
4
;
static
const
unsigned
int
MAX_TABULATED_FUNCTIONS
=
4
;
...
@@ -265,6 +287,7 @@ struct cudaGmxSimulation {
...
@@ -265,6 +287,7 @@ struct cudaGmxSimulation {
unsigned
int
atoms
;
// Number of atoms
unsigned
int
atoms
;
// Number of atoms
unsigned
int
paddedNumberOfAtoms
;
// Padded number of atoms
unsigned
int
paddedNumberOfAtoms
;
// Padded number of atoms
unsigned
int
blocks
;
// Number of blocks to launch across linear kernels
unsigned
int
blocks
;
// Number of blocks to launch across linear kernels
unsigned
int
blocksPerSM
;
// Number of blocks per share memory
unsigned
int
nonbond_blocks
;
// Number of blocks to launch across CDLJ and Born Force Part1
unsigned
int
nonbond_blocks
;
// Number of blocks to launch across CDLJ and Born Force Part1
unsigned
int
bornForce2_blocks
;
// Number of blocks to launch across Born Force 2
unsigned
int
bornForce2_blocks
;
// Number of blocks to launch across Born Force 2
unsigned
int
interaction_blocks
;
// Number of blocks to launch when identifying interacting tiles
unsigned
int
interaction_blocks
;
// Number of blocks to launch when identifying interacting tiles
...
...
platforms/cuda/src/kernels/gpu.cpp
View file @
a409f0e8
...
@@ -1352,9 +1352,11 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
...
@@ -1352,9 +1352,11 @@ void gpuSetConstraintParameters(gpuContext gpu, const vector<int>& atom1, const
// Find connected constraints for CCMA.
// Find connected constraints for CCMA.
vector
<
int
>
ccmaConstraints
;
vector
<
int
>
ccmaConstraints
;
/*
for (unsigned i = 0; i < atom1.size(); i++)
for (unsigned i = 0; i < atom1.size(); i++)
if (!isShakeAtom[atom1[i]])
if (!isShakeAtom[atom1[i]])
ccmaConstraints.push_back(i);
ccmaConstraints.push_back(i);
*/
// Record the connections between constraints.
// Record the connections between constraints.
...
@@ -1833,10 +1835,12 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
...
@@ -1833,10 +1835,12 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
break
;
break
;
}
}
}
}
else
{
gpu
->
sm_version
=
SM_20
;
gpu
->
sim
.
workUnitsPerSM
=
GF1XX_NONBOND_WORKUNITS_PER_SM
;
}
gpu
->
sim
.
nonbond_blocks
=
deviceProp
.
multiProcessorCount
;
gpu
->
sim
.
bornForce2_blocks
=
deviceProp
.
multiProcessorCount
;
gpu
->
sim
.
blocks
=
deviceProp
.
multiProcessorCount
;
if
(
deviceProp
.
regsPerBlock
==
8192
)
if
(
deviceProp
.
regsPerBlock
==
8192
)
{
{
gpu
->
sim
.
nonbond_threads_per_block
=
G8X_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
nonbond_threads_per_block
=
G8X_NONBOND_THREADS_PER_BLOCK
;
...
@@ -1846,8 +1850,9 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
...
@@ -1846,8 +1850,9 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu
->
sim
.
max_localForces_threads_per_block
=
G8X_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_localForces_threads_per_block
=
G8X_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
G8X_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
G8X_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
G8X_RANDOM_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
G8X_RANDOM_THREADS_PER_BLOCK
;
gpu
->
blocksPerSM
=
G8X_BLOCKS_PER_SM
;
}
}
else
else
if
(
deviceProp
.
regsPerBlock
<=
16384
)
{
{
gpu
->
sim
.
nonbond_threads_per_block
=
GT2XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
nonbond_threads_per_block
=
GT2XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
bornForce2_threads_per_block
=
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
;
gpu
->
sim
.
bornForce2_threads_per_block
=
GT2XX_BORNFORCE2_THREADS_PER_BLOCK
;
...
@@ -1856,7 +1861,23 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
...
@@ -1856,7 +1861,23 @@ void* gpuInit(int numAtoms, unsigned int device, bool useBlockingSync)
gpu
->
sim
.
max_localForces_threads_per_block
=
GT2XX_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_localForces_threads_per_block
=
GT2XX_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
GT2XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
GT2XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
GT2XX_RANDOM_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
GT2XX_RANDOM_THREADS_PER_BLOCK
;
gpu
->
blocksPerSM
=
GT2XX_BLOCKS_PER_SM
;
}
}
else
{
gpu
->
sim
.
nonbond_threads_per_block
=
GF1XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
bornForce2_threads_per_block
=
GF1XX_BORNFORCE2_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_shake_threads_per_block
=
GF1XX_SHAKE_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_update_threads_per_block
=
GF1XX_UPDATE_THREADS_PER_BLOCK
;
gpu
->
sim
.
max_localForces_threads_per_block
=
GF1XX_LOCALFORCES_THREADS_PER_BLOCK
;
gpu
->
sim
.
threads_per_block
=
GF1XX_NONBOND_THREADS_PER_BLOCK
;
gpu
->
sim
.
random_threads_per_block
=
GF1XX_RANDOM_THREADS_PER_BLOCK
;
gpu
->
blocksPerSM
=
GF1XX_BLOCKS_PER_SM
;
}
gpu
->
sim
.
nonbond_blocks
=
deviceProp
.
multiProcessorCount
*
gpu
->
blocksPerSM
;
gpu
->
sim
.
bornForce2_blocks
=
deviceProp
.
multiProcessorCount
*
gpu
->
blocksPerSM
;
gpu
->
sim
.
blocks
=
deviceProp
.
multiProcessorCount
;
gpu
->
sim
.
shake_threads_per_block
=
gpu
->
sim
.
max_shake_threads_per_block
;
gpu
->
sim
.
shake_threads_per_block
=
gpu
->
sim
.
max_shake_threads_per_block
;
gpu
->
sim
.
localForces_threads_per_block
=
gpu
->
sim
.
max_localForces_threads_per_block
;
gpu
->
sim
.
localForces_threads_per_block
=
gpu
->
sim
.
max_localForces_threads_per_block
;
...
...
platforms/cuda/src/kernels/gputypes.h
View file @
a409f0e8
...
@@ -55,7 +55,8 @@ enum SM_VERSION
...
@@ -55,7 +55,8 @@ enum SM_VERSION
{
{
SM_10
,
SM_10
,
SM_11
,
SM_11
,
SM_12
SM_12
,
SM_20
};
};
...
@@ -70,6 +71,7 @@ struct _gpuContext {
...
@@ -70,6 +71,7 @@ struct _gpuContext {
bool
useBlockingSync
;
bool
useBlockingSync
;
gpuAtomType
*
gpAtomTable
;
gpuAtomType
*
gpAtomTable
;
int
gAtomTypes
;
int
gAtomTypes
;
unsigned
int
blocksPerSM
;
cudaGmxSimulation
sim
;
cudaGmxSimulation
sim
;
unsigned
int
*
pOutputBufferCounter
;
unsigned
int
*
pOutputBufferCounter
;
std
::
vector
<
std
::
vector
<
int
>
>
exclusions
;
std
::
vector
<
std
::
vector
<
int
>
>
exclusions
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment