Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
deepspeed
Commits
bc046dc4
"sgl-kernel/python/vscode:/vscode.git/clone" did not exist on "d40846d456ecc930c04538778ed11f67cc793c23"
Unverified
Commit
bc046dc4
authored
Jan 08, 2021
by
Jeff Rasley
Committed by
GitHub
Jan 08, 2021
Browse files
add additional validation checks in elastic config (#646)
parent
828d75ba
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
61 additions
and
2 deletions
+61
-2
deepspeed/elasticity/config.py
deepspeed/elasticity/config.py
+30
-0
deepspeed/elasticity/constants.py
deepspeed/elasticity/constants.py
+2
-2
tests/unit/test_elastic.py
tests/unit/test_elastic.py
+29
-0
No files found.
deepspeed/elasticity/config.py
View file @
bc046dc4
...
...
@@ -63,9 +63,39 @@ class ElasticityConfig:
MAX_ACCEPTABLE_BATCH_SIZE
,
MAX_ACCEPTABLE_BATCH_SIZE_DEFAULT
)
self
.
micro_batches
=
param_dict
.
get
(
MICRO_BATCHES
,
MICRO_BATCHES_DEFAULT
)
if
not
isinstance
(
self
.
micro_batches
,
list
):
raise
ElasticityConfigError
(
f
"Elasticity expected value of
{
MICRO_BATCHES
}
to be a "
f
"list of micro batches, instead is:
{
type
(
self
.
micro_batches
)
}
, containing:
{
self
.
micro_batches
}
"
)
if
not
all
(
map
(
lambda
m
:
isinstance
(
m
,
int
),
self
.
micro_batches
)):
raise
ElasticityConfigError
(
f
"Elasticity expected
{
MICRO_BATCHES
}
to only contain a list of integers, "
f
"instead contains: f
{
self
.
micro_batches
}
"
)
if
not
all
(
map
(
lambda
m
:
m
>
0
,
self
.
micro_batches
)):
raise
ElasticityConfigError
(
f
"Elasticity expected
{
MICRO_BATCHES
}
to only contain positive integers, "
f
"instead contains: f
{
self
.
micro_batches
}
"
)
self
.
min_gpus
=
param_dict
.
get
(
MIN_GPUS
,
MIN_GPUS_DEFAULT
)
self
.
max_gpus
=
param_dict
.
get
(
MAX_GPUS
,
MAX_GPUS_DEFAULT
)
if
self
.
min_gpus
<
1
or
self
.
max_gpus
<
1
:
raise
ElasticityConfigError
(
"Elasticity min/max gpus must be > 0, "
f
"given min_gpus:
{
self
.
min_gpus
}
, max_gpus:
{
self
.
max_gpus
}
"
)
if
self
.
max_gpus
<
self
.
min_gpus
:
raise
ElasticityConfigError
(
"Elasticity min_gpus cannot be greater than max_gpus, "
f
"given min_gpus:
{
self
.
min_gpus
}
, max_gpus:
{
self
.
max_gpus
}
"
)
self
.
min_time
=
param_dict
.
get
(
MIN_TIME
,
MIN_TIME_DEFAULT
)
if
self
.
min_time
<
0
:
raise
ElasticityConfigError
(
f
"Elasticity min time needs to be >= 0: given
{
self
.
min_time
}
"
)
self
.
version
=
param_dict
.
get
(
VERSION
,
VERSION_DEFAULT
)
self
.
prefer_larger_batch_size
=
param_dict
.
get
(
PREFER_LARGER_BATCH
,
PREFER_LARGER_BATCH_DEFAULT
)
...
...
deepspeed/elasticity/constants.py
View file @
bc046dc4
...
...
@@ -46,9 +46,9 @@ MIN_GPUS_DEFAULT = 1
MAX_GPUS
=
'max_gpus'
MAX_GPUS_DEFAULT
=
10000
# Minimum running time (minutes) before the scheduler will scale us
# Minimum running time (minutes) before the scheduler will scale us
, 0 implies it's unknown
MIN_TIME
=
"min_time"
MIN_TIME_DEFAULT
=
"20"
MIN_TIME_DEFAULT
=
0
# When finding a suitable batch size, attempt to find one that is closest
# to the max train batch size given.
...
...
tests/unit/test_elastic.py
View file @
bc046dc4
...
...
@@ -107,6 +107,35 @@ def test_empty_config():
target_deepspeed_version
=
ds_version
)
@
pytest
.
mark
.
parametrize
(
'key, value'
,
[(
'micro_batch_sizes'
,
[
1
,
4
,
-
1
,
2
,
-
10
]),
(
'min_gpus'
,
-
1
),
(
'max_gpus'
,
-
1
),
(
'micro_batch_sizes'
,
5
),
(
'micro_batch_sizes'
,
[
'a'
,
None
,
0.5
]),
(
'micro_batch_sizes'
,
[
2
,
0.5
,
4
])])
def
test_invalid_config_values
(
key
,
value
):
ds_config
=
base_ds_config
.
copy
()
ds_config
[
'elasticity'
][
key
]
=
value
with
pytest
.
raises
(
deepspeed
.
elasticity
.
config
.
ElasticityError
):
deepspeed
.
elasticity
.
compute_elastic_config
(
ds_config
=
ds_config
,
target_deepspeed_version
=
ds_version
)
def
test_proper_mbsz
():
ds_config
=
base_ds_config
.
copy
()
ds_config
[
"elasticity"
][
"max_train_batch_size"
]
=
32
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment