Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
dba7e0cf
Unverified
Commit
dba7e0cf
authored
Jun 30, 2022
by
ver217
Committed by
GitHub
Jun 30, 2022
Browse files
make AutoPlacementPolicy configurable (#1191)
parent
2053e138
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
18 additions
and
6 deletions
+18
-6
colossalai/gemini/placement_policy.py
colossalai/gemini/placement_policy.py
+18
-6
No files found.
colossalai/gemini/placement_policy.py
View file @
dba7e0cf
...
@@ -58,13 +58,13 @@ class CUDAPlacementPolicy(PlacementPolicy):
...
@@ -58,13 +58,13 @@ class CUDAPlacementPolicy(PlacementPolicy):
class
AutoPlacementPolicy
(
PlacementPolicy
):
class
AutoPlacementPolicy
(
PlacementPolicy
):
need_mem_stats
:
bool
=
True
need_mem_stats
:
bool
=
True
# model data will use 1-_warmup_non_model_data_ratio CUDA memory in warmup phase
# you can set them by AutoPlacementPolicy.set_warmup_non_model_data_ratio() and AutoPlacementPolicy.set_steady_cuda_cap_ratio()
_warmup_non_model_data_ratio
:
float
=
0.8
_steady_cuda_cap_ratio
:
float
=
0.9
def
__init__
(
self
,
chunk_manager
:
ChunkManager
,
mem_stats_collector
:
Optional
[
MemStatsCollectorV2
]
=
None
)
->
None
:
def
__init__
(
self
,
chunk_manager
:
ChunkManager
,
mem_stats_collector
:
Optional
[
MemStatsCollectorV2
]
=
None
)
->
None
:
super
().
__init__
(
chunk_manager
,
mem_stats_collector
=
mem_stats_collector
)
super
().
__init__
(
chunk_manager
,
mem_stats_collector
=
mem_stats_collector
)
# model data will use 1-self._warmup_non_model_data_ratio CUDA memory in warmup phase
# TODO(ver217): make these args configurable
self
.
_warmup_non_model_data_ratio
:
float
=
0.8
self
.
_steady_cuda_cap_ratio
:
float
=
0.9
def
evict_tensors
(
self
,
def
evict_tensors
(
self
,
can_evict_chunks
:
List
[
Chunk
],
can_evict_chunks
:
List
[
Chunk
],
...
@@ -94,11 +94,11 @@ class AutoPlacementPolicy(PlacementPolicy):
...
@@ -94,11 +94,11 @@ class AutoPlacementPolicy(PlacementPolicy):
used_cuda_model_data
=
self
.
chunk_manager
.
total_mem
[
'cuda'
]
used_cuda_model_data
=
self
.
chunk_manager
.
total_mem
[
'cuda'
]
if
warmup
:
if
warmup
:
# We designate a part of CUDA memory for model data in warmup iterations.
# We designate a part of CUDA memory for model data in warmup iterations.
max_cuda_non_model_data_per_period
=
cuda_capacity
*
self
.
_warmup_non_model_data_ratio
max_cuda_non_model_data_per_period
=
cuda_capacity
*
AutoPlacementPolicy
.
_warmup_non_model_data_ratio
else
:
else
:
# max non-model-data cuda memory consumption of this sampling moment and the next sampling moment.
# max non-model-data cuda memory consumption of this sampling moment and the next sampling moment.
max_cuda_non_model_data_per_period
=
self
.
mem_stats_collector
.
next_period_non_model_data_usage
(
'cuda'
)
max_cuda_non_model_data_per_period
=
self
.
mem_stats_collector
.
next_period_non_model_data_usage
(
'cuda'
)
cuda_capacity
*=
self
.
_steady_cuda_cap_ratio
cuda_capacity
*=
AutoPlacementPolicy
.
_steady_cuda_cap_ratio
total_cuda_model_data
=
cuda_capacity
-
max_cuda_non_model_data_per_period
total_cuda_model_data
=
cuda_capacity
-
max_cuda_non_model_data_per_period
avail_cuda_model_data
=
total_cuda_model_data
-
used_cuda_model_data
avail_cuda_model_data
=
total_cuda_model_data
-
used_cuda_model_data
freed_cuda_model_data
=
0
freed_cuda_model_data
=
0
...
@@ -133,6 +133,18 @@ class AutoPlacementPolicy(PlacementPolicy):
...
@@ -133,6 +133,18 @@ class AutoPlacementPolicy(PlacementPolicy):
next_compute_idx
=
sorted
(
next_compute_idx
.
items
(),
key
=
lambda
pair
:
pair
[
1
],
reverse
=
True
)
next_compute_idx
=
sorted
(
next_compute_idx
.
items
(),
key
=
lambda
pair
:
pair
[
1
],
reverse
=
True
)
return
[
t
for
(
t
,
idx
)
in
next_compute_idx
]
return
[
t
for
(
t
,
idx
)
in
next_compute_idx
]
@
staticmethod
def
set_warmup_non_model_data_ratio
(
ratio
:
float
)
->
None
:
ratio
=
float
(
ratio
)
assert
0.0
<
ratio
<
1.0
AutoPlacementPolicy
.
_warmup_non_model_data_ratio
=
ratio
@
staticmethod
def
set_steady_cuda_cap_ratio
(
ratio
:
float
)
->
None
:
ratio
=
float
(
ratio
)
assert
0.0
<
ratio
<
1.0
AutoPlacementPolicy
.
_steady_cuda_cap_ratio
=
ratio
class
PlacementPolicyFactory
:
class
PlacementPolicyFactory
:
policies
:
Dict
[
str
,
PlacementPolicy
]
=
{
policies
:
Dict
[
str
,
PlacementPolicy
]
=
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment