Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
9330be0f
Unverified
Commit
9330be0f
authored
Mar 24, 2022
by
Jiarui Fang
Committed by
GitHub
Mar 24, 2022
Browse files
[memory] set cuda mem frac (#506)
parent
97933b67
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
43 additions
and
11 deletions
+43
-11
colossalai/utils/commons/__init__.py
colossalai/utils/commons/__init__.py
+0
-3
colossalai/utils/memory_tracer/async_memtracer.py
colossalai/utils/memory_tracer/async_memtracer.py
+5
-0
colossalai/utils/memory_utils/bucket_tensor_copy.py
colossalai/utils/memory_utils/bucket_tensor_copy.py
+0
-0
colossalai/utils/memory_utils/utils.py
colossalai/utils/memory_utils/utils.py
+37
-7
tests/test_utils/test_bucket_tensor_copy.py
tests/test_utils/test_bucket_tensor_copy.py
+1
-1
No files found.
colossalai/utils/commons/__init__.py
deleted
100644 → 0
View file @
97933b67
from
.bucket_tensor_copy
import
BucketizedTensorCopy
__all__
=
[
'BucketizedTensorCopy'
]
colossalai/utils/memory_tracer/async_memtracer.py
View file @
9330be0f
...
@@ -29,6 +29,10 @@ class AsyncMemoryMonitor:
...
@@ -29,6 +29,10 @@ class AsyncMemoryMonitor:
An Async Memory Monitor runing during computing. Sampling memory usage of the current GPU
An Async Memory Monitor runing during computing. Sampling memory usage of the current GPU
at interval of 1/(10**power) sec.
at interval of 1/(10**power) sec.
The idea comes from Runtime Memory Tracer of PatrickStar
PatrickStar: Parallel Training of Pre-trained Models via Chunk-based Memory Management
https://arxiv.org/abs/2108.05818
:param power: the power of time interval, defaults to 10
:param power: the power of time interval, defaults to 10
:type power: int
:type power: int
...
@@ -54,6 +58,7 @@ class AsyncMemoryMonitor:
...
@@ -54,6 +58,7 @@ class AsyncMemoryMonitor:
self
.
keep_measuring
=
False
self
.
keep_measuring
=
False
current_device
=
get_current_device
()
current_device
=
get_current_device
()
def
_set_cuda_device
():
def
_set_cuda_device
():
torch
.
cuda
.
set_device
(
current_device
)
torch
.
cuda
.
set_device
(
current_device
)
...
...
colossalai/utils/
common
s/bucket_tensor_copy.py
→
colossalai/utils/
memory_util
s/bucket_tensor_copy.py
View file @
9330be0f
File moved
colossalai/utils/memory_utils/utils.py
View file @
9330be0f
...
@@ -5,12 +5,27 @@ from colossalai.utils.memory_tracer.model_data_memtracer import GLOBAL_MODEL_DAT
...
@@ -5,12 +5,27 @@ from colossalai.utils.memory_tracer.model_data_memtracer import GLOBAL_MODEL_DAT
from
typing
import
Union
from
typing
import
Union
_GLOBAL_CUDA_MEM_FRACTION
=
1.0
def
colo_cuda_memory_capacity
():
def
colo_set_process_memory_fraction
(
ratio
:
float
)
->
None
:
"""colo_set_process_memory_fraction
set how much cuda memory used on the gpu belonging to the current process.
Args:
ratio (float): a ratio between 0. ~ 1.
"""
global
_GLOBAL_CUDA_MEM_FRACTION
_GLOBAL_CUDA_MEM_FRACTION
=
ratio
torch
.
cuda
.
set_per_process_memory_fraction
(
_GLOBAL_CUDA_MEM_FRACTION
,
get_current_device
())
def
colo_cuda_memory_capacity
()
->
float
:
"""
"""
Get cuda memory capacity of the current cuda.
Get cuda memory capacity of the current cuda.
"""
"""
return
torch
.
cuda
.
get_device_properties
(
get_current_device
()).
total_memory
return
torch
.
cuda
.
get_device_properties
(
get_current_device
()).
total_memory
*
_GLOBAL_CUDA_MEM_FRACTION
def
colo_model_data_tensor_move
(
src_t
:
Union
[
ShardedTensor
,
torch
.
Tensor
],
tgt_t
:
Union
[
ShardedTensor
,
def
colo_model_data_tensor_move
(
src_t
:
Union
[
ShardedTensor
,
torch
.
Tensor
],
tgt_t
:
Union
[
ShardedTensor
,
...
@@ -50,10 +65,25 @@ def colo_model_data_tensor_move(src_t: Union[ShardedTensor, torch.Tensor], tgt_t
...
@@ -50,10 +65,25 @@ def colo_model_data_tensor_move(src_t: Union[ShardedTensor, torch.Tensor], tgt_t
src_t
.
data
=
torch
.
tensor
([],
device
=
src_dev
,
dtype
=
src_t_payload
.
dtype
)
src_t
.
data
=
torch
.
tensor
([],
device
=
src_dev
,
dtype
=
src_t_payload
.
dtype
)
def
colo_model_data_move_to_cpu
(
t
:
torch
.
Tensor
):
def
colo_model_data_move_to_cpu
(
t
:
Union
[
ShardedTensor
,
torch
.
Tensor
])
->
None
:
assert
isinstance
(
t
,
torch
.
Tensor
)
"""colo_model_data_move_to_cpu
if
t
.
device
.
type
==
'cpu'
:
move a model data tensor from gpu to cpu
Args:
t (Union[ShardedTensor, torch.Tensor]): _description_
"""
if
isinstance
(
t
,
ShardedTensor
):
t_payload
=
t
.
payload
elif
isinstance
(
t
,
torch
.
Tensor
):
t_payload
=
t
else
:
raise
TypeError
(
'colo_model_data_move_to_cpu dose not accept type {type(t)}'
)
if
t_payload
.
device
.
type
==
'cpu'
:
return
return
GLOBAL_MODEL_DATA_TRACER
.
delete_tensor
(
t
)
# TODO() optimize the tensor moving with non-blocking
t
.
data
=
t
.
data
.
cpu
()
GLOBAL_MODEL_DATA_TRACER
.
delete_tensor
(
t_payload
)
t_payload
.
data
=
t_payload
.
data
.
cpu
()
tests/test_utils/test_bucket_tensor_copy.py
View file @
9330be0f
from
colossalai.utils.
commons
import
BucketizedTensorCopy
from
colossalai.utils.
memory_utils.bucket_tensor_copy
import
BucketizedTensorCopy
from
colossalai.zero.sharded_param
import
ShardedParamV2
from
colossalai.zero.sharded_param
import
ShardedParamV2
from
colossalai.utils
import
free_port
from
colossalai.utils
import
free_port
import
torch
import
torch
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment