Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
02b187c1
Unverified
Commit
02b187c1
authored
Apr 01, 2022
by
LuGY
Committed by
GitHub
Apr 01, 2022
Browse files
[zero] add sampling time for memstats collector (#610)
parent
9bee1191
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
3 deletions
+15
-3
colossalai/utils/memory_tracer/memstats_collector.py
colossalai/utils/memory_tracer/memstats_collector.py
+15
-3
No files found.
colossalai/utils/memory_tracer/memstats_collector.py
View file @
02b187c1
...
@@ -3,6 +3,7 @@ from colossalai.utils.memory_utils.utils import colo_device_memory_used
...
@@ -3,6 +3,7 @@ from colossalai.utils.memory_utils.utils import colo_device_memory_used
from
colossalai.utils
import
get_current_device
from
colossalai.utils
import
get_current_device
import
torch
import
torch
import
time
from
typing
import
List
from
typing
import
List
...
@@ -42,6 +43,8 @@ class MemStatsCollector:
...
@@ -42,6 +43,8 @@ class MemStatsCollector:
self
.
_model_data_cpu_list
=
[]
self
.
_model_data_cpu_list
=
[]
self
.
_overall_cpu_list
=
[]
self
.
_overall_cpu_list
=
[]
self
.
_sampling_time
=
[]
self
.
_start_flag
=
False
self
.
_start_flag
=
False
def
overall_mem_stats
(
self
,
device_type
:
str
):
def
overall_mem_stats
(
self
,
device_type
:
str
):
...
@@ -52,15 +55,15 @@ class MemStatsCollector:
...
@@ -52,15 +55,15 @@ class MemStatsCollector:
else
:
else
:
raise
TypeError
raise
TypeError
@
property
def
model_data_cuda_list
(
self
,
device_type
:
str
,
unit
:
str
=
'B'
)
->
List
[
int
]:
def
model_data_cuda_list
(
self
,
device_type
:
str
,
unit
:
str
=
'B'
)
->
List
[
int
]:
scale
=
1
if
unit
==
'GB'
:
if
unit
==
'GB'
:
scale
=
1e9
scale
=
1e9
elif
unit
==
'MB'
:
elif
unit
==
'MB'
:
scale
=
1e6
scale
=
1e6
elif
unit
==
'KB'
:
elif
unit
==
'KB'
:
scale
=
1e3
scale
=
1e3
elif
unit
==
'B'
:
scale
=
1
else
:
else
:
raise
TypeError
raise
TypeError
...
@@ -74,13 +77,16 @@ class MemStatsCollector:
...
@@ -74,13 +77,16 @@ class MemStatsCollector:
def
non_model_data_cuda_list
(
self
,
device_type
:
str
,
unit
:
str
=
'B'
)
->
List
[
int
]:
def
non_model_data_cuda_list
(
self
,
device_type
:
str
,
unit
:
str
=
'B'
)
->
List
[
int
]:
"""Non model data stats
"""Non model data stats
"""
"""
scale
=
1
if
unit
==
'GB'
:
if
unit
==
'GB'
:
scale
=
1e9
scale
=
1e9
elif
unit
==
'MB'
:
elif
unit
==
'MB'
:
scale
=
1e6
scale
=
1e6
elif
unit
==
'KB'
:
elif
unit
==
'KB'
:
scale
=
1e3
scale
=
1e3
elif
unit
==
'B'
:
scale
=
1
else
:
raise
TypeError
if
device_type
==
'cuda'
:
if
device_type
==
'cuda'
:
return
[(
v1
-
v2
)
/
scale
for
v1
,
v2
in
zip
(
self
.
_overall_cuda_list
,
self
.
_model_data_cuda_list
)]
return
[(
v1
-
v2
)
/
scale
for
v1
,
v2
in
zip
(
self
.
_overall_cuda_list
,
self
.
_model_data_cuda_list
)]
...
@@ -89,6 +95,10 @@ class MemStatsCollector:
...
@@ -89,6 +95,10 @@ class MemStatsCollector:
else
:
else
:
raise
TypeError
raise
TypeError
@
property
def
sampling_time
(
self
):
return
[
t
-
self
.
_sampling_time
[
0
]
for
t
in
self
.
_sampling_time
]
def
start_collection
(
self
):
def
start_collection
(
self
):
self
.
_start_flag
=
True
self
.
_start_flag
=
True
...
@@ -110,6 +120,8 @@ class MemStatsCollector:
...
@@ -110,6 +120,8 @@ class MemStatsCollector:
self
.
_model_data_cpu_list
.
append
(
GLOBAL_MODEL_DATA_TRACER
.
cpu_usage
)
self
.
_model_data_cpu_list
.
append
(
GLOBAL_MODEL_DATA_TRACER
.
cpu_usage
)
self
.
_overall_cpu_list
.
append
(
colo_device_memory_used
(
torch
.
device
(
f
'cpu'
)))
self
.
_overall_cpu_list
.
append
(
colo_device_memory_used
(
torch
.
device
(
f
'cpu'
)))
self
.
_sampling_time
.
append
(
time
.
time
())
self
.
_sampling_cnter
.
advance
()
self
.
_sampling_cnter
.
advance
()
def
reset_sampling_cnter
(
self
)
->
None
:
def
reset_sampling_cnter
(
self
)
->
None
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment