Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
369a288b
"examples/tutorial/vscode:/vscode.git/clone" did not exist on "554aa9592ea6568c933b38b5235ec1e8a663bd9f"
Unverified
Commit
369a288b
authored
Apr 01, 2022
by
ver217
Committed by
GitHub
Apr 01, 2022
Browse files
polish utils docstring (#620)
parent
e619a651
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
48 additions
and
54 deletions
+48
-54
colossalai/utils/checkpointing.py
colossalai/utils/checkpointing.py
+1
-1
colossalai/utils/memory_tracer/async_memtracer.py
colossalai/utils/memory_tracer/async_memtracer.py
+23
-24
colossalai/utils/profiler/mem_profiler.py
colossalai/utils/profiler/mem_profiler.py
+5
-7
colossalai/utils/profiler/prof_utils.py
colossalai/utils/profiler/prof_utils.py
+19
-22
No files found.
colossalai/utils/checkpointing.py
View file @
369a288b
...
...
@@ -175,7 +175,7 @@ def load_checkpoint(checkpoint_path: str,
If strict is True, then the keys of state_dict must exactly match the keys returned
by this module’s state_dict() function.
Args:
Args:
checkpoint_path (str): The exact and matched checkpoint_path directory to retrieve appropriate state_dict.
model (:class:`torch.nn.Module`): Model to reload parameters and buffers.
optimizer (Union[:class:`torch.optim.Optimizer`, :class:`colossalai.nn.optimizer`]): Optimizer to recuperate.
...
...
colossalai/utils/memory_tracer/async_memtracer.py
View file @
369a288b
...
...
@@ -11,32 +11,31 @@ from colossalai.utils import get_current_device
class
AsyncMemoryMonitor
:
"""
An Async Memory Monitor runing during computing. Sampling memory usage of the current GPU
at interval of 1/(10**power) sec.
at interval of
`
1/(10**power)
`
sec.
The idea comes from Runtime Memory Tracer of PatrickStar
PatrickStar: Parallel Training of Pre-trained Models via Chunk-based Memory Management
https://arxiv.org/abs/2108.05818
:param power: the power of time interval, defaults to 10
:type power: int
Usage:
::
```python
async_mem_monitor = AsyncMemoryMonitor()
input = torch.randn(2, 20).cuda()
OP1 = torch.nn.Linear(20, 30).cuda()
OP2 = torch.nn.Linear(30, 40).cuda()
async_mem_monitor.start()
output = OP1(input)
async_mem_monitor.finish()
async_mem_monitor.start()
output = OP2(output)
async_mem_monitor.finish()
async_mem_monitor.save('log.pkl')
```
`PatrickStar: Parallel Training of Pre-trained Models via Chunk-based Memory Management`_
Usage::
async_mem_monitor = AsyncMemoryMonitor()
input = torch.randn(2, 20).cuda()
OP1 = torch.nn.Linear(20, 30).cuda()
OP2 = torch.nn.Linear(30, 40).cuda()
async_mem_monitor.start()
output = OP1(input)
async_mem_monitor.finish()
async_mem_monitor.start()
output = OP2(output)
async_mem_monitor.finish()
async_mem_monitor.save('log.pkl')
Args:
power (int, optional): the power of time interva. Defaults to 10.
.. _PatrickStar\: Parallel Training of Pre-trained Models via Chunk-based Memory Management:
https://arxiv.org/abs/2108.05818
"""
def
__init__
(
self
,
power
:
int
=
10
):
...
...
colossalai/utils/profiler/mem_profiler.py
View file @
369a288b
...
...
@@ -8,10 +8,12 @@ from colossalai.utils.profiler import BaseProfiler
class
MemProfiler
(
BaseProfiler
):
"""Wraper of MemOpHook, used to show GPU memory usage through each iteration
To use this profiler, you need to pass an `engine` instance. And the usage is same like
CommProfiler.
Usage::
mm_prof = MemProfiler(engine)
with ProfilerContext([mm_prof]) as prof:
writer = SummaryWriter("mem")
...
...
@@ -36,15 +38,11 @@ class MemProfiler(BaseProfiler):
def
to_tensorboard
(
self
,
writer
:
SummaryWriter
)
->
None
:
stats
=
self
.
_mem_tracer
.
async_mem_monitor
.
state_dict
[
'mem_stats'
]
for
info
,
i
in
enumerate
(
stats
):
writer
.
add_scalar
(
"memory_usage/GPU"
,
info
,
i
)
writer
.
add_scalar
(
"memory_usage/GPU"
,
info
,
i
)
def
to_file
(
self
,
data_file
:
Path
)
->
None
:
self
.
_mem_tracer
.
save_results
(
data_file
)
def
show
(
self
)
->
None
:
stats
=
self
.
_mem_tracer
.
async_mem_monitor
.
state_dict
[
'mem_stats'
]
stats
=
self
.
_mem_tracer
.
async_mem_monitor
.
state_dict
[
'mem_stats'
]
print
(
stats
)
colossalai/utils/profiler/prof_utils.py
View file @
369a288b
...
...
@@ -70,29 +70,26 @@ class BaseProfiler(ABC):
class
ProfilerContext
(
object
):
"""
Profiler context manager
Usage:
::
```python
world_size = 4
inputs = torch.randn(10, 10, dtype=torch.float32, device=get_current_device())
outputs = torch.empty(world_size, 10, 10, dtype=torch.float32, device=get_current_device())
outputs_list = list(torch.chunk(outputs, chunks=world_size, dim=0))
cc_prof = CommProfiler()
with ProfilerContext([cc_prof]) as prof:
op = dist.all_reduce(inputs, async_op=True)
dist.all_gather(outputs_list, inputs)
op.wait()
dist.reduce_scatter(inputs, outputs_list)
dist.broadcast(inputs, 0)
dist.reduce(inputs, 0)
"""Profiler context manager
prof.show()
```
Usage::
world_size = 4
inputs = torch.randn(10, 10, dtype=torch.float32, device=get_current_device())
outputs = torch.empty(world_size, 10, 10, dtype=torch.float32, device=get_current_device())
outputs_list = list(torch.chunk(outputs, chunks=world_size, dim=0))
cc_prof = CommProfiler()
with ProfilerContext([cc_prof]) as prof:
op = dist.all_reduce(inputs, async_op=True)
dist.all_gather(outputs_list, inputs)
op.wait()
dist.reduce_scatter(inputs, outputs_list)
dist.broadcast(inputs, 0)
dist.reduce(inputs, 0)
prof.show()
"""
def
__init__
(
self
,
profilers
:
List
[
BaseProfiler
]
=
None
,
enable
:
bool
=
True
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment