polish utils docstring (#620)

369a288b · ver217 · GitHub · e619a651 · 369a288b · 369a288b
Unverified Commit 369a288b authored Apr 01, 2022 by ver217 Committed by GitHub Apr 01, 2022
4 changed files
--- a/colossalai/utils/checkpointing.py
+++ b/colossalai/utils/checkpointing.py
@@ -175,7 +175,7 @@ def load_checkpoint(checkpoint_path: str,
    If strict is True, then the keys of state_dict must exactly match the keys returned
    by this module’s state_dict() function.

-     Args:
+    Args:
        checkpoint_path (str): The exact and matched checkpoint_path directory to retrieve appropriate state_dict.
        model (:class:`torch.nn.Module`): Model to reload parameters and buffers.
        optimizer (Union[:class:`torch.optim.Optimizer`, :class:`colossalai.nn.optimizer`]): Optimizer to recuperate.

--- a/colossalai/utils/memory_tracer/async_memtracer.py
+++ b/colossalai/utils/memory_tracer/async_memtracer.py
@@ -11,32 +11,31 @@ from colossalai.utils import get_current_device
 class AsyncMemoryMonitor:
    """
    An Async Memory Monitor runing during computing. Sampling memory usage of the current GPU
-    at interval of 1/(10**power) sec.
+    at interval of `1/(10**power)` sec.

    The idea comes from Runtime Memory Tracer of PatrickStar
-    PatrickStar: Parallel Training of Pre-trained Models via Chunk-based Memory Management
-    https://arxiv.org/abs/2108.05818
-    
-    :param power: the power of time interval, defaults to 10
-    :type power: int
-
-    Usage:
-    ::
-
-        ```python
-            async_mem_monitor = AsyncMemoryMonitor()
-            input = torch.randn(2, 20).cuda()
-            OP1 = torch.nn.Linear(20, 30).cuda()
-            OP2 = torch.nn.Linear(30, 40).cuda()
-
-            async_mem_monitor.start()
-            output = OP1(input)
-            async_mem_monitor.finish()
-            async_mem_monitor.start()
-            output = OP2(output)
-            async_mem_monitor.finish()
-            async_mem_monitor.save('log.pkl')
-        ```
+    `PatrickStar: Parallel Training of Pre-trained Models via Chunk-based Memory Management`_
+
+    Usage::
+
+        async_mem_monitor = AsyncMemoryMonitor()
+        input = torch.randn(2, 20).cuda()
+        OP1 = torch.nn.Linear(20, 30).cuda()
+        OP2 = torch.nn.Linear(30, 40).cuda()
+
+        async_mem_monitor.start()
+        output = OP1(input)
+        async_mem_monitor.finish()
+        async_mem_monitor.start()
+        output = OP2(output)
+        async_mem_monitor.finish()
+        async_mem_monitor.save('log.pkl')
+
+    Args:
+        power (int, optional): the power of time interva. Defaults to 10.
+
+    .. _PatrickStar\: Parallel Training of Pre-trained Models via Chunk-based Memory Management:
+        https://arxiv.org/abs/2108.05818
    """

    def __init__(self, power: int = 10):

--- a/colossalai/utils/profiler/mem_profiler.py
+++ b/colossalai/utils/profiler/mem_profiler.py
@@ -8,10 +8,12 @@ from colossalai.utils.profiler import BaseProfiler

 class MemProfiler(BaseProfiler):
    """Wraper of MemOpHook, used to show GPU memory usage through each iteration
-    
+
    To use this profiler, you need to pass an `engine` instance. And the usage is same like
    CommProfiler.

+    Usage::
+
        mm_prof = MemProfiler(engine)
        with ProfilerContext([mm_prof]) as prof:
            writer = SummaryWriter("mem")
@@ -36,15 +38,11 @@ class MemProfiler(BaseProfiler):
    def to_tensorboard(self, writer: SummaryWriter) -> None:
        stats = self._mem_tracer.async_mem_monitor.state_dict['mem_stats']
        for info, i in enumerate(stats):
-            writer.add_scalar(
-                "memory_usage/GPU",
-                info,
-                i
-            )
+            writer.add_scalar("memory_usage/GPU", info, i)

    def to_file(self, data_file: Path) -> None:
        self._mem_tracer.save_results(data_file)

    def show(self) -> None:
-        stats = self._mem_tracer.async_mem_monitor.state_dict['mem_stats'] 
+        stats = self._mem_tracer.async_mem_monitor.state_dict['mem_stats']
        print(stats)
--- a/colossalai/utils/profiler/prof_utils.py
+++ b/colossalai/utils/profiler/prof_utils.py
@@ -70,29 +70,26 @@ class BaseProfiler(ABC):


 class ProfilerContext(object):
-    """
-    Profiler context manager
-    Usage:
-    ::
-
-        ```python
-            world_size = 4
-            inputs = torch.randn(10, 10, dtype=torch.float32, device=get_current_device())
-            outputs = torch.empty(world_size, 10, 10, dtype=torch.float32, device=get_current_device())
-            outputs_list = list(torch.chunk(outputs, chunks=world_size, dim=0))
-
-            cc_prof = CommProfiler()
-
-            with ProfilerContext([cc_prof]) as prof:
-                op = dist.all_reduce(inputs, async_op=True)
-                dist.all_gather(outputs_list, inputs)
-                op.wait()
-                dist.reduce_scatter(inputs, outputs_list)
-                dist.broadcast(inputs, 0)
-                dist.reduce(inputs, 0)
+    """Profiler context manager

-            prof.show()
-        ```
+    Usage::
+
+        world_size = 4
+        inputs = torch.randn(10, 10, dtype=torch.float32, device=get_current_device())
+        outputs = torch.empty(world_size, 10, 10, dtype=torch.float32, device=get_current_device())
+        outputs_list = list(torch.chunk(outputs, chunks=world_size, dim=0))
+
+        cc_prof = CommProfiler()
+
+        with ProfilerContext([cc_prof]) as prof:
+            op = dist.all_reduce(inputs, async_op=True)
+            dist.all_gather(outputs_list, inputs)
+            op.wait()
+            dist.reduce_scatter(inputs, outputs_list)
+            dist.broadcast(inputs, 0)
+            dist.reduce(inputs, 0)
+
+        prof.show()
    """

    def __init__(self, profilers: List[BaseProfiler] = None, enable: bool = True):