[test] add no master test for low level zero plugin (#4934)

c7aa319b · Zhongkai Zhao · GitHub · 1f5d2e80 · c7aa319b · c7aa319b
Unverified Commit c7aa319b authored Oct 18, 2023 by Zhongkai Zhao Committed by GitHub Oct 18, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 3 deletions

colossalai/nn/optimizer/cpu_adam.py colossalai/nn/optimizer/cpu_adam.py +2 -1

tests/test_zero/test_low_level/test_zero1_2.py tests/test_zero/test_low_level/test_zero1_2.py +7 -2

No files found.
--- a/colossalai/nn/optimizer/cpu_adam.py
+++ b/colossalai/nn/optimizer/cpu_adam.py
@@ -9,7 +9,8 @@ from .nvme_optimizer import NVMeOptimizer


 class CPUAdam(NVMeOptimizer):
-    """Implements Adam algorithm.
+    """
+    Implements Adam algorithm.

    Supports parameters updating on both GPU and CPU, depending on the device of parameters.
    But the parameters and gradients should on the same device:

--- a/tests/test_zero/test_low_level/test_zero1_2.py
+++ b/tests/test_zero/test_low_level/test_zero1_2.py
@@ -106,7 +106,8 @@ def exam_zero_1_2():


 @parameterize("dtype", [torch.float16, torch.bfloat16])
-def exam_zero_1_torch_ddp(world_size, dtype: torch.dtype):
+@parameterize("master_weights", [True, False])
+def exam_zero_1_torch_ddp(world_size, dtype: torch.dtype, master_weights: bool):
    """
    In this test, two pairs of model and optimizers are created.
    1. zero: use sharded optimizer and fp16 parameters
@@ -131,7 +132,11 @@ def exam_zero_1_torch_ddp(world_size, dtype: torch.dtype):
    # in `check_sharded_param_consistency.py`, we will test whether
    # level 1 and 2 will produce exactly the same results
    zero_optimizer = LowLevelZeroOptimizer(
-        zero_optimizer, overlap_communication=True, initial_scale=1, reduce_bucket_size=1024 * 1024
+        zero_optimizer,
+        overlap_communication=True,
+        initial_scale=1,
+        reduce_bucket_size=1024 * 1024,
+        master_weights=master_weights,
    )

    torch_optimizer = torch.optim.SGD(torch_model.parameters(), lr=1)