colossalai_zero.py 1.18 KB
Newer Older
mandoxzhang's avatar
mandoxzhang committed
1
2
from colossalai.nn.optimizer import FusedAdam

3
4
5
6
7
8
try:
    from colossalai.zero.shard_utils import TensorShardStrategy
except ImportError:
    # colossalai > 0.2.8
    from colossalai.zero.legacy import TensorShardStrategy

mandoxzhang's avatar
mandoxzhang committed
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# fp16 = dict(
#     mode=AMP_TYPE.TORCH,
# )

# seed = 2
zero = dict(model_config=dict(shard_strategy=TensorShardStrategy(),
                              reduce_scatter_bucket_size_mb=25,
                              fp32_reduce_scatter=False,
                              tensor_placement_policy="cuda",
                              gradient_predivide_factor=1.0,
                              reuse_fp16_shard=False),
            optimizer_config=dict(gpu_margin_mem_ratio=0.8,
                                  initial_scale=2**5,
                                  min_scale=1,
                                  growth_factor=2,
                                  backoff_factor=0.5,
                                  growth_interval=1000,
                                  hysteresis=2,
                                  max_scale=2**32))

# gradient_accumulation = 4
clip_grad_norm = 1.0
optimizer = dict(
    type=FusedAdam,
    lr=0.00015,
    weight_decay=1e-2,
)

37
# 64433