config.py 735 Bytes
Newer Older
1
from colossalai.legacy.amp import AMP_TYPE
2
3
4
5

# hyperparameters
# BATCH_SIZE is as per GPU
# global batch size = BATCH_SIZE x data parallel size
6
BATCH_SIZE = 4
7
8
LEARNING_RATE = 3e-3
WEIGHT_DECAY = 0.3
9
10
NUM_EPOCHS = 2
WARMUP_EPOCHS = 1
11
12
13
14

# model config
IMG_SIZE = 224
PATCH_SIZE = 16
15
HIDDEN_SIZE = 128
16
17
18
DEPTH = 4
NUM_HEADS = 4
MLP_RATIO = 2
19
NUM_CLASSES = 10
20
CHECKPOINT = False
21
SEQ_LENGTH = (IMG_SIZE // PATCH_SIZE) ** 2 + 1  # add 1 for cls token
22
23
24

# parallel setting
TENSOR_PARALLEL_SIZE = 2
25
TENSOR_PARALLEL_MODE = "1d"
26
27
28
29
30
31
32
33
34
35

parallel = dict(
    pipeline=2,
    tensor=dict(mode=TENSOR_PARALLEL_MODE, size=TENSOR_PARALLEL_SIZE),
)

fp16 = dict(mode=AMP_TYPE.NAIVE)
clip_grad_norm = 1.0

# pipeline config
36
NUM_MICRO_BATCHES = parallel["pipeline"]