Commit 6a10c7bf authored by unknown's avatar unknown
Browse files

提交Swin-Transformer代码

parents
MODEL:
TYPE: swin
NAME: swin_large_patch4_window7_224_22kto1k_finetune
DROP_PATH_RATE: 0.2
SWIN:
EMBED_DIM: 192
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 6, 12, 24, 48 ]
WINDOW_SIZE: 7
TRAIN:
EPOCHS: 30
WARMUP_EPOCHS: 5
WEIGHT_DECAY: 1e-8
BASE_LR: 2e-05
WARMUP_LR: 2e-08
MIN_LR: 2e-07
\ No newline at end of file
MODEL:
TYPE: swin
NAME: swin_small_patch4_window7_224
DROP_PATH_RATE: 0.3
SWIN:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 3, 6, 12, 24 ]
WINDOW_SIZE: 7
\ No newline at end of file
DATA:
DATASET: imagenet22K
MODEL:
TYPE: swin
NAME: swin_small_patch4_window7_224_22k
DROP_PATH_RATE: 0.2
SWIN:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 3, 6, 12, 24 ]
WINDOW_SIZE: 7
TRAIN:
EPOCHS: 90
WARMUP_EPOCHS: 5
WEIGHT_DECAY: 0.05
BASE_LR: 1.25e-4 # 4096 batch-size
WARMUP_LR: 1.25e-7
MIN_LR: 1.25e-6
\ No newline at end of file
MODEL:
TYPE: swin
NAME: swin_small_patch4_window7_224_22kto1k_finetune
DROP_PATH_RATE: 0.2
SWIN:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 3, 6, 12, 24 ]
WINDOW_SIZE: 7
TRAIN:
EPOCHS: 30
WARMUP_EPOCHS: 5
WEIGHT_DECAY: 1e-8
BASE_LR: 2e-05
WARMUP_LR: 2e-08
MIN_LR: 2e-07
\ No newline at end of file
DATA:
IMG_SIZE: 256
MODEL:
TYPE: swin
NAME: swin_tiny_c24_patch4_window8_256
DROP_PATH_RATE: 0.2
SWIN:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 6, 2 ]
NUM_HEADS: [ 4, 8, 16, 32 ]
WINDOW_SIZE: 8
\ No newline at end of file
MODEL:
NUM_CLASSES: 200
TYPE: swin
NAME: swin_tiny_patch4_window7_224
DROP_PATH_RATE: 0.2
SWIN:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 6, 2 ]
NUM_HEADS: [ 3, 6, 12, 24 ]
WINDOW_SIZE: 7
DATA:
DATASET: imagenet22K
MODEL:
TYPE: swin
NAME: swin_tiny_patch4_window7_224_22k
DROP_PATH_RATE: 0.1
SWIN:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 6, 2 ]
NUM_HEADS: [ 3, 6, 12, 24 ]
WINDOW_SIZE: 7
TRAIN:
EPOCHS: 90
WARMUP_EPOCHS: 5
WEIGHT_DECAY: 0.05
BASE_LR: 1.25e-4 # 4096 batch-size
WARMUP_LR: 1.25e-7
MIN_LR: 1.25e-6
\ No newline at end of file
MODEL:
TYPE: swin
NAME: swin_tiny_patch4_window7_224_22kto1k_finetune
DROP_PATH_RATE: 0.1
SWIN:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 6, 2 ]
NUM_HEADS: [ 3, 6, 12, 24 ]
WINDOW_SIZE: 7
TRAIN:
EPOCHS: 30
WARMUP_EPOCHS: 5
WEIGHT_DECAY: 1e-8
BASE_LR: 2e-05
WARMUP_LR: 2e-08
MIN_LR: 2e-07
\ No newline at end of file
MODEL:
TYPE: swin_mlp
NAME: swin_mlp_base_patch4_window7_224
DROP_PATH_RATE: 0.5
SWIN_MLP:
EMBED_DIM: 128
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 4, 8, 16, 32 ]
WINDOW_SIZE: 7
DATA:
IMG_SIZE: 256
MODEL:
TYPE: swin_mlp
NAME: swin_mlp_tiny_c12_patch4_window8_256
DROP_PATH_RATE: 0.2
SWIN_MLP:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 6, 2 ]
NUM_HEADS: [ 8, 16, 32, 64 ]
WINDOW_SIZE: 8
\ No newline at end of file
DATA:
IMG_SIZE: 256
MODEL:
TYPE: swin_mlp
NAME: swin_mlp_tiny_c24_patch4_window8_256
DROP_PATH_RATE: 0.2
SWIN_MLP:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 6, 2 ]
NUM_HEADS: [ 4, 8, 16, 32 ]
WINDOW_SIZE: 8
\ No newline at end of file
DATA:
IMG_SIZE: 256
MODEL:
TYPE: swin_mlp
NAME: swin_mlp_tiny_c6_patch4_window8_256
DROP_PATH_RATE: 0.2
SWIN_MLP:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 6, 2 ]
NUM_HEADS: [ 16, 32, 64, 128 ]
WINDOW_SIZE: 8
\ No newline at end of file
DATA:
DATASET: imagenet22K
IMG_SIZE: 192
MODEL:
TYPE: swin_moe
NAME: swin_moe_base_patch4_window12_192_16expert_32gpu_22k
DROP_PATH_RATE: 0.3
SWIN_MOE:
EMBED_DIM: 128
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 4, 8, 16, 32 ]
WINDOW_SIZE: 12
MLP_FC2_BIAS: False
INIT_STD: 0.005
MOE_BLOCKS: [ [ -1 ], [ -1 ], [ 1, 3, 5, 7, 9, 11, 13, 15, 17 ], [ 1 ] ]
NUM_LOCAL_EXPERTS: -2
TOP_VALUE: 1
CAPACITY_FACTOR: 1.25
IS_GSHARD_LOSS: False
MOE_DROP: 0.1
AUX_LOSS_WEIGHT: 0.01
TRAIN:
EPOCHS: 90
WARMUP_EPOCHS: 10
WEIGHT_DECAY: 0.1
BASE_LR: 1.25e-4 # 4096 batch-size
WARMUP_LR: 1.25e-7
MIN_LR: 1.25e-6
CLIP_GRAD: 3.0
TEST:
SHUFFLE: True
\ No newline at end of file
DATA:
DATASET: imagenet22K
IMG_SIZE: 192
MODEL:
TYPE: swin_moe
NAME: swin_moe_base_patch4_window12_192_32expert_32gpu_22k
DROP_PATH_RATE: 0.3
SWIN_MOE:
EMBED_DIM: 128
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 4, 8, 16, 32 ]
WINDOW_SIZE: 12
MLP_FC2_BIAS: False
INIT_STD: 0.005
MOE_BLOCKS: [ [ -1 ], [ -1 ], [ 1, 3, 5, 7, 9, 11, 13, 15, 17 ], [ 1 ] ]
NUM_LOCAL_EXPERTS: 1
TOP_VALUE: 1
CAPACITY_FACTOR: 1.25
IS_GSHARD_LOSS: False
MOE_DROP: 0.1
AUX_LOSS_WEIGHT: 0.01
TRAIN:
EPOCHS: 90
WARMUP_EPOCHS: 10
WEIGHT_DECAY: 0.1
BASE_LR: 1.25e-4 # 4096 batch-size
WARMUP_LR: 1.25e-7
MIN_LR: 1.25e-6
CLIP_GRAD: 3.0
TEST:
SHUFFLE: True
\ No newline at end of file
DATA:
DATASET: imagenet22K
IMG_SIZE: 192
MODEL:
TYPE: swin_moe
NAME: swin_moe_base_patch4_window12_192_8expert_32gpu_22k
DROP_PATH_RATE: 0.3
SWIN_MOE:
EMBED_DIM: 128
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 4, 8, 16, 32 ]
WINDOW_SIZE: 12
MLP_FC2_BIAS: False
INIT_STD: 0.005
MOE_BLOCKS: [ [ -1 ], [ -1 ], [ 1, 3, 5, 7, 9, 11, 13, 15, 17 ], [ 1 ] ]
NUM_LOCAL_EXPERTS: -4
TOP_VALUE: 1
CAPACITY_FACTOR: 1.25
IS_GSHARD_LOSS: False
MOE_DROP: 0.1
AUX_LOSS_WEIGHT: 0.01
TRAIN:
EPOCHS: 90
WARMUP_EPOCHS: 10
WEIGHT_DECAY: 0.1
BASE_LR: 1.25e-4 # 4096 batch-size
WARMUP_LR: 1.25e-7
MIN_LR: 1.25e-6
CLIP_GRAD: 3.0
TEST:
SHUFFLE: True
\ No newline at end of file
DATA:
DATASET: imagenet22K
IMG_SIZE: 192
MODEL:
TYPE: swin_moe
NAME: swin_moe_base_patch4_window12_192_cosine_router_32expert_32gpu_22k
DROP_PATH_RATE: 0.3
SWIN_MOE:
EMBED_DIM: 128
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 4, 8, 16, 32 ]
WINDOW_SIZE: 12
MLP_FC2_BIAS: False
INIT_STD: 0.005
MOE_BLOCKS: [ [ -1 ], [ -1 ], [ 1, 3, 5, 7, 9, 11, 13, 15, 17 ], [ 1 ] ]
NUM_LOCAL_EXPERTS: 1
TOP_VALUE: 1
CAPACITY_FACTOR: 1.25
COSINE_ROUTER: True
IS_GSHARD_LOSS: False
MOE_DROP: 0.1
AUX_LOSS_WEIGHT: 0.01
TRAIN:
EPOCHS: 90
WARMUP_EPOCHS: 10
WEIGHT_DECAY: 0.1
BASE_LR: 1.25e-4 # 4096 batch-size
WARMUP_LR: 1.25e-7
MIN_LR: 1.25e-6
CLIP_GRAD: 3.0
TEST:
SHUFFLE: True
\ No newline at end of file
DATA:
DATASET: imagenet22K
IMG_SIZE: 192
MODEL:
TYPE: swin_moe
NAME: swin_moe_base_patch4_window12_192_densebaseline_22k
DROP_PATH_RATE: 0.2
SWIN_MOE:
EMBED_DIM: 128
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 4, 8, 16, 32 ]
WINDOW_SIZE: 12
MLP_FC2_BIAS: False
MOE_BLOCKS: [ [ -1 ], [ -1 ], [ -1 ], [ -1 ] ]
TRAIN:
EPOCHS: 90
WARMUP_EPOCHS: 10
WEIGHT_DECAY: 0.1
BASE_LR: 1.25e-4 # 4096 batch-size
WARMUP_LR: 1.25e-7
MIN_LR: 1.25e-6
CLIP_GRAD: 3.0
MOE:
SAVE_MASTER: True
TEST:
SHUFFLE: True
\ No newline at end of file
DATA:
DATASET: imagenet22K
IMG_SIZE: 192
MODEL:
TYPE: swin_moe
NAME: swin_moe_small_patch4_window12_192_16expert_32gpu_22k
DROP_PATH_RATE: 0.2
SWIN_MOE:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 3, 6, 12, 24 ]
WINDOW_SIZE: 12
MLP_FC2_BIAS: False
INIT_STD: 0.005
MOE_BLOCKS: [ [ -1 ], [ -1 ], [ 1, 3, 5, 7, 9, 11, 13, 15, 17 ], [ 1 ] ]
NUM_LOCAL_EXPERTS: -2
TOP_VALUE: 1
CAPACITY_FACTOR: 1.25
IS_GSHARD_LOSS: False
MOE_DROP: 0.1
AUX_LOSS_WEIGHT: 0.01
TRAIN:
EPOCHS: 90
WARMUP_EPOCHS: 10
WEIGHT_DECAY: 0.1
BASE_LR: 1.25e-4 # 4096 batch-size
WARMUP_LR: 1.25e-7
MIN_LR: 1.25e-6
CLIP_GRAD: 3.0
TEST:
SHUFFLE: True
\ No newline at end of file
DATA:
DATASET: imagenet22K
IMG_SIZE: 192
MODEL:
TYPE: swin_moe
NAME: swin_moe_small_patch4_window12_192_32expert_32gpu_22k
DROP_PATH_RATE: 0.2
SWIN_MOE:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 3, 6, 12, 24 ]
WINDOW_SIZE: 12
MLP_FC2_BIAS: False
INIT_STD: 0.005
MOE_BLOCKS: [ [ -1 ], [ -1 ], [ 1, 3, 5, 7, 9, 11, 13, 15, 17 ], [ 1 ] ]
NUM_LOCAL_EXPERTS: 1
TOP_VALUE: 1
CAPACITY_FACTOR: 1.25
IS_GSHARD_LOSS: False
MOE_DROP: 0.1
AUX_LOSS_WEIGHT: 0.01
TRAIN:
EPOCHS: 90
WARMUP_EPOCHS: 10
WEIGHT_DECAY: 0.1
BASE_LR: 1.25e-4 # 4096 batch-size
WARMUP_LR: 1.25e-7
MIN_LR: 1.25e-6
CLIP_GRAD: 3.0
TEST:
SHUFFLE: True
\ No newline at end of file
DATA:
DATASET: imagenet22K
IMG_SIZE: 192
MODEL:
TYPE: swin_moe
NAME: swin_moe_small_patch4_window12_192_64expert_64gpu_22k
DROP_PATH_RATE: 0.2
SWIN_MOE:
EMBED_DIM: 96
DEPTHS: [ 2, 2, 18, 2 ]
NUM_HEADS: [ 3, 6, 12, 24 ]
WINDOW_SIZE: 12
MLP_FC2_BIAS: False
INIT_STD: 0.005
MOE_BLOCKS: [ [ -1 ], [ -1 ], [ 1, 3, 5, 7, 9, 11, 13, 15, 17 ], [ 1 ] ]
NUM_LOCAL_EXPERTS: 1
TOP_VALUE: 1
CAPACITY_FACTOR: 1.25
IS_GSHARD_LOSS: False
MOE_DROP: 0.1
AUX_LOSS_WEIGHT: 0.01
TRAIN:
EPOCHS: 90
WARMUP_EPOCHS: 10
WEIGHT_DECAY: 0.1
BASE_LR: 1.25e-4 # 4096 batch-size
WARMUP_LR: 1.25e-7
MIN_LR: 1.25e-6
CLIP_GRAD: 3.0
TEST:
SHUFFLE: True
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment