Commit f7df4b3c authored by Zeqiang Lai's avatar Zeqiang Lai Committed by zhe chen
Browse files

clarify the classification configs

parent 5dcd922a
......@@ -182,13 +182,13 @@ InternImage, the visual backbone network of "INTERN-2.5", has a parameter size o
## ImageNet-1K Image Classification
| name | pretrain | resolution | acc@1 | #param | FLOPs | download |
| :------------: | :----------: | :--------: | :---: | :----: | :---: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-T | ImageNet-1K | 224x224 | 83.5 | 30M | 5G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth) \| [cfg](classification/configs/internimage_t_1k_224.yaml) |
| InternImage-S | ImageNet-1K | 224x224 | 84.2 | 50M | 8G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth) \| [cfg](classification/configs/internimage_s_1k_224.yaml) |
| InternImage-B | ImageNet-1K | 224x224 | 84.9 | 97M | 16G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_b_1k_224.pth) \| [cfg](classification/configs/internimage_b_1k_224.yaml) |
| InternImage-L | ImageNet-22K | 384x384 | 87.7 | 223M | 108G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22kto1k_384.pth) \| [cfg](classification/configs/internimage_l_22kto1k_384.yaml) |
| InternImage-XL | ImageNet-22K | 384x384 | 88.0 | 335M | 163G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22kto1k_384.pth) \| [cfg](classification/configs/internimage_xl_22kto1k_384.yaml) |
| InternImage-H | Joint 427M | 640x640 | 89.6 | 1.08B | 1478G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_22kto1k_640.pth) \| [cfg](classification/configs/internimage_h_22kto1k_640.yaml) |
| InternImage-G | - | 512x512 | 90.1 | 3B | 2700G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_22kto1k_512.pth) \| [cfg](classification/configs/internimage_g_22kto1k_512.yaml) |
| InternImage-T | ImageNet-1K | 224x224 | 83.5 | 30M | 5G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth) \| [cfg](classification/configs/without_lr_decay/internimage_t_1k_224.yaml) |
| InternImage-S | ImageNet-1K | 224x224 | 84.2 | 50M | 8G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth) \| [cfg](classification/configs/without_lr_decay/internimage_s_1k_224.yaml) |
| InternImage-B | ImageNet-1K | 224x224 | 84.9 | 97M | 16G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_b_1k_224.pth) \| [cfg](classification/configs/without_lr_decay/internimage_b_1k_224.yaml) |
| InternImage-L | ImageNet-22K | 384x384 | 87.7 | 223M | 108G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22kto1k_384.pth) \| [cfg](classification/configs/without_lr_decay/internimage_l_22kto1k_384.yaml) |
| InternImage-XL | ImageNet-22K | 384x384 | 88.0 | 335M | 163G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22kto1k_384.pth) \| [cfg](classification/configs/without_lr_decay/internimage_xl_22kto1k_384.yaml) |
| InternImage-H | Joint 427M | 640x640 | 89.6 | 1.08B | 1478G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_22kto1k_640.pth) \| [cfg](classification/configs/without_lr_decay/internimage_h_22kto1k_640.yaml) |
| InternImage-G | - | 512x512 | 90.1 | 3B | 2700G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_22kto1k_512.pth) \| [cfg](classification/configs/without_lr_decay/internimage_g_22kto1k_512.yaml) |
## COCO Object Detection and Instance Segmentation
......
......@@ -179,13 +179,13 @@
## ImageNet-1K图像分类
| name | pretrain | resolution | acc@1 | #param | FLOPs | download |
| :------------: | :----------: | :--------: | :---: | :----: | :---: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| InternImage-T | ImageNet-1K | 224x224 | 83.5 | 30M | 5G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth) \| [cfg](classification/configs/internimage_t_1k_224.yaml) |
| InternImage-S | ImageNet-1K | 224x224 | 84.2 | 50M | 8G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth) \| [cfg](classification/configs/internimage_s_1k_224.yaml) |
| InternImage-B | ImageNet-1K | 224x224 | 84.9 | 97M | 16G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_b_1k_224.pth) \| [cfg](classification/configs/internimage_b_1k_224.yaml) |
| InternImage-L | ImageNet-22K | 384x384 | 87.7 | 223M | 108G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22kto1k_384.pth) \| [cfg](classification/configs/internimage_l_22kto1k_384.yaml) |
| InternImage-XL | ImageNet-22K | 384x384 | 88.0 | 335M | 163G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22kto1k_384.pth) \| [cfg](classification/configs/internimage_xl_22kto1k_384.yaml) |
| InternImage-H | Joint 427M | 640x640 | 89.6 | 1.08B | 1478G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_22kto1k_640.pth) \| [cfg](classification/configs/internimage_h_22kto1k_640.yaml) |
| InternImage-G | - | 512x512 | 90.1 | 3B | 2700G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_22kto1k_512.pth) \| [cfg](classification/configs/internimage_g_22kto1k_512.yaml) |
| InternImage-T | ImageNet-1K | 224x224 | 83.5 | 30M | 5G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth) \| [cfg](classification/configs/without_lr_decay/internimage_t_1k_224.yaml) |
| InternImage-S | ImageNet-1K | 224x224 | 84.2 | 50M | 8G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth) \| [cfg](classification/configs/without_lr_decay/internimage_s_1k_224.yaml) |
| InternImage-B | ImageNet-1K | 224x224 | 84.9 | 97M | 16G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_b_1k_224.pth) \| [cfg](classification/configs/without_lr_decay/internimage_b_1k_224.yaml) |
| InternImage-L | ImageNet-22K | 384x384 | 87.7 | 223M | 108G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22kto1k_384.pth) \| [cfg](classification/configs/without_lr_decay/internimage_l_22kto1k_384.yaml) |
| InternImage-XL | ImageNet-22K | 384x384 | 88.0 | 335M | 163G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22kto1k_384.pth) \| [cfg](classification/configs/without_lr_decay/internimage_xl_22kto1k_384.yaml) |
| InternImage-H | Joint 427M | 640x640 | 89.6 | 1.08B | 1478G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_h_22kto1k_640.pth) \| [cfg](classification/configs/without_lr_decay/internimage_h_22kto1k_640.yaml) |
| InternImage-G | - | 512x512 | 90.1 | 3B | 2700G | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_g_22kto1k_512.pth) \| [cfg](classification/configs/without_lr_decay/internimage_g_22kto1k_512.yaml) |
......
......@@ -145,6 +145,8 @@ python -m torch.distributed.launch --nproc_per_node 1 --master_port 12345 main.p
### Training from Scratch on ImageNet-1K
> The paper results were obtained from models trained with configs in `configs/without_lr_decay`.
To train an `InternImage` on ImageNet from scratch, run:
```bash
......
......@@ -33,6 +33,8 @@ TRAIN:
BASE_LR: 2e-05 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
OPTIMIZER:
DCN_LR_MUL: 0.1
......
......@@ -34,6 +34,8 @@ TRAIN:
BASE_LR: 2e-05 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
OPTIMIZER:
DCN_LR_MUL: 0.1
......
......@@ -34,6 +34,8 @@ TRAIN:
BASE_LR: 2e-05 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
OPTIMIZER:
USE_ZERO: True
......
......@@ -28,6 +28,8 @@ TRAIN:
BASE_LR: 2e-05 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
OPTIMIZER:
DCN_LR_MUL: 0.1
......
......@@ -28,6 +28,8 @@ TRAIN:
BASE_LR: 2e-05 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
OPTIMIZER:
DCN_LR_MUL: 0.1
......
DATA:
IMG_SIZE: 384
IMG_ON_MEMORY: True
AUG:
MIXUP: 0.0
CUTMIX: 0.0
REPROB: 0.0
MODEL:
TYPE: intern_image
DROP_PATH_RATE: 0.2
LABEL_SMOOTHING: 0.3
INTERN_IMAGE:
CORE_OP: 'DCNv3'
DEPTHS: [6, 6, 32, 6]
GROUPS: [10, 20, 40, 80]
CHANNELS: 320
DW_KERNEL_SIZE: 5
LAYER_SCALE: None
OFFSET_SCALE: 1.0
MLP_RATIO: 4.0
POST_NORM: False
RES_POST_NORM: True
LEVEL2_POST_NORM: True
LEVEL2_POST_NORM_BLOCK_IDS: [5, 11, 17, 23, 29]
CENTER_FEATURE_SCALE: True
USE_CLIP_PROJECTOR: True
TRAIN:
EMA:
ENABLE: true
DECAY: 0.9999
EPOCHS: 20
WARMUP_EPOCHS: 2
WEIGHT_DECAY: 0.05
BASE_LR: 2e-05 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
OPTIMIZER:
DCN_LR_MUL: 0.1
AMP_OPT_LEVEL: O0
EVAL_FREQ: 1
\ No newline at end of file
......@@ -33,8 +33,6 @@ TRAIN:
BASE_LR: 2e-05 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
OPTIMIZER:
DCN_LR_MUL: 0.1
......
......@@ -34,8 +34,6 @@ TRAIN:
BASE_LR: 2e-05 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
OPTIMIZER:
USE_ZERO: True
......
......@@ -28,8 +28,6 @@ TRAIN:
BASE_LR: 2e-05 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
OPTIMIZER:
DCN_LR_MUL: 0.1
......
......@@ -28,8 +28,6 @@ TRAIN:
BASE_LR: 2e-05 # 512
WARMUP_LR: .0
MIN_LR: .0
LR_LAYER_DECAY: true
LR_LAYER_DECAY_RATIO: 0.9
USE_CHECKPOINT: true
OPTIMIZER:
DCN_LR_MUL: 0.1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment