Commit 43e508f4 authored by Zhe Chen's avatar Zhe Chen Committed by zhe chen
Browse files

Replace github release with hugging face (#42)



* Update function descriptions

* Update README.md

* Replace GitHub release with hugging face

* Release InternImage-H segmentation model

---------
Co-authored-by: default avatarZhenhang Huang <prc_hzh@163.com>
parent 4bbef509
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_s_1k_224.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_t_1k_224.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -10,11 +10,11 @@ Cityscapes is a large-scale database which focuses on semantic understanding of ...@@ -10,11 +10,11 @@ Cityscapes is a large-scale database which focuses on semantic understanding of
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download | | backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:----------:|:-------:|:-----:|:----:|:----:| |:--------------:|:----------:|:------------:|:-----------:|:----------:|:-------:|:-----:|:----:|:----:|
| InternImage-T | 512x1024 | 82.58 / 83.40 | 0.32s / iter | 14.5h | 59M | 1889G | [config](./upernet_internimage_t_512x1024_160k_cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_t_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_t_512x1024_160k_cityscapes.log.json) | | InternImage-T | 512x1024 | 82.58 / 83.40 | 0.32s / iter | 14.5h | 59M | 1889G | [config](./upernet_internimage_t_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_t_512x1024_160k_cityscapes.log.json) |
| InternImage-S | 512x1024 | 82.74 / 83.45 | 0.36s / iter | 16.5h | 80M | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_s_512x1024_160k_cityscapes.log.json) | | InternImage-S | 512x1024 | 82.74 / 83.45 | 0.36s / iter | 16.5h | 80M | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512x1024_160k_cityscapes.log.json) |
| InternImage-B | 512x1024 | 83.18 / 83.97 | 0.39s / iter | 17h | 128M | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_b_512x1024_160k_cityscapes.log.json) | | InternImage-B | 512x1024 | 83.18 / 83.97 | 0.39s / iter | 17h | 128M | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512x1024_160k_cityscapes.log.json) |
| InternImage-L | 512x1024 | 83.68 / 84.41 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_cityscapes.log.json) | | InternImage-L | 512x1024 | 83.68 / 84.41 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_cityscapes.log.json) |
| InternImage-XL | 512x1024 | 83.62 / 84.28 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) | | InternImage-XL | 512x1024 | 83.62 / 84.28 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) |
- Training speed is measured with A100 GPU. - Training speed is measured with A100 GPU.
- Please set `with_cp=True` to save memory if you meet `out-of-memory` issues. - Please set `with_cp=True` to save memory if you meet `out-of-memory` issues.
...@@ -25,8 +25,8 @@ Mapillary 80k + Cityscapes (w/ coarse data) 160k ...@@ -25,8 +25,8 @@ Mapillary 80k + Cityscapes (w/ coarse data) 160k
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download | | backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:------:|:------------:| |:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:------:|:------------:|
| InternImage-L | 512x1024 | 85.94 / 86.22 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) | | InternImage-L | 512x1024 | 85.94 / 86.22 | 0.50s / iter | 23h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) |
| InternImage-XL | 512x1024 | 86.20 / 86.42 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) | | InternImage-XL | 512x1024 | 86.20 / 86.42 | 0.56s / iter | 26h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
### SegFormerHead + InternImage (with additional data) ### SegFormerHead + InternImage (with additional data)
...@@ -34,6 +34,13 @@ Mapillary 80k + Cityscapes (w/ coarse data) 160k ...@@ -34,6 +34,13 @@ Mapillary 80k + Cityscapes (w/ coarse data) 160k
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download | | backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:| |:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
| InternImage-L | 512x1024 | 85.16 / 85.67 | 0.37s / iter | 17h | 220M | 1580G | [config](./segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) | | InternImage-L | 512x1024 | 85.16 / 85.67 | 0.37s / iter | 17h | 220M | 1580G | [config](./segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.log.json) |
| InternImage-XL | 512x1024 | 85.41 / 85.93 | 0.43s / iter | 19.5h | 330M | 2364G | [config](./segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) | | InternImage-XL | 512x1024 | 85.41 / 85.93 | 0.43s / iter | 19.5h | 330M | 2364G | [config](./segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
### Mask2Former + InternImage (with additional data)
Mapillary 80k + Cityscapes (w/ coarse data) 80k
| backbone | resolution | mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
| InternImage-H | 1024x1024 | 86.37 / 86.96 | TODO | TODO | TODO | TODO | [config](./mask2former_internimage_h_1024x1024_80k_mapillary2cityscapes.py) | [ckpt]() \| [log]() |
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/segformer_mit-b0.py', '../_base_/datasets/cityscapes_extra.py', '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/cityscapes_extra.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_l_512x1024_80k_mapillary.pth' load_from = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_80k_mapillary.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/segformer_mit-b0.py', '../_base_/datasets/cityscapes_extra.py', '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/cityscapes_extra.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_xl_512x1024_80k_mapillary.pth' load_from = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_80k_mapillary.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_b_1k_224.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_b_1k_224.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes_extra.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes_extra.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_80k_mapillary.pth' load_from = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_80k_mapillary.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_s_1k_224.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_t_1k_224.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes_extra.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes_extra.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
] ]
load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_80k_mapillary.pth' load_from = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_80k_mapillary.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
# Mapillary Vistas
Introduced by Neuhold et al. in [The Mapillary Vistas Dataset for Semantic Understanding of Street Scenes](http://openaccess.thecvf.com/content_ICCV_2017/papers/Neuhold_The_Mapillary_Vistas_ICCV_2017_paper.pdf)
Mapillary Vistas Dataset is a diverse street-level imagery dataset with pixel‑accurate and instance‑specific human annotations for understanding street scenes around the world.
We first pretrain our models on the Mapillary Vistas dataset, then finetune them on the Cityscapes dataset.
## Model Zoo
### UperNet + InternImage
| backbone | resolution | schd | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:------:|:------------:|
| InternImage-L | 512x1024 | 80k | 0.50s / iter | 11.5h | 256M | 3234G | [config](./upernet_internimage_l_512x1024_80k_mapillary.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_80k_mapillary.pth) |
| InternImage-XL | 512x1024 | 80k | 0.56s / iter | 13h | 368M | 4022G | [config](./upernet_internimage_xl_512x1024_80k_mapillary.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_80k_mapillary.pth) |
### SegFormerHead + InternImage
| backbone | resolution | schd | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
| InternImage-L | 512x1024 | 80k | 0.37s / iter | 9h | 220M | 1580G | [config](./segformer_internimage_l_512x1024_80k_mapillary.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_80k_mapillary.pth) |
| InternImage-XL | 512x1024 | 80k | 0.43s / iter | 10h | 330M | 2364G | [config](./segformer_internimage_xl_512x1024_80k_mapillary.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_80k_mapillary.pth) |
### Mask2Former + InternImage
| backbone | resolution | schd | train speed | train time | #params | FLOPs | Config | Download |
|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
| InternImage-H | 1024x1024 | 80k | TODO | TODO | TODO | TODO | [config](./mask2former_internimage_h_1024x1024_80k_mapillary.py) | [ckpt]() |
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/segformer_mit-b0.py', '../_base_/datasets/mapillary.py', '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/mapillary.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/segformer_mit-b0.py', '../_base_/datasets/mapillary.py', '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/mapillary.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/mapillary.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/mapillary.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
...@@ -7,7 +7,7 @@ _base_ = [ ...@@ -7,7 +7,7 @@ _base_ = [
'../_base_/models/upernet_r50.py', '../_base_/datasets/mapillary.py', '../_base_/models/upernet_r50.py', '../_base_/datasets/mapillary.py',
'../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py' '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
] ]
pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth' pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth'
model = dict( model = dict(
backbone=dict( backbone=dict(
_delete_=True, _delete_=True,
......
import torch
import argparse
import math
from collections import OrderedDict
parser = argparse.ArgumentParser(description='Hyperparams')
parser.add_argument('filename', nargs='?', type=str, default=None)
args = parser.parse_args()
def convert_fl16(m):
new_sd = OrderedDict()
for k, v in m.items():
new_k = k
new_sd[new_k] = v.half()
return new_sd
model = torch.load(args.filename, map_location=torch.device('cpu'))['state_dict']
new_model = {"state_dict": convert_fl16(model)}
torch.save(new_model, args.filename.replace(".pth", "_fp16.pth"))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment