Replace github release with hugging face (#42)

* Update function descriptions * Update README.md * Replace GitHub release with hugging face * Release InternImage-H segmentation model --------- Co-authored-by: Zhenhang Huang <prc_hzh@163.com>

Replace github release with hugging face (#42)
* Update function descriptions * Update README.md * Replace GitHub release with hugging face * Release InternImage-H segmentation model --------- Co-authored-by: Zhenhang Huang <prc_hzh@163.com>
43e508f4 · Zhe Chen · zhe chen · 4bbef509 · 43e508f4 · 43e508f4
Commit 43e508f4 authored Mar 16, 2023 by Zhe Chen Committed by zhe chen Mar 16, 2023
20 changed files
--- a/segmentation/configs/ade20k/upernet_internimage_l_640_160k_ade20k.py
+++ b/segmentation/configs/ade20k/upernet_internimage_l_640_160k_ade20k.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/ade20k/upernet_internimage_s_512_160k_ade20k.py
+++ b/segmentation/configs/ade20k/upernet_internimage_s_512_160k_ade20k.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_s_1k_224.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/ade20k/upernet_internimage_t_512_160k_ade20k.py
+++ b/segmentation/configs/ade20k/upernet_internimage_t_512_160k_ade20k.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_t_1k_224.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/ade20k/upernet_internimage_xl_640_160k_ade20k.py
+++ b/segmentation/configs/ade20k/upernet_internimage_xl_640_160k_ade20k.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/ade20k.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/README.md
+++ b/segmentation/configs/cityscapes/README.md
@@ -10,11 +10,11 @@ Cityscapes is a large-scale database which focuses on semantic understanding of
 | backbone       | resolution |  mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download                                                                      |
 |:--------------:|:----------:|:------------:|:-----------:|:----------:|:-------:|:-----:|:----:|:----:|
-| InternImage-T  | 512x1024   |   82.58 / 83.40    | 0.32s / iter       | 14.5h      | 59M     | 1889G | [config](./upernet_internimage_t_512x1024_160k_cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_t_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_t_512x1024_160k_cityscapes.log.json) |
+| InternImage-T  | 512x1024   |   82.58 / 83.40    | 0.32s / iter       | 14.5h      | 59M     | 1889G | [config](./upernet_internimage_t_512x1024_160k_cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_t_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_t_512x1024_160k_cityscapes.log.json) |
-| InternImage-S  | 512x1024   |   82.74 / 83.45    | 0.36s / iter       | 16.5h      | 80M     | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_s_512x1024_160k_cityscapes.log.json)  |
+| InternImage-S  | 512x1024   |   82.74 / 83.45    | 0.36s / iter       | 16.5h      | 80M     | 2035G | [config](./upernet_internimage_s_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_s_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_s_512x1024_160k_cityscapes.log.json)  |
-| InternImage-B  | 512x1024   |   83.18 / 83.97    | 0.39s / iter       | 17h        | 128M    | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_b_512x1024_160k_cityscapes.log.json)  |
+| InternImage-B  | 512x1024   |   83.18 / 83.97    | 0.39s / iter       | 17h        | 128M    | 2369G | [config](./upernet_internimage_b_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_b_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_b_512x1024_160k_cityscapes.log.json)  |
-| InternImage-L  | 512x1024   |    83.68 / 84.41   | 0.50s / iter       | 23h        | 256M    | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_cityscapes.log.json)  |
+| InternImage-L  | 512x1024   |    83.68 / 84.41   | 0.50s / iter       | 23h        | 256M    | 3234G | [config](./upernet_internimage_l_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_cityscapes.log.json)  |
-| InternImage-XL | 512x1024   |    83.62 / 84.28   | 0.56s / iter       | 26h       | 368M    | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) |[ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) |
+| InternImage-XL | 512x1024   |    83.62 / 84.28   | 0.56s / iter       | 26h       | 368M    | 4022G | [config](./upernet_internimage_xl_512x1024_160k_cityscapes.py) |[ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_cityscapes.log.json) |
 - Training speed is measured with A100 GPU.
 - Please set `with_cp=True` to save memory if you meet `out-of-memory` issues.
@@ -25,8 +25,8 @@ Mapillary 80k + Cityscapes (w/ coarse data) 160k
 | backbone       | resolution |  mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download     |
 |:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:------:|:------------:|
-| InternImage-L  | 512x1024   | 85.94 / 86.22  | 0.50s / iter | 23h    | 256M  | 3234G | [config](./upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.pth)  \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.log.json)  |
+| InternImage-L  | 512x1024   | 85.94 / 86.22  | 0.50s / iter | 23h    | 256M  | 3234G | [config](./upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.pth)  \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.log.json)  |
-| InternImage-XL | 512x1024   | 86.20 / 86.42  | 0.56s / iter | 26h    | 368M  | 4022G | [config](./upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
+| InternImage-XL | 512x1024   | 86.20 / 86.42  | 0.56s / iter | 26h    | 368M  | 4022G | [config](./upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
 ### SegFormerHead + InternImage (with additional data)
@@ -34,6 +34,13 @@ Mapillary 80k + Cityscapes (w/ coarse data) 160k
 | backbone       | resolution |  mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
 |:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
-| InternImage-L  | 512x1024   | 85.16 / 85.67  | 0.37s / iter       | 17h        | 220M    | 1580G | [config](./segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.log.json)  |
+| InternImage-L  | 512x1024   | 85.16 / 85.67  | 0.37s / iter       | 17h        | 220M    | 1580G | [config](./segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.log.json)  |
-| InternImage-XL | 512x1024   | 85.41 / 85.93  | 0.43s / iter       |  19.5h      | 330M    | 2364G | [config](./segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
+| InternImage-XL | 512x1024   | 85.41 / 85.93  | 0.43s / iter       |  19.5h      | 330M    | 2364G | [config](./segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.pth) \| [log](https://huggingface.co/OpenGVLab/InternImage/raw/main/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.log.json) |
+### Mask2Former + InternImage (with additional data)
+Mapillary 80k + Cityscapes (w/ coarse data) 80k
+| backbone       | resolution |  mIoU (ss/ms) | train speed | train time | #params | FLOPs | Config | Download |
+|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
+| InternImage-H  | 1024x1024   | 86.37 / 86.96  | TODO       | TODO        | TODO    | TODO | [config](./mask2former_internimage_h_1024x1024_80k_mapillary2cityscapes.py) | [ckpt]() \| [log]()  |
--- a/segmentation/configs/cityscapes/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py
+++ b/segmentation/configs/cityscapes/segformer_internimage_l_512x1024_160k_mapillary2cityscapes.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/cityscapes_extra.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_l_512x1024_80k_mapillary.pth'
+load_from = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_80k_mapillary.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py
+++ b/segmentation/configs/cityscapes/segformer_internimage_xl_512x1024_160k_mapillary2cityscapes.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/cityscapes_extra.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/segformer_internimage_xl_512x1024_80k_mapillary.pth'
+load_from = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_80k_mapillary.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/upernet_internimage_b_512x1024_160k_cityscapes.py
+++ b/segmentation/configs/cityscapes/upernet_internimage_b_512x1024_160k_cityscapes.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_b_1k_224.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_b_1k_224.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/upernet_internimage_l_512x1024_160k_cityscapes.py
+++ b/segmentation/configs/cityscapes/upernet_internimage_l_512x1024_160k_cityscapes.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py
+++ b/segmentation/configs/cityscapes/upernet_internimage_l_512x1024_160k_mapillary2cityscapes.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes_extra.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_l_512x1024_80k_mapillary.pth'
+load_from = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_80k_mapillary.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/upernet_internimage_s_512x1024_160k_cityscapes.py
+++ b/segmentation/configs/cityscapes/upernet_internimage_s_512x1024_160k_cityscapes.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_s_1k_224.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_s_1k_224.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/upernet_internimage_t_512x1024_160k_cityscapes.py
+++ b/segmentation/configs/cityscapes/upernet_internimage_t_512x1024_160k_cityscapes.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_t_1k_224.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_t_1k_224.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/upernet_internimage_xl_512x1024_160k_cityscapes.py
+++ b/segmentation/configs/cityscapes/upernet_internimage_xl_512x1024_160k_cityscapes.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py
+++ b/segmentation/configs/cityscapes/upernet_internimage_xl_512x1024_160k_mapillary2cityscapes.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/cityscapes_extra.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_160k.py'
 ]
-load_from = 'https://github.com/OpenGVLab/InternImage/releases/download/seg_models/upernet_internimage_xl_512x1024_80k_mapillary.pth'
+load_from = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_80k_mapillary.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/mapillary/README.md
+++ b/segmentation/configs/mapillary/README.md
+# Mapillary Vistas
+Introduced by Neuhold et al. in [The Mapillary Vistas Dataset for Semantic Understanding of Street Scenes](http://openaccess.thecvf.com/content_ICCV_2017/papers/Neuhold_The_Mapillary_Vistas_ICCV_2017_paper.pdf)
+Mapillary Vistas Dataset is a diverse street-level imagery dataset with pixel‑accurate and instance‑specific human annotations for understanding street scenes around the world. 
+We first pretrain our models on the Mapillary Vistas dataset, then finetune them on the Cityscapes dataset.
+## Model Zoo
+### UperNet + InternImage
+| backbone       | resolution |  schd | train speed | train time | #params | FLOPs | Config | Download     |
+|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:------:|:------------:|
+| InternImage-L  | 512x1024   | 80k  | 0.50s / iter | 11.5h    | 256M  | 3234G | [config](./upernet_internimage_l_512x1024_80k_mapillary.py)  | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_l_512x1024_80k_mapillary.pth) |
+| InternImage-XL | 512x1024   | 80k  | 0.56s / iter | 13h    | 368M  | 4022G | [config](./upernet_internimage_xl_512x1024_80k_mapillary.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/upernet_internimage_xl_512x1024_80k_mapillary.pth) |
+### SegFormerHead + InternImage
+| backbone       | resolution |  schd | train speed | train time | #params | FLOPs | Config | Download |
+|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
+| InternImage-L  | 512x1024   | 80k  | 0.37s / iter       |   9h       | 220M    | 1580G | [config](./segformer_internimage_l_512x1024_80k_mapillary.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_l_512x1024_80k_mapillary.pth)   |
+| InternImage-XL | 512x1024   | 80k  | 0.43s / iter       |  10h      | 330M    | 2364G | [config](./segformer_internimage_xl_512x1024_80k_mapillary.py) | [ckpt](https://huggingface.co/OpenGVLab/InternImage/resolve/main/segformer_internimage_xl_512x1024_80k_mapillary.pth)  |
+### Mask2Former + InternImage
+| backbone       | resolution |  schd | train speed | train time | #params | FLOPs | Config | Download |
+|:--------------:|:----------:|:------------:|:-----------:|:-----------:|:-------:|:-----:|:-----:|:---------:|
+| InternImage-H  | 1024x1024   | 80k  | TODO       | TODO        | TODO    | TODO | [config](./mask2former_internimage_h_1024x1024_80k_mapillary.py) | [ckpt]()  |
--- a/segmentation/configs/cityscapes/segformer_internimage_l_512x1024_80k_mapillary.py
+++ b/segmentation/configs/cityscapes/segformer_internimage_l_512x1024_80k_mapillary.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/mapillary.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/segformer_internimage_xl_512x1024_80k_mapillary.py
+++ b/segmentation/configs/cityscapes/segformer_internimage_xl_512x1024_80k_mapillary.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/segformer_mit-b0.py', '../_base_/datasets/mapillary.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/upernet_internimage_l_512x1024_80k_mapillary.py
+++ b/segmentation/configs/cityscapes/upernet_internimage_l_512x1024_80k_mapillary.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/mapillary.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_l_22k_192to384.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_l_22k_192to384.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/configs/cityscapes/upernet_internimage_xl_512x1024_80k_mapillary.py
+++ b/segmentation/configs/cityscapes/upernet_internimage_xl_512x1024_80k_mapillary.py
@@ -7,7 +7,7 @@ _base_ = [
    '../_base_/models/upernet_r50.py', '../_base_/datasets/mapillary.py',
    '../_base_/default_runtime.py', '../_base_/schedules/schedule_80k.py'
 ]
-pretrained = 'https://github.com/OpenGVLab/InternImage/releases/download/cls_model/internimage_xl_22k_192to384.pth'
+pretrained = 'https://huggingface.co/OpenGVLab/InternImage/resolve/main/internimage_xl_22k_192to384.pth'
 model = dict(
    backbone=dict(
        _delete_=True,

--- a/segmentation/convertor/fp16.py
+++ b/segmentation/convertor/fp16.py
+import torch
+import argparse
+import math
+from collections import OrderedDict
+parser = argparse.ArgumentParser(description='Hyperparams')
+parser.add_argument('filename', nargs='?', type=str, default=None)
+args = parser.parse_args()
+def convert_fl16(m):
+    new_sd = OrderedDict()
+    for k, v in m.items():
+        new_k = k
+        new_sd[new_k] = v.half()
+    return new_sd
+model = torch.load(args.filename, map_location=torch.device('cpu'))['state_dict']
+new_model = {"state_dict": convert_fl16(model)}
+torch.save(new_model, args.filename.replace(".pth", "_fp16.pth"))