add new model

0d97cc8c · Sugon_ldc · 0d97cc8c · 0d97cc8c · 0d97cc8c · 0d97cc8c
Commit 0d97cc8c authored Jun 07, 2023 by Sugon_ldc
13 changed files
--- a/contrib/AutoNUE/predict_ensemble_three.py
+++ b/contrib/AutoNUE/predict_ensemble_three.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import paddle
+from paddleseg.cvlibs import manager, Config
+from paddleseg.utils import get_sys_env, logger
+from core.predict_ensemble_three import predictEnsembleThree
+import datasets, models
+def parse_args():
+    parser = argparse.ArgumentParser(description='Model prediction')
+    # params of prediction
+    parser.add_argument(
+        "--config", dest="cfg", help="The config file.", default=None, type=str)
+    parser.add_argument(
+        '--model_path',
+        dest='model_path',
+        help='The path of model for prediction',
+        type=str,
+        default=None)
+    parser.add_argument(
+        "--config_1",
+        dest="cfg_1",
+        help="The config file.",
+        default=None,
+        type=str)
+    parser.add_argument(
+        '--model_path_1',
+        dest='model_path_1',
+        help='The path of model for prediction',
+        type=str,
+        default=None)
+    parser.add_argument(
+        "--config_crop",
+        dest="cfg_crop",
+        help="The config file.",
+        default=None,
+        type=str)
+    parser.add_argument(
+        '--model_path_crop',
+        dest='model_path_crop',
+        help='The path of model for prediction',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--image_path',
+        dest='image_path',
+        help='The path of image, it can be a file or a directory including images',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the predicted results',
+        type=str,
+        default='./output/result')
+    # augment for prediction
+    parser.add_argument(
+        '--aug_pred',
+        dest='aug_pred',
+        help='Whether to use mulit-scales and flip augment for prediction',
+        action='store_true')
+    parser.add_argument(
+        '--scales',
+        dest='scales',
+        nargs='+',
+        help='Scales for augment',
+        type=float,
+        default=1.0)
+    parser.add_argument(
+        '--flip_horizontal',
+        dest='flip_horizontal',
+        help='Whether to use flip horizontally augment',
+        action='store_true')
+    parser.add_argument(
+        '--flip_vertical',
+        dest='flip_vertical',
+        help='Whether to use flip vertically augment',
+        action='store_true')
+    # sliding window prediction
+    parser.add_argument(
+        '--is_slide',
+        dest='is_slide',
+        help='Whether to prediction by sliding window',
+        action='store_true')
+    parser.add_argument(
+        '--crop_size',
+        dest='crop_size',
+        nargs=2,
+        help='The crop size of sliding window, the first is width and the second is height.',
+        type=int,
+        default=None)
+    parser.add_argument(
+        '--stride',
+        dest='stride',
+        nargs=2,
+        help='The stride of sliding window, the first is width and the second is height.',
+        type=int,
+        default=None)
+    return parser.parse_args()
+def get_image_list(image_path):
+    """Get image list"""
+    valid_suffix = [
+        '.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png'
+    ]
+    image_list = []
+    image_dir = None
+    if os.path.isfile(image_path):
+        if os.path.splitext(image_path)[-1] in valid_suffix:
+            image_list.append(image_path)
+    elif os.path.isdir(image_path):
+        image_dir = image_path
+        for root, dirs, files in os.walk(image_path):
+            for f in files:
+                if '.ipynb_checkpoints' in root:
+                    continue
+                if os.path.splitext(f)[-1] in valid_suffix:
+                    image_list.append(os.path.join(root, f))
+    else:
+        raise FileNotFoundError(
+            '`--image_path` is not found. it should be an image file or a directory including images'
+        )
+    if len(image_list) == 0:
+        raise RuntimeError('There are not image file in `--image_path`')
+    return image_list, image_dir
+def main(args):
+    env_info = get_sys_env()
+    place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
+        'GPUs used'] else 'cpu'
+    paddle.set_device(place)
+    if not args.cfg:
+        raise RuntimeError('No configuration file specified.')
+    cfg = Config(args.cfg)
+    val_dataset = cfg.val_dataset
+    cfg_1 = Config(args.cfg_1)
+    cfg_crop = Config(args.cfg_crop)
+    val_dataset_crop = cfg_crop.val_dataset
+    if not val_dataset:
+        raise RuntimeError(
+            'The verification dataset is not specified in the configuration file.'
+        )
+    msg = '\n---------------Config Information---------------\n'
+    msg += str(cfg)
+    msg += '------------------------------------------------'
+    logger.info(msg)
+    model = cfg.model
+    model_1 = cfg_1.model
+    model_crop = cfg_crop.model
+    transforms = val_dataset.transforms
+    transforms_crop = val_dataset_crop.transforms
+    image_list, image_dir = get_image_list(args.image_path)
+    logger.info('Number of predict images = {}'.format(len(image_list)))
+    predictEnsembleThree(
+        model,
+        model_1,
+        model_crop,
+        model_path=args.model_path,
+        model_path_1=args.model_path_1,
+        model_path_crop=args.model_path_crop,
+        transforms=transforms,
+        transforms_crop=transforms_crop,
+        image_list=image_list,
+        image_dir=image_dir,
+        save_dir=args.save_dir,
+        aug_pred=args.aug_pred,
+        scales=args.scales,
+        flip_horizontal=args.flip_horizontal,
+        flip_vertical=args.flip_vertical,
+        is_slide=args.is_slide,
+        crop_size=args.crop_size,
+        stride=args.stride, )
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
--- a/contrib/AutoNUE/scripts/train.py
+++ b/contrib/AutoNUE/scripts/train.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import time
+from collections import deque
+import shutil
+import paddle
+import paddle.nn.functional as F
+from paddleseg.utils import TimeAverager, calculate_eta, resume, logger
+from core.val import evaluate
+#from core.val_crop import evaluate
+def check_logits_losses(logits_list, losses):
+    len_logits = len(logits_list)
+    len_losses = len(losses['types'])
+    if len_logits != len_losses:
+        raise RuntimeError(
+            'The length of logits_list should equal to the types of loss config: {} != {}.'
+            .format(len_logits, len_losses))
+def loss_computation(logits_list, labels, losses, edges=None):
+    check_logits_losses(logits_list, losses)
+    loss_list = []
+    for i in range(len(logits_list)):
+        logits = logits_list[i]
+        loss_i = losses['types'][i]
+        # Whether to use edges as labels According to loss type.
+        if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label:
+            loss_list.append(losses['coef'][i] * loss_i(logits, edges))
+        else:
+            loss_list.append(losses['coef'][i] * loss_i(logits, labels))
+    return loss_list
+def train(model,
+          train_dataset,
+          val_dataset=None,
+          aug_eval=False,
+          flip_horizontal_eval=False,
+          optimizer=None,
+          save_dir='output',
+          iters=10000,
+          batch_size=2,
+          resume_model=None,
+          save_interval=1000,
+          log_iters=10,
+          num_workers=0,
+          use_vdl=False,
+          losses=None,
+          keep_checkpoint_max=5):
+    """
+    Launch training.
+    Args:
+        model（nn.Layer): A sementic segmentation model.
+        train_dataset (paddle.io.Dataset): Used to read and process training datasets.
+        val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
+        aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
+        flip_horizontal_eval (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True.
+        optimizer (paddle.optimizer.Optimizer): The optimizer.
+        save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'.
+        iters (int, optional): How may iters to train the model. Defualt: 10000.
+        batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2.
+        resume_model (str, optional): The path of resume model.
+        save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000.
+        log_iters (int, optional): Display logging information at every log_iters. Default: 10.
+        num_workers (int, optional): Num workers for data loader. Default: 0.
+        use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False.
+        losses (dict): A dict including 'types' and 'coef'. The length of coef should equal to 1 or len(losses['types']).
+            The 'types' item is a list of object of paddleseg.models.losses while the 'coef' item is a list of the relevant coefficient.
+        keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
+    """
+    nranks = paddle.distributed.ParallelEnv().nranks
+    local_rank = paddle.distributed.ParallelEnv().local_rank
+    start_iter = 0
+    if resume_model is not None:
+        start_iter = resume(model, optimizer, resume_model)
+    if not os.path.isdir(save_dir):
+        if os.path.exists(save_dir):
+            os.remove(save_dir)
+        os.makedirs(save_dir)
+    if nranks > 1:
+        # Initialize parallel training environment.
+        paddle.distributed.init_parallel_env()
+        ddp_model = paddle.DataParallel(model)
+#         for item in ddp_model.named_parameters():
+#             if item[0].find('scale_attn')==-1:
+#                 item[1].stop_gradient=True
+    batch_sampler = paddle.io.DistributedBatchSampler(
+        train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
+    loader = paddle.io.DataLoader(
+        train_dataset,
+        batch_sampler=batch_sampler,
+        num_workers=num_workers,
+        return_list=True, )
+    if use_vdl:
+        from visualdl import LogWriter
+        log_writer = LogWriter(save_dir)
+    avg_loss = 0.0
+    avg_loss_list = []
+    iters_per_epoch = len(batch_sampler)
+    best_mean_iou = -1.0
+    best_model_iter = -1
+    reader_cost_averager = TimeAverager()
+    batch_cost_averager = TimeAverager()
+    save_models = deque()
+    batch_start = time.time()
+    iter = start_iter
+    while iter < iters:
+        for data in loader:
+            iter += 1
+            if iter > iters:
+                break
+            reader_cost_averager.record(time.time() - batch_start)
+            images = data[0]
+            labels = data[1].astype('int64')
+            edges = None
+            if len(data) == 3:
+                edges = data[2].astype('int64')
+            if hasattr(train_dataset,
+                       'shuffle') and iter % iters_per_epoch == 0:
+                train_dataset.shuffle()
+            if nranks > 1:
+                logits_list = ddp_model(images)
+            else:
+                logits_list = model(images)
+            loss_list = loss_computation(
+                logits_list=logits_list,
+                labels=labels,
+                losses=losses,
+                edges=edges)
+            loss = sum(loss_list)
+            loss.backward()
+            optimizer.step()
+            lr = optimizer.get_lr()
+            if isinstance(optimizer._learning_rate,
+                          paddle.optimizer.lr.LRScheduler):
+                optimizer._learning_rate.step()
+            model.clear_gradients()
+            avg_loss += float(loss)
+            if not avg_loss_list:
+                avg_loss_list = [l.numpy() for l in loss_list]
+            else:
+                for i in range(len(loss_list)):
+                    avg_loss_list[i] += loss_list[i].numpy()
+            batch_cost_averager.record(
+                time.time() - batch_start, num_samples=batch_size)
+            if (iter) % log_iters == 0 and local_rank == 0:
+                avg_loss /= log_iters
+                avg_loss_list = [l[0] / log_iters for l in avg_loss_list]
+                remain_iters = iters - iter
+                avg_train_batch_cost = batch_cost_averager.get_average()
+                avg_train_reader_cost = reader_cost_averager.get_average()
+                eta = calculate_eta(remain_iters, avg_train_batch_cost)
+                logger.info(
+                    "[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.5f}, ips={:.4f} samples/sec | ETA {}"
+                    .format((iter - 1
+                             ) // iters_per_epoch + 1, iter, iters, avg_loss,
+                            lr, avg_train_batch_cost, avg_train_reader_cost,
+                            batch_cost_averager.get_ips_average(), eta))
+                if use_vdl:
+                    log_writer.add_scalar('Train/loss', avg_loss, iter)
+                    # Record all losses if there are more than 2 losses.
+                    if len(avg_loss_list) > 1:
+                        avg_loss_dict = {}
+                        for i, value in enumerate(avg_loss_list):
+                            avg_loss_dict['loss_' + str(i)] = value
+                        for key, value in avg_loss_dict.items():
+                            log_tag = 'Train/' + key
+                            log_writer.add_scalar(log_tag, value, iter)
+                    log_writer.add_scalar('Train/lr', lr, iter)
+                    log_writer.add_scalar('Train/batch_cost',
+                                          avg_train_batch_cost, iter)
+                    log_writer.add_scalar('Train/reader_cost',
+                                          avg_train_reader_cost, iter)
+                avg_loss = 0.0
+                avg_loss_list = []
+                reader_cost_averager.reset()
+                batch_cost_averager.reset()
+            if (iter % save_interval == 0 or
+                    iter == iters) and (val_dataset is not None):
+                num_workers = 1 if num_workers > 0 else 0
+                mean_iou, acc = evaluate(
+                    model,
+                    val_dataset,
+                    aug_eval=aug_eval,
+                    scales=1.0,
+                    flip_horizontal=False,
+                    flip_vertical=False,
+                    is_slide=False,
+                    stride=None,
+                    crop_size=None,
+                    num_workers=num_workers)
+                model.train()
+            if (iter % save_interval == 0 or iter == iters) and local_rank == 0:
+                current_save_dir = os.path.join(save_dir,
+                                                "iter_{}".format(iter))
+                if not os.path.isdir(current_save_dir):
+                    os.makedirs(current_save_dir)
+                paddle.save(model.state_dict(),
+                            os.path.join(current_save_dir, 'model.pdparams'))
+                paddle.save(optimizer.state_dict(),
+                            os.path.join(current_save_dir, 'model.pdopt'))
+                save_models.append(current_save_dir)
+                if len(save_models) > keep_checkpoint_max > 0:
+                    model_to_remove = save_models.popleft()
+                    shutil.rmtree(model_to_remove)
+                if val_dataset is not None:
+                    if mean_iou > best_mean_iou:
+                        best_mean_iou = mean_iou
+                        best_model_iter = iter
+                        best_model_dir = os.path.join(save_dir, "best_model")
+                        paddle.save(
+                            model.state_dict(),
+                            os.path.join(best_model_dir, 'model.pdparams'))
+                    logger.info(
+                        '[EVAL] The model with the best validation mIoU ({:.4f}) was saved at iter {}.'
+                        .format(best_mean_iou, best_model_iter))
+                    if use_vdl:
+                        log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter)
+                        log_writer.add_scalar('Evaluate/Acc', acc, iter)
+            batch_start = time.time()
+    # Calculate flops.
+    if local_rank == 0:
+        def count_syncbn(m, x, y):
+            x = x[0]
+            nelements = x.numel()
+            m.total_ops += int(2 * nelements)
+        _, c, h, w = images.shape
+        flops = paddle.flops(
+            model, [1, c, h, w],
+            custom_ops={paddle.nn.SyncBatchNorm: count_syncbn})
+        logger.info(flops)
+    # Sleep for half a second to let dataloader release resources.
+    time.sleep(0.5)
+    if use_vdl:
+        log_writer.close()
--- a/contrib/AutoNUE/tools/IDD_labeling.py
+++ b/contrib/AutoNUE/tools/IDD_labeling.py
+import os
+import numpy as np
+import cv2
+from PIL import Image
+from paddleseg import utils
+import xml.dom.minidom
+def mkdir(path):
+    sub_dir = os.path.dirname(path)
+    if not os.path.exists(sub_dir):
+        os.makedirs(sub_dir)
+def get_image_list(image_path):
+    """Get image list"""
+    valid_suffix = [
+        '.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png'
+    ]
+    image_list = []
+    image_dir = None
+    if os.path.isfile(image_path):
+        if os.path.splitext(image_path)[-1] in valid_suffix:
+            image_list.append(image_path)
+    elif os.path.isdir(image_path):
+        image_dir = image_path
+        for root, dirs, files in os.walk(image_path):
+            for f in files:
+                if '.ipynb_checkpoints' in root:
+                    continue
+                if os.path.splitext(f)[-1] in valid_suffix:
+                    image_list.append(os.path.join(root.split('/')[-1], f))
+    else:
+        raise FileNotFoundError(
+            '`--image_path` is not found. it should be an image file or a directory including images'
+        )
+    if len(image_list) == 0:
+        raise RuntimeError('There are not image file in `--image_path`')
+    return image_list, image_dir
+def refine_pred():
+    image_list, image_dir = get_image_list(
+        'detection_out/pseudo_color_prediction')
+    for ii in image_list:
+        name_pred = 'detection_out/pseudo_color_prediction/' + ii
+        name_label = 'data/IDD_Detection/Annotations/all/' + ii[:-3] + 'xml'
+        pred = np.array(Image.open(name_pred)).astype(np.float32)
+        if not os.path.exists(name_label):
+            pred_mask = utils.visualize.get_pseudo_color_map(pred)
+            pred_saved_path = 'detect_out/pred_refine/' + ii
+            mkdir(pred_saved_path)
+            pred_mask.save(pred_saved_path)
+            continue
+        dom = xml.dom.minidom.parse(name_label)
+        root = dom.documentElement
+        objects = root.getElementsByTagName("object")
+        for item in objects:
+            name = item.getElementsByTagName("name")[0]
+            if name.firstChild.data == 'traffic sign' or name.firstChild.data == 'traffic light':
+                print(ii)
+                xmin = int(
+                    item.getElementsByTagName('bndbox')[0].getElementsByTagName(
+                        'xmin')[0].firstChild.data)
+                ymin = int(
+                    item.getElementsByTagName('bndbox')[0].getElementsByTagName(
+                        'ymin')[0].firstChild.data)
+                xmax = int(
+                    item.getElementsByTagName('bndbox')[0].getElementsByTagName(
+                        'xmax')[0].firstChild.data)
+                ymax = int(
+                    item.getElementsByTagName('bndbox')[0].getElementsByTagName(
+                        'ymax')[0].firstChild.data)
+                if name.firstChild.data == 'traffic sign':
+                    pred[ymin:ymax, xmin:xmax] = 18
+                elif name.firstChild.data == 'traffic light':
+                    pred[ymin:ymax, xmin:xmax] = 19
+        pred_mask = utils.visualize.get_pseudo_color_map(pred)
+        pred_saved_path = 'detect_out/pred_refine/' + ii
+        mkdir(pred_saved_path)
+        pred_mask.save(pred_saved_path)
+def test():
+    path = '/Users/liliulei/Downloads/IDD_Detection/JPEGImages/frontNear/'
+    image_list, image_dir = get_image_list(path)
+    for ii in image_list:
+        name_xml = '/Users/liliulei/Downloads/IDD_Detection/Annotations/frontNear/' + ii[:
+                                                                                         -3] + 'xml'
+        image = cv2.imread(path + ii)
+        # print(image.shape)
+        (h, w) = image.shape[0:2]
+        pred = np.zeros_like(image)
+        dom = xml.dom.minidom.parse(name_xml)
+        root = dom.documentElement
+        objects = root.getElementsByTagName("object")
+        for item in objects:
+            name = item.getElementsByTagName("name")[0]
+            print(name.firstChild.data)
+            if name.firstChild.data == 'traffic sign' or name.firstChild.data == 'traffic light':
+                xmin = int(
+                    item.getElementsByTagName('bndbox')[0].getElementsByTagName(
+                        'xmin')[0].firstChild.data)
+                ymin = int(
+                    item.getElementsByTagName('bndbox')[0].getElementsByTagName(
+                        'ymin')[0].firstChild.data)
+                xmax = int(
+                    item.getElementsByTagName('bndbox')[0].getElementsByTagName(
+                        'xmax')[0].firstChild.data)
+                ymax = int(
+                    item.getElementsByTagName('bndbox')[0].getElementsByTagName(
+                        'ymax')[0].firstChild.data)
+                if name.firstChild.data == 'traffic sign':
+                    pred[ymin:ymax, xmin:xmax, 0] = 255
+                elif name.firstChild.data == 'traffic light':
+                    pred[ymin:ymax, xmin:xmax, 1] = 255
+        new_im = image * 0.5 + pred * 0.5
+        cv2.imwrite(ii.split('/')[-1][:-3] + 'png', new_im)
+refine_pred()
--- a/contrib/AutoNUE/train.py
+++ b/contrib/AutoNUE/train.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import paddle
+from paddleseg.cvlibs import manager, Config
+from paddleseg.utils import get_sys_env, logger
+import datasets, models
+from scripts.train import train
+def parse_args():
+    parser = argparse.ArgumentParser(description='Model training')
+    # params of training
+    parser.add_argument(
+        "--config", dest="cfg", help="The config file.", default=None, type=str)
+    parser.add_argument(
+        '--iters',
+        dest='iters',
+        help='iters for training',
+        type=int,
+        default=None)
+    parser.add_argument(
+        '--batch_size',
+        dest='batch_size',
+        help='Mini batch size of one gpu or cpu',
+        type=int,
+        default=None)
+    parser.add_argument(
+        '--learning_rate',
+        dest='learning_rate',
+        help='Learning rate',
+        type=float,
+        default=None)
+    parser.add_argument(
+        '--save_interval',
+        dest='save_interval',
+        help='How many iters to save a model snapshot once during training.',
+        type=int,
+        default=1000)
+    parser.add_argument(
+        '--resume_model',
+        dest='resume_model',
+        help='The path of resume model',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the model snapshot',
+        type=str,
+        default='./output')
+    parser.add_argument(
+        '--keep_checkpoint_max',
+        dest='keep_checkpoint_max',
+        help='Maximum number of checkpoints to save',
+        type=int,
+        default=5)
+    parser.add_argument(
+        '--num_workers',
+        dest='num_workers',
+        help='Num workers for data loader',
+        type=int,
+        default=0)
+    parser.add_argument(
+        '--do_eval',
+        dest='do_eval',
+        help='Eval while training',
+        action='store_true')
+    parser.add_argument(
+        '--log_iters',
+        dest='log_iters',
+        help='Display logging information at every log_iters',
+        default=10,
+        type=int)
+    parser.add_argument(
+        '--use_vdl',
+        dest='use_vdl',
+        help='Whether to record the data to VisualDL during training',
+        action='store_true')
+    return parser.parse_args()
+def main(args):
+    env_info = get_sys_env()
+    info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
+    info = '\n'.join(['', format('Environment Information', '-^48s')] + info +
+                     ['-' * 48])
+    logger.info(info)
+    place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
+        'GPUs used'] else 'cpu'
+    paddle.set_device(place)
+    if not args.cfg:
+        raise RuntimeError('No configuration file specified.')
+    cfg = Config(
+        args.cfg,
+        learning_rate=args.learning_rate,
+        iters=args.iters,
+        batch_size=args.batch_size)
+    train_dataset = cfg.train_dataset
+    if train_dataset is None:
+        raise RuntimeError(
+            'The training dataset is not specified in the configuration file.')
+    val_dataset = cfg.val_dataset if args.do_eval else None
+    losses = cfg.loss
+    msg = '\n---------------Config Information---------------\n'
+    msg += str(cfg)
+    msg += '------------------------------------------------'
+    logger.info(msg)
+    train(
+        cfg.model,
+        train_dataset,
+        val_dataset=val_dataset,
+        aug_eval=True,
+        flip_horizontal_eval=False,
+        optimizer=cfg.optimizer,
+        save_dir=args.save_dir,
+        iters=cfg.iters,
+        batch_size=cfg.batch_size,
+        resume_model=args.resume_model,
+        save_interval=args.save_interval,
+        log_iters=args.log_iters,
+        num_workers=args.num_workers,
+        use_vdl=args.use_vdl,
+        losses=losses,
+        keep_checkpoint_max=args.keep_checkpoint_max)
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
--- a/contrib/AutoNUE/val.py
+++ b/contrib/AutoNUE/val.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import paddle
+from paddleseg.cvlibs import manager, Config
+from core.val import evaluate
+from paddleseg.utils import get_sys_env, logger, utils
+import datasets, models
+def parse_args():
+    parser = argparse.ArgumentParser(description='Model evaluation')
+    # params of evaluate
+    parser.add_argument(
+        "--config", dest="cfg", help="The config file.", default=None, type=str)
+    parser.add_argument(
+        '--model_path',
+        dest='model_path',
+        help='The path of model for evaluation',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--num_workers',
+        dest='num_workers',
+        help='Num workers for data loader',
+        type=int,
+        default=0)
+    # augment for evaluation
+    parser.add_argument(
+        '--aug_eval',
+        dest='aug_eval',
+        help='Whether to use mulit-scales and flip augment for evaluation',
+        action='store_true')
+    parser.add_argument(
+        '--scales',
+        dest='scales',
+        nargs='+',
+        help='Scales for augment',
+        type=float,
+        default=1.0)
+    parser.add_argument(
+        '--flip_horizontal',
+        dest='flip_horizontal',
+        help='Whether to use flip horizontally augment',
+        action='store_true')
+    parser.add_argument(
+        '--flip_vertical',
+        dest='flip_vertical',
+        help='Whether to use flip vertically augment',
+        action='store_true')
+    # sliding window evaluation
+    parser.add_argument(
+        '--is_slide',
+        dest='is_slide',
+        help='Whether to evaluate by sliding window',
+        action='store_true')
+    parser.add_argument(
+        '--crop_size',
+        dest='crop_size',
+        nargs=2,
+        help='The crop size of sliding window, the first is width and the second is height.',
+        type=int,
+        default=None)
+    parser.add_argument(
+        '--stride',
+        dest='stride',
+        nargs=2,
+        help='The stride of sliding window, the first is width and the second is height.',
+        type=int,
+        default=None)
+    return parser.parse_args()
+def main(args):
+    env_info = get_sys_env()
+    place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
+        'GPUs used'] else 'cpu'
+    paddle.set_device(place)
+    if not args.cfg:
+        raise RuntimeError('No configuration file specified.')
+    cfg = Config(args.cfg)
+    val_dataset = cfg.val_dataset
+    if val_dataset is None:
+        raise RuntimeError(
+            'The verification dataset is not specified in the configuration file.'
+        )
+    msg = '\n---------------Config Information---------------\n'
+    msg += str(cfg)
+    msg += '------------------------------------------------'
+    logger.info(msg)
+    model = cfg.model
+    utils.load_entire_model(model, args.model_path)
+    logger.info('Loaded trained params of model successfully')
+    evaluate(
+        model,
+        val_dataset,
+        aug_eval=args.aug_eval,
+        scales=args.scales,
+        flip_horizontal=args.flip_horizontal,
+        flip_vertical=args.flip_vertical,
+        is_slide=args.is_slide,
+        crop_size=args.crop_size,
+        stride=args.stride,
+        num_workers=args.num_workers, )
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
--- a/contrib/CityscapesSOTA/README.md
+++ b/contrib/CityscapesSOTA/README.md
+# Cityscapes SOTA
+The implementation of Hierarchical Multi-Scale Attention based on PaddlePaddle. [[Paper]](https://arxiv.org/abs/2005.10821)<br>
+Based on the above work, we made some optimizations:
+- Use dice loss and bootstrapped cross entropy loss instead of cross entropy
+- Learn all fine data and equal amount of coarse data in each epoch
+- The evaluation is carried out by using the equal difference scale series instead of the equal ratio scale series
+We achieve mIoU of **87%** on Cityscapes validation set.
+The actual effect is as follows (for high-definition pictures, please click [here](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v2.0/docs/images/cityscapes.gif)).
+<div align="center">
+<img src=https://user-images.githubusercontent.com/30695251/144982303-51d40188-c00d-46b7-9012-41955c4e2156.gif  width = "500" />  
+</div>
+## Installation
+#### step 1. Install PaddlePaddle
+System Requirements:
+* PaddlePaddle >= 2.0.0rc1
+* Python >= 3.6+
+Highly recommend you install the GPU version of PaddlePaddle, due to large overhead of segmentation models, otherwise it could be out of memory while running the models. For more detailed installation tutorials, please refer to the official website of [PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/2.0/install/)。
+#### step 2. Install PaddleSeg
+You should use *API Calling* method to install PaddleSeg for flexible development.
+```shell
+pip install paddleseg
+```
+## Data Preparation
+Download following files and put into `data/cityscapes` directory. Then unzip these files.
+```shell
+mkdir -p data/cityscapes
+```
+Firstly please download 3 files from [Cityscapes dataset](https://www.cityscapes-dataset.com/downloads/)
+- leftImg8bit_trainvaltest.zip (11GB)
+- gtFine_trainvaltest.zip (241MB)
+- leftImg8bit_trainextra.zip (44GB)
+Run the following commands to do the label conversion:
+```shell
+pip install cityscapesscripts
+python ../../tools/data/convert_cityscapes.py --cityscapes_path data/cityscapes --num_workers 8
+```
+Where 'cityscapes_path' should be adjusted according to the actual dataset path. 'num_workers' determines the number of processes started and the size can be adjusted according to the actual situation.
+Then download and uncompress Autolabelled-Data from [google drive](https://drive.google.com/file/d/1DtPo-WP-hjaOwsbj6ZxTtOo_7R_4TKRG/view?usp=sharing)
+- refinement_final_v0.zip # This file is needed for autolabelled training for recreating SOTA
+Delete useless `tmp` directory in `refinement_final` directory.
+```
+rm -r tmp/
+```
+Convert autolabelled data according to PaddleSeg data format:
+```shell
+python tools/convert_cityscapes_autolabeling.py --dataset_root data/cityscapes/
+```
+Finally, you need to organize data following the below structure.
+    cityscapes
+    |
+    |--leftImg8bit
+    |  |--train
+    |  |--val
+    |  |--test
+    |
+    |--gtFine
+    |  |--train
+    |  |--val
+    |  |--test
+    |
+    |--leftImg8bit_trainextra
+    |  |--leftImg8bit
+    |     |--train_extra
+    |        |--augsburg
+    |        |--bayreuth
+    |        |--...
+    |
+    |--convert_autolabelled
+    |  |--augsburg
+    |  |--bayreuth
+    |  |--...
+## Evaluation
+### Download Trained Model
+```shell
+mkdir -p saved_model && cd saved_model
+wget https://bj.bcebos.com/paddleseg/dygraph/cityscapes/mscale_ocr_hrnetw48_cityscapes_autolabel_mapillary/model.pdparams
+cd ..
+```
+### Evaluation on Cityscapes
+| Model | Backbone | mIoU | mIoU (flip) | mIoU (5 scales + flip) |
+|:-:|:-:|:-:|:-:|:-:|
+|MscaleOCRNet|HRNet_w48|86.89%|86.99%|87.00%|
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch val.py \
+--config configs/mscale_ocr_cityscapes_autolabel_mapillary.yml --num_workers 3 --model_path saved_model/model.pdparams
+```
+The reported mIoU should be 86.89. This evaluates with scales of 0.5, 1.0 and 2.0. This requires 14.2GB of GPU memory.
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch val.py \
+--config configs/mscale_ocr_cityscapes_autolabel_mapillary.yml --num_workers 3 --model_path saved_model/model.pdparams \
+--aug_eval --flip_horizontal
+```
+The reported mIoU should be 86.99. This evaluates with scales of 0.5, 1.0, 2.0 and flip horizontal. This requires 14.2GB of GPU memory.
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch val.py \
+--config configs/mscale_ocr_cityscapes_autolabel_mapillary_ms_val.yml --num_workers 3 --model_path saved_model/model.pdparams \
+--aug_eval --flip_horizontal
+```
+The reported mIoU should be 87.00. This evaluates with scales of 0.5, 1.0, 1.5, 2.0, 2.5 and flip horizontal. This requires 21.2GB of GPU memory.
+## Training
+### Download Pretrained Weights
+```shell
+mkdir -p pretrain && cd pretrain
+wget https://bj.bcebos.com/paddleseg/dygraph/cityscapes/ocrnet_hrnetw48_mapillary/pretrained.pdparams
+cd ..
+```
+Pretrained weights were obtained by pretraining on the Mapillary dataset from OCRNet (backbone is HRNet w48).
+### Training on Cityscapes
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch train.py \
+--config configs/mscale_ocr_cityscapes_autolabel_mapillary.yml --use_vdl \
+--save_dir saved_model/mscale_ocr_cityscapes_autolabel_mapillary --save_interval 2000 --num_workers 5 --do_eval
+```
+Note that this requires 32GB of GPU memory. You can remove argument `--do_eval` to turn off evaluation during training, thus it only requires 25GB of GPU memory.
+If you run out of memory, try to lower the crop size.
+## Deploy
+Run the following command to export the inference model.
+```shell
+python export.py \
+    --config configs/mscale_ocr_cityscapes_autolabel_mapillary_ms_val.yml \
+    --save_dir ./output \
+    --input_shape 1 3 2048 1024
+```
+We can use the following deployment methods to deploy the inference model.
+| Platform         | Library           | Tutorial  |
+| :----------- | :----------- | :----- |
+| Python | Paddle prediction library | [e.g.](../../docs/deployment/inference/python_inference.md) |
+| C++ | Paddle prediction library | [e.g.](../../docs/deployment/inference/cpp_inference.md) |
+| Mobile | PaddleLite   | [e.g.](../../docs/deployment/lite/lite.md) |
+| Front-end | PaddleJS     | [e.g.](../../docs/deployment/web/web.md) |
+Other deployment documents:
+* [Inference with TensorRT in C++](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/201_CityscapesSOTA/demo)
+* [Inference with ONNX Runtime in Python](https://github.com/iwatake2222/play_with_tensorrt/tree/master/pj_tensorrt_seg_paddleseg_cityscapessota)
+* Inference with TensorFlow Lite in Python https://github.com/axinc-ai/ailia-models/tree/master/image_segmentation/paddleseg
+Thanks for their contributions!
--- a/contrib/CityscapesSOTA/configs/README.md
+++ b/contrib/CityscapesSOTA/configs/README.md
+# Hierarchical multi-scale attention for semantic segmentation
+## Reference
+> Tao, Andrew, Karan Sapra, and Bryan Catanzaro. "Hierarchical multi-scale attention for semantic segmentation." arXiv preprint arXiv:2005.10821 (2020).
--- a/contrib/CityscapesSOTA/configs/mscale_ocr_cityscapes_autolabel_mapillary.yml
+++ b/contrib/CityscapesSOTA/configs/mscale_ocr_cityscapes_autolabel_mapillary.yml
+batch_size: 1
+iters: 65000
+model:
+  type: MscaleOCRNet
+  pretrained: pretrain/pretrained.pdparams
+  n_scales: [0.5,1.0,2.0]
+  backbone:
+    type: HRNet_W48_NV
+  num_classes: 19
+  backbone_indices: [0]
+train_dataset:
+  type: CityscapesAutolabeling
+  dataset_root: data/cityscapes
+  transforms:
+    - type: ResizeStepScaling
+      min_scale_factor: 0.5
+      max_scale_factor: 2.0
+      scale_step_size: 0
+    - type: RandomPaddingCrop
+      crop_size: [2048, 1024]
+    - type: RandomHorizontalFlip
+    - type: RandomDistort
+      brightness_range: 0.25
+      brightness_prob: 1
+      contrast_range: 0.25
+      contrast_prob: 1
+      saturation_range: 0.25
+      saturation_prob: 1
+      hue_range: 63
+      hue_prob: 1
+    - type: Normalize
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+  mode: train
+val_dataset:
+  type: CityscapesAutolabeling
+  dataset_root: data/cityscapes
+  transforms:
+    - type: Normalize
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+  mode: val
+optimizer:
+  type: sgd
+  momentum: 0.9
+  weight_decay: 0.0001
+lr_scheduler:
+  type: PolynomialDecay
+  learning_rate: 0.005
+  power: 2
+  end_lr: 0.0
+loss:
+  types:
+    - type: DiceLoss
+    - type: DiceLoss
+    - type: DiceLoss
+    - type: DiceLoss
+    - type: BootstrappedCrossEntropyLoss
+      min_K: 100000
+      loss_th: 0.05
+    - type: BootstrappedCrossEntropyLoss
+      min_K: 100000
+      loss_th: 0.05
+    - type: BootstrappedCrossEntropyLoss
+      min_K: 100000
+      loss_th: 0.05
+    - type: BootstrappedCrossEntropyLoss
+      min_K: 100000
+      loss_th: 0.05
+  coef: [1, 0.4, 0.05, 0.05, 1, 0.4, 0.05, 0.05]
--- a/contrib/CityscapesSOTA/configs/mscale_ocr_cityscapes_autolabel_mapillary_ms_val.yml
+++ b/contrib/CityscapesSOTA/configs/mscale_ocr_cityscapes_autolabel_mapillary_ms_val.yml
+batch_size: 1
+iters: 65000
+model:
+  type: MscaleOCRNet
+  pretrained: pretrain/pretrained.pdparams
+  n_scales: [0.5,1.0,1.5,2.0,2.5] # more scales
+  backbone:
+    type: HRNet_W48_NV
+  num_classes: 19
+  backbone_indices: [0]
+val_dataset:
+  type: CityscapesAutolabeling
+  dataset_root: data/cityscapes
+  transforms:
+    - type: Normalize
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+  mode: val
+train_dataset:
+  type: CityscapesAutolabeling
+  dataset_root: data/cityscapes/
+  transforms:
+    - type: ResizeStepScaling
+      min_scale_factor: 0.5
+      max_scale_factor: 2.0
+      scale_step_size: 0
+    - type: RandomPaddingCrop
+      crop_size: [2048, 1024]
+    - type: RandomHorizontalFlip
+    - type: RandomDistort
+      brightness_range: 0.25
+      brightness_prob: 1
+      contrast_range: 0.25
+      contrast_prob: 1
+      saturation_range: 0.25
+      saturation_prob: 1
+      hue_range: 63
+      hue_prob: 1
+    - type: Normalize
+      mean: [0.485, 0.456, 0.406]
+      std: [0.229, 0.224, 0.225]
+  mode: train
--- a/contrib/CityscapesSOTA/datasets/__init__.py
+++ b/contrib/CityscapesSOTA/datasets/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .cityscapes_autolabeling import CityscapesAutolabeling
--- a/contrib/CityscapesSOTA/datasets/cityscapes_autolabeling.py
+++ b/contrib/CityscapesSOTA/datasets/cityscapes_autolabeling.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import glob
+import random
+import paddle
+import numpy as np
+from PIL import Image
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent.
+random.seed(100)
+@manager.DATASETS.add_component
+class CityscapesAutolabeling(paddle.io.Dataset):
+    """
+    Cityscapes dataset with fine data, coarse data and autolabelled data.
+    Source: https://www.cityscapes-dataset.com/
+    Autolabelled-Data from [google drive](https://drive.google.com/file/d/1DtPo-WP-hjaOwsbj6ZxTtOo_7R_4TKRG/view?usp=sharing)
+    The folder structure is as follow:
+        cityscapes
+        |
+        |--leftImg8bit
+        |  |--train
+        |  |--val
+        |  |--test
+        |
+        |--gtFine
+        |  |--train
+        |  |--val
+        |  |--test
+        |
+        |--leftImg8bit_trainextra
+        |  |--leftImg8bit
+        |     |--train_extra
+        |        |--augsburg
+        |        |--bayreuth
+        |        |--...
+        |
+        |--convert_autolabelled
+        |  |--augsburg
+        |  |--bayreuth
+        |  |--...
+    Make sure there are **labelTrainIds.png in gtFine directory. If not, please run the conver_cityscapes.py in tools.
+    Convert autolabelled data according to PaddleSeg data format:
+        python tools/convert_cityscapes_autolabeling.py --dataset_root data/cityscapes/
+    Args:
+        transforms (list): Transforms for image.
+        dataset_root (str): Cityscapes dataset directory.
+        mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1
+        add_val (bool, optional): Whether to add val set in training. Default: False
+    """
+    def __init__(self,
+                 transforms,
+                 dataset_root,
+                 mode='train',
+                 coarse_multiple=1,
+                 add_val=False):
+        self.dataset_root = dataset_root
+        self.transforms = Compose(transforms)
+        self.file_list = list()
+        mode = mode.lower()
+        self.mode = mode
+        self.num_classes = 19
+        self.ignore_index = 255
+        self.coarse_multiple = coarse_multiple
+        if mode not in ['train', 'val', 'test']:
+            raise ValueError(
+                "mode should be 'train', 'val' or 'test', but got {}.".format(
+                    mode))
+        if self.transforms is None:
+            raise ValueError("`transforms` is necessary, but it is None.")
+        img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
+        label_dir = os.path.join(self.dataset_root, 'gtFine')
+        if self.dataset_root is None or not os.path.isdir(
+                self.dataset_root) or not os.path.isdir(
+                    img_dir) or not os.path.isdir(label_dir):
+            raise ValueError(
+                "The dataset is not Found or the folder structure is nonconfoumance."
+            )
+        label_files = sorted(
+            glob.glob(
+                os.path.join(label_dir, mode, '*',
+                             '*_gtFine_labelTrainIds.png')))
+        img_files = sorted(
+            glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.png')))
+        self.file_list = [
+            [img_path, label_path]
+            for img_path, label_path in zip(img_files, label_files)
+        ]
+        self.num_files = len(self.file_list)
+        self.total_num_files = self.num_files
+        if mode == 'train':
+            # whether to add val set in training
+            if add_val:
+                label_files = sorted(
+                    glob.glob(
+                        os.path.join(label_dir, 'val', '*',
+                                     '*_gtFine_labelTrainIds.png')))
+                img_files = sorted(
+                    glob.glob(
+                        os.path.join(img_dir, 'val', '*', '*_leftImg8bit.png')))
+                val_file_list = [
+                    [img_path, label_path]
+                    for img_path, label_path in zip(img_files, label_files)
+                ]
+                self.file_list.extend(val_file_list)
+                self.num_files = len(self.file_list)
+            # use coarse dataset only in training
+            img_dir = os.path.join(self.dataset_root, 'leftImg8bit_trainextra',
+                                   'leftImg8bit', 'train_extra')
+            label_dir = os.path.join(self.dataset_root, 'convert_autolabelled')
+            if self.dataset_root is None or not os.path.isdir(
+                    self.dataset_root) or not os.path.isdir(
+                        img_dir) or not os.path.isdir(label_dir):
+                raise ValueError(
+                    "The coarse dataset is not Found or the folder structure is nonconfoumance."
+                )
+            coarse_label_files = sorted(
+                glob.glob(os.path.join(label_dir, '*', '*_leftImg8bit.png')))
+            coarse_img_files = sorted(
+                glob.glob(os.path.join(img_dir, '*', '*_leftImg8bit.png')))
+            if len(coarse_img_files) != len(coarse_label_files):
+                raise ValueError(
+                    "The number of images = {} is not equal to the number of labels = {} in Cityscapes Autolabeling dataset."
+                    .format(len(coarse_img_files), len(coarse_label_files)))
+            self.coarse_file_list = [[img_path, label_path]
+                                     for img_path, label_path in zip(
+                                         coarse_img_files, coarse_label_files)]
+            random.shuffle(self.coarse_file_list)
+            self.total_num_files = int(self.num_files * (1 + coarse_multiple))
+    def __getitem__(self, idx):
+        if self.mode == 'test':
+            image_path, label_path = self.file_list[idx]
+            im, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            return im, image_path
+        elif self.mode == 'val':
+            image_path, label_path = self.file_list[idx]
+            im, _ = self.transforms(im=image_path)
+            label = np.asarray(Image.open(label_path))
+            label = label[np.newaxis, :, :]
+            return im, label
+        else:
+            if idx >= self.num_files:
+                image_path, label_path = self.coarse_file_list[idx -
+                                                               self.num_files]
+            else:
+                image_path, label_path = self.file_list[idx]
+            im, label = self.transforms(im=image_path, label=label_path)
+            return im, label
+    def shuffle(self):
+        random.shuffle(self.coarse_file_list)
+    def __len__(self):
+        return self.total_num_files
--- a/contrib/CityscapesSOTA/export.py
+++ b/contrib/CityscapesSOTA/export.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import paddle
+import yaml
+from paddleseg.cvlibs import Config
+from paddleseg.utils import logger
+import datasets, models
+def parse_args():
+    parser = argparse.ArgumentParser(description='Model export.')
+    parser.add_argument(
+        "--config", help="The config file.", type=str, required=True)
+    parser.add_argument(
+        '--model_path', help='The path of model for export', type=str)
+    parser.add_argument(
+        '--save_dir',
+        help='The directory for saving the exported model',
+        type=str,
+        default='./output/inference_model')
+    parser.add_argument(
+        '--output_op',
+        choices=['argmax', 'softmax', 'none'],
+        default="argmax",
+        help="Select which op to be appended to output result, default: argmax")
+    parser.add_argument(
+        '--without_argmax',
+        help='Do not add the argmax operation at the end of the network. [Deprecated]',
+        action='store_true')
+    parser.add_argument(
+        '--with_softmax',
+        help='Add the softmax operation at the end of the network. [Deprecated]',
+        action='store_true')
+    parser.add_argument(
+        "--input_shape",
+        nargs='+',
+        help="Export the model with fixed input shape, such as 1 3 1024 1024.",
+        type=int,
+        default=None)
+    return parser.parse_args()
+class SavedSegmentationNet(paddle.nn.Layer):
+    def __init__(self, net, output_op):
+        super().__init__()
+        self.net = net
+        self.output_op = output_op
+        assert output_op in ['argmax', 'softmax'], \
+            "output_op should in ['argmax', 'softmax']"
+    def forward(self, x):
+        outs = self.net(x)
+        new_outs = []
+        for out in outs:
+            if self.output_op == 'argmax':
+                out = paddle.argmax(out, axis=1, dtype='int32')
+            elif self.output_op == 'softmax':
+                out = paddle.nn.functional.softmax(out, axis=1)
+            new_outs.append(out)
+        return new_outs
+def main(args):
+    os.environ['PADDLESEG_EXPORT_STAGE'] = 'True'
+    cfg = Config(args.config)
+    cfg.check_sync_info()
+    net = cfg.model
+    if args.model_path is not None:
+        para_state_dict = paddle.load(args.model_path)
+        net.set_dict(para_state_dict)
+        logger.info('Loaded trained params of model successfully.')
+    if args.input_shape is None:
+        shape = [None, 3, None, None]
+    else:
+        shape = args.input_shape
+    output_op = args.output_op
+    if args.without_argmax:
+        logger.warning(
+            '--without_argmax will be deprecated, please use --output_op')
+        output_op = 'none'
+    if args.with_softmax:
+        logger.warning(
+            '--with_softmax will be deprecated, please use --output_op')
+        output_op = 'softmax'
+    new_net = net if output_op == 'none' else SavedSegmentationNet(net,
+                                                                   output_op)
+    new_net.eval()
+    new_net = paddle.jit.to_static(
+        new_net,
+        input_spec=[paddle.static.InputSpec(
+            shape=shape, dtype='float32')])
+    save_path = os.path.join(args.save_dir, 'model')
+    paddle.jit.save(new_net, save_path)
+    yml_file = os.path.join(args.save_dir, 'deploy.yaml')
+    with open(yml_file, 'w') as file:
+        transforms = cfg.export_config.get('transforms', [{
+            'type': 'Normalize'
+        }])
+        output_dtype = 'int32' if output_op == 'argmax' else 'float32'
+        data = {
+            'Deploy': {
+                'model': 'model.pdmodel',
+                'params': 'model.pdiparams',
+                'transforms': transforms,
+                'input_shape': shape,
+                'output_op': output_op,
+                'output_dtype': output_dtype
+            }
+        }
+        yaml.dump(data, file)
+    logger.info(f'The inference model is saved in {args.save_dir}')
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
--- a/contrib/CityscapesSOTA/models/__init__.py
+++ b/contrib/CityscapesSOTA/models/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .backbones.hrnet_nv import *
+from .mscale_ocrnet import MscaleOCRNet