add new model

0d97cc8c · Sugon_ldc · 0d97cc8c · 0d97cc8c · 0d97cc8c · 0d97cc8c
Commit 0d97cc8c authored Jun 07, 2023 by Sugon_ldc
20 changed files
--- a/contrib/AutoNUE/core/infer.py
+++ b/contrib/AutoNUE/core/infer.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections.abc
+from itertools import combinations
+
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+
+
+def get_reverse_list(ori_shape, transforms):
+    """
+    get reverse list of transform.
+
+    Args:
+        ori_shape (list): Origin shape of image.
+        transforms (list): List of transform.
+
+    Returns:
+        list: List of tuple, there are two format:
+            ('resize', (h, w)) The image shape before resize,
+            ('padding', (h, w)) The image shape before padding.
+    """
+    reverse_list = []
+    h, w = ori_shape[0], ori_shape[1]
+    for op in transforms:
+        if op.__class__.__name__ in ['Resize', 'ResizeByLong']:
+            reverse_list.append(('resize', (h, w)))
+            h, w = op.target_size[0], op.target_size[1]
+        if op.__class__.__name__ in ['Padding']:
+            reverse_list.append(('padding', (h, w)))
+            w, h = op.target_size[0], op.target_size[1]
+        if op.__class__.__name__ in ['LimitLong']:
+            long_edge = max(h, w)
+            short_edge = min(h, w)
+            if ((op.max_long is not None) and (long_edge > op.max_long)):
+                reverse_list.append(('resize', (h, w)))
+                long_edge = op.max_long
+                short_edge = int(round(short_edge * op.max_long / long_edge))
+            elif ((op.min_long is not None) and (long_edge < op.min_long)):
+                reverse_list.append(('resize', (h, w)))
+                long_edge = op.min_long
+                short_edge = int(round(short_edge * op.min_long / long_edge))
+            if h > w:
+                h = long_edge
+                w = short_edge
+            else:
+                w = long_edge
+                h = short_edge
+    return reverse_list
+
+
+def reverse_transform(pred, ori_shape, transforms):
+    """recover pred to origin shape"""
+    reverse_list = get_reverse_list(ori_shape, transforms)
+    for item in reverse_list[::-1]:
+        if item[0] == 'resize':
+            h, w = item[1][0], item[1][1]
+            pred = F.interpolate(pred, (h, w), mode='nearest')
+        elif item[0] == 'padding':
+            h, w = item[1][0], item[1][1]
+            pred = pred[:, :, 0:h, 0:w]
+        else:
+            raise Exception("Unexpected info '{}' in im_info".format(item[0]))
+    return pred
+
+
+def flip_combination(flip_horizontal=False, flip_vertical=False):
+    """
+    Get flip combination.
+
+    Args:
+        flip_horizontal (bool): Whether to flip horizontally. Default: False.
+        flip_vertical (bool): Whether to flip vertically. Default: False.
+
+    Returns:
+        list: List of tuple. The first element of tuple is whether to flip horizontally,
+            and the second is whether to flip vertically.
+    """
+
+    flip_comb = [(False, False)]
+    if flip_horizontal:
+        flip_comb.append((True, False))
+    if flip_vertical:
+        flip_comb.append((False, True))
+        if flip_horizontal:
+            flip_comb.append((True, True))
+    return flip_comb
+
+
+def tensor_flip(x, flip):
+    """Flip tensor according directions"""
+    if flip[0]:
+        x = x[:, :, :, ::-1]
+    if flip[1]:
+        x = x[:, :, ::-1, :]
+    return x
+
+
+def slide_inference(model, im, crop_size, stride):
+    """
+    Infer by sliding window.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        crop_size (tuple|list). The size of sliding window, (w, h).
+        stride (tuple|list). The size of stride, (w, h).
+
+    Return:
+        Tensor: The logit of input image.
+    """
+    h_im, w_im = im.shape[-2:]
+    w_crop, h_crop = crop_size
+    w_stride, h_stride = stride
+    # calculate the crop nums
+    rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1
+    cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1
+    # prevent negative sliding rounds when imgs after scaling << crop_size
+    rows = 1 if h_im <= h_crop else rows
+    cols = 1 if w_im <= w_crop else cols
+    # TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation.
+    final_logit = None
+    count = np.zeros([1, 1, h_im, w_im])
+    for r in range(rows):
+        for c in range(cols):
+            h1 = r * h_stride
+            w1 = c * w_stride
+            h2 = min(h1 + h_crop, h_im)
+            w2 = min(w1 + w_crop, w_im)
+            h1 = max(h2 - h_crop, 0)
+            w1 = max(w2 - w_crop, 0)
+            im_crop = im[:, :, h1:h2, w1:w2]
+            logits = model(im_crop)
+            if not isinstance(logits, collections.abc.Sequence):
+                raise TypeError(
+                    "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
+                    .format(type(logits)))
+            logit = logits[0].numpy()
+            if final_logit is None:
+                final_logit = np.zeros([1, logit.shape[1], h_im, w_im])
+            final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1]
+            count[:, :, h1:h2, w1:w2] += 1
+    if np.sum(count == 0) != 0:
+        raise RuntimeError(
+            'There are pixel not predicted. It is possible that stride is greater than crop_size'
+        )
+    final_logit = final_logit / count
+    final_logit = paddle.to_tensor(final_logit)
+    return final_logit
+
+
+def inference(model,
+              im,
+              ori_shape=None,
+              transforms=None,
+              is_slide=False,
+              stride=None,
+              crop_size=None):
+    """
+    Inference for image.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        ori_shape (list): Origin shape of image.
+        transforms (list): Transforms for image.
+        is_slide (bool): Whether to infer by sliding window. Default: False.
+        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
+        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
+
+    Returns:
+        Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned.
+            If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned.
+    """
+    if not is_slide:
+        logits = model(im)
+        if not isinstance(logits, collections.abc.Sequence):
+            raise TypeError(
+                "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
+                .format(type(logits)))
+        logit = logits[0]
+    else:
+        logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
+    if ori_shape is not None:
+        pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
+        pred = reverse_transform(pred, ori_shape, transforms)
+        return pred
+    else:
+        return logit
+
+
+def aug_inference(model,
+                  im,
+                  ori_shape,
+                  transforms,
+                  scales=1.0,
+                  flip_horizontal=False,
+                  flip_vertical=False,
+                  is_slide=False,
+                  stride=None,
+                  crop_size=None):
+    """
+    Infer with augmentation.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        ori_shape (list): Origin shape of image.
+        transforms (list): Transforms for image.
+        scales (float|tuple|list):  Scales for resize. Default: 1.
+        flip_horizontal (bool): Whether to flip horizontally. Default: False.
+        flip_vertical (bool): Whether to flip vertically. Default: False.
+        is_slide (bool): Whether to infer by sliding wimdow. Default: False.
+        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
+        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
+
+    Returns:
+        Tensor: Prediction of image with shape (1, 1, h, w) is returned.
+    """
+    if isinstance(scales, float):
+        scales = [scales]
+    elif not isinstance(scales, (tuple, list)):
+        raise TypeError(
+            '`scales` expects float/tuple/list type, but received {}'.format(
+                type(scales)))
+    final_logit = 0
+    h_input, w_input = im.shape[-2], im.shape[-1]
+    flip_comb = flip_combination(flip_horizontal, flip_vertical)
+    for scale in scales:
+        h = int(h_input * scale + 0.5)
+        w = int(w_input * scale + 0.5)
+        im = F.interpolate(im, (h, w), mode='bilinear')
+        for flip in flip_comb:
+            im_flip = tensor_flip(im, flip)
+            logit = inference(
+                model,
+                im_flip,
+                is_slide=is_slide,
+                crop_size=crop_size,
+                stride=stride)
+            logit = tensor_flip(logit, flip)
+            logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
+
+            logit = F.softmax(logit, axis=1)
+            final_logit = final_logit + logit
+
+    pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
+    pred = F.interpolate(pred, (ori_shape[0], ori_shape[1]), mode='nearest')
+    return pred
--- a/contrib/AutoNUE/core/infer_crop.py
+++ b/contrib/AutoNUE/core/infer_crop.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections.abc
+from itertools import combinations
+
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+
+
+def get_reverse_list(ori_shape, transforms):
+    """
+    get reverse list of transform.
+
+    Args:
+        ori_shape (list): Origin shape of image.
+        transforms (list): List of transform.
+
+    Returns:
+        list: List of tuple, there are two format:
+            ('resize', (h, w)) The image shape before resize,
+            ('padding', (h, w)) The image shape before padding.
+    """
+    reverse_list = []
+    h, w = ori_shape[0], ori_shape[1]
+    for op in transforms:
+        if op.__class__.__name__ in ['Resize', 'ResizeByLong']:
+            reverse_list.append(('resize', (h, w)))
+            h, w = op.target_size[0], op.target_size[1]
+        if op.__class__.__name__ in ['Padding']:
+            reverse_list.append(('padding', (h, w)))
+            w, h = op.target_size[0], op.target_size[1]
+        if op.__class__.__name__ in ['LimitLong']:
+            long_edge = max(h, w)
+            short_edge = min(h, w)
+            if ((op.max_long is not None) and (long_edge > op.max_long)):
+                reverse_list.append(('resize', (h, w)))
+                long_edge = op.max_long
+                short_edge = int(round(short_edge * op.max_long / long_edge))
+            elif ((op.min_long is not None) and (long_edge < op.min_long)):
+                reverse_list.append(('resize', (h, w)))
+                long_edge = op.min_long
+                short_edge = int(round(short_edge * op.min_long / long_edge))
+            if h > w:
+                h = long_edge
+                w = short_edge
+            else:
+                w = long_edge
+                h = short_edge
+    return reverse_list
+
+
+def reverse_transform(pred, ori_shape, transforms):
+    """recover pred to origin shape"""
+    reverse_list = get_reverse_list(ori_shape, transforms)
+    for item in reverse_list[::-1]:
+        if item[0] == 'resize':
+            h, w = item[1][0], item[1][1]
+            pred = F.interpolate(pred, (h, w), mode='nearest')
+        elif item[0] == 'padding':
+            h, w = item[1][0], item[1][1]
+            pred = pred[:, :, 0:h, 0:w]
+        else:
+            raise Exception("Unexpected info '{}' in im_info".format(item[0]))
+    return pred
+
+
+def flip_combination(flip_horizontal=False, flip_vertical=False):
+    """
+    Get flip combination.
+
+    Args:
+        flip_horizontal (bool): Whether to flip horizontally. Default: False.
+        flip_vertical (bool): Whether to flip vertically. Default: False.
+
+    Returns:
+        list: List of tuple. The first element of tuple is whether to flip horizontally,
+            and the second is whether to flip vertically.
+    """
+
+    flip_comb = [(False, False)]
+    if flip_horizontal:
+        flip_comb.append((True, False))
+    if flip_vertical:
+        flip_comb.append((False, True))
+        if flip_horizontal:
+            flip_comb.append((True, True))
+    return flip_comb
+
+
+def tensor_flip(x, flip):
+    """Flip tensor according directions"""
+    if flip[0]:
+        x = x[:, :, :, ::-1]
+    if flip[1]:
+        x = x[:, :, ::-1, :]
+    return x
+
+
+def slide_inference(model, im, crop_size, stride):
+    """
+    Infer by sliding window.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        crop_size (tuple|list). The size of sliding window, (w, h).
+        stride (tuple|list). The size of stride, (w, h).
+
+    Return:
+        Tensor: The logit of input image.
+    """
+    h_im, w_im = im.shape[-2:]
+    w_crop, h_crop = crop_size
+    w_stride, h_stride = stride
+    # calculate the crop nums
+    rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1
+    cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1
+    # prevent negative sliding rounds when imgs after scaling << crop_size
+    rows = 1 if h_im <= h_crop else rows
+    cols = 1 if w_im <= w_crop else cols
+    # TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation.
+    final_logit = None
+    count = np.zeros([1, 1, h_im, w_im])
+    for r in range(rows):
+        for c in range(cols):
+            h1 = r * h_stride
+            w1 = c * w_stride
+            h2 = min(h1 + h_crop, h_im)
+            w2 = min(w1 + w_crop, w_im)
+            h1 = max(h2 - h_crop, 0)
+            w1 = max(w2 - w_crop, 0)
+            im_crop = im[:, :, h1:h2, w1:w2]
+            logits = model(im_crop)
+            if not isinstance(logits, collections.abc.Sequence):
+                raise TypeError(
+                    "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
+                    .format(type(logits)))
+            logit = logits[0].numpy()
+            if final_logit is None:
+                final_logit = np.zeros([1, logit.shape[1], h_im, w_im])
+            final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1]
+            count[:, :, h1:h2, w1:w2] += 1
+    if np.sum(count == 0) != 0:
+        raise RuntimeError(
+            'There are pixel not predicted. It is possible that stride is greater than crop_size'
+        )
+    final_logit = final_logit / count
+    final_logit = paddle.to_tensor(final_logit)
+    return final_logit
+
+
+def inference(model,
+              im,
+              ori_shape=None,
+              transforms=None,
+              is_slide=False,
+              stride=None,
+              crop_size=None):
+    """
+    Inference for image.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        ori_shape (list): Origin shape of image.
+        transforms (list): Transforms for image.
+        is_slide (bool): Whether to infer by sliding window. Default: False.
+        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
+        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
+
+    Returns:
+        Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned.
+            If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned.
+    """
+    if not is_slide:
+        logits = model(im)
+        if not isinstance(logits, collections.abc.Sequence):
+            raise TypeError(
+                "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
+                .format(type(logits)))
+        logit = logits[0]
+    else:
+        logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
+    if ori_shape is not None:
+        pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
+        pred = reverse_transform(pred, ori_shape, transforms)
+        return pred
+    else:
+        return logit
+
+
+def aug_inference(model,
+                  im,
+                  ori_shape,
+                  transforms,
+                  scales=1.0,
+                  flip_horizontal=False,
+                  flip_vertical=False,
+                  is_slide=False,
+                  stride=None,
+                  crop_size=None):
+    """
+    Infer with augmentation.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        ori_shape (list): Origin shape of image.
+        transforms (list): Transforms for image.
+        scales (float|tuple|list):  Scales for resize. Default: 1.
+        flip_horizontal (bool): Whether to flip horizontally. Default: False.
+        flip_vertical (bool): Whether to flip vertically. Default: False.
+        is_slide (bool): Whether to infer by sliding wimdow. Default: False.
+        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
+        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
+
+    Returns:
+        Tensor: Prediction of image with shape (1, 1, h, w) is returned.
+    """
+    if isinstance(scales, float):
+        scales = [scales]
+    elif not isinstance(scales, (tuple, list)):
+        raise TypeError(
+            '`scales` expects float/tuple/list type, but received {}'.format(
+                type(scales)))
+    final_logit = 0
+    h_input, w_input = im.shape[-2], im.shape[-1]
+    flip_comb = flip_combination(flip_horizontal, flip_vertical)
+    for scale in scales:
+        h = int(h_input * scale + 0.5)
+        w = int(w_input * scale + 0.5)
+        im = F.interpolate(im, (h, w), mode='bilinear')
+        for flip in flip_comb:
+            im_flip = tensor_flip(im, flip)
+            logit = inference(
+                model,
+                im_flip,
+                is_slide=is_slide,
+                crop_size=crop_size,
+                stride=stride)
+            logit = tensor_flip(logit, flip)
+            logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
+
+            logit = F.softmax(logit, axis=1)
+            final_logit = final_logit + logit
+
+    return final_logit
--- a/contrib/AutoNUE/core/infer_ensemble.py
+++ b/contrib/AutoNUE/core/infer_ensemble.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections.abc
+from itertools import combinations
+
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+
+
+def get_reverse_list(ori_shape, transforms):
+    """
+    get reverse list of transform.
+
+    Args:
+        ori_shape (list): Origin shape of image.
+        transforms (list): List of transform.
+
+    Returns:
+        list: List of tuple, there are two format:
+            ('resize', (h, w)) The image shape before resize,
+            ('padding', (h, w)) The image shape before padding.
+    """
+    reverse_list = []
+    h, w = ori_shape[0], ori_shape[1]
+    for op in transforms:
+        if op.__class__.__name__ in ['Resize', 'ResizeByLong']:
+            reverse_list.append(('resize', (h, w)))
+            h, w = op.target_size[0], op.target_size[1]
+        if op.__class__.__name__ in ['Padding']:
+            reverse_list.append(('padding', (h, w)))
+            w, h = op.target_size[0], op.target_size[1]
+        if op.__class__.__name__ in ['LimitLong']:
+            long_edge = max(h, w)
+            short_edge = min(h, w)
+            if ((op.max_long is not None) and (long_edge > op.max_long)):
+                reverse_list.append(('resize', (h, w)))
+                long_edge = op.max_long
+                short_edge = int(round(short_edge * op.max_long / long_edge))
+            elif ((op.min_long is not None) and (long_edge < op.min_long)):
+                reverse_list.append(('resize', (h, w)))
+                long_edge = op.min_long
+                short_edge = int(round(short_edge * op.min_long / long_edge))
+            if h > w:
+                h = long_edge
+                w = short_edge
+            else:
+                w = long_edge
+                h = short_edge
+    return reverse_list
+
+
+def reverse_transform(pred, ori_shape, transforms):
+    """recover pred to origin shape"""
+    reverse_list = get_reverse_list(ori_shape, transforms)
+    for item in reverse_list[::-1]:
+        if item[0] == 'resize':
+            h, w = item[1][0], item[1][1]
+            pred = F.interpolate(pred, (h, w), mode='nearest')
+        elif item[0] == 'padding':
+            h, w = item[1][0], item[1][1]
+            pred = pred[:, :, 0:h, 0:w]
+        else:
+            raise Exception("Unexpected info '{}' in im_info".format(item[0]))
+    return pred
+
+
+def flip_combination(flip_horizontal=False, flip_vertical=False):
+    """
+    Get flip combination.
+
+    Args:
+        flip_horizontal (bool): Whether to flip horizontally. Default: False.
+        flip_vertical (bool): Whether to flip vertically. Default: False.
+
+    Returns:
+        list: List of tuple. The first element of tuple is whether to flip horizontally,
+            and the second is whether to flip vertically.
+    """
+
+    flip_comb = [(False, False)]
+    if flip_horizontal:
+        flip_comb.append((True, False))
+    if flip_vertical:
+        flip_comb.append((False, True))
+        if flip_horizontal:
+            flip_comb.append((True, True))
+    return flip_comb
+
+
+def tensor_flip(x, flip):
+    """Flip tensor according directions"""
+    if flip[0]:
+        x = x[:, :, :, ::-1]
+    if flip[1]:
+        x = x[:, :, ::-1, :]
+    return x
+
+
+def inference(model,
+              model_hard,
+              im,
+              ori_shape=None,
+              transforms=None,
+              is_slide=False,
+              stride=None,
+              crop_size=None):
+    """
+    Inference for image.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        ori_shape (list): Origin shape of image.
+        transforms (list): Transforms for image.
+        is_slide (bool): Whether to infer by sliding window. Default: False.
+        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
+        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
+
+    Returns:
+        Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned.
+            If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned.
+    """
+    if not is_slide:
+        # logits = model(im)[0]
+        # logits_hard = model_hard(im)[0]
+
+        logits = F.softmax(model(im)[0], axis=1)
+        logits_hard = F.softmax(model_hard(im)[0], axis=1)
+
+        #         logit_hard = logits.clone()
+        #         for ii in range(logits.shape[0]):
+        #             logit_hard[ii] = paddle.scatter(logit_hard[ii], paddle.to_tensor([3, 7, 12, 14, 15, 16, 18, 19, 20, 21]), logits_hard[ii][1:], overwrite=True, name=None)
+        logit = (logits + logits_hard) / 2
+        # logit = logit_hard
+
+    if ori_shape is not None:
+        pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
+        pred = reverse_transform(pred, ori_shape, transforms)
+        return pred
+    else:
+        return logit
+
+
+def aug_inference(model,
+                  model_hard,
+                  im,
+                  ori_shape,
+                  transforms,
+                  scales=1.0,
+                  flip_horizontal=False,
+                  flip_vertical=False,
+                  is_slide=False,
+                  stride=None,
+                  crop_size=None):
+    """
+    Infer with augmentation.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        ori_shape (list): Origin shape of image.
+        transforms (list): Transforms for image.
+        scales (float|tuple|list):  Scales for resize. Default: 1.
+        flip_horizontal (bool): Whether to flip horizontally. Default: False.
+        flip_vertical (bool): Whether to flip vertically. Default: False.
+        is_slide (bool): Whether to infer by sliding wimdow. Default: False.
+        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
+        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
+
+    Returns:
+        Tensor: Prediction of image with shape (1, 1, h, w) is returned.
+    """
+    if isinstance(scales, float):
+        scales = [scales]
+    elif not isinstance(scales, (tuple, list)):
+        raise TypeError(
+            '`scales` expects float/tuple/list type, but received {}'.format(
+                type(scales)))
+    final_logit = 0
+    h_input, w_input = im.shape[-2], im.shape[-1]
+    flip_comb = flip_combination(flip_horizontal, flip_vertical)
+    for scale in scales:
+        h = int(h_input * scale + 0.5)
+        w = int(w_input * scale + 0.5)
+        im = F.interpolate(im, (h, w), mode='bilinear')
+        for flip in flip_comb:
+            im_flip = tensor_flip(im, flip)
+            logit = inference(
+                model,
+                model_hard,
+                im_flip,
+                is_slide=is_slide,
+                crop_size=crop_size,
+                stride=stride)
+            logit = tensor_flip(logit, flip)
+            logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
+
+            #             logit = F.softmax(logit, axis=1)
+            final_logit = final_logit + logit
+
+    pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
+    pred = F.interpolate(pred, (ori_shape[0], ori_shape[1]), mode='nearest')
+    return pred
--- a/contrib/AutoNUE/core/infer_ensemble_three.py
+++ b/contrib/AutoNUE/core/infer_ensemble_three.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections.abc
+from itertools import combinations
+
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+
+
+def get_reverse_list(ori_shape, transforms):
+    """
+    get reverse list of transform.
+
+    Args:
+        ori_shape (list): Origin shape of image.
+        transforms (list): List of transform.
+
+    Returns:
+        list: List of tuple, there are two format:
+            ('resize', (h, w)) The image shape before resize,
+            ('padding', (h, w)) The image shape before padding.
+    """
+    reverse_list = []
+    h, w = ori_shape[0], ori_shape[1]
+    for op in transforms:
+        if op.__class__.__name__ in ['Resize', 'ResizeByLong']:
+            reverse_list.append(('resize', (h, w)))
+            h, w = op.target_size[0], op.target_size[1]
+        if op.__class__.__name__ in ['Padding']:
+            reverse_list.append(('padding', (h, w)))
+            w, h = op.target_size[0], op.target_size[1]
+        if op.__class__.__name__ in ['LimitLong']:
+            long_edge = max(h, w)
+            short_edge = min(h, w)
+            if ((op.max_long is not None) and (long_edge > op.max_long)):
+                reverse_list.append(('resize', (h, w)))
+                long_edge = op.max_long
+                short_edge = int(round(short_edge * op.max_long / long_edge))
+            elif ((op.min_long is not None) and (long_edge < op.min_long)):
+                reverse_list.append(('resize', (h, w)))
+                long_edge = op.min_long
+                short_edge = int(round(short_edge * op.min_long / long_edge))
+            if h > w:
+                h = long_edge
+                w = short_edge
+            else:
+                w = long_edge
+                h = short_edge
+    return reverse_list
+
+
+def reverse_transform(pred, ori_shape, transforms):
+    """recover pred to origin shape"""
+    reverse_list = get_reverse_list(ori_shape, transforms)
+    for item in reverse_list[::-1]:
+        if item[0] == 'resize':
+            h, w = item[1][0], item[1][1]
+            pred = F.interpolate(pred, (h, w), mode='nearest')
+        elif item[0] == 'padding':
+            h, w = item[1][0], item[1][1]
+            pred = pred[:, :, 0:h, 0:w]
+        else:
+            raise Exception("Unexpected info '{}' in im_info".format(item[0]))
+    return pred
+
+
+def flip_combination(flip_horizontal=False, flip_vertical=False):
+    """
+    Get flip combination.
+
+    Args:
+        flip_horizontal (bool): Whether to flip horizontally. Default: False.
+        flip_vertical (bool): Whether to flip vertically. Default: False.
+
+    Returns:
+        list: List of tuple. The first element of tuple is whether to flip horizontally,
+            and the second is whether to flip vertically.
+    """
+
+    flip_comb = [(False, False)]
+    if flip_horizontal:
+        flip_comb.append((True, False))
+    if flip_vertical:
+        flip_comb.append((False, True))
+        if flip_horizontal:
+            flip_comb.append((True, True))
+    return flip_comb
+
+
+def tensor_flip(x, flip):
+    """Flip tensor according directions"""
+    if flip[0]:
+        x = x[:, :, :, ::-1]
+    if flip[1]:
+        x = x[:, :, ::-1, :]
+    return x
+
+
+def inference(model,
+              model_hard,
+              im,
+              ori_shape=None,
+              transforms=None,
+              is_slide=False,
+              stride=None,
+              crop_size=None):
+    """
+    Inference for image.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        ori_shape (list): Origin shape of image.
+        transforms (list): Transforms for image.
+        is_slide (bool): Whether to infer by sliding window. Default: False.
+        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
+        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
+
+    Returns:
+        Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned.
+            If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned.
+    """
+    if not is_slide:
+        # logits = model(im)[0]
+        # logits_hard = model_hard(im)[0]
+
+        logits = F.softmax(model(im)[0], axis=1)
+        logits_hard = F.softmax(model_hard(im)[0], axis=1)
+
+        #         logit_hard = logits.clone()
+        #         for ii in range(logits.shape[0]):
+        #             logit_hard[ii] = paddle.scatter(logit_hard[ii], paddle.to_tensor([3, 7, 12, 14, 15, 16, 18, 19, 20, 21]), logits_hard[ii][1:], overwrite=True, name=None)
+        logit = logits + logits_hard
+        # logit = logit_hard
+
+    if ori_shape is not None:
+        pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
+        pred = reverse_transform(pred, ori_shape, transforms)
+        return pred
+    else:
+        return logit
+
+
+def aug_inference(model,
+                  model_hard,
+                  im,
+                  ori_shape,
+                  transforms,
+                  scales=1.0,
+                  flip_horizontal=False,
+                  flip_vertical=False,
+                  is_slide=False,
+                  stride=None,
+                  crop_size=None):
+    """
+    Infer with augmentation.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        ori_shape (list): Origin shape of image.
+        transforms (list): Transforms for image.
+        scales (float|tuple|list):  Scales for resize. Default: 1.
+        flip_horizontal (bool): Whether to flip horizontally. Default: False.
+        flip_vertical (bool): Whether to flip vertically. Default: False.
+        is_slide (bool): Whether to infer by sliding wimdow. Default: False.
+        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
+        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
+
+    Returns:
+        Tensor: Prediction of image with shape (1, 1, h, w) is returned.
+    """
+    if isinstance(scales, float):
+        scales = [scales]
+    elif not isinstance(scales, (tuple, list)):
+        raise TypeError(
+            '`scales` expects float/tuple/list type, but received {}'.format(
+                type(scales)))
+    final_logit = 0
+    h_input, w_input = im.shape[-2], im.shape[-1]
+    flip_comb = flip_combination(flip_horizontal, flip_vertical)
+    for scale in scales:
+        h = int(h_input * scale + 0.5)
+        w = int(w_input * scale + 0.5)
+        im = F.interpolate(im, (h, w), mode='bilinear')
+        for flip in flip_comb:
+            im_flip = tensor_flip(im, flip)
+            logit = inference(
+                model,
+                model_hard,
+                im_flip,
+                is_slide=is_slide,
+                crop_size=crop_size,
+                stride=stride)
+            logit = tensor_flip(logit, flip)
+            logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
+
+            #             logit = F.softmax(logit, axis=1)
+            final_logit = final_logit + logit
+
+    return final_logit
--- a/contrib/AutoNUE/core/infer_generate_autolabel.py
+++ b/contrib/AutoNUE/core/infer_generate_autolabel.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import collections.abc
+from itertools import combinations
+
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+
+
+def get_reverse_list(ori_shape, transforms):
+    """
+    get reverse list of transform.
+
+    Args:
+        ori_shape (list): Origin shape of image.
+        transforms (list): List of transform.
+
+    Returns:
+        list: List of tuple, there are two format:
+            ('resize', (h, w)) The image shape before resize,
+            ('padding', (h, w)) The image shape before padding.
+    """
+    reverse_list = []
+    h, w = ori_shape[0], ori_shape[1]
+    for op in transforms:
+        if op.__class__.__name__ in ['Resize', 'ResizeByLong']:
+            reverse_list.append(('resize', (h, w)))
+            h, w = op.target_size[0], op.target_size[1]
+        if op.__class__.__name__ in ['Padding']:
+            reverse_list.append(('padding', (h, w)))
+            w, h = op.target_size[0], op.target_size[1]
+        if op.__class__.__name__ in ['LimitLong']:
+            long_edge = max(h, w)
+            short_edge = min(h, w)
+            if ((op.max_long is not None) and (long_edge > op.max_long)):
+                reverse_list.append(('resize', (h, w)))
+                long_edge = op.max_long
+                short_edge = int(round(short_edge * op.max_long / long_edge))
+            elif ((op.min_long is not None) and (long_edge < op.min_long)):
+                reverse_list.append(('resize', (h, w)))
+                long_edge = op.min_long
+                short_edge = int(round(short_edge * op.min_long / long_edge))
+            if h > w:
+                h = long_edge
+                w = short_edge
+            else:
+                w = long_edge
+                h = short_edge
+    return reverse_list
+
+
+def reverse_transform(pred, ori_shape, transforms):
+    """recover pred to origin shape"""
+    reverse_list = get_reverse_list(ori_shape, transforms)
+    for item in reverse_list[::-1]:
+        if item[0] == 'resize':
+            h, w = item[1][0], item[1][1]
+            pred = F.interpolate(pred, (h, w), mode='nearest')
+        elif item[0] == 'padding':
+            h, w = item[1][0], item[1][1]
+            pred = pred[:, :, 0:h, 0:w]
+        else:
+            raise Exception("Unexpected info '{}' in im_info".format(item[0]))
+    return pred
+
+
+def flip_combination(flip_horizontal=False, flip_vertical=False):
+    """
+    Get flip combination.
+
+    Args:
+        flip_horizontal (bool): Whether to flip horizontally. Default: False.
+        flip_vertical (bool): Whether to flip vertically. Default: False.
+
+    Returns:
+        list: List of tuple. The first element of tuple is whether to flip horizontally,
+            and the second is whether to flip vertically.
+    """
+
+    flip_comb = [(False, False)]
+    if flip_horizontal:
+        flip_comb.append((True, False))
+    if flip_vertical:
+        flip_comb.append((False, True))
+        if flip_horizontal:
+            flip_comb.append((True, True))
+    return flip_comb
+
+
+def tensor_flip(x, flip):
+    """Flip tensor according directions"""
+    if flip[0]:
+        x = x[:, :, :, ::-1]
+    if flip[1]:
+        x = x[:, :, ::-1, :]
+    return x
+
+
+def slide_inference(model, im, crop_size, stride):
+    """
+    Infer by sliding window.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        crop_size (tuple|list). The size of sliding window, (w, h).
+        stride (tuple|list). The size of stride, (w, h).
+
+    Return:
+        Tensor: The logit of input image.
+    """
+    h_im, w_im = im.shape[-2:]
+    w_crop, h_crop = crop_size
+    w_stride, h_stride = stride
+    # calculate the crop nums
+    rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1
+    cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1
+    # prevent negative sliding rounds when imgs after scaling << crop_size
+    rows = 1 if h_im <= h_crop else rows
+    cols = 1 if w_im <= w_crop else cols
+    # TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation.
+    final_logit = None
+    count = np.zeros([1, 1, h_im, w_im])
+    for r in range(rows):
+        for c in range(cols):
+            h1 = r * h_stride
+            w1 = c * w_stride
+            h2 = min(h1 + h_crop, h_im)
+            w2 = min(w1 + w_crop, w_im)
+            h1 = max(h2 - h_crop, 0)
+            w1 = max(w2 - w_crop, 0)
+            im_crop = im[:, :, h1:h2, w1:w2]
+            logits = model(im_crop)
+            if not isinstance(logits, collections.abc.Sequence):
+                raise TypeError(
+                    "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
+                    .format(type(logits)))
+            logit = logits[0].numpy()
+            if final_logit is None:
+                final_logit = np.zeros([1, logit.shape[1], h_im, w_im])
+            final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1]
+            count[:, :, h1:h2, w1:w2] += 1
+    if np.sum(count == 0) != 0:
+        raise RuntimeError(
+            'There are pixel not predicted. It is possible that stride is greater than crop_size'
+        )
+    final_logit = final_logit / count
+    final_logit = paddle.to_tensor(final_logit)
+    return final_logit
+
+
+def inference(model,
+              im,
+              ori_shape=None,
+              transforms=None,
+              is_slide=False,
+              stride=None,
+              crop_size=None):
+    """
+    Inference for image.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        ori_shape (list): Origin shape of image.
+        transforms (list): Transforms for image.
+        is_slide (bool): Whether to infer by sliding window. Default: False.
+        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
+        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
+
+    Returns:
+        Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned.
+            If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned.
+    """
+    if not is_slide:
+        logits = model(im)
+        if not isinstance(logits, collections.abc.Sequence):
+            raise TypeError(
+                "The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
+                .format(type(logits)))
+        logit = logits[0]
+    else:
+        logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
+    if ori_shape is not None:
+        pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
+        pred = reverse_transform(pred, ori_shape, transforms)
+        return pred
+    else:
+        return logit
+
+
+def aug_inference(model,
+                  im,
+                  ori_shape,
+                  transforms,
+                  scales=1.0,
+                  flip_horizontal=False,
+                  flip_vertical=False,
+                  is_slide=False,
+                  stride=None,
+                  crop_size=None):
+    """
+    Infer with augmentation.
+
+    Args:
+        model (paddle.nn.Layer): model to get logits of image.
+        im (Tensor): the input image.
+        ori_shape (list): Origin shape of image.
+        transforms (list): Transforms for image.
+        scales (float|tuple|list):  Scales for resize. Default: 1.
+        flip_horizontal (bool): Whether to flip horizontally. Default: False.
+        flip_vertical (bool): Whether to flip vertically. Default: False.
+        is_slide (bool): Whether to infer by sliding wimdow. Default: False.
+        crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
+        stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
+
+    Returns:
+        Tensor: Prediction of image with shape (1, 1, h, w) is returned.
+    """
+    if isinstance(scales, float):
+        scales = [scales]
+    elif not isinstance(scales, (tuple, list)):
+        raise TypeError(
+            '`scales` expects float/tuple/list type, but received {}'.format(
+                type(scales)))
+    final_logit = 0
+    h_input, w_input = im.shape[-2], im.shape[-1]
+    flip_comb = flip_combination(flip_horizontal, flip_vertical)
+    for scale in scales:
+        h = int(h_input * scale + 0.5)
+        w = int(w_input * scale + 0.5)
+        im = F.interpolate(im, (h, w), mode='bilinear')
+        for flip in flip_comb:
+            im_flip = tensor_flip(im, flip)
+            logit = inference(
+                model,
+                im_flip,
+                is_slide=is_slide,
+                crop_size=crop_size,
+                stride=stride)
+            logit = tensor_flip(logit, flip)
+            logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
+
+            logit = F.softmax(logit, axis=1)
+            final_logit = final_logit + logit
+
+    final_logit = F.softmax(final_logit, axis=1)
+    filte = paddle.max(final_logit, axis=1, keepdim=True).numpy()
+    pred = paddle.argmax(
+        final_logit, axis=1, keepdim=True, dtype='int32').numpy()
+    pred[filte < 0.9] = 255
+    pred = paddle.to_tensor(pred)
+    pred = reverse_transform(pred, ori_shape, transforms)
+    return pred
--- a/contrib/AutoNUE/core/predict_ensemble.py
+++ b/contrib/AutoNUE/core/predict_ensemble.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import math
+
+import cv2
+import numpy as np
+import paddle
+
+from paddleseg import utils
+import core.infer_ensemble as infer_ensemble
+from paddleseg.utils import logger, progbar
+
+
+def mkdir(path):
+    sub_dir = os.path.dirname(path)
+    if not os.path.exists(sub_dir):
+        os.makedirs(sub_dir)
+
+
+def partition_list(arr, m):
+    """split the list 'arr' into m pieces"""
+    n = int(math.ceil(len(arr) / float(m)))
+    return [arr[i:i + n] for i in range(0, len(arr), n)]
+
+
+def predictEnsemble(model,
+                    model_hard,
+                    model_path,
+                    model_path_hard,
+                    transforms,
+                    image_list,
+                    image_dir=None,
+                    save_dir='output',
+                    aug_pred=False,
+                    scales=1.0,
+                    flip_horizontal=True,
+                    flip_vertical=False,
+                    is_slide=False,
+                    stride=None,
+                    crop_size=None):
+    """
+    predict and visualize the image_list.
+
+    Args:
+        model (nn.Layer): Used to predict for input image.
+        model_path (str): The path of pretrained model.
+        transforms (transform.Compose): Preprocess for input image.
+        image_list (list): A list of image path to be predicted.
+        image_dir (str, optional): The root directory of the images predicted. Default: None.
+        save_dir (str, optional): The directory to save the visualized results. Default: 'output'.
+        aug_pred (bool, optional): Whether to use mulit-scales and flip augment for predition. Default: False.
+        scales (list|float, optional): Scales for augment. It is valid when `aug_pred` is True. Default: 1.0.
+        flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_pred` is True. Default: True.
+        flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_pred` is True. Default: False.
+        is_slide (bool, optional): Whether to predict by sliding window. Default: False.
+        stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
+            It should be provided when `is_slide` is True.
+        crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
+            It should be provided when `is_slide` is True.
+
+    """
+    utils.utils.load_entire_model(model, model_path)
+    model.eval()
+    utils.utils.load_entire_model(model_hard, model_path_hard)
+    model_hard.eval()
+    nranks = paddle.distributed.get_world_size()
+    local_rank = paddle.distributed.get_rank()
+    if nranks > 1:
+        img_lists = partition_list(image_list, nranks)
+    else:
+        img_lists = [image_list]
+
+    added_saved_dir = os.path.join(save_dir, 'added_prediction')
+    pred_saved_dir = os.path.join(save_dir, 'pseudo_color_prediction')
+
+    logger.info("Start to predict...")
+    progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1)
+    with paddle.no_grad():
+        for i, im_path in enumerate(img_lists[local_rank]):
+            im = cv2.imread(im_path)
+            ori_shape = im.shape[:2]
+            im, _ = transforms(im)
+            im = im[np.newaxis, ...]
+            im = paddle.to_tensor(im)
+
+            if aug_pred:
+                pred = infer_ensemble.aug_inference(
+                    model,
+                    model_hard,
+                    im,
+                    ori_shape=ori_shape,
+                    transforms=transforms.transforms,
+                    scales=scales,
+                    flip_horizontal=flip_horizontal,
+                    flip_vertical=flip_vertical,
+                    is_slide=is_slide,
+                    stride=stride,
+                    crop_size=crop_size)
+            else:
+                pred = infer_ensemble.inference(
+                    model,
+                    model_hard,
+                    im,
+                    ori_shape=ori_shape,
+                    transforms=transforms.transforms,
+                    is_slide=is_slide,
+                    stride=stride,
+                    crop_size=crop_size)
+            pred = paddle.squeeze(pred)
+            pred = pred.numpy().astype('uint8')
+
+            # get the saved name
+            if image_dir is not None:
+                im_file = im_path.replace(image_dir, '')
+            else:
+                im_file = os.path.basename(im_path)
+            if im_file[0] == '/':
+                im_file = im_file[1:]
+
+            # save added image
+            added_image = utils.visualize.visualize(im_path, pred, weight=0.6)
+            added_image_path = os.path.join(added_saved_dir, im_file)
+            mkdir(added_image_path)
+            cv2.imwrite(added_image_path, added_image)
+
+            # save pseudo color prediction
+            pred_mask = utils.visualize.get_pseudo_color_map(pred)
+            pred_saved_path = os.path.join(pred_saved_dir,
+                                           im_file.rsplit(".")[0] + ".png")
+            mkdir(pred_saved_path)
+            pred_mask.save(pred_saved_path)
+
+            # pred_im = utils.visualize(im_path, pred, weight=0.0)
+            # pred_saved_path = os.path.join(pred_saved_dir, im_file)
+            # mkdir(pred_saved_path)
+            # cv2.imwrite(pred_saved_path, pred_im)
+
+            progbar_pred.update(i + 1)
--- a/contrib/AutoNUE/core/predict_ensemble_three.py
+++ b/contrib/AutoNUE/core/predict_ensemble_three.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import math
+
+import cv2
+import numpy as np
+import paddle
+import paddle.nn.functional as F
+
+from paddleseg import utils
+import core.infer_ensemble_three as infer_ensemble
+import core.infer_crop as infer_crop
+from paddleseg.utils import logger, progbar
+
+
+def mkdir(path):
+    sub_dir = os.path.dirname(path)
+    if not os.path.exists(sub_dir):
+        os.makedirs(sub_dir)
+
+
+def partition_list(arr, m):
+    """split the list 'arr' into m pieces"""
+    n = int(math.ceil(len(arr) / float(m)))
+    return [arr[i:i + n] for i in range(0, len(arr), n)]
+
+
+def predictEnsembleThree(model,
+                         model_1,
+                         model_crop,
+                         model_path,
+                         model_path_1,
+                         model_path_crop,
+                         transforms,
+                         transforms_crop,
+                         image_list,
+                         image_dir=None,
+                         save_dir='output',
+                         aug_pred=False,
+                         scales=1.0,
+                         flip_horizontal=True,
+                         flip_vertical=False,
+                         is_slide=False,
+                         stride=None,
+                         crop_size=None):
+    """
+    predict and visualize the image_list.
+
+    Args:
+        model (nn.Layer): Used to predict for input image.
+        model_path (str): The path of pretrained model.
+        transforms (transform.Compose): Preprocess for input image.
+        image_list (list): A list of image path to be predicted.
+        image_dir (str, optional): The root directory of the images predicted. Default: None.
+        save_dir (str, optional): The directory to save the visualized results. Default: 'output'.
+        aug_pred (bool, optional): Whether to use mulit-scales and flip augment for predition. Default: False.
+        scales (list|float, optional): Scales for augment. It is valid when `aug_pred` is True. Default: 1.0.
+        flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_pred` is True. Default: True.
+        flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_pred` is True. Default: False.
+        is_slide (bool, optional): Whether to predict by sliding window. Default: False.
+        stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
+            It should be provided when `is_slide` is True.
+        crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
+            It should be provided when `is_slide` is True.
+
+    """
+    utils.utils.load_entire_model(model, model_path)
+    model.eval()
+    utils.utils.load_entire_model(model_1, model_path_1)
+    model_1.eval()
+    utils.utils.load_entire_model(model_crop, model_path_crop)
+    model_crop.eval()
+    nranks = paddle.distributed.get_world_size()
+    local_rank = paddle.distributed.get_rank()
+    if nranks > 1:
+        img_lists = partition_list(image_list, nranks)
+    else:
+        img_lists = [image_list]
+
+    added_saved_dir = os.path.join(save_dir, 'added_prediction')
+    pred_saved_dir = os.path.join(save_dir, 'pseudo_color_prediction')
+
+    logger.info("Start to predict...")
+    progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1)
+    with paddle.no_grad():
+        for i, im_path in enumerate(img_lists[local_rank]):
+            im_origin = cv2.imread(im_path)
+            ori_shape = im_origin.shape[:2]
+            im, _ = transforms(im_origin)
+            im = im[np.newaxis, ...]
+            im = paddle.to_tensor(im)
+
+            ims, _ = transforms_crop(im_origin)
+            im1 = ims[:, 540:540 + 720, 320:320 + 1280]
+            im2 = ims[:, 540:540 + 720, 960:960 + 1280]
+            im3 = ims[:, 540:540 + 720, 1600:1600 + 1280]
+            im1 = im1[np.newaxis, ...]
+            im1 = paddle.to_tensor(im1)
+            im2 = im2[np.newaxis, ...]
+            im2 = paddle.to_tensor(im2)
+            im3 = im3[np.newaxis, ...]
+            im3 = paddle.to_tensor(im3)
+            ims_ = [im1, im2, im3]
+
+            if aug_pred:
+                pred = infer_ensemble.aug_inference(
+                    model,
+                    model_1,
+                    im,
+                    ori_shape=ori_shape,
+                    transforms=transforms.transforms,
+                    scales=scales,
+                    flip_horizontal=flip_horizontal,
+                    flip_vertical=flip_vertical,
+                    is_slide=is_slide,
+                    stride=stride,
+                    crop_size=crop_size)
+            else:
+                pred = infer_ensemble.inference(
+                    model,
+                    model_1,
+                    im,
+                    ori_shape=ori_shape,
+                    transforms=transforms.transforms,
+                    is_slide=is_slide,
+                    stride=stride,
+                    crop_size=crop_size)
+            preds = []
+            for ii in range(3):
+                im_ = ims_[ii]
+                if aug_pred:
+                    pred_crop = infer_crop.aug_inference(
+                        model,
+                        im_,
+                        ori_shape=ori_shape,
+                        transforms=transforms.transforms,
+                        scales=scales,
+                        flip_horizontal=flip_horizontal,
+                        flip_vertical=flip_vertical,
+                        is_slide=is_slide,
+                        stride=stride,
+                        crop_size=crop_size)
+                else:
+                    pred_crop = infer_crop.inference(
+                        model,
+                        im_,
+                        ori_shape=ori_shape,
+                        transforms=transforms.transforms,
+                        is_slide=is_slide,
+                        stride=stride,
+                        crop_size=crop_size)
+                preds.append(pred_crop)
+
+            left_ensem = (
+                preds[0][:, :, :, 640:1280] + preds[1][:, :, :, 0:640]) / 2
+            right_ensem = (
+                preds[1][:, :, :, 640:1280] + preds[2][:, :, :, 0:640]) / 2
+            pred_ensem = paddle.concat(
+                [
+                    preds[0][:, :, :, 0:640], left_ensem, right_ensem,
+                    preds[2][:, :, :, 640:1280]
+                ],
+                axis=3)
+            logit = F.interpolate(pred_ensem, (432, 768), mode='bilinear')
+
+            pred_logit = pred.clone()
+            pred_logit[:, :, 324:756, 576:1344] = logit
+            pred = pred + pred_logit
+            pred = F.interpolate(pred, ori_shape, mode='bilinear')
+            pred = paddle.argmax(pred, axis=1, keepdim=True, dtype='int32')
+            pred = paddle.squeeze(pred)
+            pred = pred.numpy().astype('uint8')
+
+            # get the saved name
+            if image_dir is not None:
+                im_file = im_path.replace(image_dir, '')
+            else:
+                im_file = os.path.basename(im_path)
+            if im_file[0] == '/':
+                im_file = im_file[1:]
+
+            # save added image
+            added_image = utils.visualize.visualize(im_path, pred, weight=0.6)
+            added_image_path = os.path.join(added_saved_dir, im_file)
+            mkdir(added_image_path)
+            cv2.imwrite(added_image_path, added_image)
+
+            # save pseudo color prediction
+            pred_mask = utils.visualize.get_pseudo_color_map(pred)
+            pred_saved_path = os.path.join(pred_saved_dir,
+                                           im_file.rsplit(".")[0] + ".png")
+            mkdir(pred_saved_path)
+            pred_mask.save(pred_saved_path)
+
+            # pred_im = utils.visualize(im_path, pred, weight=0.0)
+            # pred_saved_path = os.path.join(pred_saved_dir, im_file)
+            # mkdir(pred_saved_path)
+            # cv2.imwrite(pred_saved_path, pred_im)
+
+            progbar_pred.update(i + 1)
--- a/contrib/AutoNUE/core/predict_generate_autolabel.py
+++ b/contrib/AutoNUE/core/predict_generate_autolabel.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import math
+
+import cv2
+import numpy as np
+import paddle
+
+from paddleseg import utils
+from core import infer_generate_autolabel
+from paddleseg.utils import logger, progbar
+
+
+def mkdir(path):
+    sub_dir = os.path.dirname(path)
+    if not os.path.exists(sub_dir):
+        os.makedirs(sub_dir)
+
+
+def partition_list(arr, m):
+    """split the list 'arr' into m pieces"""
+    n = int(math.ceil(len(arr) / float(m)))
+    return [arr[i:i + n] for i in range(0, len(arr), n)]
+
+
+def predictAutolabel(model,
+                     model_path,
+                     transforms,
+                     image_list,
+                     image_dir=None,
+                     save_dir='output',
+                     aug_pred=False,
+                     scales=1.0,
+                     flip_horizontal=True,
+                     flip_vertical=False,
+                     is_slide=False,
+                     stride=None,
+                     crop_size=None):
+    """
+    predict and visualize the image_list.
+
+    Args:
+        model (nn.Layer): Used to predict for input image.
+        model_path (str): The path of pretrained model.
+        transforms (transform.Compose): Preprocess for input image.
+        image_list (list): A list of image path to be predicted.
+        image_dir (str, optional): The root directory of the images predicted. Default: None.
+        save_dir (str, optional): The directory to save the visualized results. Default: 'output'.
+        aug_pred (bool, optional): Whether to use mulit-scales and flip augment for predition. Default: False.
+        scales (list|float, optional): Scales for augment. It is valid when `aug_pred` is True. Default: 1.0.
+        flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_pred` is True. Default: True.
+        flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_pred` is True. Default: False.
+        is_slide (bool, optional): Whether to predict by sliding window. Default: False.
+        stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
+            It should be provided when `is_slide` is True.
+        crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
+            It should be provided when `is_slide` is True.
+
+    """
+    utils.utils.load_entire_model(model, model_path)
+    model.eval()
+    nranks = paddle.distributed.get_world_size()
+    local_rank = paddle.distributed.get_rank()
+    if nranks > 1:
+        img_lists = partition_list(image_list, nranks)
+    else:
+        img_lists = [image_list]
+
+    added_saved_dir = os.path.join(save_dir, 'added_prediction')
+    pred_saved_dir = os.path.join(save_dir, 'pseudo_color_prediction')
+
+    logger.info("Start to predict...")
+    progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1)
+    with paddle.no_grad():
+        for i, im_path in enumerate(img_lists[local_rank]):
+            im = cv2.imread(im_path)
+            ori_shape = im.shape[:2]
+            im, _ = transforms(im)
+            im = im[np.newaxis, ...]
+            im = paddle.to_tensor(im)
+
+            if aug_pred:
+                pred = infer_generate_autolabel.aug_inference(
+                    model,
+                    im,
+                    ori_shape=ori_shape,
+                    transforms=transforms.transforms,
+                    scales=scales,
+                    flip_horizontal=flip_horizontal,
+                    flip_vertical=flip_vertical,
+                    is_slide=is_slide,
+                    stride=stride,
+                    crop_size=crop_size)
+            else:
+                pred = infer_generate_autolabel.inference(
+                    model,
+                    im,
+                    ori_shape=ori_shape,
+                    transforms=transforms.transforms,
+                    is_slide=is_slide,
+                    stride=stride,
+                    crop_size=crop_size)
+            pred = paddle.squeeze(pred)
+            pred = pred.numpy().astype('uint8')
+
+            # get the saved name
+            if image_dir is not None:
+                im_file = im_path.replace(image_dir, '')
+            else:
+                im_file = os.path.basename(im_path)
+            if im_file[0] == '/':
+                im_file = im_file[1:]
+
+            # save added image
+            added_image = utils.visualize.visualize(im_path, pred, weight=0.6)
+            added_image_path = os.path.join(added_saved_dir, im_file)
+            mkdir(added_image_path)
+            cv2.imwrite(added_image_path, added_image)
+
+            # save pseudo color prediction
+            pred_mask = utils.visualize.get_pseudo_color_map(pred)
+            pred_saved_path = os.path.join(pred_saved_dir,
+                                           im_file.rsplit(".")[0] + ".png")
+            mkdir(pred_saved_path)
+            pred_mask.save(pred_saved_path)
+
+            # pred_im = utils.visualize(im_path, pred, weight=0.0)
+            # pred_saved_path = os.path.join(pred_saved_dir, im_file)
+            # mkdir(pred_saved_path)
+            # cv2.imwrite(pred_saved_path, pred_im)
+
+            progbar_pred.update(i + 1)
--- a/contrib/AutoNUE/core/val.py
+++ b/contrib/AutoNUE/core/val.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import time
+import paddle
+import paddle.nn.functional as F
+
+from paddleseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar
+from core import infer
+
+np.set_printoptions(suppress=True)
+
+
+def evaluate(model,
+             eval_dataset,
+             aug_eval=False,
+             scales=1.0,
+             flip_horizontal=True,
+             flip_vertical=False,
+             is_slide=False,
+             stride=None,
+             crop_size=None,
+             num_workers=0,
+             print_detail=True):
+    """
+    Launch evalution.
+
+    Args:
+        model（nn.Layer): A sementic segmentation model.
+        eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
+        aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
+        scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
+        flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True.
+        flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_eval` is True. Default: False.
+        is_slide (bool, optional): Whether to evaluate by sliding window. Default: False.
+        stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
+            It should be provided when `is_slide` is True.
+        crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
+            It should be provided when `is_slide` is True.
+        num_workers (int, optional): Num workers for data loader. Default: 0.
+        print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
+
+    Returns:
+        float: The mIoU of validation datasets.
+        float: The accuracy of validation datasets.
+    """
+    model.eval()
+    nranks = paddle.distributed.ParallelEnv().nranks
+    local_rank = paddle.distributed.ParallelEnv().local_rank
+    if nranks > 1:
+        # Initialize parallel environment if not done.
+        if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
+        ):
+            paddle.distributed.init_parallel_env()
+    batch_sampler = paddle.io.DistributedBatchSampler(
+        eval_dataset, batch_size=1, shuffle=False, drop_last=False)
+    loader = paddle.io.DataLoader(
+        eval_dataset,
+        batch_sampler=batch_sampler,
+        num_workers=num_workers,
+        return_list=True, )
+
+    total_iters = len(loader)
+    intersect_area_all = 0
+    pred_area_all = 0
+    label_area_all = 0
+
+    if print_detail:
+        logger.info("Start evaluating (total_samples={}, total_iters={})...".
+                    format(len(eval_dataset), total_iters))
+    progbar_val = progbar.Progbar(target=total_iters, verbose=1)
+    reader_cost_averager = TimeAverager()
+    batch_cost_averager = TimeAverager()
+    batch_start = time.time()
+    with paddle.no_grad():
+        for iter, data in enumerate(loader):
+            (im, label) = data
+            reader_cost_averager.record(time.time() - batch_start)
+            label = label.astype('int64')
+
+            ori_shape = label.shape[-2:]
+            if aug_eval:
+                pred = infer.aug_inference(
+                    model,
+                    im,
+                    ori_shape=ori_shape,
+                    transforms=eval_dataset.transforms.transforms,
+                    scales=scales,
+                    flip_horizontal=flip_horizontal,
+                    flip_vertical=flip_vertical,
+                    is_slide=is_slide,
+                    stride=stride,
+                    crop_size=crop_size)
+            else:
+                pred = infer.inference(
+                    model,
+                    im,
+                    ori_shape=ori_shape,
+                    transforms=eval_dataset.transforms.transforms,
+                    is_slide=is_slide,
+                    stride=stride,
+                    crop_size=crop_size)
+
+            intersect_area, pred_area, label_area = metrics.calculate_area(
+                pred,
+                label,
+                eval_dataset.num_classes,
+                ignore_index=eval_dataset.ignore_index)
+
+            # Gather from all ranks
+            if nranks > 1:
+                intersect_area_list = []
+                pred_area_list = []
+                label_area_list = []
+                paddle.distributed.all_gather(intersect_area_list,
+                                              intersect_area)
+                paddle.distributed.all_gather(pred_area_list, pred_area)
+                paddle.distributed.all_gather(label_area_list, label_area)
+
+                # Some image has been evaluated and should be eliminated in last iter
+                if (iter + 1) * nranks > len(eval_dataset):
+                    valid = len(eval_dataset) - iter * nranks
+                    intersect_area_list = intersect_area_list[:valid]
+                    pred_area_list = pred_area_list[:valid]
+                    label_area_list = label_area_list[:valid]
+
+                for i in range(len(intersect_area_list)):
+                    intersect_area_all = intersect_area_all + intersect_area_list[
+                        i]
+                    pred_area_all = pred_area_all + pred_area_list[i]
+                    label_area_all = label_area_all + label_area_list[i]
+            else:
+                intersect_area_all = intersect_area_all + intersect_area
+                pred_area_all = pred_area_all + pred_area
+                label_area_all = label_area_all + label_area
+            batch_cost_averager.record(
+                time.time() - batch_start, num_samples=len(label))
+            batch_cost = batch_cost_averager.get_average()
+            reader_cost = reader_cost_averager.get_average()
+
+            if local_rank == 0 and print_detail:
+                progbar_val.update(iter + 1, [('batch_cost', batch_cost),
+                                              ('reader cost', reader_cost)])
+            reader_cost_averager.reset()
+            batch_cost_averager.reset()
+            batch_start = time.time()
+
+    class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
+                                       label_area_all)
+    class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
+    kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
+
+    if print_detail:
+        logger.info("[EVAL] #Images={} mIoU={:.4f} Acc={:.4f} Kappa={:.4f} ".
+                    format(len(eval_dataset), miou, acc, kappa))
+        logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
+        logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
+    return miou, acc
--- a/contrib/AutoNUE/core/val_crop.py
+++ b/contrib/AutoNUE/core/val_crop.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+
+import numpy as np
+import time
+import paddle
+import paddle.nn.functional as F
+
+from paddleseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar
+from core import infer_crop
+
+np.set_printoptions(suppress=True)
+
+
+def evaluate(model,
+             eval_dataset,
+             aug_eval=False,
+             scales=1.0,
+             flip_horizontal=True,
+             flip_vertical=False,
+             is_slide=False,
+             stride=None,
+             crop_size=None,
+             num_workers=0,
+             print_detail=True):
+    """
+    Launch evalution.
+
+    Args:
+        model（nn.Layer): A sementic segmentation model.
+        eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
+        aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
+        scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
+        flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True.
+        flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_eval` is True. Default: False.
+        is_slide (bool, optional): Whether to evaluate by sliding window. Default: False.
+        stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
+            It should be provided when `is_slide` is True.
+        crop_size (tuple|list, optional):  The crop size of sliding window, the first is width and the second is height.
+            It should be provided when `is_slide` is True.
+        num_workers (int, optional): Num workers for data loader. Default: 0.
+        print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
+
+    Returns:
+        float: The mIoU of validation datasets.
+        float: The accuracy of validation datasets.
+    """
+    model.eval()
+    nranks = paddle.distributed.ParallelEnv().nranks
+    local_rank = paddle.distributed.ParallelEnv().local_rank
+    if nranks > 1:
+        # Initialize parallel environment if not done.
+        if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
+        ):
+            paddle.distributed.init_parallel_env()
+    batch_sampler = paddle.io.DistributedBatchSampler(
+        eval_dataset, batch_size=1, shuffle=False, drop_last=False)
+    loader = paddle.io.DataLoader(
+        eval_dataset,
+        batch_sampler=batch_sampler,
+        num_workers=num_workers,
+        return_list=True, )
+
+    total_iters = len(loader)
+    intersect_area_all = 0
+    pred_area_all = 0
+    label_area_all = 0
+
+    if print_detail:
+        logger.info("Start evaluating (total_samples={}, total_iters={})...".
+                    format(len(eval_dataset), total_iters))
+    progbar_val = progbar.Progbar(target=total_iters, verbose=1)
+    reader_cost_averager = TimeAverager()
+    batch_cost_averager = TimeAverager()
+    batch_start = time.time()
+    with paddle.no_grad():
+        for iter, data in enumerate(loader):
+            reader_cost_averager.record(time.time() - batch_start)
+            preds = []
+            label = data[3].astype('int64')
+            for ii in range(3):
+                im = data[ii]
+                ori_shape = im.shape[-2:]
+                if aug_eval:
+                    pred = infer_crop.aug_inference(
+                        model,
+                        im,
+                        ori_shape=ori_shape,
+                        transforms=eval_dataset.transforms.transforms,
+                        scales=scales,
+                        flip_horizontal=flip_horizontal,
+                        flip_vertical=flip_vertical,
+                        is_slide=is_slide,
+                        stride=stride,
+                        crop_size=crop_size)
+                else:
+                    pred = infer_crop.inference(
+                        model,
+                        im,
+                        ori_shape=ori_shape,
+                        transforms=eval_dataset.transforms.transforms,
+                        is_slide=is_slide,
+                        stride=stride,
+                        crop_size=crop_size)
+                preds.append(pred)
+
+            left_ensem = (
+                preds[0][:, :, :, 640:1280] + preds[1][:, :, :, 0:640]) / 2
+            right_ensem = (
+                preds[1][:, :, :, 640:1280] + preds[2][:, :, :, 0:640]) / 2
+            pred_ensem = paddle.concat(
+                [
+                    preds[0][:, :, :, 0:640], left_ensem, right_ensem,
+                    preds[2][:, :, :, 640:1280]
+                ],
+                axis=3)
+            pred = paddle.argmax(
+                pred_ensem, axis=1, keepdim=True, dtype='int32')
+
+            intersect_area, pred_area, label_area = metrics.calculate_area(
+                pred,
+                label,
+                eval_dataset.num_classes,
+                ignore_index=eval_dataset.ignore_index)
+
+            # Gather from all ranks
+            if nranks > 1:
+                intersect_area_list = []
+                pred_area_list = []
+                label_area_list = []
+                paddle.distributed.all_gather(intersect_area_list,
+                                              intersect_area)
+                paddle.distributed.all_gather(pred_area_list, pred_area)
+                paddle.distributed.all_gather(label_area_list, label_area)
+
+                # Some image has been evaluated and should be eliminated in last iter
+                if (iter + 1) * nranks > len(eval_dataset):
+                    valid = len(eval_dataset) - iter * nranks
+                    intersect_area_list = intersect_area_list[:valid]
+                    pred_area_list = pred_area_list[:valid]
+                    label_area_list = label_area_list[:valid]
+
+                for i in range(len(intersect_area_list)):
+                    intersect_area_all = intersect_area_all + intersect_area_list[
+                        i]
+                    pred_area_all = pred_area_all + pred_area_list[i]
+                    label_area_all = label_area_all + label_area_list[i]
+            else:
+                intersect_area_all = intersect_area_all + intersect_area
+                pred_area_all = pred_area_all + pred_area
+                label_area_all = label_area_all + label_area
+
+            batch_cost_averager.record(
+                time.time() - batch_start, num_samples=len(label))
+            batch_cost = batch_cost_averager.get_average()
+            reader_cost = reader_cost_averager.get_average()
+
+            if local_rank == 0 and print_detail:
+                progbar_val.update(iter + 1, [('batch_cost', batch_cost),
+                                              ('reader cost', reader_cost)])
+            reader_cost_averager.reset()
+            batch_cost_averager.reset()
+            batch_start = time.time()
+
+    class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
+                                       label_area_all)
+    class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
+    kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
+
+    if print_detail:
+        logger.info("[EVAL] #Images={} mIoU={:.4f} Acc={:.4f} Kappa={:.4f} ".
+                    format(len(eval_dataset), miou, acc, kappa))
+        logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
+        logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
+    return miou, acc
--- a/contrib/AutoNUE/datasets/__init__.py
+++ b/contrib/AutoNUE/datasets/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .auto_nue import AutoNue
+from .auto_nue_autolabel import AutoNueAutolabel
+from .auto_nue_crop import AutoNueCrop
--- a/contrib/AutoNUE/datasets/auto_nue.py
+++ b/contrib/AutoNUE/datasets/auto_nue.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import glob
+import random
+import cv2
+import paddle
+import numpy as np
+from PIL import Image
+
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+
+# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent.
+random.seed(100)
+
+
+@manager.DATASETS.add_component
+class AutoNue(paddle.io.Dataset):
+    """
+    You need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5)
+    following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling.
+
+    And then, you need to organize data following the below structure.
+
+    IDD_Segmentation
+    |
+    |--leftImg8bit
+    |  |--train
+    |  |--val
+    |  |--test
+    |
+    |--gtFine
+    |  |--train
+    |  |--val
+    |  |--test
+
+    Args:
+        transforms (list): Transforms for image.
+        dataset_root (str): Cityscapes dataset directory.
+        mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1
+        add_val (bool, optional): Whether to add val set in training. Default: False
+    """
+
+    def __init__(self,
+                 transforms,
+                 dataset_root,
+                 mode='train',
+                 coarse_multiple=1,
+                 add_val=False):
+        self.dataset_root = dataset_root
+        self.transforms = Compose(transforms)
+        self.file_list = list()
+        mode = mode.lower()
+        self.mode = mode
+        self.num_classes = 26
+        self.ignore_index = 255
+        self.coarse_multiple = coarse_multiple
+
+        if mode not in ['train', 'val', 'test']:
+            raise ValueError(
+                "mode should be 'train', 'val' or 'test', but got {}.".format(
+                    mode))
+
+        if self.transforms is None:
+            raise ValueError("`transforms` is necessary, but it is None.")
+
+        img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
+        label_dir = os.path.join(self.dataset_root, 'gtFine')
+        if self.dataset_root is None or not os.path.isdir(
+                self.dataset_root) or not os.path.isdir(
+                    img_dir) or not os.path.isdir(label_dir):
+            raise ValueError(
+                "The dataset is not Found or the folder structure is nonconfoumance."
+            )
+
+        label_files = sorted(
+            glob.glob(
+                os.path.join(label_dir, mode, '*',
+                             '*_gtFine_labellevel3Ids.png')))
+        img_files = sorted(
+            glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.*')))
+
+        self.file_list = [
+            [img_path, label_path]
+            for img_path, label_path in zip(img_files, label_files)
+        ]
+        # for ii in range(len(self.file_list)):
+        #     print(self.file_list[ii])
+        # print(len(self.file_list))
+        self.num_files = len(self.file_list)
+        self.total_num_files = self.num_files
+
+        if mode == 'train':
+            # whether to add val set in training
+            if add_val:
+                label_files = sorted(
+                    glob.glob(
+                        os.path.join(label_dir, 'val', '*',
+                                     '*_gtFine_labellevel3Ids.png')))
+                img_files = sorted(
+                    glob.glob(
+                        os.path.join(img_dir, 'val', '*', '*_leftImg8bit.*')))
+                val_file_list = [
+                    [img_path, label_path]
+                    for img_path, label_path in zip(img_files, label_files)
+                ]
+                self.file_list.extend(val_file_list)
+                for ii in range(len(self.file_list)):
+                    print(self.file_list[ii])
+                print(len(self.file_list))
+                self.num_files = len(self.file_list)
+                self.total_num_files = self.num_files
+
+            # use coarse dataset only in training
+            # img_dir = os.path.join('data/IDD_Detection/JPEGImages/all')
+            # label_dir = os.path.join('data/IDD_Detection/AutoLabel/pred_refine')
+
+            # if self.dataset_root is None or not os.path.isdir(
+            #         self.dataset_root) or not os.path.isdir(
+            #             img_dir) or not os.path.isdir(label_dir):
+            #     raise ValueError(
+            #         "The coarse dataset is not Found or the folder structure is nonconfoumance."
+            #     )
+
+            # coarse_label_files = sorted(
+            #     glob.glob(os.path.join(label_dir, '*', '*')))
+            # coarse_img_files = sorted(
+            #     glob.glob(os.path.join(img_dir, '*', '*')))
+            # if len(coarse_img_files) != len(coarse_label_files):
+            #     raise ValueError(
+            #         "The number of images = {} is not equal to the number of labels = {} in Cityscapes Autolabeling dataset."
+            #         .format(len(coarse_img_files), len(coarse_label_files)))
+
+            # self.coarse_file_list = [[img_path, label_path]
+            #                          for img_path, label_path in zip(
+            #                              coarse_img_files, coarse_label_files)]
+            # random.shuffle(self.coarse_file_list)
+
+            # self.total_num_files = int(self.num_files * (1 + coarse_multiple))
+
+    def __getitem__(self, idx):
+        if self.mode == 'test':
+            image_path, label_path = self.file_list[idx]
+            im, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            return im, image_path
+        elif self.mode == 'val':
+            image_path, label_path = self.file_list[idx]
+            im, _ = self.transforms(im=image_path)
+            label = np.asarray(Image.open(label_path))
+            #             label = cv2.resize(label, (1280, 720), interpolation=cv2.INTER_NEAREST)
+            label = label[np.newaxis, :, :]
+            return im, label
+        else:
+            if idx >= self.num_files:
+                image_path, label_path = self.coarse_file_list[idx -
+                                                               self.num_files]
+            else:
+                image_path, label_path = self.file_list[idx]
+
+            im, label = self.transforms(im=image_path, label=label_path)
+            return im, label
+
+    def __len__(self):
+        return self.total_num_files
--- a/contrib/AutoNUE/datasets/auto_nue_autolabel.py
+++ b/contrib/AutoNUE/datasets/auto_nue_autolabel.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import glob
+import random
+
+import paddle
+import numpy as np
+from PIL import Image
+
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+
+# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent.
+random.seed(100)
+
+
+@manager.DATASETS.add_component
+class AutoNueAutolabel(paddle.io.Dataset):
+    """
+    You need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5)
+    following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling.
+
+    And then, you need to organize data following the below structure.
+
+    IDD_Segmentation
+    |
+    |--leftImg8bit
+    |  |--train
+    |  |--val
+    |  |--test
+    |
+    |--gtFine
+    |  |--train
+    |  |--val
+    |  |--test
+
+    Args:
+        transforms (list): Transforms for image.
+        dataset_root (str): Cityscapes dataset directory.
+        mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1
+        add_val (bool, optional): Whether to add val set in training. Default: False
+    """
+
+    def __init__(self,
+                 transforms,
+                 dataset_root,
+                 mode='train',
+                 coarse_multiple=1,
+                 add_val=False):
+        self.dataset_root = dataset_root
+        self.transforms = Compose(transforms)
+        self.file_list = list()
+        mode = mode.lower()
+        self.mode = mode
+        self.num_classes = 26
+        self.ignore_index = 255
+        self.coarse_multiple = coarse_multiple
+
+        if mode not in ['train', 'val', 'test']:
+            raise ValueError(
+                "mode should be 'train', 'val' or 'test', but got {}.".format(
+                    mode))
+
+        if self.transforms is None:
+            raise ValueError("`transforms` is necessary, but it is None.")
+
+        img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
+        label_dir = os.path.join(self.dataset_root, 'gtFine')
+        if self.dataset_root is None or not os.path.isdir(
+                self.dataset_root) or not os.path.isdir(
+                    img_dir) or not os.path.isdir(label_dir):
+            raise ValueError(
+                "The dataset is not Found or the folder structure is nonconfoumance."
+            )
+
+        label_files = sorted(
+            glob.glob(
+                os.path.join(label_dir, mode, '*',
+                             '*_gtFine_labellevel3Ids.png')))
+        img_files = sorted(
+            glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.*')))
+
+        self.file_list = [
+            [img_path, label_path]
+            for img_path, label_path in zip(img_files, label_files)
+        ]
+        # for ii in range(len(self.file_list)):
+        #     print(self.file_list[ii])
+        # print(len(self.file_list))
+        self.num_files = len(self.file_list)
+        self.total_num_files = self.num_files
+
+        if mode == 'train':
+            # whether to add val set in training
+            if add_val:
+                label_files = sorted(
+                    glob.glob(
+                        os.path.join(label_dir, 'val', '*',
+                                     '*_gtFine_labellevel3Ids.png')))
+                img_files = sorted(
+                    glob.glob(
+                        os.path.join(img_dir, 'val', '*', '*_leftImg8bit.*')))
+                val_file_list = [
+                    [img_path, label_path]
+                    for img_path, label_path in zip(img_files, label_files)
+                ]
+                self.file_list.extend(val_file_list)
+                for ii in range(len(self.file_list)):
+                    print(self.file_list[ii])
+                print(len(self.file_list))
+                self.num_files = len(self.file_list)
+                self.total_num_files = self.num_files
+
+            # use coarse dataset only in training
+            img_dir = os.path.join('data/IDD_Detection/JPEGImages')
+            label_dir = os.path.join('data/IDD_Detection/pred_refine')
+
+            if self.dataset_root is None or not os.path.isdir(
+                    self.dataset_root) or not os.path.isdir(
+                        img_dir) or not os.path.isdir(label_dir):
+                raise ValueError(
+                    "The coarse dataset is not Found or the folder structure is nonconfoumance."
+                )
+
+            coarse_label_files = sorted(
+                glob.glob(os.path.join(label_dir, '*', '*')))
+            coarse_img_files = sorted(
+                glob.glob(os.path.join(img_dir, '*', '*')))
+            if len(coarse_img_files) != len(coarse_label_files):
+                raise ValueError(
+                    "The number of images = {} is not equal to the number of labels = {} in Cityscapes Autolabeling dataset."
+                    .format(len(coarse_img_files), len(coarse_label_files)))
+
+            self.coarse_file_list = [[img_path, label_path]
+                                     for img_path, label_path in zip(
+                                         coarse_img_files, coarse_label_files)]
+            random.shuffle(self.coarse_file_list)
+
+            self.file_list = self.coarse_file_list
+            self.num_files = len(self.file_list)
+            self.total_num_files = self.num_files
+            print(self.num_files)
+
+            # self.total_num_files = int(self.num_files * (1 + coarse_multiple))
+
+    def __getitem__(self, idx):
+        if self.mode == 'test':
+            image_path, label_path = self.file_list[idx]
+            im, _ = self.transforms(im=image_path)
+            im = im[np.newaxis, ...]
+            return im, image_path
+        elif self.mode == 'val':
+            image_path, label_path = self.file_list[idx]
+            im, _ = self.transforms(im=image_path)
+            label = np.asarray(Image.open(label_path))
+            label = label[np.newaxis, :, :]
+            return im, label
+        else:
+            # if idx >= self.num_files:
+            #     image_path, label_path = self.coarse_file_list[idx -
+            #                                                    self.num_files]
+            # else:
+            image_path, label_path = self.file_list[idx]
+
+            im, label = self.transforms(im=image_path, label=label_path)
+            return im, label
+
+    def __len__(self):
+        return self.total_num_files
--- a/contrib/AutoNUE/datasets/auto_nue_crop.py
+++ b/contrib/AutoNUE/datasets/auto_nue_crop.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import glob
+import random
+
+import paddle
+import numpy as np
+from PIL import Image
+
+from paddleseg.cvlibs import manager
+from paddleseg.transforms import Compose
+
+# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent.
+random.seed(100)
+
+
+@manager.DATASETS.add_component
+class AutoNueCrop(paddle.io.Dataset):
+    """
+    You need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5)
+    following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling.
+
+    And then, you need to organize data following the below structure.
+
+    IDD_Segmentation
+    |
+    |--leftImg8bit
+    |  |--train
+    |  |--val
+    |  |--test
+    |
+    |--gtFine
+    |  |--train
+    |  |--val
+    |  |--test
+
+    Args:
+        transforms (list): Transforms for image.
+        dataset_root (str): Cityscapes dataset directory.
+        mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
+        coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1
+        add_val (bool, optional): Whether to add val set in training. Default: False
+    """
+
+    def __init__(self,
+                 transforms,
+                 dataset_root,
+                 mode='train',
+                 coarse_multiple=1,
+                 add_val=False):
+        self.dataset_root = dataset_root
+        self.transforms = Compose(transforms)
+        self.file_list = list()
+        mode = mode.lower()
+        self.mode = mode
+        self.num_classes = 26
+        self.ignore_index = 255
+        self.coarse_multiple = coarse_multiple
+
+        if mode not in ['train', 'val', 'test']:
+            raise ValueError(
+                "mode should be 'train', 'val' or 'test', but got {}.".format(
+                    mode))
+
+        if self.transforms is None:
+            raise ValueError("`transforms` is necessary, but it is None.")
+
+        img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
+        label_dir = os.path.join(self.dataset_root, 'gtFine')
+        if self.dataset_root is None or not os.path.isdir(
+                self.dataset_root) or not os.path.isdir(
+                    img_dir) or not os.path.isdir(label_dir):
+            raise ValueError(
+                "The dataset is not Found or the folder structure is nonconfoumance."
+            )
+
+        label_files = sorted(
+            glob.glob(
+                os.path.join(label_dir, mode, '*',
+                             '*_gtFine_labellevel3Ids.png')))
+        img_files = sorted(
+            glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.*')))
+
+        self.file_list = [
+            [img_path, label_path]
+            for img_path, label_path in zip(img_files, label_files)
+        ]
+        # for ii in range(len(self.file_list)):
+        #     print(self.file_list[ii])
+        # print(len(self.file_list))
+        self.num_files = len(self.file_list)
+        self.total_num_files = self.num_files
+
+        if mode == 'train':
+            # whether to add val set in training
+            if add_val:
+                label_files = sorted(
+                    glob.glob(
+                        os.path.join(label_dir, 'val', '*',
+                                     '*_gtFine_labellevel3Ids.png')))
+                img_files = sorted(
+                    glob.glob(
+                        os.path.join(img_dir, 'val', '*', '*_leftImg8bit.*')))
+                val_file_list = [
+                    [img_path, label_path]
+                    for img_path, label_path in zip(img_files, label_files)
+                ]
+                self.file_list.extend(val_file_list)
+                for ii in range(len(self.file_list)):
+                    print(self.file_list[ii])
+                print(len(self.file_list))
+                self.num_files = len(self.file_list)
+                self.total_num_files = self.num_files
+
+    def __getitem__(self, idx):
+        if self.mode == 'test':
+            image_path, label_path = self.file_list[idx]
+            im, _ = self.transforms(im=image_path)
+            (h, w) = im.shape[1:]
+            im1 = im[:, 540:540 + 720, 320:320 + 1280]
+            im2 = im[:, 540:540 + 720, 960:960 + 1280]
+            im3 = im[:, 540:540 + 720, 1600:1600 + 1280]
+            return im1, im2, im3, image_path
+        elif self.mode == 'val':
+            image_path, label_path = self.file_list[idx]
+            im, label = self.transforms(im=image_path, label=label_path)
+            (h, w) = im.shape[1:]
+            im1 = im[:, 540:540 + 720, 320:320 + 1280]
+            im2 = im[:, 540:540 + 720, 960:960 + 1280]
+            im3 = im[:, 540:540 + 720, 1600:1600 + 1280]
+            label = label[540:540 + 720, 320:1600 + 1280]
+            return im1, im2, im3, label
+        else:
+            if idx >= self.num_files:
+                image_path, label_path = self.coarse_file_list[idx -
+                                                               self.num_files]
+            else:
+                image_path, label_path = self.file_list[idx]
+
+            im, label = self.transforms(im=image_path, label=label_path)
+            (h, w) = im.shape[1:]
+
+            start_w = np.linspace(320, 1600, 5).tolist()
+            np.random.shuffle(start_w)
+            start = int(start_w[0])
+            crop_im = im[:, 540:540 + 720, start:(start + 1280)]
+            crop_label = label[540:540 + 720, start:(start + 1280)]
+            return crop_im, crop_label
+
+    def __len__(self):
+        return self.total_num_files
--- a/contrib/AutoNUE/models/__init__.py
+++ b/contrib/AutoNUE/models/__init__.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .backbones.hrnet_nv import *
+from .mscale_ocrnet import MscaleOCRNet
--- a/contrib/AutoNUE/models/backbones/hrnet_nv.py
+++ b/contrib/AutoNUE/models/backbones/hrnet_nv.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg.cvlibs import manager, param_init
+from paddleseg.models import layers
+from paddleseg.utils import utils
+
+__all__ = [
+    "HRNet_W18_NV_Small_V1", "HRNet_W18_NV_Small_V2", "HRNet_W18_NV",
+    "HRNet_W30_NV", "HRNet_W32_NV", "HRNet_W40_NV", "HRNet_W44_NV",
+    "HRNet_W48_NV", "HRNet_W60_NV", "HRNet_W64_NV"
+]
+
+
+class HRNetNV(nn.Layer):
+    """
+    The HRNet implementation based on PaddlePaddle.
+    The difference from HRNet at paddleseg/models/backbones/hrnet.py is
+        1. The padding parameter of convolution is different.
+
+    The original article refers to
+    Jingdong Wang, et, al. "HRNet：Deep High-Resolution Representation Learning for Visual Recognition"
+    (https://arxiv.org/pdf/1908.07919.pdf).
+
+    Args:
+        pretrained (str): The path of pretrained model.
+        stage1_num_modules (int): Number of modules for stage1. Default 1.
+        stage1_num_blocks (list): Number of blocks per module for stage1. Default [4].
+        stage1_num_channels (list): Number of channels per branch for stage1. Default [64].
+        stage2_num_modules (int): Number of modules for stage2. Default 1.
+        stage2_num_blocks (list): Number of blocks per module for stage2. Default [4, 4]
+        stage2_num_channels (list): Number of channels per branch for stage2. Default [18, 36].
+        stage3_num_modules (int): Number of modules for stage3. Default 4.
+        stage3_num_blocks (list): Number of blocks per module for stage3. Default [4, 4, 4]
+        stage3_num_channels (list): Number of channels per branch for stage3. Default [18, 36, 72].
+        stage4_num_modules (int): Number of modules for stage4. Default 3.
+        stage4_num_blocks (list): Number of blocks per module for stage4. Default [4, 4, 4, 4]
+        stage4_num_channels (list): Number of channels per branch for stage4. Default [18, 36, 72. 144].
+        has_se (bool): Whether to use Squeeze-and-Excitation module. Default False.
+        align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
+            e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
+    """
+
+    def __init__(self,
+                 pretrained=None,
+                 stage1_num_modules=1,
+                 stage1_num_blocks=[4],
+                 stage1_num_channels=[64],
+                 stage2_num_modules=1,
+                 stage2_num_blocks=[4, 4],
+                 stage2_num_channels=[18, 36],
+                 stage3_num_modules=4,
+                 stage3_num_blocks=[4, 4, 4],
+                 stage3_num_channels=[18, 36, 72],
+                 stage4_num_modules=3,
+                 stage4_num_blocks=[4, 4, 4, 4],
+                 stage4_num_channels=[18, 36, 72, 144],
+                 has_se=False,
+                 align_corners=False):
+        super(HRNetNV, self).__init__()
+        self.pretrained = pretrained
+        self.stage1_num_modules = stage1_num_modules
+        self.stage1_num_blocks = stage1_num_blocks
+        self.stage1_num_channels = stage1_num_channels
+        self.stage2_num_modules = stage2_num_modules
+        self.stage2_num_blocks = stage2_num_blocks
+        self.stage2_num_channels = stage2_num_channels
+        self.stage3_num_modules = stage3_num_modules
+        self.stage3_num_blocks = stage3_num_blocks
+        self.stage3_num_channels = stage3_num_channels
+        self.stage4_num_modules = stage4_num_modules
+        self.stage4_num_blocks = stage4_num_blocks
+        self.stage4_num_channels = stage4_num_channels
+        self.has_se = has_se
+        self.align_corners = align_corners
+        self.feat_channels = [sum(stage4_num_channels)]
+
+        self.conv_layer1_1 = layers.ConvBNReLU(
+            in_channels=3,
+            out_channels=64,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            bias_attr=False)
+
+        self.conv_layer1_2 = layers.ConvBNReLU(
+            in_channels=64,
+            out_channels=64,
+            kernel_size=3,
+            stride=2,
+            padding=1,
+            bias_attr=False)
+
+        self.la1 = Layer1(
+            num_channels=64,
+            num_blocks=self.stage1_num_blocks[0],
+            num_filters=self.stage1_num_channels[0],
+            has_se=has_se,
+            name="layer2")
+
+        self.tr1 = TransitionLayer(
+            in_channels=[self.stage1_num_channels[0] * 4],
+            out_channels=self.stage2_num_channels,
+            name="tr1")
+
+        self.st2 = Stage(
+            num_channels=self.stage2_num_channels,
+            num_modules=self.stage2_num_modules,
+            num_blocks=self.stage2_num_blocks,
+            num_filters=self.stage2_num_channels,
+            has_se=self.has_se,
+            name="st2",
+            align_corners=align_corners)
+
+        self.tr2 = TransitionLayer(
+            in_channels=self.stage2_num_channels,
+            out_channels=self.stage3_num_channels,
+            name="tr2")
+        self.st3 = Stage(
+            num_channels=self.stage3_num_channels,
+            num_modules=self.stage3_num_modules,
+            num_blocks=self.stage3_num_blocks,
+            num_filters=self.stage3_num_channels,
+            has_se=self.has_se,
+            name="st3",
+            align_corners=align_corners)
+
+        self.tr3 = TransitionLayer(
+            in_channels=self.stage3_num_channels,
+            out_channels=self.stage4_num_channels,
+            name="tr3")
+        self.st4 = Stage(
+            num_channels=self.stage4_num_channels,
+            num_modules=self.stage4_num_modules,
+            num_blocks=self.stage4_num_blocks,
+            num_filters=self.stage4_num_channels,
+            has_se=self.has_se,
+            name="st4",
+            align_corners=align_corners)
+        self.init_weight()
+
+    def forward(self, x):
+        conv1 = self.conv_layer1_1(x)
+        conv2 = self.conv_layer1_2(conv1)
+
+        la1 = self.la1(conv2)
+
+        tr1 = self.tr1([la1])
+        st2 = self.st2(tr1)
+
+        tr2 = self.tr2(st2)
+        st3 = self.st3(tr2)
+
+        tr3 = self.tr3(st3)
+        st4 = self.st4(tr3)
+
+        x0_h, x0_w = st4[0].shape[2:]
+        x1 = F.interpolate(
+            st4[1], (x0_h, x0_w),
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x2 = F.interpolate(
+            st4[2], (x0_h, x0_w),
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x3 = F.interpolate(
+            st4[3], (x0_h, x0_w),
+            mode='bilinear',
+            align_corners=self.align_corners)
+        x = paddle.concat([st4[0], x1, x2, x3], axis=1)
+
+        return [x]
+
+    def init_weight(self):
+        for layer in self.sublayers():
+            if isinstance(layer, nn.Conv2D):
+                param_init.normal_init(layer.weight, std=0.001)
+            elif isinstance(layer, (nn.BatchNorm, nn.SyncBatchNorm)):
+                param_init.constant_init(layer.weight, value=1.0)
+                param_init.constant_init(layer.bias, value=0.0)
+        if self.pretrained is not None:
+            utils.load_pretrained_model(self, self.pretrained)
+
+
+class Layer1(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 num_blocks,
+                 has_se=False,
+                 name=None):
+        super(Layer1, self).__init__()
+
+        self.bottleneck_block_list = []
+
+        for i in range(num_blocks):
+            bottleneck_block = self.add_sublayer(
+                "bb_{}_{}".format(name, i + 1),
+                BottleneckBlock(
+                    num_channels=num_channels if i == 0 else num_filters * 4,
+                    num_filters=num_filters,
+                    has_se=has_se,
+                    stride=1,
+                    downsample=True if i == 0 else False,
+                    name=name + '_' + str(i + 1)))
+            self.bottleneck_block_list.append(bottleneck_block)
+
+    def forward(self, x):
+        conv = x
+        for block_func in self.bottleneck_block_list:
+            conv = block_func(conv)
+        return conv
+
+
+class TransitionLayer(nn.Layer):
+    def __init__(self, in_channels, out_channels, name=None):
+        super(TransitionLayer, self).__init__()
+
+        num_in = len(in_channels)
+        num_out = len(out_channels)
+        self.conv_bn_func_list = []
+        for i in range(num_out):
+            residual = None
+            if i < num_in:
+                if in_channels[i] != out_channels[i]:
+                    residual = self.add_sublayer(
+                        "transition_{}_layer_{}".format(name, i + 1),
+                        layers.ConvBNReLU(
+                            in_channels=in_channels[i],
+                            out_channels=out_channels[i],
+                            kernel_size=3,
+                            padding=1,
+                            bias_attr=False))
+            else:
+                residual = self.add_sublayer(
+                    "transition_{}_layer_{}".format(name, i + 1),
+                    layers.ConvBNReLU(
+                        in_channels=in_channels[-1],
+                        out_channels=out_channels[i],
+                        kernel_size=3,
+                        stride=2,
+                        padding=1,
+                        bias_attr=False))
+            self.conv_bn_func_list.append(residual)
+
+    def forward(self, x):
+        outs = []
+        for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
+            if conv_bn_func is None:
+                outs.append(x[idx])
+            else:
+                if idx < len(x):
+                    outs.append(conv_bn_func(x[idx]))
+                else:
+                    outs.append(conv_bn_func(x[-1]))
+        return outs
+
+
+class Branches(nn.Layer):
+    def __init__(self,
+                 num_blocks,
+                 in_channels,
+                 out_channels,
+                 has_se=False,
+                 name=None):
+        super(Branches, self).__init__()
+
+        self.basic_block_list = []
+
+        for i in range(len(out_channels)):
+            self.basic_block_list.append([])
+            for j in range(num_blocks[i]):
+                in_ch = in_channels[i] if j == 0 else out_channels[i]
+                basic_block_func = self.add_sublayer(
+                    "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
+                    BasicBlock(
+                        num_channels=in_ch,
+                        num_filters=out_channels[i],
+                        has_se=has_se,
+                        name=name + '_branch_layer_' + str(i + 1) + '_' +
+                        str(j + 1)))
+                self.basic_block_list[i].append(basic_block_func)
+
+    def forward(self, x):
+        outs = []
+        for idx, input in enumerate(x):
+            conv = input
+            for basic_block_func in self.basic_block_list[idx]:
+                conv = basic_block_func(conv)
+            outs.append(conv)
+        return outs
+
+
+class BottleneckBlock(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 has_se,
+                 stride=1,
+                 downsample=False,
+                 name=None):
+        super(BottleneckBlock, self).__init__()
+
+        self.has_se = has_se
+        self.downsample = downsample
+
+        self.conv1 = layers.ConvBNReLU(
+            in_channels=num_channels,
+            out_channels=num_filters,
+            kernel_size=1,
+            padding=0,
+            bias_attr=False)
+
+        self.conv2 = layers.ConvBNReLU(
+            in_channels=num_filters,
+            out_channels=num_filters,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias_attr=False)
+
+        self.conv3 = layers.ConvBN(
+            in_channels=num_filters,
+            out_channels=num_filters * 4,
+            kernel_size=1,
+            padding=0,
+            bias_attr=False)
+
+        if self.downsample:
+            self.conv_down = layers.ConvBN(
+                in_channels=num_channels,
+                out_channels=num_filters * 4,
+                kernel_size=1,
+                bias_attr=False)
+
+        if self.has_se:
+            self.se = SELayer(
+                num_channels=num_filters * 4,
+                num_filters=num_filters * 4,
+                reduction_ratio=16,
+                name=name + '_fc')
+
+    def forward(self, x):
+        residual = x
+        conv1 = self.conv1(x)
+        conv2 = self.conv2(conv1)
+        conv3 = self.conv3(conv2)
+
+        if self.downsample:
+            residual = self.conv_down(x)
+
+        if self.has_se:
+            conv3 = self.se(conv3)
+
+        y = conv3 + residual
+        y = F.relu(y)
+        return y
+
+
+class BasicBlock(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 stride=1,
+                 has_se=False,
+                 downsample=False,
+                 name=None):
+        super(BasicBlock, self).__init__()
+
+        self.has_se = has_se
+        self.downsample = downsample
+
+        self.conv1 = layers.ConvBNReLU(
+            in_channels=num_channels,
+            out_channels=num_filters,
+            kernel_size=3,
+            stride=stride,
+            padding=1,
+            bias_attr=False)
+        self.conv2 = layers.ConvBN(
+            in_channels=num_filters,
+            out_channels=num_filters,
+            kernel_size=3,
+            padding=1,
+            bias_attr=False)
+
+        if self.downsample:
+            self.conv_down = layers.ConvBNReLU(
+                in_channels=num_channels,
+                out_channels=num_filters,
+                kernel_size=1,
+                padding=0,
+                bias_attr=False)
+
+        if self.has_se:
+            self.se = SELayer(
+                num_channels=num_filters,
+                num_filters=num_filters,
+                reduction_ratio=16,
+                name=name + '_fc')
+
+    def forward(self, x):
+        residual = x
+        conv1 = self.conv1(x)
+        conv2 = self.conv2(conv1)
+
+        if self.downsample:
+            residual = self.conv_down(x)
+
+        if self.has_se:
+            conv2 = self.se(conv2)
+
+        y = conv2 + residual
+        y = F.relu(y)
+        return y
+
+
+class SELayer(nn.Layer):
+    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
+        super(SELayer, self).__init__()
+
+        self.pool2d_gap = nn.AdaptiveAvgPool2D(1)
+
+        self._num_channels = num_channels
+
+        med_ch = int(num_channels / reduction_ratio)
+        stdv = 1.0 / math.sqrt(num_channels * 1.0)
+        self.squeeze = nn.Linear(
+            num_channels,
+            med_ch,
+            weight_attr=paddle.ParamAttr(
+                initializer=nn.initializer.Uniform(-stdv, stdv)))
+
+        stdv = 1.0 / math.sqrt(med_ch * 1.0)
+        self.excitation = nn.Linear(
+            med_ch,
+            num_filters,
+            weight_attr=paddle.ParamAttr(
+                initializer=nn.initializer.Uniform(-stdv, stdv)))
+
+    def forward(self, x):
+        pool = self.pool2d_gap(x)
+        pool = paddle.reshape(pool, shape=[-1, self._num_channels])
+        squeeze = self.squeeze(pool)
+        squeeze = F.relu(squeeze)
+        excitation = self.excitation(squeeze)
+        excitation = F.sigmoid(excitation)
+        excitation = paddle.reshape(
+            excitation, shape=[-1, self._num_channels, 1, 1])
+        out = x * excitation
+        return out
+
+
+class Stage(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_modules,
+                 num_blocks,
+                 num_filters,
+                 has_se=False,
+                 multi_scale_output=True,
+                 name=None,
+                 align_corners=False):
+        super(Stage, self).__init__()
+
+        self._num_modules = num_modules
+
+        self.stage_func_list = []
+        for i in range(num_modules):
+            if i == num_modules - 1 and not multi_scale_output:
+                stage_func = self.add_sublayer(
+                    "stage_{}_{}".format(name, i + 1),
+                    HighResolutionModule(
+                        num_channels=num_channels,
+                        num_blocks=num_blocks,
+                        num_filters=num_filters,
+                        has_se=has_se,
+                        multi_scale_output=False,
+                        name=name + '_' + str(i + 1),
+                        align_corners=align_corners))
+            else:
+                stage_func = self.add_sublayer(
+                    "stage_{}_{}".format(name, i + 1),
+                    HighResolutionModule(
+                        num_channels=num_channels,
+                        num_blocks=num_blocks,
+                        num_filters=num_filters,
+                        has_se=has_se,
+                        name=name + '_' + str(i + 1),
+                        align_corners=align_corners))
+
+            self.stage_func_list.append(stage_func)
+
+    def forward(self, x):
+        out = x
+        for idx in range(self._num_modules):
+            out = self.stage_func_list[idx](out)
+        return out
+
+
+class HighResolutionModule(nn.Layer):
+    def __init__(self,
+                 num_channels,
+                 num_blocks,
+                 num_filters,
+                 has_se=False,
+                 multi_scale_output=True,
+                 name=None,
+                 align_corners=False):
+        super(HighResolutionModule, self).__init__()
+
+        self.branches_func = Branches(
+            num_blocks=num_blocks,
+            in_channels=num_channels,
+            out_channels=num_filters,
+            has_se=has_se,
+            name=name)
+
+        self.fuse_func = FuseLayers(
+            in_channels=num_filters,
+            out_channels=num_filters,
+            multi_scale_output=multi_scale_output,
+            name=name,
+            align_corners=align_corners)
+
+    def forward(self, x):
+        out = self.branches_func(x)
+        out = self.fuse_func(out)
+        return out
+
+
+class FuseLayers(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 multi_scale_output=True,
+                 name=None,
+                 align_corners=False):
+        super(FuseLayers, self).__init__()
+
+        self._actual_ch = len(in_channels) if multi_scale_output else 1
+        self._in_channels = in_channels
+        self.align_corners = align_corners
+
+        self.residual_func_list = []
+        for i in range(self._actual_ch):
+            for j in range(len(in_channels)):
+                if j > i:
+                    residual_func = self.add_sublayer(
+                        "residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
+                        layers.ConvBN(
+                            in_channels=in_channels[j],
+                            out_channels=out_channels[i],
+                            kernel_size=1,
+                            padding=0,
+                            bias_attr=False))
+                    self.residual_func_list.append(residual_func)
+                elif j < i:
+                    pre_num_filters = in_channels[j]
+                    for k in range(i - j):
+                        if k == i - j - 1:
+                            residual_func = self.add_sublayer(
+                                "residual_{}_layer_{}_{}_{}".format(
+                                    name, i + 1, j + 1, k + 1),
+                                layers.ConvBN(
+                                    in_channels=pre_num_filters,
+                                    out_channels=out_channels[i],
+                                    kernel_size=3,
+                                    stride=2,
+                                    padding=1,
+                                    bias_attr=False))
+                            pre_num_filters = out_channels[i]
+                        else:
+                            residual_func = self.add_sublayer(
+                                "residual_{}_layer_{}_{}_{}".format(
+                                    name, i + 1, j + 1, k + 1),
+                                layers.ConvBNReLU(
+                                    in_channels=pre_num_filters,
+                                    out_channels=out_channels[j],
+                                    kernel_size=3,
+                                    stride=2,
+                                    padding=1,
+                                    bias_attr=False))
+                            pre_num_filters = out_channels[j]
+                        self.residual_func_list.append(residual_func)
+
+    def forward(self, x):
+        outs = []
+        residual_func_idx = 0
+        for i in range(self._actual_ch):
+            residual = x[i]
+            residual_shape = residual.shape[-2:]
+            for j in range(len(self._in_channels)):
+                if j > i:
+                    y = self.residual_func_list[residual_func_idx](x[j])
+                    residual_func_idx += 1
+
+                    y = F.interpolate(
+                        y,
+                        residual_shape,
+                        mode='bilinear',
+                        align_corners=self.align_corners)
+                    residual = residual + y
+                elif j < i:
+                    y = x[j]
+                    for k in range(i - j):
+                        y = self.residual_func_list[residual_func_idx](y)
+                        residual_func_idx += 1
+
+                    residual = residual + y
+
+            residual = F.relu(residual)
+            outs.append(residual)
+
+        return outs
+
+
+@manager.BACKBONES.add_component
+def HRNet_W18_NV_Small_V1(**kwargs):
+    model = HRNetNV(
+        stage1_num_modules=1,
+        stage1_num_blocks=[1],
+        stage1_num_channels=[32],
+        stage2_num_modules=1,
+        stage2_num_blocks=[2, 2],
+        stage2_num_channels=[16, 32],
+        stage3_num_modules=1,
+        stage3_num_blocks=[2, 2, 2],
+        stage3_num_channels=[16, 32, 64],
+        stage4_num_modules=1,
+        stage4_num_blocks=[2, 2, 2, 2],
+        stage4_num_channels=[16, 32, 64, 128],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W18_NV_Small_V2(**kwargs):
+    model = HRNetNV(
+        stage1_num_modules=1,
+        stage1_num_blocks=[2],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[2, 2],
+        stage2_num_channels=[18, 36],
+        stage3_num_modules=3,
+        stage3_num_blocks=[2, 2, 2],
+        stage3_num_channels=[18, 36, 72],
+        stage4_num_modules=2,
+        stage4_num_blocks=[2, 2, 2, 2],
+        stage4_num_channels=[18, 36, 72, 144],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W18_NV(**kwargs):
+    model = HRNetNV(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[18, 36],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[18, 36, 72],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[18, 36, 72, 144],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W30_NV(**kwargs):
+    model = HRNetNV(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[30, 60],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[30, 60, 120],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[30, 60, 120, 240],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W32_NV(**kwargs):
+    model = HRNetNV(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[32, 64],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[32, 64, 128],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[32, 64, 128, 256],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W40_NV(**kwargs):
+    model = HRNetNV(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[40, 80],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[40, 80, 160],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[40, 80, 160, 320],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W44_NV(**kwargs):
+    model = HRNetNV(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[44, 88],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[44, 88, 176],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[44, 88, 176, 352],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W48_NV(**kwargs):
+    model = HRNetNV(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[48, 96],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[48, 96, 192],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[48, 96, 192, 384],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W60_NV(**kwargs):
+    model = HRNetNV(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[60, 120],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[60, 120, 240],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[60, 120, 240, 480],
+        **kwargs)
+    return model
+
+
+@manager.BACKBONES.add_component
+def HRNet_W64_NV(**kwargs):
+    model = HRNetNV(
+        stage1_num_modules=1,
+        stage1_num_blocks=[4],
+        stage1_num_channels=[64],
+        stage2_num_modules=1,
+        stage2_num_blocks=[4, 4],
+        stage2_num_channels=[64, 128],
+        stage3_num_modules=4,
+        stage3_num_blocks=[4, 4, 4],
+        stage3_num_channels=[64, 128, 256],
+        stage4_num_modules=3,
+        stage4_num_blocks=[4, 4, 4, 4],
+        stage4_num_channels=[64, 128, 256, 512],
+        **kwargs)
+    return model
--- a/contrib/AutoNUE/models/mscale_ocrnet.py
+++ b/contrib/AutoNUE/models/mscale_ocrnet.py
+import math
+
+import paddle
+import paddle.nn as nn
+from paddleseg.cvlibs import manager, param_init
+from paddleseg.utils import utils
+from paddleseg.models import layers
+from .ocrnet_nv import OCRNetNV
+
+
+@manager.MODELS.add_component
+class MscaleOCRNet(nn.Layer):
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices,
+                 n_scales=[0.5, 1.0, 2.0],
+                 ocr_mid_channels=512,
+                 ocr_key_channels=256,
+                 align_corners=False,
+                 pretrained=None):
+        super().__init__()
+        self.ocrnet = OCRNetNV(
+            num_classes,
+            backbone,
+            backbone_indices,
+            ocr_mid_channels=ocr_mid_channels,
+            ocr_key_channels=ocr_key_channels,
+            align_corners=align_corners,
+            ms_attention=True)
+        self.scale_attn = AttenHead(in_ch=ocr_mid_channels, out_ch=1)
+
+        self.n_scales = n_scales
+        self.pretrained = pretrained
+        self.align_corners = align_corners
+
+        if self.pretrained is not None:
+            utils.load_pretrained_model(self, self.pretrained)
+            # backbone.init_weight()
+
+    def forward(self, x):
+        if self.training:
+            return self.one_scale_forward(x)
+        else:
+            return self.nscale_forward(x, self.n_scales)
+
+    def one_scale_forward(self, x):
+
+        x_size = x.shape[2:]
+        cls_out, aux_out, _ = self.ocrnet(x)
+
+        cls_out = nn.functional.interpolate(
+            cls_out,
+            size=x_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        aux_out = nn.functional.interpolate(
+            aux_out,
+            size=x_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        output = [cls_out, aux_out]
+        output.extend(output)
+        return output
+
+    def one_scale_val(self, x):
+
+        x_size = x.shape[2:]
+        cls_out, aux_out, _ = self.ocrnet(x)
+
+        cls_out = nn.functional.interpolate(
+            cls_out,
+            size=x_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        output = [cls_out]
+        return output
+
+    def two_scale_forward(self, x_1x):
+        """
+        Do we supervised both aux outputs, lo and high scale?
+        Should attention be used to combine the aux output?
+        Normally we only supervise the combined 1x output
+
+        If we use attention to combine the aux outputs, then
+        we can use normal weighting for aux vs. cls outputs
+        """
+        x_lo = nn.functional.interpolate(
+            x_1x,
+            scale_factor=0.5,
+            align_corners=self.align_corners,
+            mode='bilinear')
+        lo_outs = self.single_scale_forward(x_lo)
+
+        pred_05x = lo_outs['cls_out']
+        p_lo = pred_05x
+        aux_lo = lo_outs['aux_out']
+        logit_attn = lo_outs['logit_attn']
+
+        hi_outs = self.single_scale_forward(x_1x)
+        pred_10x = hi_outs['cls_out']
+        p_1x = pred_10x
+        aux_1x = hi_outs['aux_out']
+
+        p_lo = p_lo * logit_attn
+        aux_lo = aux_lo * logit_attn
+        p_lo = scale_as(p_lo, p_1x)
+        aux_lo = scale_as(aux_lo, p_1x)
+
+        logit_attn = scale_as(logit_attn, p_1x)
+
+        # combine lo and hi predictions with attention
+        joint_pred = p_lo + p_1x * (1 - logit_attn)
+        joint_aux = aux_lo + aux_1x * (1 - logit_attn)
+
+        output = [joint_pred, joint_aux]
+
+        # Optionally, apply supervision to the multi-scale predictions
+        # directly.
+        scaled_pred_05x = scale_as(pred_05x, p_1x)
+        output.extend([scaled_pred_05x, pred_10x])
+        output.extend(output)
+        return output
+
+    def two_scale_forward_high(self, x_1x):
+        """
+        Do we supervised both aux outputs, lo and high scale?
+        Should attention be used to combine the aux output?
+        Normally we only supervise the combined 1x output
+
+        If we use attention to combine the aux outputs, then
+        we can use normal weighting for aux vs. cls outputs
+        """
+        x_hi = nn.functional.interpolate(
+            x_1x,
+            scale_factor=1.5,
+            align_corners=self.align_corners,
+            mode='bilinear')
+
+        lo_outs = self.single_scale_forward(x_1x)
+        pred_10x = lo_outs['cls_out']
+        p_lo = pred_10x
+        aux_lo = lo_outs['aux_out']
+        logit_attn = lo_outs['logit_attn']
+
+        hi_outs = self.single_scale_forward(x_hi)
+        pred_15x = hi_outs['cls_out']
+        p_hi = pred_15x
+        aux_hi = hi_outs['aux_out']
+
+        p_lo = p_lo * logit_attn
+        aux_lo = aux_lo * logit_attn
+        p_hi = scale_as(p_hi, p_lo)
+        aux_hi = scale_as(aux_hi, aux_lo)
+
+        # combine lo and hi predictions with attention
+        joint_pred = p_lo + p_hi * (1 - logit_attn)
+        joint_aux = aux_lo + aux_hi * (1 - logit_attn)
+
+        output = [joint_pred, joint_aux]
+
+        # Optionally, apply supervision to the multi-scale predictions
+        # directly.
+        scaled_pred_15x = scale_as(pred_15x, p_lo)
+        output.extend(output)
+        return output
+
+    def nscale_forward(self, x_1x, scales):
+        """
+        Hierarchical attention, primarily used for getting best inference
+        results.
+
+        We use attention at multiple scales, giving priority to the lower
+        resolutions. For example, if we have 4 scales {0.5, 1.0, 1.5, 2.0},
+        then evaluation is done as follows:
+
+              p_joint = attn_1.5 * p_1.5 + (1 - attn_1.5) * down(p_2.0)
+              p_joint = attn_1.0 * p_1.0 + (1 - attn_1.0) * down(p_joint)
+              p_joint = up(attn_0.5 * p_0.5) * (1 - up(attn_0.5)) * p_joint
+
+        The target scale is always 1.0, and 1.0 is expected to be part of the
+        list of scales. When predictions are done at greater than 1.0 scale,
+        the predictions are downsampled before combining with the next lower
+        scale.
+
+        x_1x:
+          scales - a list of scales to evaluate
+          x_1x - dict containing 'images', the x_1x, and 'gts', the ground
+                   truth mask
+
+        Output:
+          If training, return loss, else return prediction + attention
+        """
+        assert 1.0 in scales, 'expected 1.0 to be the target scale'
+        # Lower resolution provides attention for higher rez predictions,
+        # so we evaluate in order: high to low
+        scales = sorted(scales, reverse=True)
+
+        pred = None
+
+        for s in scales:
+            x = nn.functional.interpolate(
+                x_1x,
+                scale_factor=s,
+                align_corners=self.align_corners,
+                mode='bilinear')
+            outs = self.single_scale_forward(x)
+
+            cls_out = outs['cls_out']
+            attn_out = outs['logit_attn']
+
+            if pred is None:
+                pred = cls_out
+            elif s >= 1.0:
+                # downscale previous
+                pred = scale_as(pred, cls_out, self.align_corners)
+                pred = cls_out * attn_out + pred * (1 - attn_out)
+            else:
+                # s < 1.0: upscale current
+                cls_out = cls_out * attn_out
+
+                cls_out = scale_as(cls_out, pred, self.align_corners)
+                attn_out = scale_as(attn_out, pred, self.align_corners)
+
+                pred = cls_out + pred * (1 - attn_out)
+
+        return [pred]
+
+    def single_scale_forward(self, x):
+        x_size = x.shape[2:]
+        cls_out, aux_out, ocr_mid_feats = self.ocrnet(x)
+        attn = self.scale_attn(ocr_mid_feats)
+
+        cls_out = nn.functional.interpolate(
+            cls_out,
+            size=x_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        aux_out = nn.functional.interpolate(
+            aux_out,
+            size=x_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+        attn = nn.functional.interpolate(
+            attn,
+            size=x_size,
+            mode='bilinear',
+            align_corners=self.align_corners)
+
+        return {'cls_out': cls_out, 'aux_out': aux_out, 'logit_attn': attn}
+
+
+class AttenHead(nn.Layer):
+    def __init__(self, in_ch, out_ch):
+        super(AttenHead, self).__init__()
+        # bottleneck channels for seg and attn heads
+        bot_ch = 256
+
+        self.atten_head = nn.Sequential(
+            layers.ConvBNReLU(
+                in_ch, bot_ch, 3, padding=1, bias_attr=False),
+            layers.ConvBNReLU(
+                bot_ch, bot_ch, 3, padding=1, bias_attr=False),
+            nn.Conv2D(
+                bot_ch, out_ch, kernel_size=(1, 1), bias_attr=False),
+            nn.Sigmoid())
+
+    def forward(self, x):
+        return self.atten_head(x)
+
+
+def scale_as(x, y, align_corners=False):
+    '''
+    scale x to the same size as y
+    '''
+    y_size = y.shape[2], y.shape[3]
+    x_scaled = nn.functional.interpolate(
+        x, size=y_size, mode='bilinear', align_corners=align_corners)
+    return x_scaled
--- a/contrib/AutoNUE/models/ocrnet_nv.py
+++ b/contrib/AutoNUE/models/ocrnet_nv.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddleseg.cvlibs import manager, param_init
+from paddleseg.models import layers
+
+
+@manager.MODELS.add_component
+class OCRNetNV(nn.Layer):
+    """
+    The OCRNet implementation based on PaddlePaddle.
+    The differences from OCRNet at OCRNet at paddleseg/models/ocrnet.py are
+
+        1. The convolution bias is set to False
+
+        2. droput_ rate in SpatialOCRModule is 0.05
+
+        3. OCRHead will return `ocr`.
+
+        4. Will not Logit_ List size to the original size, when MS_ attention=True
+
+    The original article refers to
+        Yuan, Yuhui, et al. "Object-Contextual Representations for Semantic Segmentation"
+        (https://arxiv.org/pdf/1909.11065.pdf)
+
+    Args:
+        num_classes (int): The unique number of target classes.
+        backbone (Paddle.nn.Layer): Backbone network.
+        backbone_indices (tuple): A tuple indicates the indices of output of backbone.
+            It can be either one or two values, if two values, the first index will be taken as
+            a deep-supervision feature in auxiliary layer; the second one will be taken as
+            input of pixel representation. If one value, it is taken by both above.
+        ocr_mid_channels (int, optional): The number of middle channels in OCRHead. Default: 512.
+        ocr_key_channels (int, optional): The number of key channels in ObjectAttentionBlock. Default: 256.
+        align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
+            is even, e.g. 1024x512, otherwise it is True, e.g. 769x769.  Default: False.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 backbone,
+                 backbone_indices,
+                 ocr_mid_channels=512,
+                 ocr_key_channels=256,
+                 align_corners=False,
+                 ms_attention=False):
+        super().__init__()
+
+        self.backbone = backbone
+        self.backbone_indices = backbone_indices
+        in_channels = [self.backbone.feat_channels[i] for i in backbone_indices]
+
+        self.head = OCRHead(
+            num_classes=num_classes,
+            in_channels=in_channels,
+            ocr_mid_channels=ocr_mid_channels,
+            ocr_key_channels=ocr_key_channels,
+            ms_attention=ms_attention)
+
+        self.align_corners = align_corners
+        self.ms_attention = ms_attention
+
+    def forward(self, x):
+        feats = self.backbone(x)
+        feats = [feats[i] for i in self.backbone_indices]
+        logit_list = self.head(feats)
+        if not self.ms_attention:
+            logit_list = [
+                F.interpolate(
+                    logit,
+                    x.shape[2:],
+                    mode='bilinear',
+                    align_corners=self.align_corners) for logit in logit_list
+            ]
+        return logit_list
+
+
+class OCRHead(nn.Layer):
+    """
+    The Object contextual representation head.
+
+    Args:
+        num_classes(int): The unique number of target classes.
+        in_channels(tuple): The number of input channels.
+        ocr_mid_channels(int, optional): The number of middle channels in OCRHead. Default: 512.
+        ocr_key_channels(int, optional): The number of key channels in ObjectAttentionBlock. Default: 256.
+    """
+
+    def __init__(self,
+                 num_classes,
+                 in_channels,
+                 ocr_mid_channels=512,
+                 ocr_key_channels=256,
+                 ms_attention=False):
+        super().__init__()
+
+        self.num_classes = num_classes
+        self.ms_attention = ms_attention
+        self.spatial_gather = SpatialGatherBlock()
+        self.spatial_ocr = SpatialOCRModule(
+            ocr_mid_channels,
+            ocr_key_channels,
+            ocr_mid_channels,
+            dropout_rate=0.05)
+
+        self.indices = [-2, -1] if len(in_channels) > 1 else [-1, -1]
+
+        self.conv3x3_ocr = layers.ConvBNReLU(
+            in_channels[self.indices[1]], ocr_mid_channels, 3, padding=1)
+        self.cls_head = nn.Conv2D(ocr_mid_channels, self.num_classes, 1)
+        self.aux_head = nn.Sequential(
+            layers.ConvBNReLU(in_channels[self.indices[0]],
+                              in_channels[self.indices[0]], 1),
+            nn.Conv2D(in_channels[self.indices[0]], self.num_classes, 1))
+
+        self.init_weight()
+
+    def forward(self, feat_list):
+        feat_shallow, feat_deep = feat_list[self.indices[0]], feat_list[
+            self.indices[1]]
+
+        soft_regions = self.aux_head(feat_shallow)
+        pixels = self.conv3x3_ocr(feat_deep)
+
+        object_regions = self.spatial_gather(pixels, soft_regions)
+        ocr = self.spatial_ocr(pixels, object_regions)
+
+        logit = self.cls_head(ocr)
+        if self.ms_attention:
+            return [logit, soft_regions, ocr]
+        return [logit, soft_regions]
+
+    def init_weight(self):
+        """Initialize the parameters of model parts."""
+        for sublayer in self.sublayers():
+            if isinstance(sublayer, nn.Conv2D):
+                param_init.normal_init(sublayer.weight, std=0.001)
+            elif isinstance(sublayer, (nn.BatchNorm, nn.SyncBatchNorm)):
+                param_init.constant_init(sublayer.weight, value=1.0)
+                param_init.constant_init(sublayer.bias, value=0.0)
+
+
+class SpatialGatherBlock(nn.Layer):
+    """Aggregation layer to compute the pixel-region representation."""
+
+    def forward(self, pixels, regions):
+        n, c, h, w = pixels.shape
+        _, k, _, _ = regions.shape
+
+        # pixels: from (n, c, h, w) to (n, h*w, c)
+        pixels = paddle.reshape(pixels, (n, c, h * w))
+        pixels = paddle.transpose(pixels, (0, 2, 1))
+
+        # regions: from (n, k, h, w) to (n, k, h*w)
+        regions = paddle.reshape(regions, (n, k, h * w))
+        regions = F.softmax(regions, axis=2)
+
+        # feats: from (n, k, c) to (n, c, k, 1)
+        feats = paddle.bmm(regions, pixels)
+        feats = paddle.transpose(feats, (0, 2, 1))
+        feats = paddle.unsqueeze(feats, axis=-1)
+
+        return feats
+
+
+class SpatialOCRModule(nn.Layer):
+    """Aggregate the global object representation to update the representation for each pixel."""
+
+    def __init__(self,
+                 in_channels,
+                 key_channels,
+                 out_channels,
+                 dropout_rate=0.1):
+        super().__init__()
+
+        self.attention_block = ObjectAttentionBlock(in_channels, key_channels)
+        self.conv1x1 = nn.Sequential(
+            layers.ConvBNReLU(
+                2 * in_channels, out_channels, 1, bias_attr=False),
+            nn.Dropout2D(dropout_rate))
+
+    def forward(self, pixels, regions):
+        context = self.attention_block(pixels, regions)
+        feats = paddle.concat([context, pixels], axis=1)
+        feats = self.conv1x1(feats)
+
+        return feats
+
+
+class ObjectAttentionBlock(nn.Layer):
+    """A self-attention module."""
+
+    def __init__(self, in_channels, key_channels):
+        super().__init__()
+
+        self.in_channels = in_channels
+        self.key_channels = key_channels
+
+        self.f_pixel = nn.Sequential(
+            layers.ConvBNReLU(
+                in_channels, key_channels, 1, bias_attr=False),
+            layers.ConvBNReLU(
+                key_channels, key_channels, 1, bias_attr=False))
+
+        self.f_object = nn.Sequential(
+            layers.ConvBNReLU(
+                in_channels, key_channels, 1, bias_attr=False),
+            layers.ConvBNReLU(
+                key_channels, key_channels, 1, bias_attr=False))
+
+        self.f_down = layers.ConvBNReLU(
+            in_channels, key_channels, 1, bias_attr=False)
+
+        self.f_up = layers.ConvBNReLU(
+            key_channels, in_channels, 1, bias_attr=False)
+
+    def forward(self, x, proxy):
+        n, _, h, w = x.shape
+
+        # query : from (n, c1, h1, w1) to (n, h1*w1, key_channels)
+        query = self.f_pixel(x)
+        query = paddle.reshape(query, (n, self.key_channels, -1))
+        query = paddle.transpose(query, (0, 2, 1))
+
+        # key : from (n, c2, h2, w2) to (n, key_channels, h2*w2)
+        key = self.f_object(proxy)
+        key = paddle.reshape(key, (n, self.key_channels, -1))
+
+        # value : from (n, c2, h2, w2) to (n, h2*w2, key_channels)
+        value = self.f_down(proxy)
+        value = paddle.reshape(value, (n, self.key_channels, -1))
+        value = paddle.transpose(value, (0, 2, 1))
+
+        # sim_map (n, h1*w1, h2*w2)
+        sim_map = paddle.bmm(query, key)
+        sim_map = (self.key_channels**-.5) * sim_map
+        sim_map = F.softmax(sim_map, axis=-1)
+
+        # context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1)
+        context = paddle.bmm(sim_map, value)
+        context = paddle.transpose(context, (0, 2, 1))
+        context = paddle.reshape(context, (n, self.key_channels, h, w))
+        context = self.f_up(context)
+
+        return context
+
+
+class ProjectionHead(nn.Layer):
+    def __init__(self, dim_in, proj_dim=256, proj='convmlp'):
+        super(ProjectionHead, self).__init__()
+
+        if proj == 'linear':
+            self.proj = nn.Conv2d(dim_in, proj_dim, kernel_size=1)
+        elif proj == 'convmlp':
+            self.proj = nn.Sequential(
+                nn.Conv2d(
+                    dim_in, dim_in, kernel_size=1),
+                nn.SyncBatchNorm(dim_in),
+                nn.ReLU(),
+                nn.Conv2d(
+                    dim_in, proj_dim, kernel_size=1))
+
+    def forward(self, x):
+        return F.normalize(self.proj(x), p=2, dim=1)
--- a/contrib/AutoNUE/predict.py
+++ b/contrib/AutoNUE/predict.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+import paddle
+
+from paddleseg.cvlibs import manager, Config
+from paddleseg.utils import get_sys_env, logger
+from paddleseg.core import predict
+import datasets, models
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Model prediction')
+
+    # params of prediction
+    parser.add_argument(
+        "--config", dest="cfg", help="The config file.", default=None, type=str)
+    parser.add_argument(
+        '--model_path',
+        dest='model_path',
+        help='The path of model for prediction',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--image_path',
+        dest='image_path',
+        help='The path of image, it can be a file or a directory including images',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the predicted results',
+        type=str,
+        default='./output/result')
+
+    # augment for prediction
+    parser.add_argument(
+        '--aug_pred',
+        dest='aug_pred',
+        help='Whether to use mulit-scales and flip augment for prediction',
+        action='store_true')
+    parser.add_argument(
+        '--scales',
+        dest='scales',
+        nargs='+',
+        help='Scales for augment',
+        type=float,
+        default=1.0)
+    parser.add_argument(
+        '--flip_horizontal',
+        dest='flip_horizontal',
+        help='Whether to use flip horizontally augment',
+        action='store_true')
+    parser.add_argument(
+        '--flip_vertical',
+        dest='flip_vertical',
+        help='Whether to use flip vertically augment',
+        action='store_true')
+
+    # sliding window prediction
+    parser.add_argument(
+        '--is_slide',
+        dest='is_slide',
+        help='Whether to prediction by sliding window',
+        action='store_true')
+    parser.add_argument(
+        '--crop_size',
+        dest='crop_size',
+        nargs=2,
+        help='The crop size of sliding window, the first is width and the second is height.',
+        type=int,
+        default=None)
+    parser.add_argument(
+        '--stride',
+        dest='stride',
+        nargs=2,
+        help='The stride of sliding window, the first is width and the second is height.',
+        type=int,
+        default=None)
+
+    return parser.parse_args()
+
+
+def get_image_list(image_path):
+    """Get image list"""
+    valid_suffix = [
+        '.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png'
+    ]
+    image_list = []
+    image_dir = None
+    if os.path.isfile(image_path):
+        if os.path.splitext(image_path)[-1] in valid_suffix:
+            image_list.append(image_path)
+    elif os.path.isdir(image_path):
+        image_dir = image_path
+        for root, dirs, files in os.walk(image_path):
+            for f in files:
+                if '.ipynb_checkpoints' in root:
+                    continue
+                if os.path.splitext(f)[-1] in valid_suffix:
+                    image_list.append(os.path.join(root, f))
+    else:
+        raise FileNotFoundError(
+            '`--image_path` is not found. it should be an image file or a directory including images'
+        )
+
+    if len(image_list) == 0:
+        raise RuntimeError('There are not image file in `--image_path`')
+
+    return image_list, image_dir
+
+
+def main(args):
+    env_info = get_sys_env()
+    place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
+        'GPUs used'] else 'cpu'
+
+    paddle.set_device(place)
+    if not args.cfg:
+        raise RuntimeError('No configuration file specified.')
+
+    cfg = Config(args.cfg)
+    val_dataset = cfg.val_dataset
+    if not val_dataset:
+        raise RuntimeError(
+            'The verification dataset is not specified in the configuration file.'
+        )
+
+    msg = '\n---------------Config Information---------------\n'
+    msg += str(cfg)
+    msg += '------------------------------------------------'
+    logger.info(msg)
+
+    model = cfg.model
+    transforms = val_dataset.transforms
+    image_list, image_dir = get_image_list(args.image_path)
+    logger.info('Number of predict images = {}'.format(len(image_list)))
+    predict(
+        model,
+        model_path=args.model_path,
+        transforms=transforms,
+        image_list=image_list,
+        image_dir=image_dir,
+        save_dir=args.save_dir,
+        aug_pred=args.aug_pred,
+        scales=args.scales,
+        flip_horizontal=args.flip_horizontal,
+        flip_vertical=args.flip_vertical,
+        is_slide=args.is_slide,
+        crop_size=args.crop_size,
+        stride=args.stride, )
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)
--- a/contrib/AutoNUE/predict_ensemble.py
+++ b/contrib/AutoNUE/predict_ensemble.py
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+
+import paddle
+
+from paddleseg.cvlibs import manager, Config
+from paddleseg.utils import get_sys_env, logger
+from core import predictEnsemble
+import datasets, models
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Model prediction')
+
+    # params of prediction
+    parser.add_argument(
+        "--config", dest="cfg", help="The config file.", default=None, type=str)
+    parser.add_argument(
+        '--model_path',
+        dest='model_path',
+        help='The path of model for prediction',
+        type=str,
+        default=None)
+    parser.add_argument(
+        "--config_hard",
+        dest="cfg_hard",
+        help="The config file.",
+        default=None,
+        type=str)
+    parser.add_argument(
+        '--model_path_hard',
+        dest='model_path_hard',
+        help='The path of model for prediction',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--image_path',
+        dest='image_path',
+        help='The path of image, it can be a file or a directory including images',
+        type=str,
+        default=None)
+    parser.add_argument(
+        '--save_dir',
+        dest='save_dir',
+        help='The directory for saving the predicted results',
+        type=str,
+        default='./output/result')
+
+    # augment for prediction
+    parser.add_argument(
+        '--aug_pred',
+        dest='aug_pred',
+        help='Whether to use mulit-scales and flip augment for prediction',
+        action='store_true')
+    parser.add_argument(
+        '--scales',
+        dest='scales',
+        nargs='+',
+        help='Scales for augment',
+        type=float,
+        default=1.0)
+    parser.add_argument(
+        '--flip_horizontal',
+        dest='flip_horizontal',
+        help='Whether to use flip horizontally augment',
+        action='store_true')
+    parser.add_argument(
+        '--flip_vertical',
+        dest='flip_vertical',
+        help='Whether to use flip vertically augment',
+        action='store_true')
+
+    # sliding window prediction
+    parser.add_argument(
+        '--is_slide',
+        dest='is_slide',
+        help='Whether to prediction by sliding window',
+        action='store_true')
+    parser.add_argument(
+        '--crop_size',
+        dest='crop_size',
+        nargs=2,
+        help='The crop size of sliding window, the first is width and the second is height.',
+        type=int,
+        default=None)
+    parser.add_argument(
+        '--stride',
+        dest='stride',
+        nargs=2,
+        help='The stride of sliding window, the first is width and the second is height.',
+        type=int,
+        default=None)
+
+    return parser.parse_args()
+
+
+def get_image_list(image_path):
+    """Get image list"""
+    valid_suffix = [
+        '.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png'
+    ]
+    image_list = []
+    image_dir = None
+    if os.path.isfile(image_path):
+        if os.path.splitext(image_path)[-1] in valid_suffix:
+            image_list.append(image_path)
+    elif os.path.isdir(image_path):
+        image_dir = image_path
+        for root, dirs, files in os.walk(image_path):
+            for f in files:
+                if '.ipynb_checkpoints' in root:
+                    continue
+                if os.path.splitext(f)[-1] in valid_suffix:
+                    image_list.append(os.path.join(root, f))
+    else:
+        raise FileNotFoundError(
+            '`--image_path` is not found. it should be an image file or a directory including images'
+        )
+
+    if len(image_list) == 0:
+        raise RuntimeError('There are not image file in `--image_path`')
+
+    return image_list, image_dir
+
+
+def main(args):
+    env_info = get_sys_env()
+    place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
+        'GPUs used'] else 'cpu'
+
+    paddle.set_device(place)
+    if not args.cfg:
+        raise RuntimeError('No configuration file specified.')
+
+    cfg = Config(args.cfg)
+    val_dataset = cfg.val_dataset
+    cfg_hard = Config(args.cfg_hard)
+
+    if not val_dataset:
+        raise RuntimeError(
+            'The verification dataset is not specified in the configuration file.'
+        )
+
+    msg = '\n---------------Config Information---------------\n'
+    msg += str(cfg)
+    msg += '------------------------------------------------'
+    logger.info(msg)
+
+    model = cfg.model
+    model_hard = cfg_hard.model
+    transforms = val_dataset.transforms
+    image_list, image_dir = get_image_list(args.image_path)
+    logger.info('Number of predict images = {}'.format(len(image_list)))
+    predictEnsemble(
+        model,
+        model_hard,
+        model_path=args.model_path,
+        model_path_hard=args.model_path_hard,
+        transforms=transforms,
+        image_list=image_list,
+        image_dir=image_dir,
+        save_dir=args.save_dir,
+        aug_pred=args.aug_pred,
+        scales=args.scales,
+        flip_horizontal=args.flip_horizontal,
+        flip_vertical=args.flip_vertical,
+        is_slide=args.is_slide,
+        crop_size=args.crop_size,
+        stride=args.stride, )
+
+
+if __name__ == '__main__':
+    args = parse_args()
+    main(args)