Commit 0d97cc8c authored by Sugon_ldc's avatar Sugon_ldc
Browse files

add new model

parents
Pipeline #316 failed with stages
in 0 seconds
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections.abc
from itertools import combinations
import numpy as np
import paddle
import paddle.nn.functional as F
def get_reverse_list(ori_shape, transforms):
"""
get reverse list of transform.
Args:
ori_shape (list): Origin shape of image.
transforms (list): List of transform.
Returns:
list: List of tuple, there are two format:
('resize', (h, w)) The image shape before resize,
('padding', (h, w)) The image shape before padding.
"""
reverse_list = []
h, w = ori_shape[0], ori_shape[1]
for op in transforms:
if op.__class__.__name__ in ['Resize', 'ResizeByLong']:
reverse_list.append(('resize', (h, w)))
h, w = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['Padding']:
reverse_list.append(('padding', (h, w)))
w, h = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['LimitLong']:
long_edge = max(h, w)
short_edge = min(h, w)
if ((op.max_long is not None) and (long_edge > op.max_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.max_long
short_edge = int(round(short_edge * op.max_long / long_edge))
elif ((op.min_long is not None) and (long_edge < op.min_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.min_long
short_edge = int(round(short_edge * op.min_long / long_edge))
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
return reverse_list
def reverse_transform(pred, ori_shape, transforms):
"""recover pred to origin shape"""
reverse_list = get_reverse_list(ori_shape, transforms)
for item in reverse_list[::-1]:
if item[0] == 'resize':
h, w = item[1][0], item[1][1]
pred = F.interpolate(pred, (h, w), mode='nearest')
elif item[0] == 'padding':
h, w = item[1][0], item[1][1]
pred = pred[:, :, 0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(item[0]))
return pred
def flip_combination(flip_horizontal=False, flip_vertical=False):
"""
Get flip combination.
Args:
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
Returns:
list: List of tuple. The first element of tuple is whether to flip horizontally,
and the second is whether to flip vertically.
"""
flip_comb = [(False, False)]
if flip_horizontal:
flip_comb.append((True, False))
if flip_vertical:
flip_comb.append((False, True))
if flip_horizontal:
flip_comb.append((True, True))
return flip_comb
def tensor_flip(x, flip):
"""Flip tensor according directions"""
if flip[0]:
x = x[:, :, :, ::-1]
if flip[1]:
x = x[:, :, ::-1, :]
return x
def slide_inference(model, im, crop_size, stride):
"""
Infer by sliding window.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
crop_size (tuple|list). The size of sliding window, (w, h).
stride (tuple|list). The size of stride, (w, h).
Return:
Tensor: The logit of input image.
"""
h_im, w_im = im.shape[-2:]
w_crop, h_crop = crop_size
w_stride, h_stride = stride
# calculate the crop nums
rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1
cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1
# prevent negative sliding rounds when imgs after scaling << crop_size
rows = 1 if h_im <= h_crop else rows
cols = 1 if w_im <= w_crop else cols
# TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation.
final_logit = None
count = np.zeros([1, 1, h_im, w_im])
for r in range(rows):
for c in range(cols):
h1 = r * h_stride
w1 = c * w_stride
h2 = min(h1 + h_crop, h_im)
w2 = min(w1 + w_crop, w_im)
h1 = max(h2 - h_crop, 0)
w1 = max(w2 - w_crop, 0)
im_crop = im[:, :, h1:h2, w1:w2]
logits = model(im_crop)
if not isinstance(logits, collections.abc.Sequence):
raise TypeError(
"The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
.format(type(logits)))
logit = logits[0].numpy()
if final_logit is None:
final_logit = np.zeros([1, logit.shape[1], h_im, w_im])
final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1]
count[:, :, h1:h2, w1:w2] += 1
if np.sum(count == 0) != 0:
raise RuntimeError(
'There are pixel not predicted. It is possible that stride is greater than crop_size'
)
final_logit = final_logit / count
final_logit = paddle.to_tensor(final_logit)
return final_logit
def inference(model,
im,
ori_shape=None,
transforms=None,
is_slide=False,
stride=None,
crop_size=None):
"""
Inference for image.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
is_slide (bool): Whether to infer by sliding window. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
Returns:
Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned.
If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned.
"""
if not is_slide:
logits = model(im)
if not isinstance(logits, collections.abc.Sequence):
raise TypeError(
"The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
.format(type(logits)))
logit = logits[0]
else:
logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
if ori_shape is not None:
pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
pred = reverse_transform(pred, ori_shape, transforms)
return pred
else:
return logit
def aug_inference(model,
im,
ori_shape,
transforms,
scales=1.0,
flip_horizontal=False,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None):
"""
Infer with augmentation.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
scales (float|tuple|list): Scales for resize. Default: 1.
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
is_slide (bool): Whether to infer by sliding wimdow. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
Returns:
Tensor: Prediction of image with shape (1, 1, h, w) is returned.
"""
if isinstance(scales, float):
scales = [scales]
elif not isinstance(scales, (tuple, list)):
raise TypeError(
'`scales` expects float/tuple/list type, but received {}'.format(
type(scales)))
final_logit = 0
h_input, w_input = im.shape[-2], im.shape[-1]
flip_comb = flip_combination(flip_horizontal, flip_vertical)
for scale in scales:
h = int(h_input * scale + 0.5)
w = int(w_input * scale + 0.5)
im = F.interpolate(im, (h, w), mode='bilinear')
for flip in flip_comb:
im_flip = tensor_flip(im, flip)
logit = inference(
model,
im_flip,
is_slide=is_slide,
crop_size=crop_size,
stride=stride)
logit = tensor_flip(logit, flip)
logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
logit = F.softmax(logit, axis=1)
final_logit = final_logit + logit
pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
pred = F.interpolate(pred, (ori_shape[0], ori_shape[1]), mode='nearest')
return pred
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections.abc
from itertools import combinations
import numpy as np
import paddle
import paddle.nn.functional as F
def get_reverse_list(ori_shape, transforms):
"""
get reverse list of transform.
Args:
ori_shape (list): Origin shape of image.
transforms (list): List of transform.
Returns:
list: List of tuple, there are two format:
('resize', (h, w)) The image shape before resize,
('padding', (h, w)) The image shape before padding.
"""
reverse_list = []
h, w = ori_shape[0], ori_shape[1]
for op in transforms:
if op.__class__.__name__ in ['Resize', 'ResizeByLong']:
reverse_list.append(('resize', (h, w)))
h, w = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['Padding']:
reverse_list.append(('padding', (h, w)))
w, h = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['LimitLong']:
long_edge = max(h, w)
short_edge = min(h, w)
if ((op.max_long is not None) and (long_edge > op.max_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.max_long
short_edge = int(round(short_edge * op.max_long / long_edge))
elif ((op.min_long is not None) and (long_edge < op.min_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.min_long
short_edge = int(round(short_edge * op.min_long / long_edge))
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
return reverse_list
def reverse_transform(pred, ori_shape, transforms):
"""recover pred to origin shape"""
reverse_list = get_reverse_list(ori_shape, transforms)
for item in reverse_list[::-1]:
if item[0] == 'resize':
h, w = item[1][0], item[1][1]
pred = F.interpolate(pred, (h, w), mode='nearest')
elif item[0] == 'padding':
h, w = item[1][0], item[1][1]
pred = pred[:, :, 0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(item[0]))
return pred
def flip_combination(flip_horizontal=False, flip_vertical=False):
"""
Get flip combination.
Args:
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
Returns:
list: List of tuple. The first element of tuple is whether to flip horizontally,
and the second is whether to flip vertically.
"""
flip_comb = [(False, False)]
if flip_horizontal:
flip_comb.append((True, False))
if flip_vertical:
flip_comb.append((False, True))
if flip_horizontal:
flip_comb.append((True, True))
return flip_comb
def tensor_flip(x, flip):
"""Flip tensor according directions"""
if flip[0]:
x = x[:, :, :, ::-1]
if flip[1]:
x = x[:, :, ::-1, :]
return x
def slide_inference(model, im, crop_size, stride):
"""
Infer by sliding window.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
crop_size (tuple|list). The size of sliding window, (w, h).
stride (tuple|list). The size of stride, (w, h).
Return:
Tensor: The logit of input image.
"""
h_im, w_im = im.shape[-2:]
w_crop, h_crop = crop_size
w_stride, h_stride = stride
# calculate the crop nums
rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1
cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1
# prevent negative sliding rounds when imgs after scaling << crop_size
rows = 1 if h_im <= h_crop else rows
cols = 1 if w_im <= w_crop else cols
# TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation.
final_logit = None
count = np.zeros([1, 1, h_im, w_im])
for r in range(rows):
for c in range(cols):
h1 = r * h_stride
w1 = c * w_stride
h2 = min(h1 + h_crop, h_im)
w2 = min(w1 + w_crop, w_im)
h1 = max(h2 - h_crop, 0)
w1 = max(w2 - w_crop, 0)
im_crop = im[:, :, h1:h2, w1:w2]
logits = model(im_crop)
if not isinstance(logits, collections.abc.Sequence):
raise TypeError(
"The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
.format(type(logits)))
logit = logits[0].numpy()
if final_logit is None:
final_logit = np.zeros([1, logit.shape[1], h_im, w_im])
final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1]
count[:, :, h1:h2, w1:w2] += 1
if np.sum(count == 0) != 0:
raise RuntimeError(
'There are pixel not predicted. It is possible that stride is greater than crop_size'
)
final_logit = final_logit / count
final_logit = paddle.to_tensor(final_logit)
return final_logit
def inference(model,
im,
ori_shape=None,
transforms=None,
is_slide=False,
stride=None,
crop_size=None):
"""
Inference for image.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
is_slide (bool): Whether to infer by sliding window. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
Returns:
Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned.
If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned.
"""
if not is_slide:
logits = model(im)
if not isinstance(logits, collections.abc.Sequence):
raise TypeError(
"The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
.format(type(logits)))
logit = logits[0]
else:
logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
if ori_shape is not None:
pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
pred = reverse_transform(pred, ori_shape, transforms)
return pred
else:
return logit
def aug_inference(model,
im,
ori_shape,
transforms,
scales=1.0,
flip_horizontal=False,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None):
"""
Infer with augmentation.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
scales (float|tuple|list): Scales for resize. Default: 1.
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
is_slide (bool): Whether to infer by sliding wimdow. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
Returns:
Tensor: Prediction of image with shape (1, 1, h, w) is returned.
"""
if isinstance(scales, float):
scales = [scales]
elif not isinstance(scales, (tuple, list)):
raise TypeError(
'`scales` expects float/tuple/list type, but received {}'.format(
type(scales)))
final_logit = 0
h_input, w_input = im.shape[-2], im.shape[-1]
flip_comb = flip_combination(flip_horizontal, flip_vertical)
for scale in scales:
h = int(h_input * scale + 0.5)
w = int(w_input * scale + 0.5)
im = F.interpolate(im, (h, w), mode='bilinear')
for flip in flip_comb:
im_flip = tensor_flip(im, flip)
logit = inference(
model,
im_flip,
is_slide=is_slide,
crop_size=crop_size,
stride=stride)
logit = tensor_flip(logit, flip)
logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
logit = F.softmax(logit, axis=1)
final_logit = final_logit + logit
return final_logit
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections.abc
from itertools import combinations
import numpy as np
import paddle
import paddle.nn.functional as F
def get_reverse_list(ori_shape, transforms):
"""
get reverse list of transform.
Args:
ori_shape (list): Origin shape of image.
transforms (list): List of transform.
Returns:
list: List of tuple, there are two format:
('resize', (h, w)) The image shape before resize,
('padding', (h, w)) The image shape before padding.
"""
reverse_list = []
h, w = ori_shape[0], ori_shape[1]
for op in transforms:
if op.__class__.__name__ in ['Resize', 'ResizeByLong']:
reverse_list.append(('resize', (h, w)))
h, w = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['Padding']:
reverse_list.append(('padding', (h, w)))
w, h = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['LimitLong']:
long_edge = max(h, w)
short_edge = min(h, w)
if ((op.max_long is not None) and (long_edge > op.max_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.max_long
short_edge = int(round(short_edge * op.max_long / long_edge))
elif ((op.min_long is not None) and (long_edge < op.min_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.min_long
short_edge = int(round(short_edge * op.min_long / long_edge))
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
return reverse_list
def reverse_transform(pred, ori_shape, transforms):
"""recover pred to origin shape"""
reverse_list = get_reverse_list(ori_shape, transforms)
for item in reverse_list[::-1]:
if item[0] == 'resize':
h, w = item[1][0], item[1][1]
pred = F.interpolate(pred, (h, w), mode='nearest')
elif item[0] == 'padding':
h, w = item[1][0], item[1][1]
pred = pred[:, :, 0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(item[0]))
return pred
def flip_combination(flip_horizontal=False, flip_vertical=False):
"""
Get flip combination.
Args:
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
Returns:
list: List of tuple. The first element of tuple is whether to flip horizontally,
and the second is whether to flip vertically.
"""
flip_comb = [(False, False)]
if flip_horizontal:
flip_comb.append((True, False))
if flip_vertical:
flip_comb.append((False, True))
if flip_horizontal:
flip_comb.append((True, True))
return flip_comb
def tensor_flip(x, flip):
"""Flip tensor according directions"""
if flip[0]:
x = x[:, :, :, ::-1]
if flip[1]:
x = x[:, :, ::-1, :]
return x
def inference(model,
model_hard,
im,
ori_shape=None,
transforms=None,
is_slide=False,
stride=None,
crop_size=None):
"""
Inference for image.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
is_slide (bool): Whether to infer by sliding window. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
Returns:
Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned.
If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned.
"""
if not is_slide:
# logits = model(im)[0]
# logits_hard = model_hard(im)[0]
logits = F.softmax(model(im)[0], axis=1)
logits_hard = F.softmax(model_hard(im)[0], axis=1)
# logit_hard = logits.clone()
# for ii in range(logits.shape[0]):
# logit_hard[ii] = paddle.scatter(logit_hard[ii], paddle.to_tensor([3, 7, 12, 14, 15, 16, 18, 19, 20, 21]), logits_hard[ii][1:], overwrite=True, name=None)
logit = (logits + logits_hard) / 2
# logit = logit_hard
if ori_shape is not None:
pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
pred = reverse_transform(pred, ori_shape, transforms)
return pred
else:
return logit
def aug_inference(model,
model_hard,
im,
ori_shape,
transforms,
scales=1.0,
flip_horizontal=False,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None):
"""
Infer with augmentation.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
scales (float|tuple|list): Scales for resize. Default: 1.
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
is_slide (bool): Whether to infer by sliding wimdow. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
Returns:
Tensor: Prediction of image with shape (1, 1, h, w) is returned.
"""
if isinstance(scales, float):
scales = [scales]
elif not isinstance(scales, (tuple, list)):
raise TypeError(
'`scales` expects float/tuple/list type, but received {}'.format(
type(scales)))
final_logit = 0
h_input, w_input = im.shape[-2], im.shape[-1]
flip_comb = flip_combination(flip_horizontal, flip_vertical)
for scale in scales:
h = int(h_input * scale + 0.5)
w = int(w_input * scale + 0.5)
im = F.interpolate(im, (h, w), mode='bilinear')
for flip in flip_comb:
im_flip = tensor_flip(im, flip)
logit = inference(
model,
model_hard,
im_flip,
is_slide=is_slide,
crop_size=crop_size,
stride=stride)
logit = tensor_flip(logit, flip)
logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
# logit = F.softmax(logit, axis=1)
final_logit = final_logit + logit
pred = paddle.argmax(final_logit, axis=1, keepdim=True, dtype='int32')
pred = F.interpolate(pred, (ori_shape[0], ori_shape[1]), mode='nearest')
return pred
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections.abc
from itertools import combinations
import numpy as np
import paddle
import paddle.nn.functional as F
def get_reverse_list(ori_shape, transforms):
"""
get reverse list of transform.
Args:
ori_shape (list): Origin shape of image.
transforms (list): List of transform.
Returns:
list: List of tuple, there are two format:
('resize', (h, w)) The image shape before resize,
('padding', (h, w)) The image shape before padding.
"""
reverse_list = []
h, w = ori_shape[0], ori_shape[1]
for op in transforms:
if op.__class__.__name__ in ['Resize', 'ResizeByLong']:
reverse_list.append(('resize', (h, w)))
h, w = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['Padding']:
reverse_list.append(('padding', (h, w)))
w, h = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['LimitLong']:
long_edge = max(h, w)
short_edge = min(h, w)
if ((op.max_long is not None) and (long_edge > op.max_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.max_long
short_edge = int(round(short_edge * op.max_long / long_edge))
elif ((op.min_long is not None) and (long_edge < op.min_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.min_long
short_edge = int(round(short_edge * op.min_long / long_edge))
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
return reverse_list
def reverse_transform(pred, ori_shape, transforms):
"""recover pred to origin shape"""
reverse_list = get_reverse_list(ori_shape, transforms)
for item in reverse_list[::-1]:
if item[0] == 'resize':
h, w = item[1][0], item[1][1]
pred = F.interpolate(pred, (h, w), mode='nearest')
elif item[0] == 'padding':
h, w = item[1][0], item[1][1]
pred = pred[:, :, 0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(item[0]))
return pred
def flip_combination(flip_horizontal=False, flip_vertical=False):
"""
Get flip combination.
Args:
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
Returns:
list: List of tuple. The first element of tuple is whether to flip horizontally,
and the second is whether to flip vertically.
"""
flip_comb = [(False, False)]
if flip_horizontal:
flip_comb.append((True, False))
if flip_vertical:
flip_comb.append((False, True))
if flip_horizontal:
flip_comb.append((True, True))
return flip_comb
def tensor_flip(x, flip):
"""Flip tensor according directions"""
if flip[0]:
x = x[:, :, :, ::-1]
if flip[1]:
x = x[:, :, ::-1, :]
return x
def inference(model,
model_hard,
im,
ori_shape=None,
transforms=None,
is_slide=False,
stride=None,
crop_size=None):
"""
Inference for image.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
is_slide (bool): Whether to infer by sliding window. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
Returns:
Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned.
If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned.
"""
if not is_slide:
# logits = model(im)[0]
# logits_hard = model_hard(im)[0]
logits = F.softmax(model(im)[0], axis=1)
logits_hard = F.softmax(model_hard(im)[0], axis=1)
# logit_hard = logits.clone()
# for ii in range(logits.shape[0]):
# logit_hard[ii] = paddle.scatter(logit_hard[ii], paddle.to_tensor([3, 7, 12, 14, 15, 16, 18, 19, 20, 21]), logits_hard[ii][1:], overwrite=True, name=None)
logit = logits + logits_hard
# logit = logit_hard
if ori_shape is not None:
pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
pred = reverse_transform(pred, ori_shape, transforms)
return pred
else:
return logit
def aug_inference(model,
model_hard,
im,
ori_shape,
transforms,
scales=1.0,
flip_horizontal=False,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None):
"""
Infer with augmentation.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
scales (float|tuple|list): Scales for resize. Default: 1.
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
is_slide (bool): Whether to infer by sliding wimdow. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
Returns:
Tensor: Prediction of image with shape (1, 1, h, w) is returned.
"""
if isinstance(scales, float):
scales = [scales]
elif not isinstance(scales, (tuple, list)):
raise TypeError(
'`scales` expects float/tuple/list type, but received {}'.format(
type(scales)))
final_logit = 0
h_input, w_input = im.shape[-2], im.shape[-1]
flip_comb = flip_combination(flip_horizontal, flip_vertical)
for scale in scales:
h = int(h_input * scale + 0.5)
w = int(w_input * scale + 0.5)
im = F.interpolate(im, (h, w), mode='bilinear')
for flip in flip_comb:
im_flip = tensor_flip(im, flip)
logit = inference(
model,
model_hard,
im_flip,
is_slide=is_slide,
crop_size=crop_size,
stride=stride)
logit = tensor_flip(logit, flip)
logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
# logit = F.softmax(logit, axis=1)
final_logit = final_logit + logit
return final_logit
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import collections.abc
from itertools import combinations
import numpy as np
import paddle
import paddle.nn.functional as F
def get_reverse_list(ori_shape, transforms):
"""
get reverse list of transform.
Args:
ori_shape (list): Origin shape of image.
transforms (list): List of transform.
Returns:
list: List of tuple, there are two format:
('resize', (h, w)) The image shape before resize,
('padding', (h, w)) The image shape before padding.
"""
reverse_list = []
h, w = ori_shape[0], ori_shape[1]
for op in transforms:
if op.__class__.__name__ in ['Resize', 'ResizeByLong']:
reverse_list.append(('resize', (h, w)))
h, w = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['Padding']:
reverse_list.append(('padding', (h, w)))
w, h = op.target_size[0], op.target_size[1]
if op.__class__.__name__ in ['LimitLong']:
long_edge = max(h, w)
short_edge = min(h, w)
if ((op.max_long is not None) and (long_edge > op.max_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.max_long
short_edge = int(round(short_edge * op.max_long / long_edge))
elif ((op.min_long is not None) and (long_edge < op.min_long)):
reverse_list.append(('resize', (h, w)))
long_edge = op.min_long
short_edge = int(round(short_edge * op.min_long / long_edge))
if h > w:
h = long_edge
w = short_edge
else:
w = long_edge
h = short_edge
return reverse_list
def reverse_transform(pred, ori_shape, transforms):
"""recover pred to origin shape"""
reverse_list = get_reverse_list(ori_shape, transforms)
for item in reverse_list[::-1]:
if item[0] == 'resize':
h, w = item[1][0], item[1][1]
pred = F.interpolate(pred, (h, w), mode='nearest')
elif item[0] == 'padding':
h, w = item[1][0], item[1][1]
pred = pred[:, :, 0:h, 0:w]
else:
raise Exception("Unexpected info '{}' in im_info".format(item[0]))
return pred
def flip_combination(flip_horizontal=False, flip_vertical=False):
"""
Get flip combination.
Args:
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
Returns:
list: List of tuple. The first element of tuple is whether to flip horizontally,
and the second is whether to flip vertically.
"""
flip_comb = [(False, False)]
if flip_horizontal:
flip_comb.append((True, False))
if flip_vertical:
flip_comb.append((False, True))
if flip_horizontal:
flip_comb.append((True, True))
return flip_comb
def tensor_flip(x, flip):
"""Flip tensor according directions"""
if flip[0]:
x = x[:, :, :, ::-1]
if flip[1]:
x = x[:, :, ::-1, :]
return x
def slide_inference(model, im, crop_size, stride):
"""
Infer by sliding window.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
crop_size (tuple|list). The size of sliding window, (w, h).
stride (tuple|list). The size of stride, (w, h).
Return:
Tensor: The logit of input image.
"""
h_im, w_im = im.shape[-2:]
w_crop, h_crop = crop_size
w_stride, h_stride = stride
# calculate the crop nums
rows = np.int(np.ceil(1.0 * (h_im - h_crop) / h_stride)) + 1
cols = np.int(np.ceil(1.0 * (w_im - w_crop) / w_stride)) + 1
# prevent negative sliding rounds when imgs after scaling << crop_size
rows = 1 if h_im <= h_crop else rows
cols = 1 if w_im <= w_crop else cols
# TODO 'Tensor' object does not support item assignment. If support, use tensor to calculation.
final_logit = None
count = np.zeros([1, 1, h_im, w_im])
for r in range(rows):
for c in range(cols):
h1 = r * h_stride
w1 = c * w_stride
h2 = min(h1 + h_crop, h_im)
w2 = min(w1 + w_crop, w_im)
h1 = max(h2 - h_crop, 0)
w1 = max(w2 - w_crop, 0)
im_crop = im[:, :, h1:h2, w1:w2]
logits = model(im_crop)
if not isinstance(logits, collections.abc.Sequence):
raise TypeError(
"The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
.format(type(logits)))
logit = logits[0].numpy()
if final_logit is None:
final_logit = np.zeros([1, logit.shape[1], h_im, w_im])
final_logit[:, :, h1:h2, w1:w2] += logit[:, :, :h2 - h1, :w2 - w1]
count[:, :, h1:h2, w1:w2] += 1
if np.sum(count == 0) != 0:
raise RuntimeError(
'There are pixel not predicted. It is possible that stride is greater than crop_size'
)
final_logit = final_logit / count
final_logit = paddle.to_tensor(final_logit)
return final_logit
def inference(model,
im,
ori_shape=None,
transforms=None,
is_slide=False,
stride=None,
crop_size=None):
"""
Inference for image.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
is_slide (bool): Whether to infer by sliding window. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
Returns:
Tensor: If ori_shape is not None, a prediction with shape (1, 1, h, w) is returned.
If ori_shape is None, a logit with shape (1, num_classes, h, w) is returned.
"""
if not is_slide:
logits = model(im)
if not isinstance(logits, collections.abc.Sequence):
raise TypeError(
"The type of logits must be one of collections.abc.Sequence, e.g. list, tuple. But received {}"
.format(type(logits)))
logit = logits[0]
else:
logit = slide_inference(model, im, crop_size=crop_size, stride=stride)
if ori_shape is not None:
pred = paddle.argmax(logit, axis=1, keepdim=True, dtype='int32')
pred = reverse_transform(pred, ori_shape, transforms)
return pred
else:
return logit
def aug_inference(model,
im,
ori_shape,
transforms,
scales=1.0,
flip_horizontal=False,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None):
"""
Infer with augmentation.
Args:
model (paddle.nn.Layer): model to get logits of image.
im (Tensor): the input image.
ori_shape (list): Origin shape of image.
transforms (list): Transforms for image.
scales (float|tuple|list): Scales for resize. Default: 1.
flip_horizontal (bool): Whether to flip horizontally. Default: False.
flip_vertical (bool): Whether to flip vertically. Default: False.
is_slide (bool): Whether to infer by sliding wimdow. Default: False.
crop_size (tuple|list). The size of sliding window, (w, h). It should be probided if is_slide is True.
stride (tuple|list). The size of stride, (w, h). It should be probided if is_slide is True.
Returns:
Tensor: Prediction of image with shape (1, 1, h, w) is returned.
"""
if isinstance(scales, float):
scales = [scales]
elif not isinstance(scales, (tuple, list)):
raise TypeError(
'`scales` expects float/tuple/list type, but received {}'.format(
type(scales)))
final_logit = 0
h_input, w_input = im.shape[-2], im.shape[-1]
flip_comb = flip_combination(flip_horizontal, flip_vertical)
for scale in scales:
h = int(h_input * scale + 0.5)
w = int(w_input * scale + 0.5)
im = F.interpolate(im, (h, w), mode='bilinear')
for flip in flip_comb:
im_flip = tensor_flip(im, flip)
logit = inference(
model,
im_flip,
is_slide=is_slide,
crop_size=crop_size,
stride=stride)
logit = tensor_flip(logit, flip)
logit = F.interpolate(logit, (h_input, w_input), mode='bilinear')
logit = F.softmax(logit, axis=1)
final_logit = final_logit + logit
final_logit = F.softmax(final_logit, axis=1)
filte = paddle.max(final_logit, axis=1, keepdim=True).numpy()
pred = paddle.argmax(
final_logit, axis=1, keepdim=True, dtype='int32').numpy()
pred[filte < 0.9] = 255
pred = paddle.to_tensor(pred)
pred = reverse_transform(pred, ori_shape, transforms)
return pred
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import math
import cv2
import numpy as np
import paddle
from paddleseg import utils
import core.infer_ensemble as infer_ensemble
from paddleseg.utils import logger, progbar
def mkdir(path):
sub_dir = os.path.dirname(path)
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
def partition_list(arr, m):
"""split the list 'arr' into m pieces"""
n = int(math.ceil(len(arr) / float(m)))
return [arr[i:i + n] for i in range(0, len(arr), n)]
def predictEnsemble(model,
model_hard,
model_path,
model_path_hard,
transforms,
image_list,
image_dir=None,
save_dir='output',
aug_pred=False,
scales=1.0,
flip_horizontal=True,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None):
"""
predict and visualize the image_list.
Args:
model (nn.Layer): Used to predict for input image.
model_path (str): The path of pretrained model.
transforms (transform.Compose): Preprocess for input image.
image_list (list): A list of image path to be predicted.
image_dir (str, optional): The root directory of the images predicted. Default: None.
save_dir (str, optional): The directory to save the visualized results. Default: 'output'.
aug_pred (bool, optional): Whether to use mulit-scales and flip augment for predition. Default: False.
scales (list|float, optional): Scales for augment. It is valid when `aug_pred` is True. Default: 1.0.
flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_pred` is True. Default: True.
flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_pred` is True. Default: False.
is_slide (bool, optional): Whether to predict by sliding window. Default: False.
stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
"""
utils.utils.load_entire_model(model, model_path)
model.eval()
utils.utils.load_entire_model(model_hard, model_path_hard)
model_hard.eval()
nranks = paddle.distributed.get_world_size()
local_rank = paddle.distributed.get_rank()
if nranks > 1:
img_lists = partition_list(image_list, nranks)
else:
img_lists = [image_list]
added_saved_dir = os.path.join(save_dir, 'added_prediction')
pred_saved_dir = os.path.join(save_dir, 'pseudo_color_prediction')
logger.info("Start to predict...")
progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1)
with paddle.no_grad():
for i, im_path in enumerate(img_lists[local_rank]):
im = cv2.imread(im_path)
ori_shape = im.shape[:2]
im, _ = transforms(im)
im = im[np.newaxis, ...]
im = paddle.to_tensor(im)
if aug_pred:
pred = infer_ensemble.aug_inference(
model,
model_hard,
im,
ori_shape=ori_shape,
transforms=transforms.transforms,
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred = infer_ensemble.inference(
model,
model_hard,
im,
ori_shape=ori_shape,
transforms=transforms.transforms,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
pred = paddle.squeeze(pred)
pred = pred.numpy().astype('uint8')
# get the saved name
if image_dir is not None:
im_file = im_path.replace(image_dir, '')
else:
im_file = os.path.basename(im_path)
if im_file[0] == '/':
im_file = im_file[1:]
# save added image
added_image = utils.visualize.visualize(im_path, pred, weight=0.6)
added_image_path = os.path.join(added_saved_dir, im_file)
mkdir(added_image_path)
cv2.imwrite(added_image_path, added_image)
# save pseudo color prediction
pred_mask = utils.visualize.get_pseudo_color_map(pred)
pred_saved_path = os.path.join(pred_saved_dir,
im_file.rsplit(".")[0] + ".png")
mkdir(pred_saved_path)
pred_mask.save(pred_saved_path)
# pred_im = utils.visualize(im_path, pred, weight=0.0)
# pred_saved_path = os.path.join(pred_saved_dir, im_file)
# mkdir(pred_saved_path)
# cv2.imwrite(pred_saved_path, pred_im)
progbar_pred.update(i + 1)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import math
import cv2
import numpy as np
import paddle
import paddle.nn.functional as F
from paddleseg import utils
import core.infer_ensemble_three as infer_ensemble
import core.infer_crop as infer_crop
from paddleseg.utils import logger, progbar
def mkdir(path):
sub_dir = os.path.dirname(path)
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
def partition_list(arr, m):
"""split the list 'arr' into m pieces"""
n = int(math.ceil(len(arr) / float(m)))
return [arr[i:i + n] for i in range(0, len(arr), n)]
def predictEnsembleThree(model,
model_1,
model_crop,
model_path,
model_path_1,
model_path_crop,
transforms,
transforms_crop,
image_list,
image_dir=None,
save_dir='output',
aug_pred=False,
scales=1.0,
flip_horizontal=True,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None):
"""
predict and visualize the image_list.
Args:
model (nn.Layer): Used to predict for input image.
model_path (str): The path of pretrained model.
transforms (transform.Compose): Preprocess for input image.
image_list (list): A list of image path to be predicted.
image_dir (str, optional): The root directory of the images predicted. Default: None.
save_dir (str, optional): The directory to save the visualized results. Default: 'output'.
aug_pred (bool, optional): Whether to use mulit-scales and flip augment for predition. Default: False.
scales (list|float, optional): Scales for augment. It is valid when `aug_pred` is True. Default: 1.0.
flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_pred` is True. Default: True.
flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_pred` is True. Default: False.
is_slide (bool, optional): Whether to predict by sliding window. Default: False.
stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
"""
utils.utils.load_entire_model(model, model_path)
model.eval()
utils.utils.load_entire_model(model_1, model_path_1)
model_1.eval()
utils.utils.load_entire_model(model_crop, model_path_crop)
model_crop.eval()
nranks = paddle.distributed.get_world_size()
local_rank = paddle.distributed.get_rank()
if nranks > 1:
img_lists = partition_list(image_list, nranks)
else:
img_lists = [image_list]
added_saved_dir = os.path.join(save_dir, 'added_prediction')
pred_saved_dir = os.path.join(save_dir, 'pseudo_color_prediction')
logger.info("Start to predict...")
progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1)
with paddle.no_grad():
for i, im_path in enumerate(img_lists[local_rank]):
im_origin = cv2.imread(im_path)
ori_shape = im_origin.shape[:2]
im, _ = transforms(im_origin)
im = im[np.newaxis, ...]
im = paddle.to_tensor(im)
ims, _ = transforms_crop(im_origin)
im1 = ims[:, 540:540 + 720, 320:320 + 1280]
im2 = ims[:, 540:540 + 720, 960:960 + 1280]
im3 = ims[:, 540:540 + 720, 1600:1600 + 1280]
im1 = im1[np.newaxis, ...]
im1 = paddle.to_tensor(im1)
im2 = im2[np.newaxis, ...]
im2 = paddle.to_tensor(im2)
im3 = im3[np.newaxis, ...]
im3 = paddle.to_tensor(im3)
ims_ = [im1, im2, im3]
if aug_pred:
pred = infer_ensemble.aug_inference(
model,
model_1,
im,
ori_shape=ori_shape,
transforms=transforms.transforms,
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred = infer_ensemble.inference(
model,
model_1,
im,
ori_shape=ori_shape,
transforms=transforms.transforms,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
preds = []
for ii in range(3):
im_ = ims_[ii]
if aug_pred:
pred_crop = infer_crop.aug_inference(
model,
im_,
ori_shape=ori_shape,
transforms=transforms.transforms,
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred_crop = infer_crop.inference(
model,
im_,
ori_shape=ori_shape,
transforms=transforms.transforms,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
preds.append(pred_crop)
left_ensem = (
preds[0][:, :, :, 640:1280] + preds[1][:, :, :, 0:640]) / 2
right_ensem = (
preds[1][:, :, :, 640:1280] + preds[2][:, :, :, 0:640]) / 2
pred_ensem = paddle.concat(
[
preds[0][:, :, :, 0:640], left_ensem, right_ensem,
preds[2][:, :, :, 640:1280]
],
axis=3)
logit = F.interpolate(pred_ensem, (432, 768), mode='bilinear')
pred_logit = pred.clone()
pred_logit[:, :, 324:756, 576:1344] = logit
pred = pred + pred_logit
pred = F.interpolate(pred, ori_shape, mode='bilinear')
pred = paddle.argmax(pred, axis=1, keepdim=True, dtype='int32')
pred = paddle.squeeze(pred)
pred = pred.numpy().astype('uint8')
# get the saved name
if image_dir is not None:
im_file = im_path.replace(image_dir, '')
else:
im_file = os.path.basename(im_path)
if im_file[0] == '/':
im_file = im_file[1:]
# save added image
added_image = utils.visualize.visualize(im_path, pred, weight=0.6)
added_image_path = os.path.join(added_saved_dir, im_file)
mkdir(added_image_path)
cv2.imwrite(added_image_path, added_image)
# save pseudo color prediction
pred_mask = utils.visualize.get_pseudo_color_map(pred)
pred_saved_path = os.path.join(pred_saved_dir,
im_file.rsplit(".")[0] + ".png")
mkdir(pred_saved_path)
pred_mask.save(pred_saved_path)
# pred_im = utils.visualize(im_path, pred, weight=0.0)
# pred_saved_path = os.path.join(pred_saved_dir, im_file)
# mkdir(pred_saved_path)
# cv2.imwrite(pred_saved_path, pred_im)
progbar_pred.update(i + 1)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import math
import cv2
import numpy as np
import paddle
from paddleseg import utils
from core import infer_generate_autolabel
from paddleseg.utils import logger, progbar
def mkdir(path):
sub_dir = os.path.dirname(path)
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
def partition_list(arr, m):
"""split the list 'arr' into m pieces"""
n = int(math.ceil(len(arr) / float(m)))
return [arr[i:i + n] for i in range(0, len(arr), n)]
def predictAutolabel(model,
model_path,
transforms,
image_list,
image_dir=None,
save_dir='output',
aug_pred=False,
scales=1.0,
flip_horizontal=True,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None):
"""
predict and visualize the image_list.
Args:
model (nn.Layer): Used to predict for input image.
model_path (str): The path of pretrained model.
transforms (transform.Compose): Preprocess for input image.
image_list (list): A list of image path to be predicted.
image_dir (str, optional): The root directory of the images predicted. Default: None.
save_dir (str, optional): The directory to save the visualized results. Default: 'output'.
aug_pred (bool, optional): Whether to use mulit-scales and flip augment for predition. Default: False.
scales (list|float, optional): Scales for augment. It is valid when `aug_pred` is True. Default: 1.0.
flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_pred` is True. Default: True.
flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_pred` is True. Default: False.
is_slide (bool, optional): Whether to predict by sliding window. Default: False.
stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
"""
utils.utils.load_entire_model(model, model_path)
model.eval()
nranks = paddle.distributed.get_world_size()
local_rank = paddle.distributed.get_rank()
if nranks > 1:
img_lists = partition_list(image_list, nranks)
else:
img_lists = [image_list]
added_saved_dir = os.path.join(save_dir, 'added_prediction')
pred_saved_dir = os.path.join(save_dir, 'pseudo_color_prediction')
logger.info("Start to predict...")
progbar_pred = progbar.Progbar(target=len(img_lists[0]), verbose=1)
with paddle.no_grad():
for i, im_path in enumerate(img_lists[local_rank]):
im = cv2.imread(im_path)
ori_shape = im.shape[:2]
im, _ = transforms(im)
im = im[np.newaxis, ...]
im = paddle.to_tensor(im)
if aug_pred:
pred = infer_generate_autolabel.aug_inference(
model,
im,
ori_shape=ori_shape,
transforms=transforms.transforms,
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred = infer_generate_autolabel.inference(
model,
im,
ori_shape=ori_shape,
transforms=transforms.transforms,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
pred = paddle.squeeze(pred)
pred = pred.numpy().astype('uint8')
# get the saved name
if image_dir is not None:
im_file = im_path.replace(image_dir, '')
else:
im_file = os.path.basename(im_path)
if im_file[0] == '/':
im_file = im_file[1:]
# save added image
added_image = utils.visualize.visualize(im_path, pred, weight=0.6)
added_image_path = os.path.join(added_saved_dir, im_file)
mkdir(added_image_path)
cv2.imwrite(added_image_path, added_image)
# save pseudo color prediction
pred_mask = utils.visualize.get_pseudo_color_map(pred)
pred_saved_path = os.path.join(pred_saved_dir,
im_file.rsplit(".")[0] + ".png")
mkdir(pred_saved_path)
pred_mask.save(pred_saved_path)
# pred_im = utils.visualize(im_path, pred, weight=0.0)
# pred_saved_path = os.path.join(pred_saved_dir, im_file)
# mkdir(pred_saved_path)
# cv2.imwrite(pred_saved_path, pred_im)
progbar_pred.update(i + 1)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import time
import paddle
import paddle.nn.functional as F
from paddleseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar
from core import infer
np.set_printoptions(suppress=True)
def evaluate(model,
eval_dataset,
aug_eval=False,
scales=1.0,
flip_horizontal=True,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None,
num_workers=0,
print_detail=True):
"""
Launch evalution.
Args:
model(nn.Layer): A sementic segmentation model.
eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True.
flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_eval` is True. Default: False.
is_slide (bool, optional): Whether to evaluate by sliding window. Default: False.
stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
num_workers (int, optional): Num workers for data loader. Default: 0.
print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
Returns:
float: The mIoU of validation datasets.
float: The accuracy of validation datasets.
"""
model.eval()
nranks = paddle.distributed.ParallelEnv().nranks
local_rank = paddle.distributed.ParallelEnv().local_rank
if nranks > 1:
# Initialize parallel environment if not done.
if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
):
paddle.distributed.init_parallel_env()
batch_sampler = paddle.io.DistributedBatchSampler(
eval_dataset, batch_size=1, shuffle=False, drop_last=False)
loader = paddle.io.DataLoader(
eval_dataset,
batch_sampler=batch_sampler,
num_workers=num_workers,
return_list=True, )
total_iters = len(loader)
intersect_area_all = 0
pred_area_all = 0
label_area_all = 0
if print_detail:
logger.info("Start evaluating (total_samples={}, total_iters={})...".
format(len(eval_dataset), total_iters))
progbar_val = progbar.Progbar(target=total_iters, verbose=1)
reader_cost_averager = TimeAverager()
batch_cost_averager = TimeAverager()
batch_start = time.time()
with paddle.no_grad():
for iter, data in enumerate(loader):
(im, label) = data
reader_cost_averager.record(time.time() - batch_start)
label = label.astype('int64')
ori_shape = label.shape[-2:]
if aug_eval:
pred = infer.aug_inference(
model,
im,
ori_shape=ori_shape,
transforms=eval_dataset.transforms.transforms,
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred = infer.inference(
model,
im,
ori_shape=ori_shape,
transforms=eval_dataset.transforms.transforms,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
intersect_area, pred_area, label_area = metrics.calculate_area(
pred,
label,
eval_dataset.num_classes,
ignore_index=eval_dataset.ignore_index)
# Gather from all ranks
if nranks > 1:
intersect_area_list = []
pred_area_list = []
label_area_list = []
paddle.distributed.all_gather(intersect_area_list,
intersect_area)
paddle.distributed.all_gather(pred_area_list, pred_area)
paddle.distributed.all_gather(label_area_list, label_area)
# Some image has been evaluated and should be eliminated in last iter
if (iter + 1) * nranks > len(eval_dataset):
valid = len(eval_dataset) - iter * nranks
intersect_area_list = intersect_area_list[:valid]
pred_area_list = pred_area_list[:valid]
label_area_list = label_area_list[:valid]
for i in range(len(intersect_area_list)):
intersect_area_all = intersect_area_all + intersect_area_list[
i]
pred_area_all = pred_area_all + pred_area_list[i]
label_area_all = label_area_all + label_area_list[i]
else:
intersect_area_all = intersect_area_all + intersect_area
pred_area_all = pred_area_all + pred_area
label_area_all = label_area_all + label_area
batch_cost_averager.record(
time.time() - batch_start, num_samples=len(label))
batch_cost = batch_cost_averager.get_average()
reader_cost = reader_cost_averager.get_average()
if local_rank == 0 and print_detail:
progbar_val.update(iter + 1, [('batch_cost', batch_cost),
('reader cost', reader_cost)])
reader_cost_averager.reset()
batch_cost_averager.reset()
batch_start = time.time()
class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
label_area_all)
class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
if print_detail:
logger.info("[EVAL] #Images={} mIoU={:.4f} Acc={:.4f} Kappa={:.4f} ".
format(len(eval_dataset), miou, acc, kappa))
logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
return miou, acc
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import time
import paddle
import paddle.nn.functional as F
from paddleseg.utils import metrics, TimeAverager, calculate_eta, logger, progbar
from core import infer_crop
np.set_printoptions(suppress=True)
def evaluate(model,
eval_dataset,
aug_eval=False,
scales=1.0,
flip_horizontal=True,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None,
num_workers=0,
print_detail=True):
"""
Launch evalution.
Args:
model(nn.Layer): A sementic segmentation model.
eval_dataset (paddle.io.Dataset): Used to read and process validation datasets.
aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
scales (list|float, optional): Scales for augment. It is valid when `aug_eval` is True. Default: 1.0.
flip_horizontal (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True.
flip_vertical (bool, optional): Whether to use flip vertically augment. It is valid when `aug_eval` is True. Default: False.
is_slide (bool, optional): Whether to evaluate by sliding window. Default: False.
stride (tuple|list, optional): The stride of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
crop_size (tuple|list, optional): The crop size of sliding window, the first is width and the second is height.
It should be provided when `is_slide` is True.
num_workers (int, optional): Num workers for data loader. Default: 0.
print_detail (bool, optional): Whether to print detailed information about the evaluation process. Default: True.
Returns:
float: The mIoU of validation datasets.
float: The accuracy of validation datasets.
"""
model.eval()
nranks = paddle.distributed.ParallelEnv().nranks
local_rank = paddle.distributed.ParallelEnv().local_rank
if nranks > 1:
# Initialize parallel environment if not done.
if not paddle.distributed.parallel.parallel_helper._is_parallel_ctx_initialized(
):
paddle.distributed.init_parallel_env()
batch_sampler = paddle.io.DistributedBatchSampler(
eval_dataset, batch_size=1, shuffle=False, drop_last=False)
loader = paddle.io.DataLoader(
eval_dataset,
batch_sampler=batch_sampler,
num_workers=num_workers,
return_list=True, )
total_iters = len(loader)
intersect_area_all = 0
pred_area_all = 0
label_area_all = 0
if print_detail:
logger.info("Start evaluating (total_samples={}, total_iters={})...".
format(len(eval_dataset), total_iters))
progbar_val = progbar.Progbar(target=total_iters, verbose=1)
reader_cost_averager = TimeAverager()
batch_cost_averager = TimeAverager()
batch_start = time.time()
with paddle.no_grad():
for iter, data in enumerate(loader):
reader_cost_averager.record(time.time() - batch_start)
preds = []
label = data[3].astype('int64')
for ii in range(3):
im = data[ii]
ori_shape = im.shape[-2:]
if aug_eval:
pred = infer_crop.aug_inference(
model,
im,
ori_shape=ori_shape,
transforms=eval_dataset.transforms.transforms,
scales=scales,
flip_horizontal=flip_horizontal,
flip_vertical=flip_vertical,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
else:
pred = infer_crop.inference(
model,
im,
ori_shape=ori_shape,
transforms=eval_dataset.transforms.transforms,
is_slide=is_slide,
stride=stride,
crop_size=crop_size)
preds.append(pred)
left_ensem = (
preds[0][:, :, :, 640:1280] + preds[1][:, :, :, 0:640]) / 2
right_ensem = (
preds[1][:, :, :, 640:1280] + preds[2][:, :, :, 0:640]) / 2
pred_ensem = paddle.concat(
[
preds[0][:, :, :, 0:640], left_ensem, right_ensem,
preds[2][:, :, :, 640:1280]
],
axis=3)
pred = paddle.argmax(
pred_ensem, axis=1, keepdim=True, dtype='int32')
intersect_area, pred_area, label_area = metrics.calculate_area(
pred,
label,
eval_dataset.num_classes,
ignore_index=eval_dataset.ignore_index)
# Gather from all ranks
if nranks > 1:
intersect_area_list = []
pred_area_list = []
label_area_list = []
paddle.distributed.all_gather(intersect_area_list,
intersect_area)
paddle.distributed.all_gather(pred_area_list, pred_area)
paddle.distributed.all_gather(label_area_list, label_area)
# Some image has been evaluated and should be eliminated in last iter
if (iter + 1) * nranks > len(eval_dataset):
valid = len(eval_dataset) - iter * nranks
intersect_area_list = intersect_area_list[:valid]
pred_area_list = pred_area_list[:valid]
label_area_list = label_area_list[:valid]
for i in range(len(intersect_area_list)):
intersect_area_all = intersect_area_all + intersect_area_list[
i]
pred_area_all = pred_area_all + pred_area_list[i]
label_area_all = label_area_all + label_area_list[i]
else:
intersect_area_all = intersect_area_all + intersect_area
pred_area_all = pred_area_all + pred_area
label_area_all = label_area_all + label_area
batch_cost_averager.record(
time.time() - batch_start, num_samples=len(label))
batch_cost = batch_cost_averager.get_average()
reader_cost = reader_cost_averager.get_average()
if local_rank == 0 and print_detail:
progbar_val.update(iter + 1, [('batch_cost', batch_cost),
('reader cost', reader_cost)])
reader_cost_averager.reset()
batch_cost_averager.reset()
batch_start = time.time()
class_iou, miou = metrics.mean_iou(intersect_area_all, pred_area_all,
label_area_all)
class_acc, acc = metrics.accuracy(intersect_area_all, pred_area_all)
kappa = metrics.kappa(intersect_area_all, pred_area_all, label_area_all)
if print_detail:
logger.info("[EVAL] #Images={} mIoU={:.4f} Acc={:.4f} Kappa={:.4f} ".
format(len(eval_dataset), miou, acc, kappa))
logger.info("[EVAL] Class IoU: \n" + str(np.round(class_iou, 4)))
logger.info("[EVAL] Class Acc: \n" + str(np.round(class_acc, 4)))
return miou, acc
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .auto_nue import AutoNue
from .auto_nue_autolabel import AutoNueAutolabel
from .auto_nue_crop import AutoNueCrop
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import glob
import random
import cv2
import paddle
import numpy as np
from PIL import Image
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent.
random.seed(100)
@manager.DATASETS.add_component
class AutoNue(paddle.io.Dataset):
"""
You need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5)
following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling.
And then, you need to organize data following the below structure.
IDD_Segmentation
|
|--leftImg8bit
| |--train
| |--val
| |--test
|
|--gtFine
| |--train
| |--val
| |--test
Args:
transforms (list): Transforms for image.
dataset_root (str): Cityscapes dataset directory.
mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1
add_val (bool, optional): Whether to add val set in training. Default: False
"""
def __init__(self,
transforms,
dataset_root,
mode='train',
coarse_multiple=1,
add_val=False):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.file_list = list()
mode = mode.lower()
self.mode = mode
self.num_classes = 26
self.ignore_index = 255
self.coarse_multiple = coarse_multiple
if mode not in ['train', 'val', 'test']:
raise ValueError(
"mode should be 'train', 'val' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise ValueError("`transforms` is necessary, but it is None.")
img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
label_dir = os.path.join(self.dataset_root, 'gtFine')
if self.dataset_root is None or not os.path.isdir(
self.dataset_root) or not os.path.isdir(
img_dir) or not os.path.isdir(label_dir):
raise ValueError(
"The dataset is not Found or the folder structure is nonconfoumance."
)
label_files = sorted(
glob.glob(
os.path.join(label_dir, mode, '*',
'*_gtFine_labellevel3Ids.png')))
img_files = sorted(
glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.*')))
self.file_list = [
[img_path, label_path]
for img_path, label_path in zip(img_files, label_files)
]
# for ii in range(len(self.file_list)):
# print(self.file_list[ii])
# print(len(self.file_list))
self.num_files = len(self.file_list)
self.total_num_files = self.num_files
if mode == 'train':
# whether to add val set in training
if add_val:
label_files = sorted(
glob.glob(
os.path.join(label_dir, 'val', '*',
'*_gtFine_labellevel3Ids.png')))
img_files = sorted(
glob.glob(
os.path.join(img_dir, 'val', '*', '*_leftImg8bit.*')))
val_file_list = [
[img_path, label_path]
for img_path, label_path in zip(img_files, label_files)
]
self.file_list.extend(val_file_list)
for ii in range(len(self.file_list)):
print(self.file_list[ii])
print(len(self.file_list))
self.num_files = len(self.file_list)
self.total_num_files = self.num_files
# use coarse dataset only in training
# img_dir = os.path.join('data/IDD_Detection/JPEGImages/all')
# label_dir = os.path.join('data/IDD_Detection/AutoLabel/pred_refine')
# if self.dataset_root is None or not os.path.isdir(
# self.dataset_root) or not os.path.isdir(
# img_dir) or not os.path.isdir(label_dir):
# raise ValueError(
# "The coarse dataset is not Found or the folder structure is nonconfoumance."
# )
# coarse_label_files = sorted(
# glob.glob(os.path.join(label_dir, '*', '*')))
# coarse_img_files = sorted(
# glob.glob(os.path.join(img_dir, '*', '*')))
# if len(coarse_img_files) != len(coarse_label_files):
# raise ValueError(
# "The number of images = {} is not equal to the number of labels = {} in Cityscapes Autolabeling dataset."
# .format(len(coarse_img_files), len(coarse_label_files)))
# self.coarse_file_list = [[img_path, label_path]
# for img_path, label_path in zip(
# coarse_img_files, coarse_label_files)]
# random.shuffle(self.coarse_file_list)
# self.total_num_files = int(self.num_files * (1 + coarse_multiple))
def __getitem__(self, idx):
if self.mode == 'test':
image_path, label_path = self.file_list[idx]
im, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, image_path
elif self.mode == 'val':
image_path, label_path = self.file_list[idx]
im, _ = self.transforms(im=image_path)
label = np.asarray(Image.open(label_path))
# label = cv2.resize(label, (1280, 720), interpolation=cv2.INTER_NEAREST)
label = label[np.newaxis, :, :]
return im, label
else:
if idx >= self.num_files:
image_path, label_path = self.coarse_file_list[idx -
self.num_files]
else:
image_path, label_path = self.file_list[idx]
im, label = self.transforms(im=image_path, label=label_path)
return im, label
def __len__(self):
return self.total_num_files
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import glob
import random
import paddle
import numpy as np
from PIL import Image
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent.
random.seed(100)
@manager.DATASETS.add_component
class AutoNueAutolabel(paddle.io.Dataset):
"""
You need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5)
following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling.
And then, you need to organize data following the below structure.
IDD_Segmentation
|
|--leftImg8bit
| |--train
| |--val
| |--test
|
|--gtFine
| |--train
| |--val
| |--test
Args:
transforms (list): Transforms for image.
dataset_root (str): Cityscapes dataset directory.
mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1
add_val (bool, optional): Whether to add val set in training. Default: False
"""
def __init__(self,
transforms,
dataset_root,
mode='train',
coarse_multiple=1,
add_val=False):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.file_list = list()
mode = mode.lower()
self.mode = mode
self.num_classes = 26
self.ignore_index = 255
self.coarse_multiple = coarse_multiple
if mode not in ['train', 'val', 'test']:
raise ValueError(
"mode should be 'train', 'val' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise ValueError("`transforms` is necessary, but it is None.")
img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
label_dir = os.path.join(self.dataset_root, 'gtFine')
if self.dataset_root is None or not os.path.isdir(
self.dataset_root) or not os.path.isdir(
img_dir) or not os.path.isdir(label_dir):
raise ValueError(
"The dataset is not Found or the folder structure is nonconfoumance."
)
label_files = sorted(
glob.glob(
os.path.join(label_dir, mode, '*',
'*_gtFine_labellevel3Ids.png')))
img_files = sorted(
glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.*')))
self.file_list = [
[img_path, label_path]
for img_path, label_path in zip(img_files, label_files)
]
# for ii in range(len(self.file_list)):
# print(self.file_list[ii])
# print(len(self.file_list))
self.num_files = len(self.file_list)
self.total_num_files = self.num_files
if mode == 'train':
# whether to add val set in training
if add_val:
label_files = sorted(
glob.glob(
os.path.join(label_dir, 'val', '*',
'*_gtFine_labellevel3Ids.png')))
img_files = sorted(
glob.glob(
os.path.join(img_dir, 'val', '*', '*_leftImg8bit.*')))
val_file_list = [
[img_path, label_path]
for img_path, label_path in zip(img_files, label_files)
]
self.file_list.extend(val_file_list)
for ii in range(len(self.file_list)):
print(self.file_list[ii])
print(len(self.file_list))
self.num_files = len(self.file_list)
self.total_num_files = self.num_files
# use coarse dataset only in training
img_dir = os.path.join('data/IDD_Detection/JPEGImages')
label_dir = os.path.join('data/IDD_Detection/pred_refine')
if self.dataset_root is None or not os.path.isdir(
self.dataset_root) or not os.path.isdir(
img_dir) or not os.path.isdir(label_dir):
raise ValueError(
"The coarse dataset is not Found or the folder structure is nonconfoumance."
)
coarse_label_files = sorted(
glob.glob(os.path.join(label_dir, '*', '*')))
coarse_img_files = sorted(
glob.glob(os.path.join(img_dir, '*', '*')))
if len(coarse_img_files) != len(coarse_label_files):
raise ValueError(
"The number of images = {} is not equal to the number of labels = {} in Cityscapes Autolabeling dataset."
.format(len(coarse_img_files), len(coarse_label_files)))
self.coarse_file_list = [[img_path, label_path]
for img_path, label_path in zip(
coarse_img_files, coarse_label_files)]
random.shuffle(self.coarse_file_list)
self.file_list = self.coarse_file_list
self.num_files = len(self.file_list)
self.total_num_files = self.num_files
print(self.num_files)
# self.total_num_files = int(self.num_files * (1 + coarse_multiple))
def __getitem__(self, idx):
if self.mode == 'test':
image_path, label_path = self.file_list[idx]
im, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, image_path
elif self.mode == 'val':
image_path, label_path = self.file_list[idx]
im, _ = self.transforms(im=image_path)
label = np.asarray(Image.open(label_path))
label = label[np.newaxis, :, :]
return im, label
else:
# if idx >= self.num_files:
# image_path, label_path = self.coarse_file_list[idx -
# self.num_files]
# else:
image_path, label_path = self.file_list[idx]
im, label = self.transforms(im=image_path, label=label_path)
return im, label
def __len__(self):
return self.total_num_files
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import glob
import random
import paddle
import numpy as np
from PIL import Image
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent.
random.seed(100)
@manager.DATASETS.add_component
class AutoNueCrop(paddle.io.Dataset):
"""
You need to to download and convert the [India Driving Dataset](https://idd.insaan.iiit.ac.in/evaluation/autonue21/#bm5)
following the instructions of Segmentation Track. IDD_Dectection dataset also need for pseudo-labeling.
And then, you need to organize data following the below structure.
IDD_Segmentation
|
|--leftImg8bit
| |--train
| |--val
| |--test
|
|--gtFine
| |--train
| |--val
| |--test
Args:
transforms (list): Transforms for image.
dataset_root (str): Cityscapes dataset directory.
mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1
add_val (bool, optional): Whether to add val set in training. Default: False
"""
def __init__(self,
transforms,
dataset_root,
mode='train',
coarse_multiple=1,
add_val=False):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.file_list = list()
mode = mode.lower()
self.mode = mode
self.num_classes = 26
self.ignore_index = 255
self.coarse_multiple = coarse_multiple
if mode not in ['train', 'val', 'test']:
raise ValueError(
"mode should be 'train', 'val' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise ValueError("`transforms` is necessary, but it is None.")
img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
label_dir = os.path.join(self.dataset_root, 'gtFine')
if self.dataset_root is None or not os.path.isdir(
self.dataset_root) or not os.path.isdir(
img_dir) or not os.path.isdir(label_dir):
raise ValueError(
"The dataset is not Found or the folder structure is nonconfoumance."
)
label_files = sorted(
glob.glob(
os.path.join(label_dir, mode, '*',
'*_gtFine_labellevel3Ids.png')))
img_files = sorted(
glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.*')))
self.file_list = [
[img_path, label_path]
for img_path, label_path in zip(img_files, label_files)
]
# for ii in range(len(self.file_list)):
# print(self.file_list[ii])
# print(len(self.file_list))
self.num_files = len(self.file_list)
self.total_num_files = self.num_files
if mode == 'train':
# whether to add val set in training
if add_val:
label_files = sorted(
glob.glob(
os.path.join(label_dir, 'val', '*',
'*_gtFine_labellevel3Ids.png')))
img_files = sorted(
glob.glob(
os.path.join(img_dir, 'val', '*', '*_leftImg8bit.*')))
val_file_list = [
[img_path, label_path]
for img_path, label_path in zip(img_files, label_files)
]
self.file_list.extend(val_file_list)
for ii in range(len(self.file_list)):
print(self.file_list[ii])
print(len(self.file_list))
self.num_files = len(self.file_list)
self.total_num_files = self.num_files
def __getitem__(self, idx):
if self.mode == 'test':
image_path, label_path = self.file_list[idx]
im, _ = self.transforms(im=image_path)
(h, w) = im.shape[1:]
im1 = im[:, 540:540 + 720, 320:320 + 1280]
im2 = im[:, 540:540 + 720, 960:960 + 1280]
im3 = im[:, 540:540 + 720, 1600:1600 + 1280]
return im1, im2, im3, image_path
elif self.mode == 'val':
image_path, label_path = self.file_list[idx]
im, label = self.transforms(im=image_path, label=label_path)
(h, w) = im.shape[1:]
im1 = im[:, 540:540 + 720, 320:320 + 1280]
im2 = im[:, 540:540 + 720, 960:960 + 1280]
im3 = im[:, 540:540 + 720, 1600:1600 + 1280]
label = label[540:540 + 720, 320:1600 + 1280]
return im1, im2, im3, label
else:
if idx >= self.num_files:
image_path, label_path = self.coarse_file_list[idx -
self.num_files]
else:
image_path, label_path = self.file_list[idx]
im, label = self.transforms(im=image_path, label=label_path)
(h, w) = im.shape[1:]
start_w = np.linspace(320, 1600, 5).tolist()
np.random.shuffle(start_w)
start = int(start_w[0])
crop_im = im[:, 540:540 + 720, start:(start + 1280)]
crop_label = label[540:540 + 720, start:(start + 1280)]
return crop_im, crop_label
def __len__(self):
return self.total_num_files
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .backbones.hrnet_nv import *
from .mscale_ocrnet import MscaleOCRNet
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg.cvlibs import manager, param_init
from paddleseg.models import layers
from paddleseg.utils import utils
__all__ = [
"HRNet_W18_NV_Small_V1", "HRNet_W18_NV_Small_V2", "HRNet_W18_NV",
"HRNet_W30_NV", "HRNet_W32_NV", "HRNet_W40_NV", "HRNet_W44_NV",
"HRNet_W48_NV", "HRNet_W60_NV", "HRNet_W64_NV"
]
class HRNetNV(nn.Layer):
"""
The HRNet implementation based on PaddlePaddle.
The difference from HRNet at paddleseg/models/backbones/hrnet.py is
1. The padding parameter of convolution is different.
The original article refers to
Jingdong Wang, et, al. "HRNet:Deep High-Resolution Representation Learning for Visual Recognition"
(https://arxiv.org/pdf/1908.07919.pdf).
Args:
pretrained (str): The path of pretrained model.
stage1_num_modules (int): Number of modules for stage1. Default 1.
stage1_num_blocks (list): Number of blocks per module for stage1. Default [4].
stage1_num_channels (list): Number of channels per branch for stage1. Default [64].
stage2_num_modules (int): Number of modules for stage2. Default 1.
stage2_num_blocks (list): Number of blocks per module for stage2. Default [4, 4]
stage2_num_channels (list): Number of channels per branch for stage2. Default [18, 36].
stage3_num_modules (int): Number of modules for stage3. Default 4.
stage3_num_blocks (list): Number of blocks per module for stage3. Default [4, 4, 4]
stage3_num_channels (list): Number of channels per branch for stage3. Default [18, 36, 72].
stage4_num_modules (int): Number of modules for stage4. Default 3.
stage4_num_blocks (list): Number of blocks per module for stage4. Default [4, 4, 4, 4]
stage4_num_channels (list): Number of channels per branch for stage4. Default [18, 36, 72. 144].
has_se (bool): Whether to use Squeeze-and-Excitation module. Default False.
align_corners (bool, optional): An argument of F.interpolate. It should be set to False when the feature size is even,
e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
"""
def __init__(self,
pretrained=None,
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[18, 36],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[18, 36, 72, 144],
has_se=False,
align_corners=False):
super(HRNetNV, self).__init__()
self.pretrained = pretrained
self.stage1_num_modules = stage1_num_modules
self.stage1_num_blocks = stage1_num_blocks
self.stage1_num_channels = stage1_num_channels
self.stage2_num_modules = stage2_num_modules
self.stage2_num_blocks = stage2_num_blocks
self.stage2_num_channels = stage2_num_channels
self.stage3_num_modules = stage3_num_modules
self.stage3_num_blocks = stage3_num_blocks
self.stage3_num_channels = stage3_num_channels
self.stage4_num_modules = stage4_num_modules
self.stage4_num_blocks = stage4_num_blocks
self.stage4_num_channels = stage4_num_channels
self.has_se = has_se
self.align_corners = align_corners
self.feat_channels = [sum(stage4_num_channels)]
self.conv_layer1_1 = layers.ConvBNReLU(
in_channels=3,
out_channels=64,
kernel_size=3,
stride=2,
padding=1,
bias_attr=False)
self.conv_layer1_2 = layers.ConvBNReLU(
in_channels=64,
out_channels=64,
kernel_size=3,
stride=2,
padding=1,
bias_attr=False)
self.la1 = Layer1(
num_channels=64,
num_blocks=self.stage1_num_blocks[0],
num_filters=self.stage1_num_channels[0],
has_se=has_se,
name="layer2")
self.tr1 = TransitionLayer(
in_channels=[self.stage1_num_channels[0] * 4],
out_channels=self.stage2_num_channels,
name="tr1")
self.st2 = Stage(
num_channels=self.stage2_num_channels,
num_modules=self.stage2_num_modules,
num_blocks=self.stage2_num_blocks,
num_filters=self.stage2_num_channels,
has_se=self.has_se,
name="st2",
align_corners=align_corners)
self.tr2 = TransitionLayer(
in_channels=self.stage2_num_channels,
out_channels=self.stage3_num_channels,
name="tr2")
self.st3 = Stage(
num_channels=self.stage3_num_channels,
num_modules=self.stage3_num_modules,
num_blocks=self.stage3_num_blocks,
num_filters=self.stage3_num_channels,
has_se=self.has_se,
name="st3",
align_corners=align_corners)
self.tr3 = TransitionLayer(
in_channels=self.stage3_num_channels,
out_channels=self.stage4_num_channels,
name="tr3")
self.st4 = Stage(
num_channels=self.stage4_num_channels,
num_modules=self.stage4_num_modules,
num_blocks=self.stage4_num_blocks,
num_filters=self.stage4_num_channels,
has_se=self.has_se,
name="st4",
align_corners=align_corners)
self.init_weight()
def forward(self, x):
conv1 = self.conv_layer1_1(x)
conv2 = self.conv_layer1_2(conv1)
la1 = self.la1(conv2)
tr1 = self.tr1([la1])
st2 = self.st2(tr1)
tr2 = self.tr2(st2)
st3 = self.st3(tr2)
tr3 = self.tr3(st3)
st4 = self.st4(tr3)
x0_h, x0_w = st4[0].shape[2:]
x1 = F.interpolate(
st4[1], (x0_h, x0_w),
mode='bilinear',
align_corners=self.align_corners)
x2 = F.interpolate(
st4[2], (x0_h, x0_w),
mode='bilinear',
align_corners=self.align_corners)
x3 = F.interpolate(
st4[3], (x0_h, x0_w),
mode='bilinear',
align_corners=self.align_corners)
x = paddle.concat([st4[0], x1, x2, x3], axis=1)
return [x]
def init_weight(self):
for layer in self.sublayers():
if isinstance(layer, nn.Conv2D):
param_init.normal_init(layer.weight, std=0.001)
elif isinstance(layer, (nn.BatchNorm, nn.SyncBatchNorm)):
param_init.constant_init(layer.weight, value=1.0)
param_init.constant_init(layer.bias, value=0.0)
if self.pretrained is not None:
utils.load_pretrained_model(self, self.pretrained)
class Layer1(nn.Layer):
def __init__(self,
num_channels,
num_filters,
num_blocks,
has_se=False,
name=None):
super(Layer1, self).__init__()
self.bottleneck_block_list = []
for i in range(num_blocks):
bottleneck_block = self.add_sublayer(
"bb_{}_{}".format(name, i + 1),
BottleneckBlock(
num_channels=num_channels if i == 0 else num_filters * 4,
num_filters=num_filters,
has_se=has_se,
stride=1,
downsample=True if i == 0 else False,
name=name + '_' + str(i + 1)))
self.bottleneck_block_list.append(bottleneck_block)
def forward(self, x):
conv = x
for block_func in self.bottleneck_block_list:
conv = block_func(conv)
return conv
class TransitionLayer(nn.Layer):
def __init__(self, in_channels, out_channels, name=None):
super(TransitionLayer, self).__init__()
num_in = len(in_channels)
num_out = len(out_channels)
self.conv_bn_func_list = []
for i in range(num_out):
residual = None
if i < num_in:
if in_channels[i] != out_channels[i]:
residual = self.add_sublayer(
"transition_{}_layer_{}".format(name, i + 1),
layers.ConvBNReLU(
in_channels=in_channels[i],
out_channels=out_channels[i],
kernel_size=3,
padding=1,
bias_attr=False))
else:
residual = self.add_sublayer(
"transition_{}_layer_{}".format(name, i + 1),
layers.ConvBNReLU(
in_channels=in_channels[-1],
out_channels=out_channels[i],
kernel_size=3,
stride=2,
padding=1,
bias_attr=False))
self.conv_bn_func_list.append(residual)
def forward(self, x):
outs = []
for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
if conv_bn_func is None:
outs.append(x[idx])
else:
if idx < len(x):
outs.append(conv_bn_func(x[idx]))
else:
outs.append(conv_bn_func(x[-1]))
return outs
class Branches(nn.Layer):
def __init__(self,
num_blocks,
in_channels,
out_channels,
has_se=False,
name=None):
super(Branches, self).__init__()
self.basic_block_list = []
for i in range(len(out_channels)):
self.basic_block_list.append([])
for j in range(num_blocks[i]):
in_ch = in_channels[i] if j == 0 else out_channels[i]
basic_block_func = self.add_sublayer(
"bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
BasicBlock(
num_channels=in_ch,
num_filters=out_channels[i],
has_se=has_se,
name=name + '_branch_layer_' + str(i + 1) + '_' +
str(j + 1)))
self.basic_block_list[i].append(basic_block_func)
def forward(self, x):
outs = []
for idx, input in enumerate(x):
conv = input
for basic_block_func in self.basic_block_list[idx]:
conv = basic_block_func(conv)
outs.append(conv)
return outs
class BottleneckBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
has_se,
stride=1,
downsample=False,
name=None):
super(BottleneckBlock, self).__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = layers.ConvBNReLU(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=1,
padding=0,
bias_attr=False)
self.conv2 = layers.ConvBNReLU(
in_channels=num_filters,
out_channels=num_filters,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
self.conv3 = layers.ConvBN(
in_channels=num_filters,
out_channels=num_filters * 4,
kernel_size=1,
padding=0,
bias_attr=False)
if self.downsample:
self.conv_down = layers.ConvBN(
in_channels=num_channels,
out_channels=num_filters * 4,
kernel_size=1,
bias_attr=False)
if self.has_se:
self.se = SELayer(
num_channels=num_filters * 4,
num_filters=num_filters * 4,
reduction_ratio=16,
name=name + '_fc')
def forward(self, x):
residual = x
conv1 = self.conv1(x)
conv2 = self.conv2(conv1)
conv3 = self.conv3(conv2)
if self.downsample:
residual = self.conv_down(x)
if self.has_se:
conv3 = self.se(conv3)
y = conv3 + residual
y = F.relu(y)
return y
class BasicBlock(nn.Layer):
def __init__(self,
num_channels,
num_filters,
stride=1,
has_se=False,
downsample=False,
name=None):
super(BasicBlock, self).__init__()
self.has_se = has_se
self.downsample = downsample
self.conv1 = layers.ConvBNReLU(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=3,
stride=stride,
padding=1,
bias_attr=False)
self.conv2 = layers.ConvBN(
in_channels=num_filters,
out_channels=num_filters,
kernel_size=3,
padding=1,
bias_attr=False)
if self.downsample:
self.conv_down = layers.ConvBNReLU(
in_channels=num_channels,
out_channels=num_filters,
kernel_size=1,
padding=0,
bias_attr=False)
if self.has_se:
self.se = SELayer(
num_channels=num_filters,
num_filters=num_filters,
reduction_ratio=16,
name=name + '_fc')
def forward(self, x):
residual = x
conv1 = self.conv1(x)
conv2 = self.conv2(conv1)
if self.downsample:
residual = self.conv_down(x)
if self.has_se:
conv2 = self.se(conv2)
y = conv2 + residual
y = F.relu(y)
return y
class SELayer(nn.Layer):
def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
super(SELayer, self).__init__()
self.pool2d_gap = nn.AdaptiveAvgPool2D(1)
self._num_channels = num_channels
med_ch = int(num_channels / reduction_ratio)
stdv = 1.0 / math.sqrt(num_channels * 1.0)
self.squeeze = nn.Linear(
num_channels,
med_ch,
weight_attr=paddle.ParamAttr(
initializer=nn.initializer.Uniform(-stdv, stdv)))
stdv = 1.0 / math.sqrt(med_ch * 1.0)
self.excitation = nn.Linear(
med_ch,
num_filters,
weight_attr=paddle.ParamAttr(
initializer=nn.initializer.Uniform(-stdv, stdv)))
def forward(self, x):
pool = self.pool2d_gap(x)
pool = paddle.reshape(pool, shape=[-1, self._num_channels])
squeeze = self.squeeze(pool)
squeeze = F.relu(squeeze)
excitation = self.excitation(squeeze)
excitation = F.sigmoid(excitation)
excitation = paddle.reshape(
excitation, shape=[-1, self._num_channels, 1, 1])
out = x * excitation
return out
class Stage(nn.Layer):
def __init__(self,
num_channels,
num_modules,
num_blocks,
num_filters,
has_se=False,
multi_scale_output=True,
name=None,
align_corners=False):
super(Stage, self).__init__()
self._num_modules = num_modules
self.stage_func_list = []
for i in range(num_modules):
if i == num_modules - 1 and not multi_scale_output:
stage_func = self.add_sublayer(
"stage_{}_{}".format(name, i + 1),
HighResolutionModule(
num_channels=num_channels,
num_blocks=num_blocks,
num_filters=num_filters,
has_se=has_se,
multi_scale_output=False,
name=name + '_' + str(i + 1),
align_corners=align_corners))
else:
stage_func = self.add_sublayer(
"stage_{}_{}".format(name, i + 1),
HighResolutionModule(
num_channels=num_channels,
num_blocks=num_blocks,
num_filters=num_filters,
has_se=has_se,
name=name + '_' + str(i + 1),
align_corners=align_corners))
self.stage_func_list.append(stage_func)
def forward(self, x):
out = x
for idx in range(self._num_modules):
out = self.stage_func_list[idx](out)
return out
class HighResolutionModule(nn.Layer):
def __init__(self,
num_channels,
num_blocks,
num_filters,
has_se=False,
multi_scale_output=True,
name=None,
align_corners=False):
super(HighResolutionModule, self).__init__()
self.branches_func = Branches(
num_blocks=num_blocks,
in_channels=num_channels,
out_channels=num_filters,
has_se=has_se,
name=name)
self.fuse_func = FuseLayers(
in_channels=num_filters,
out_channels=num_filters,
multi_scale_output=multi_scale_output,
name=name,
align_corners=align_corners)
def forward(self, x):
out = self.branches_func(x)
out = self.fuse_func(out)
return out
class FuseLayers(nn.Layer):
def __init__(self,
in_channels,
out_channels,
multi_scale_output=True,
name=None,
align_corners=False):
super(FuseLayers, self).__init__()
self._actual_ch = len(in_channels) if multi_scale_output else 1
self._in_channels = in_channels
self.align_corners = align_corners
self.residual_func_list = []
for i in range(self._actual_ch):
for j in range(len(in_channels)):
if j > i:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
layers.ConvBN(
in_channels=in_channels[j],
out_channels=out_channels[i],
kernel_size=1,
padding=0,
bias_attr=False))
self.residual_func_list.append(residual_func)
elif j < i:
pre_num_filters = in_channels[j]
for k in range(i - j):
if k == i - j - 1:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}_{}".format(
name, i + 1, j + 1, k + 1),
layers.ConvBN(
in_channels=pre_num_filters,
out_channels=out_channels[i],
kernel_size=3,
stride=2,
padding=1,
bias_attr=False))
pre_num_filters = out_channels[i]
else:
residual_func = self.add_sublayer(
"residual_{}_layer_{}_{}_{}".format(
name, i + 1, j + 1, k + 1),
layers.ConvBNReLU(
in_channels=pre_num_filters,
out_channels=out_channels[j],
kernel_size=3,
stride=2,
padding=1,
bias_attr=False))
pre_num_filters = out_channels[j]
self.residual_func_list.append(residual_func)
def forward(self, x):
outs = []
residual_func_idx = 0
for i in range(self._actual_ch):
residual = x[i]
residual_shape = residual.shape[-2:]
for j in range(len(self._in_channels)):
if j > i:
y = self.residual_func_list[residual_func_idx](x[j])
residual_func_idx += 1
y = F.interpolate(
y,
residual_shape,
mode='bilinear',
align_corners=self.align_corners)
residual = residual + y
elif j < i:
y = x[j]
for k in range(i - j):
y = self.residual_func_list[residual_func_idx](y)
residual_func_idx += 1
residual = residual + y
residual = F.relu(residual)
outs.append(residual)
return outs
@manager.BACKBONES.add_component
def HRNet_W18_NV_Small_V1(**kwargs):
model = HRNetNV(
stage1_num_modules=1,
stage1_num_blocks=[1],
stage1_num_channels=[32],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[16, 32],
stage3_num_modules=1,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[16, 32, 64],
stage4_num_modules=1,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[16, 32, 64, 128],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W18_NV_Small_V2(**kwargs):
model = HRNetNV(
stage1_num_modules=1,
stage1_num_blocks=[2],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[2, 2],
stage2_num_channels=[18, 36],
stage3_num_modules=3,
stage3_num_blocks=[2, 2, 2],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=2,
stage4_num_blocks=[2, 2, 2, 2],
stage4_num_channels=[18, 36, 72, 144],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W18_NV(**kwargs):
model = HRNetNV(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[18, 36],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[18, 36, 72],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[18, 36, 72, 144],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W30_NV(**kwargs):
model = HRNetNV(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[30, 60],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[30, 60, 120],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[30, 60, 120, 240],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W32_NV(**kwargs):
model = HRNetNV(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[32, 64],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[32, 64, 128],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[32, 64, 128, 256],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W40_NV(**kwargs):
model = HRNetNV(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[40, 80],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[40, 80, 160],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[40, 80, 160, 320],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W44_NV(**kwargs):
model = HRNetNV(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[44, 88],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[44, 88, 176],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[44, 88, 176, 352],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W48_NV(**kwargs):
model = HRNetNV(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[48, 96],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[48, 96, 192],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[48, 96, 192, 384],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W60_NV(**kwargs):
model = HRNetNV(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[60, 120],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[60, 120, 240],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[60, 120, 240, 480],
**kwargs)
return model
@manager.BACKBONES.add_component
def HRNet_W64_NV(**kwargs):
model = HRNetNV(
stage1_num_modules=1,
stage1_num_blocks=[4],
stage1_num_channels=[64],
stage2_num_modules=1,
stage2_num_blocks=[4, 4],
stage2_num_channels=[64, 128],
stage3_num_modules=4,
stage3_num_blocks=[4, 4, 4],
stage3_num_channels=[64, 128, 256],
stage4_num_modules=3,
stage4_num_blocks=[4, 4, 4, 4],
stage4_num_channels=[64, 128, 256, 512],
**kwargs)
return model
import math
import paddle
import paddle.nn as nn
from paddleseg.cvlibs import manager, param_init
from paddleseg.utils import utils
from paddleseg.models import layers
from .ocrnet_nv import OCRNetNV
@manager.MODELS.add_component
class MscaleOCRNet(nn.Layer):
def __init__(self,
num_classes,
backbone,
backbone_indices,
n_scales=[0.5, 1.0, 2.0],
ocr_mid_channels=512,
ocr_key_channels=256,
align_corners=False,
pretrained=None):
super().__init__()
self.ocrnet = OCRNetNV(
num_classes,
backbone,
backbone_indices,
ocr_mid_channels=ocr_mid_channels,
ocr_key_channels=ocr_key_channels,
align_corners=align_corners,
ms_attention=True)
self.scale_attn = AttenHead(in_ch=ocr_mid_channels, out_ch=1)
self.n_scales = n_scales
self.pretrained = pretrained
self.align_corners = align_corners
if self.pretrained is not None:
utils.load_pretrained_model(self, self.pretrained)
# backbone.init_weight()
def forward(self, x):
if self.training:
return self.one_scale_forward(x)
else:
return self.nscale_forward(x, self.n_scales)
def one_scale_forward(self, x):
x_size = x.shape[2:]
cls_out, aux_out, _ = self.ocrnet(x)
cls_out = nn.functional.interpolate(
cls_out,
size=x_size,
mode='bilinear',
align_corners=self.align_corners)
aux_out = nn.functional.interpolate(
aux_out,
size=x_size,
mode='bilinear',
align_corners=self.align_corners)
output = [cls_out, aux_out]
output.extend(output)
return output
def one_scale_val(self, x):
x_size = x.shape[2:]
cls_out, aux_out, _ = self.ocrnet(x)
cls_out = nn.functional.interpolate(
cls_out,
size=x_size,
mode='bilinear',
align_corners=self.align_corners)
output = [cls_out]
return output
def two_scale_forward(self, x_1x):
"""
Do we supervised both aux outputs, lo and high scale?
Should attention be used to combine the aux output?
Normally we only supervise the combined 1x output
If we use attention to combine the aux outputs, then
we can use normal weighting for aux vs. cls outputs
"""
x_lo = nn.functional.interpolate(
x_1x,
scale_factor=0.5,
align_corners=self.align_corners,
mode='bilinear')
lo_outs = self.single_scale_forward(x_lo)
pred_05x = lo_outs['cls_out']
p_lo = pred_05x
aux_lo = lo_outs['aux_out']
logit_attn = lo_outs['logit_attn']
hi_outs = self.single_scale_forward(x_1x)
pred_10x = hi_outs['cls_out']
p_1x = pred_10x
aux_1x = hi_outs['aux_out']
p_lo = p_lo * logit_attn
aux_lo = aux_lo * logit_attn
p_lo = scale_as(p_lo, p_1x)
aux_lo = scale_as(aux_lo, p_1x)
logit_attn = scale_as(logit_attn, p_1x)
# combine lo and hi predictions with attention
joint_pred = p_lo + p_1x * (1 - logit_attn)
joint_aux = aux_lo + aux_1x * (1 - logit_attn)
output = [joint_pred, joint_aux]
# Optionally, apply supervision to the multi-scale predictions
# directly.
scaled_pred_05x = scale_as(pred_05x, p_1x)
output.extend([scaled_pred_05x, pred_10x])
output.extend(output)
return output
def two_scale_forward_high(self, x_1x):
"""
Do we supervised both aux outputs, lo and high scale?
Should attention be used to combine the aux output?
Normally we only supervise the combined 1x output
If we use attention to combine the aux outputs, then
we can use normal weighting for aux vs. cls outputs
"""
x_hi = nn.functional.interpolate(
x_1x,
scale_factor=1.5,
align_corners=self.align_corners,
mode='bilinear')
lo_outs = self.single_scale_forward(x_1x)
pred_10x = lo_outs['cls_out']
p_lo = pred_10x
aux_lo = lo_outs['aux_out']
logit_attn = lo_outs['logit_attn']
hi_outs = self.single_scale_forward(x_hi)
pred_15x = hi_outs['cls_out']
p_hi = pred_15x
aux_hi = hi_outs['aux_out']
p_lo = p_lo * logit_attn
aux_lo = aux_lo * logit_attn
p_hi = scale_as(p_hi, p_lo)
aux_hi = scale_as(aux_hi, aux_lo)
# combine lo and hi predictions with attention
joint_pred = p_lo + p_hi * (1 - logit_attn)
joint_aux = aux_lo + aux_hi * (1 - logit_attn)
output = [joint_pred, joint_aux]
# Optionally, apply supervision to the multi-scale predictions
# directly.
scaled_pred_15x = scale_as(pred_15x, p_lo)
output.extend(output)
return output
def nscale_forward(self, x_1x, scales):
"""
Hierarchical attention, primarily used for getting best inference
results.
We use attention at multiple scales, giving priority to the lower
resolutions. For example, if we have 4 scales {0.5, 1.0, 1.5, 2.0},
then evaluation is done as follows:
p_joint = attn_1.5 * p_1.5 + (1 - attn_1.5) * down(p_2.0)
p_joint = attn_1.0 * p_1.0 + (1 - attn_1.0) * down(p_joint)
p_joint = up(attn_0.5 * p_0.5) * (1 - up(attn_0.5)) * p_joint
The target scale is always 1.0, and 1.0 is expected to be part of the
list of scales. When predictions are done at greater than 1.0 scale,
the predictions are downsampled before combining with the next lower
scale.
x_1x:
scales - a list of scales to evaluate
x_1x - dict containing 'images', the x_1x, and 'gts', the ground
truth mask
Output:
If training, return loss, else return prediction + attention
"""
assert 1.0 in scales, 'expected 1.0 to be the target scale'
# Lower resolution provides attention for higher rez predictions,
# so we evaluate in order: high to low
scales = sorted(scales, reverse=True)
pred = None
for s in scales:
x = nn.functional.interpolate(
x_1x,
scale_factor=s,
align_corners=self.align_corners,
mode='bilinear')
outs = self.single_scale_forward(x)
cls_out = outs['cls_out']
attn_out = outs['logit_attn']
if pred is None:
pred = cls_out
elif s >= 1.0:
# downscale previous
pred = scale_as(pred, cls_out, self.align_corners)
pred = cls_out * attn_out + pred * (1 - attn_out)
else:
# s < 1.0: upscale current
cls_out = cls_out * attn_out
cls_out = scale_as(cls_out, pred, self.align_corners)
attn_out = scale_as(attn_out, pred, self.align_corners)
pred = cls_out + pred * (1 - attn_out)
return [pred]
def single_scale_forward(self, x):
x_size = x.shape[2:]
cls_out, aux_out, ocr_mid_feats = self.ocrnet(x)
attn = self.scale_attn(ocr_mid_feats)
cls_out = nn.functional.interpolate(
cls_out,
size=x_size,
mode='bilinear',
align_corners=self.align_corners)
aux_out = nn.functional.interpolate(
aux_out,
size=x_size,
mode='bilinear',
align_corners=self.align_corners)
attn = nn.functional.interpolate(
attn,
size=x_size,
mode='bilinear',
align_corners=self.align_corners)
return {'cls_out': cls_out, 'aux_out': aux_out, 'logit_attn': attn}
class AttenHead(nn.Layer):
def __init__(self, in_ch, out_ch):
super(AttenHead, self).__init__()
# bottleneck channels for seg and attn heads
bot_ch = 256
self.atten_head = nn.Sequential(
layers.ConvBNReLU(
in_ch, bot_ch, 3, padding=1, bias_attr=False),
layers.ConvBNReLU(
bot_ch, bot_ch, 3, padding=1, bias_attr=False),
nn.Conv2D(
bot_ch, out_ch, kernel_size=(1, 1), bias_attr=False),
nn.Sigmoid())
def forward(self, x):
return self.atten_head(x)
def scale_as(x, y, align_corners=False):
'''
scale x to the same size as y
'''
y_size = y.shape[2], y.shape[3]
x_scaled = nn.functional.interpolate(
x, size=y_size, mode='bilinear', align_corners=align_corners)
return x_scaled
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddleseg.cvlibs import manager, param_init
from paddleseg.models import layers
@manager.MODELS.add_component
class OCRNetNV(nn.Layer):
"""
The OCRNet implementation based on PaddlePaddle.
The differences from OCRNet at OCRNet at paddleseg/models/ocrnet.py are
1. The convolution bias is set to False
2. droput_ rate in SpatialOCRModule is 0.05
3. OCRHead will return `ocr`.
4. Will not Logit_ List size to the original size, when MS_ attention=True
The original article refers to
Yuan, Yuhui, et al. "Object-Contextual Representations for Semantic Segmentation"
(https://arxiv.org/pdf/1909.11065.pdf)
Args:
num_classes (int): The unique number of target classes.
backbone (Paddle.nn.Layer): Backbone network.
backbone_indices (tuple): A tuple indicates the indices of output of backbone.
It can be either one or two values, if two values, the first index will be taken as
a deep-supervision feature in auxiliary layer; the second one will be taken as
input of pixel representation. If one value, it is taken by both above.
ocr_mid_channels (int, optional): The number of middle channels in OCRHead. Default: 512.
ocr_key_channels (int, optional): The number of key channels in ObjectAttentionBlock. Default: 256.
align_corners (bool): An argument of F.interpolate. It should be set to False when the output size of feature
is even, e.g. 1024x512, otherwise it is True, e.g. 769x769. Default: False.
"""
def __init__(self,
num_classes,
backbone,
backbone_indices,
ocr_mid_channels=512,
ocr_key_channels=256,
align_corners=False,
ms_attention=False):
super().__init__()
self.backbone = backbone
self.backbone_indices = backbone_indices
in_channels = [self.backbone.feat_channels[i] for i in backbone_indices]
self.head = OCRHead(
num_classes=num_classes,
in_channels=in_channels,
ocr_mid_channels=ocr_mid_channels,
ocr_key_channels=ocr_key_channels,
ms_attention=ms_attention)
self.align_corners = align_corners
self.ms_attention = ms_attention
def forward(self, x):
feats = self.backbone(x)
feats = [feats[i] for i in self.backbone_indices]
logit_list = self.head(feats)
if not self.ms_attention:
logit_list = [
F.interpolate(
logit,
x.shape[2:],
mode='bilinear',
align_corners=self.align_corners) for logit in logit_list
]
return logit_list
class OCRHead(nn.Layer):
"""
The Object contextual representation head.
Args:
num_classes(int): The unique number of target classes.
in_channels(tuple): The number of input channels.
ocr_mid_channels(int, optional): The number of middle channels in OCRHead. Default: 512.
ocr_key_channels(int, optional): The number of key channels in ObjectAttentionBlock. Default: 256.
"""
def __init__(self,
num_classes,
in_channels,
ocr_mid_channels=512,
ocr_key_channels=256,
ms_attention=False):
super().__init__()
self.num_classes = num_classes
self.ms_attention = ms_attention
self.spatial_gather = SpatialGatherBlock()
self.spatial_ocr = SpatialOCRModule(
ocr_mid_channels,
ocr_key_channels,
ocr_mid_channels,
dropout_rate=0.05)
self.indices = [-2, -1] if len(in_channels) > 1 else [-1, -1]
self.conv3x3_ocr = layers.ConvBNReLU(
in_channels[self.indices[1]], ocr_mid_channels, 3, padding=1)
self.cls_head = nn.Conv2D(ocr_mid_channels, self.num_classes, 1)
self.aux_head = nn.Sequential(
layers.ConvBNReLU(in_channels[self.indices[0]],
in_channels[self.indices[0]], 1),
nn.Conv2D(in_channels[self.indices[0]], self.num_classes, 1))
self.init_weight()
def forward(self, feat_list):
feat_shallow, feat_deep = feat_list[self.indices[0]], feat_list[
self.indices[1]]
soft_regions = self.aux_head(feat_shallow)
pixels = self.conv3x3_ocr(feat_deep)
object_regions = self.spatial_gather(pixels, soft_regions)
ocr = self.spatial_ocr(pixels, object_regions)
logit = self.cls_head(ocr)
if self.ms_attention:
return [logit, soft_regions, ocr]
return [logit, soft_regions]
def init_weight(self):
"""Initialize the parameters of model parts."""
for sublayer in self.sublayers():
if isinstance(sublayer, nn.Conv2D):
param_init.normal_init(sublayer.weight, std=0.001)
elif isinstance(sublayer, (nn.BatchNorm, nn.SyncBatchNorm)):
param_init.constant_init(sublayer.weight, value=1.0)
param_init.constant_init(sublayer.bias, value=0.0)
class SpatialGatherBlock(nn.Layer):
"""Aggregation layer to compute the pixel-region representation."""
def forward(self, pixels, regions):
n, c, h, w = pixels.shape
_, k, _, _ = regions.shape
# pixels: from (n, c, h, w) to (n, h*w, c)
pixels = paddle.reshape(pixels, (n, c, h * w))
pixels = paddle.transpose(pixels, (0, 2, 1))
# regions: from (n, k, h, w) to (n, k, h*w)
regions = paddle.reshape(regions, (n, k, h * w))
regions = F.softmax(regions, axis=2)
# feats: from (n, k, c) to (n, c, k, 1)
feats = paddle.bmm(regions, pixels)
feats = paddle.transpose(feats, (0, 2, 1))
feats = paddle.unsqueeze(feats, axis=-1)
return feats
class SpatialOCRModule(nn.Layer):
"""Aggregate the global object representation to update the representation for each pixel."""
def __init__(self,
in_channels,
key_channels,
out_channels,
dropout_rate=0.1):
super().__init__()
self.attention_block = ObjectAttentionBlock(in_channels, key_channels)
self.conv1x1 = nn.Sequential(
layers.ConvBNReLU(
2 * in_channels, out_channels, 1, bias_attr=False),
nn.Dropout2D(dropout_rate))
def forward(self, pixels, regions):
context = self.attention_block(pixels, regions)
feats = paddle.concat([context, pixels], axis=1)
feats = self.conv1x1(feats)
return feats
class ObjectAttentionBlock(nn.Layer):
"""A self-attention module."""
def __init__(self, in_channels, key_channels):
super().__init__()
self.in_channels = in_channels
self.key_channels = key_channels
self.f_pixel = nn.Sequential(
layers.ConvBNReLU(
in_channels, key_channels, 1, bias_attr=False),
layers.ConvBNReLU(
key_channels, key_channels, 1, bias_attr=False))
self.f_object = nn.Sequential(
layers.ConvBNReLU(
in_channels, key_channels, 1, bias_attr=False),
layers.ConvBNReLU(
key_channels, key_channels, 1, bias_attr=False))
self.f_down = layers.ConvBNReLU(
in_channels, key_channels, 1, bias_attr=False)
self.f_up = layers.ConvBNReLU(
key_channels, in_channels, 1, bias_attr=False)
def forward(self, x, proxy):
n, _, h, w = x.shape
# query : from (n, c1, h1, w1) to (n, h1*w1, key_channels)
query = self.f_pixel(x)
query = paddle.reshape(query, (n, self.key_channels, -1))
query = paddle.transpose(query, (0, 2, 1))
# key : from (n, c2, h2, w2) to (n, key_channels, h2*w2)
key = self.f_object(proxy)
key = paddle.reshape(key, (n, self.key_channels, -1))
# value : from (n, c2, h2, w2) to (n, h2*w2, key_channels)
value = self.f_down(proxy)
value = paddle.reshape(value, (n, self.key_channels, -1))
value = paddle.transpose(value, (0, 2, 1))
# sim_map (n, h1*w1, h2*w2)
sim_map = paddle.bmm(query, key)
sim_map = (self.key_channels**-.5) * sim_map
sim_map = F.softmax(sim_map, axis=-1)
# context from (n, h1*w1, key_channels) to (n , out_channels, h1, w1)
context = paddle.bmm(sim_map, value)
context = paddle.transpose(context, (0, 2, 1))
context = paddle.reshape(context, (n, self.key_channels, h, w))
context = self.f_up(context)
return context
class ProjectionHead(nn.Layer):
def __init__(self, dim_in, proj_dim=256, proj='convmlp'):
super(ProjectionHead, self).__init__()
if proj == 'linear':
self.proj = nn.Conv2d(dim_in, proj_dim, kernel_size=1)
elif proj == 'convmlp':
self.proj = nn.Sequential(
nn.Conv2d(
dim_in, dim_in, kernel_size=1),
nn.SyncBatchNorm(dim_in),
nn.ReLU(),
nn.Conv2d(
dim_in, proj_dim, kernel_size=1))
def forward(self, x):
return F.normalize(self.proj(x), p=2, dim=1)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import paddle
from paddleseg.cvlibs import manager, Config
from paddleseg.utils import get_sys_env, logger
from paddleseg.core import predict
import datasets, models
def parse_args():
parser = argparse.ArgumentParser(description='Model prediction')
# params of prediction
parser.add_argument(
"--config", dest="cfg", help="The config file.", default=None, type=str)
parser.add_argument(
'--model_path',
dest='model_path',
help='The path of model for prediction',
type=str,
default=None)
parser.add_argument(
'--image_path',
dest='image_path',
help='The path of image, it can be a file or a directory including images',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the predicted results',
type=str,
default='./output/result')
# augment for prediction
parser.add_argument(
'--aug_pred',
dest='aug_pred',
help='Whether to use mulit-scales and flip augment for prediction',
action='store_true')
parser.add_argument(
'--scales',
dest='scales',
nargs='+',
help='Scales for augment',
type=float,
default=1.0)
parser.add_argument(
'--flip_horizontal',
dest='flip_horizontal',
help='Whether to use flip horizontally augment',
action='store_true')
parser.add_argument(
'--flip_vertical',
dest='flip_vertical',
help='Whether to use flip vertically augment',
action='store_true')
# sliding window prediction
parser.add_argument(
'--is_slide',
dest='is_slide',
help='Whether to prediction by sliding window',
action='store_true')
parser.add_argument(
'--crop_size',
dest='crop_size',
nargs=2,
help='The crop size of sliding window, the first is width and the second is height.',
type=int,
default=None)
parser.add_argument(
'--stride',
dest='stride',
nargs=2,
help='The stride of sliding window, the first is width and the second is height.',
type=int,
default=None)
return parser.parse_args()
def get_image_list(image_path):
"""Get image list"""
valid_suffix = [
'.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png'
]
image_list = []
image_dir = None
if os.path.isfile(image_path):
if os.path.splitext(image_path)[-1] in valid_suffix:
image_list.append(image_path)
elif os.path.isdir(image_path):
image_dir = image_path
for root, dirs, files in os.walk(image_path):
for f in files:
if '.ipynb_checkpoints' in root:
continue
if os.path.splitext(f)[-1] in valid_suffix:
image_list.append(os.path.join(root, f))
else:
raise FileNotFoundError(
'`--image_path` is not found. it should be an image file or a directory including images'
)
if len(image_list) == 0:
raise RuntimeError('There are not image file in `--image_path`')
return image_list, image_dir
def main(args):
env_info = get_sys_env()
place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
'GPUs used'] else 'cpu'
paddle.set_device(place)
if not args.cfg:
raise RuntimeError('No configuration file specified.')
cfg = Config(args.cfg)
val_dataset = cfg.val_dataset
if not val_dataset:
raise RuntimeError(
'The verification dataset is not specified in the configuration file.'
)
msg = '\n---------------Config Information---------------\n'
msg += str(cfg)
msg += '------------------------------------------------'
logger.info(msg)
model = cfg.model
transforms = val_dataset.transforms
image_list, image_dir = get_image_list(args.image_path)
logger.info('Number of predict images = {}'.format(len(image_list)))
predict(
model,
model_path=args.model_path,
transforms=transforms,
image_list=image_list,
image_dir=image_dir,
save_dir=args.save_dir,
aug_pred=args.aug_pred,
scales=args.scales,
flip_horizontal=args.flip_horizontal,
flip_vertical=args.flip_vertical,
is_slide=args.is_slide,
crop_size=args.crop_size,
stride=args.stride, )
if __name__ == '__main__':
args = parse_args()
main(args)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import paddle
from paddleseg.cvlibs import manager, Config
from paddleseg.utils import get_sys_env, logger
from core import predictEnsemble
import datasets, models
def parse_args():
parser = argparse.ArgumentParser(description='Model prediction')
# params of prediction
parser.add_argument(
"--config", dest="cfg", help="The config file.", default=None, type=str)
parser.add_argument(
'--model_path',
dest='model_path',
help='The path of model for prediction',
type=str,
default=None)
parser.add_argument(
"--config_hard",
dest="cfg_hard",
help="The config file.",
default=None,
type=str)
parser.add_argument(
'--model_path_hard',
dest='model_path_hard',
help='The path of model for prediction',
type=str,
default=None)
parser.add_argument(
'--image_path',
dest='image_path',
help='The path of image, it can be a file or a directory including images',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the predicted results',
type=str,
default='./output/result')
# augment for prediction
parser.add_argument(
'--aug_pred',
dest='aug_pred',
help='Whether to use mulit-scales and flip augment for prediction',
action='store_true')
parser.add_argument(
'--scales',
dest='scales',
nargs='+',
help='Scales for augment',
type=float,
default=1.0)
parser.add_argument(
'--flip_horizontal',
dest='flip_horizontal',
help='Whether to use flip horizontally augment',
action='store_true')
parser.add_argument(
'--flip_vertical',
dest='flip_vertical',
help='Whether to use flip vertically augment',
action='store_true')
# sliding window prediction
parser.add_argument(
'--is_slide',
dest='is_slide',
help='Whether to prediction by sliding window',
action='store_true')
parser.add_argument(
'--crop_size',
dest='crop_size',
nargs=2,
help='The crop size of sliding window, the first is width and the second is height.',
type=int,
default=None)
parser.add_argument(
'--stride',
dest='stride',
nargs=2,
help='The stride of sliding window, the first is width and the second is height.',
type=int,
default=None)
return parser.parse_args()
def get_image_list(image_path):
"""Get image list"""
valid_suffix = [
'.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png'
]
image_list = []
image_dir = None
if os.path.isfile(image_path):
if os.path.splitext(image_path)[-1] in valid_suffix:
image_list.append(image_path)
elif os.path.isdir(image_path):
image_dir = image_path
for root, dirs, files in os.walk(image_path):
for f in files:
if '.ipynb_checkpoints' in root:
continue
if os.path.splitext(f)[-1] in valid_suffix:
image_list.append(os.path.join(root, f))
else:
raise FileNotFoundError(
'`--image_path` is not found. it should be an image file or a directory including images'
)
if len(image_list) == 0:
raise RuntimeError('There are not image file in `--image_path`')
return image_list, image_dir
def main(args):
env_info = get_sys_env()
place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
'GPUs used'] else 'cpu'
paddle.set_device(place)
if not args.cfg:
raise RuntimeError('No configuration file specified.')
cfg = Config(args.cfg)
val_dataset = cfg.val_dataset
cfg_hard = Config(args.cfg_hard)
if not val_dataset:
raise RuntimeError(
'The verification dataset is not specified in the configuration file.'
)
msg = '\n---------------Config Information---------------\n'
msg += str(cfg)
msg += '------------------------------------------------'
logger.info(msg)
model = cfg.model
model_hard = cfg_hard.model
transforms = val_dataset.transforms
image_list, image_dir = get_image_list(args.image_path)
logger.info('Number of predict images = {}'.format(len(image_list)))
predictEnsemble(
model,
model_hard,
model_path=args.model_path,
model_path_hard=args.model_path_hard,
transforms=transforms,
image_list=image_list,
image_dir=image_dir,
save_dir=args.save_dir,
aug_pred=args.aug_pred,
scales=args.scales,
flip_horizontal=args.flip_horizontal,
flip_vertical=args.flip_vertical,
is_slide=args.is_slide,
crop_size=args.crop_size,
stride=args.stride, )
if __name__ == '__main__':
args = parse_args()
main(args)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment