Commit 0d97cc8c authored by Sugon_ldc's avatar Sugon_ldc
Browse files

add new model

parents
Pipeline #316 failed with stages
in 0 seconds
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import paddle
from paddleseg.cvlibs import manager, Config
from paddleseg.utils import get_sys_env, logger
from core.predict_ensemble_three import predictEnsembleThree
import datasets, models
def parse_args():
parser = argparse.ArgumentParser(description='Model prediction')
# params of prediction
parser.add_argument(
"--config", dest="cfg", help="The config file.", default=None, type=str)
parser.add_argument(
'--model_path',
dest='model_path',
help='The path of model for prediction',
type=str,
default=None)
parser.add_argument(
"--config_1",
dest="cfg_1",
help="The config file.",
default=None,
type=str)
parser.add_argument(
'--model_path_1',
dest='model_path_1',
help='The path of model for prediction',
type=str,
default=None)
parser.add_argument(
"--config_crop",
dest="cfg_crop",
help="The config file.",
default=None,
type=str)
parser.add_argument(
'--model_path_crop',
dest='model_path_crop',
help='The path of model for prediction',
type=str,
default=None)
parser.add_argument(
'--image_path',
dest='image_path',
help='The path of image, it can be a file or a directory including images',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the predicted results',
type=str,
default='./output/result')
# augment for prediction
parser.add_argument(
'--aug_pred',
dest='aug_pred',
help='Whether to use mulit-scales and flip augment for prediction',
action='store_true')
parser.add_argument(
'--scales',
dest='scales',
nargs='+',
help='Scales for augment',
type=float,
default=1.0)
parser.add_argument(
'--flip_horizontal',
dest='flip_horizontal',
help='Whether to use flip horizontally augment',
action='store_true')
parser.add_argument(
'--flip_vertical',
dest='flip_vertical',
help='Whether to use flip vertically augment',
action='store_true')
# sliding window prediction
parser.add_argument(
'--is_slide',
dest='is_slide',
help='Whether to prediction by sliding window',
action='store_true')
parser.add_argument(
'--crop_size',
dest='crop_size',
nargs=2,
help='The crop size of sliding window, the first is width and the second is height.',
type=int,
default=None)
parser.add_argument(
'--stride',
dest='stride',
nargs=2,
help='The stride of sliding window, the first is width and the second is height.',
type=int,
default=None)
return parser.parse_args()
def get_image_list(image_path):
"""Get image list"""
valid_suffix = [
'.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png'
]
image_list = []
image_dir = None
if os.path.isfile(image_path):
if os.path.splitext(image_path)[-1] in valid_suffix:
image_list.append(image_path)
elif os.path.isdir(image_path):
image_dir = image_path
for root, dirs, files in os.walk(image_path):
for f in files:
if '.ipynb_checkpoints' in root:
continue
if os.path.splitext(f)[-1] in valid_suffix:
image_list.append(os.path.join(root, f))
else:
raise FileNotFoundError(
'`--image_path` is not found. it should be an image file or a directory including images'
)
if len(image_list) == 0:
raise RuntimeError('There are not image file in `--image_path`')
return image_list, image_dir
def main(args):
env_info = get_sys_env()
place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
'GPUs used'] else 'cpu'
paddle.set_device(place)
if not args.cfg:
raise RuntimeError('No configuration file specified.')
cfg = Config(args.cfg)
val_dataset = cfg.val_dataset
cfg_1 = Config(args.cfg_1)
cfg_crop = Config(args.cfg_crop)
val_dataset_crop = cfg_crop.val_dataset
if not val_dataset:
raise RuntimeError(
'The verification dataset is not specified in the configuration file.'
)
msg = '\n---------------Config Information---------------\n'
msg += str(cfg)
msg += '------------------------------------------------'
logger.info(msg)
model = cfg.model
model_1 = cfg_1.model
model_crop = cfg_crop.model
transforms = val_dataset.transforms
transforms_crop = val_dataset_crop.transforms
image_list, image_dir = get_image_list(args.image_path)
logger.info('Number of predict images = {}'.format(len(image_list)))
predictEnsembleThree(
model,
model_1,
model_crop,
model_path=args.model_path,
model_path_1=args.model_path_1,
model_path_crop=args.model_path_crop,
transforms=transforms,
transforms_crop=transforms_crop,
image_list=image_list,
image_dir=image_dir,
save_dir=args.save_dir,
aug_pred=args.aug_pred,
scales=args.scales,
flip_horizontal=args.flip_horizontal,
flip_vertical=args.flip_vertical,
is_slide=args.is_slide,
crop_size=args.crop_size,
stride=args.stride, )
if __name__ == '__main__':
args = parse_args()
main(args)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import time
from collections import deque
import shutil
import paddle
import paddle.nn.functional as F
from paddleseg.utils import TimeAverager, calculate_eta, resume, logger
from core.val import evaluate
#from core.val_crop import evaluate
def check_logits_losses(logits_list, losses):
len_logits = len(logits_list)
len_losses = len(losses['types'])
if len_logits != len_losses:
raise RuntimeError(
'The length of logits_list should equal to the types of loss config: {} != {}.'
.format(len_logits, len_losses))
def loss_computation(logits_list, labels, losses, edges=None):
check_logits_losses(logits_list, losses)
loss_list = []
for i in range(len(logits_list)):
logits = logits_list[i]
loss_i = losses['types'][i]
# Whether to use edges as labels According to loss type.
if loss_i.__class__.__name__ in ('BCELoss', ) and loss_i.edge_label:
loss_list.append(losses['coef'][i] * loss_i(logits, edges))
else:
loss_list.append(losses['coef'][i] * loss_i(logits, labels))
return loss_list
def train(model,
train_dataset,
val_dataset=None,
aug_eval=False,
flip_horizontal_eval=False,
optimizer=None,
save_dir='output',
iters=10000,
batch_size=2,
resume_model=None,
save_interval=1000,
log_iters=10,
num_workers=0,
use_vdl=False,
losses=None,
keep_checkpoint_max=5):
"""
Launch training.
Args:
model(nn.Layer): A sementic segmentation model.
train_dataset (paddle.io.Dataset): Used to read and process training datasets.
val_dataset (paddle.io.Dataset, optional): Used to read and process validation datasets.
aug_eval (bool, optional): Whether to use mulit-scales and flip augment for evaluation. Default: False.
flip_horizontal_eval (bool, optional): Whether to use flip horizontally augment. It is valid when `aug_eval` is True. Default: True.
optimizer (paddle.optimizer.Optimizer): The optimizer.
save_dir (str, optional): The directory for saving the model snapshot. Default: 'output'.
iters (int, optional): How may iters to train the model. Defualt: 10000.
batch_size (int, optional): Mini batch size of one gpu or cpu. Default: 2.
resume_model (str, optional): The path of resume model.
save_interval (int, optional): How many iters to save a model snapshot once during training. Default: 1000.
log_iters (int, optional): Display logging information at every log_iters. Default: 10.
num_workers (int, optional): Num workers for data loader. Default: 0.
use_vdl (bool, optional): Whether to record the data to VisualDL during training. Default: False.
losses (dict): A dict including 'types' and 'coef'. The length of coef should equal to 1 or len(losses['types']).
The 'types' item is a list of object of paddleseg.models.losses while the 'coef' item is a list of the relevant coefficient.
keep_checkpoint_max (int, optional): Maximum number of checkpoints to save. Default: 5.
"""
nranks = paddle.distributed.ParallelEnv().nranks
local_rank = paddle.distributed.ParallelEnv().local_rank
start_iter = 0
if resume_model is not None:
start_iter = resume(model, optimizer, resume_model)
if not os.path.isdir(save_dir):
if os.path.exists(save_dir):
os.remove(save_dir)
os.makedirs(save_dir)
if nranks > 1:
# Initialize parallel training environment.
paddle.distributed.init_parallel_env()
ddp_model = paddle.DataParallel(model)
# for item in ddp_model.named_parameters():
# if item[0].find('scale_attn')==-1:
# item[1].stop_gradient=True
batch_sampler = paddle.io.DistributedBatchSampler(
train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
loader = paddle.io.DataLoader(
train_dataset,
batch_sampler=batch_sampler,
num_workers=num_workers,
return_list=True, )
if use_vdl:
from visualdl import LogWriter
log_writer = LogWriter(save_dir)
avg_loss = 0.0
avg_loss_list = []
iters_per_epoch = len(batch_sampler)
best_mean_iou = -1.0
best_model_iter = -1
reader_cost_averager = TimeAverager()
batch_cost_averager = TimeAverager()
save_models = deque()
batch_start = time.time()
iter = start_iter
while iter < iters:
for data in loader:
iter += 1
if iter > iters:
break
reader_cost_averager.record(time.time() - batch_start)
images = data[0]
labels = data[1].astype('int64')
edges = None
if len(data) == 3:
edges = data[2].astype('int64')
if hasattr(train_dataset,
'shuffle') and iter % iters_per_epoch == 0:
train_dataset.shuffle()
if nranks > 1:
logits_list = ddp_model(images)
else:
logits_list = model(images)
loss_list = loss_computation(
logits_list=logits_list,
labels=labels,
losses=losses,
edges=edges)
loss = sum(loss_list)
loss.backward()
optimizer.step()
lr = optimizer.get_lr()
if isinstance(optimizer._learning_rate,
paddle.optimizer.lr.LRScheduler):
optimizer._learning_rate.step()
model.clear_gradients()
avg_loss += float(loss)
if not avg_loss_list:
avg_loss_list = [l.numpy() for l in loss_list]
else:
for i in range(len(loss_list)):
avg_loss_list[i] += loss_list[i].numpy()
batch_cost_averager.record(
time.time() - batch_start, num_samples=batch_size)
if (iter) % log_iters == 0 and local_rank == 0:
avg_loss /= log_iters
avg_loss_list = [l[0] / log_iters for l in avg_loss_list]
remain_iters = iters - iter
avg_train_batch_cost = batch_cost_averager.get_average()
avg_train_reader_cost = reader_cost_averager.get_average()
eta = calculate_eta(remain_iters, avg_train_batch_cost)
logger.info(
"[TRAIN] epoch={}, iter={}/{}, loss={:.4f}, lr={:.6f}, batch_cost={:.4f}, reader_cost={:.5f}, ips={:.4f} samples/sec | ETA {}"
.format((iter - 1
) // iters_per_epoch + 1, iter, iters, avg_loss,
lr, avg_train_batch_cost, avg_train_reader_cost,
batch_cost_averager.get_ips_average(), eta))
if use_vdl:
log_writer.add_scalar('Train/loss', avg_loss, iter)
# Record all losses if there are more than 2 losses.
if len(avg_loss_list) > 1:
avg_loss_dict = {}
for i, value in enumerate(avg_loss_list):
avg_loss_dict['loss_' + str(i)] = value
for key, value in avg_loss_dict.items():
log_tag = 'Train/' + key
log_writer.add_scalar(log_tag, value, iter)
log_writer.add_scalar('Train/lr', lr, iter)
log_writer.add_scalar('Train/batch_cost',
avg_train_batch_cost, iter)
log_writer.add_scalar('Train/reader_cost',
avg_train_reader_cost, iter)
avg_loss = 0.0
avg_loss_list = []
reader_cost_averager.reset()
batch_cost_averager.reset()
if (iter % save_interval == 0 or
iter == iters) and (val_dataset is not None):
num_workers = 1 if num_workers > 0 else 0
mean_iou, acc = evaluate(
model,
val_dataset,
aug_eval=aug_eval,
scales=1.0,
flip_horizontal=False,
flip_vertical=False,
is_slide=False,
stride=None,
crop_size=None,
num_workers=num_workers)
model.train()
if (iter % save_interval == 0 or iter == iters) and local_rank == 0:
current_save_dir = os.path.join(save_dir,
"iter_{}".format(iter))
if not os.path.isdir(current_save_dir):
os.makedirs(current_save_dir)
paddle.save(model.state_dict(),
os.path.join(current_save_dir, 'model.pdparams'))
paddle.save(optimizer.state_dict(),
os.path.join(current_save_dir, 'model.pdopt'))
save_models.append(current_save_dir)
if len(save_models) > keep_checkpoint_max > 0:
model_to_remove = save_models.popleft()
shutil.rmtree(model_to_remove)
if val_dataset is not None:
if mean_iou > best_mean_iou:
best_mean_iou = mean_iou
best_model_iter = iter
best_model_dir = os.path.join(save_dir, "best_model")
paddle.save(
model.state_dict(),
os.path.join(best_model_dir, 'model.pdparams'))
logger.info(
'[EVAL] The model with the best validation mIoU ({:.4f}) was saved at iter {}.'
.format(best_mean_iou, best_model_iter))
if use_vdl:
log_writer.add_scalar('Evaluate/mIoU', mean_iou, iter)
log_writer.add_scalar('Evaluate/Acc', acc, iter)
batch_start = time.time()
# Calculate flops.
if local_rank == 0:
def count_syncbn(m, x, y):
x = x[0]
nelements = x.numel()
m.total_ops += int(2 * nelements)
_, c, h, w = images.shape
flops = paddle.flops(
model, [1, c, h, w],
custom_ops={paddle.nn.SyncBatchNorm: count_syncbn})
logger.info(flops)
# Sleep for half a second to let dataloader release resources.
time.sleep(0.5)
if use_vdl:
log_writer.close()
import os
import numpy as np
import cv2
from PIL import Image
from paddleseg import utils
import xml.dom.minidom
def mkdir(path):
sub_dir = os.path.dirname(path)
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
def get_image_list(image_path):
"""Get image list"""
valid_suffix = [
'.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png'
]
image_list = []
image_dir = None
if os.path.isfile(image_path):
if os.path.splitext(image_path)[-1] in valid_suffix:
image_list.append(image_path)
elif os.path.isdir(image_path):
image_dir = image_path
for root, dirs, files in os.walk(image_path):
for f in files:
if '.ipynb_checkpoints' in root:
continue
if os.path.splitext(f)[-1] in valid_suffix:
image_list.append(os.path.join(root.split('/')[-1], f))
else:
raise FileNotFoundError(
'`--image_path` is not found. it should be an image file or a directory including images'
)
if len(image_list) == 0:
raise RuntimeError('There are not image file in `--image_path`')
return image_list, image_dir
def refine_pred():
image_list, image_dir = get_image_list(
'detection_out/pseudo_color_prediction')
for ii in image_list:
name_pred = 'detection_out/pseudo_color_prediction/' + ii
name_label = 'data/IDD_Detection/Annotations/all/' + ii[:-3] + 'xml'
pred = np.array(Image.open(name_pred)).astype(np.float32)
if not os.path.exists(name_label):
pred_mask = utils.visualize.get_pseudo_color_map(pred)
pred_saved_path = 'detect_out/pred_refine/' + ii
mkdir(pred_saved_path)
pred_mask.save(pred_saved_path)
continue
dom = xml.dom.minidom.parse(name_label)
root = dom.documentElement
objects = root.getElementsByTagName("object")
for item in objects:
name = item.getElementsByTagName("name")[0]
if name.firstChild.data == 'traffic sign' or name.firstChild.data == 'traffic light':
print(ii)
xmin = int(
item.getElementsByTagName('bndbox')[0].getElementsByTagName(
'xmin')[0].firstChild.data)
ymin = int(
item.getElementsByTagName('bndbox')[0].getElementsByTagName(
'ymin')[0].firstChild.data)
xmax = int(
item.getElementsByTagName('bndbox')[0].getElementsByTagName(
'xmax')[0].firstChild.data)
ymax = int(
item.getElementsByTagName('bndbox')[0].getElementsByTagName(
'ymax')[0].firstChild.data)
if name.firstChild.data == 'traffic sign':
pred[ymin:ymax, xmin:xmax] = 18
elif name.firstChild.data == 'traffic light':
pred[ymin:ymax, xmin:xmax] = 19
pred_mask = utils.visualize.get_pseudo_color_map(pred)
pred_saved_path = 'detect_out/pred_refine/' + ii
mkdir(pred_saved_path)
pred_mask.save(pred_saved_path)
def test():
path = '/Users/liliulei/Downloads/IDD_Detection/JPEGImages/frontNear/'
image_list, image_dir = get_image_list(path)
for ii in image_list:
name_xml = '/Users/liliulei/Downloads/IDD_Detection/Annotations/frontNear/' + ii[:
-3] + 'xml'
image = cv2.imread(path + ii)
# print(image.shape)
(h, w) = image.shape[0:2]
pred = np.zeros_like(image)
dom = xml.dom.minidom.parse(name_xml)
root = dom.documentElement
objects = root.getElementsByTagName("object")
for item in objects:
name = item.getElementsByTagName("name")[0]
print(name.firstChild.data)
if name.firstChild.data == 'traffic sign' or name.firstChild.data == 'traffic light':
xmin = int(
item.getElementsByTagName('bndbox')[0].getElementsByTagName(
'xmin')[0].firstChild.data)
ymin = int(
item.getElementsByTagName('bndbox')[0].getElementsByTagName(
'ymin')[0].firstChild.data)
xmax = int(
item.getElementsByTagName('bndbox')[0].getElementsByTagName(
'xmax')[0].firstChild.data)
ymax = int(
item.getElementsByTagName('bndbox')[0].getElementsByTagName(
'ymax')[0].firstChild.data)
if name.firstChild.data == 'traffic sign':
pred[ymin:ymax, xmin:xmax, 0] = 255
elif name.firstChild.data == 'traffic light':
pred[ymin:ymax, xmin:xmax, 1] = 255
new_im = image * 0.5 + pred * 0.5
cv2.imwrite(ii.split('/')[-1][:-3] + 'png', new_im)
refine_pred()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import paddle
from paddleseg.cvlibs import manager, Config
from paddleseg.utils import get_sys_env, logger
import datasets, models
from scripts.train import train
def parse_args():
parser = argparse.ArgumentParser(description='Model training')
# params of training
parser.add_argument(
"--config", dest="cfg", help="The config file.", default=None, type=str)
parser.add_argument(
'--iters',
dest='iters',
help='iters for training',
type=int,
default=None)
parser.add_argument(
'--batch_size',
dest='batch_size',
help='Mini batch size of one gpu or cpu',
type=int,
default=None)
parser.add_argument(
'--learning_rate',
dest='learning_rate',
help='Learning rate',
type=float,
default=None)
parser.add_argument(
'--save_interval',
dest='save_interval',
help='How many iters to save a model snapshot once during training.',
type=int,
default=1000)
parser.add_argument(
'--resume_model',
dest='resume_model',
help='The path of resume model',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the model snapshot',
type=str,
default='./output')
parser.add_argument(
'--keep_checkpoint_max',
dest='keep_checkpoint_max',
help='Maximum number of checkpoints to save',
type=int,
default=5)
parser.add_argument(
'--num_workers',
dest='num_workers',
help='Num workers for data loader',
type=int,
default=0)
parser.add_argument(
'--do_eval',
dest='do_eval',
help='Eval while training',
action='store_true')
parser.add_argument(
'--log_iters',
dest='log_iters',
help='Display logging information at every log_iters',
default=10,
type=int)
parser.add_argument(
'--use_vdl',
dest='use_vdl',
help='Whether to record the data to VisualDL during training',
action='store_true')
return parser.parse_args()
def main(args):
env_info = get_sys_env()
info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
info = '\n'.join(['', format('Environment Information', '-^48s')] + info +
['-' * 48])
logger.info(info)
place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
'GPUs used'] else 'cpu'
paddle.set_device(place)
if not args.cfg:
raise RuntimeError('No configuration file specified.')
cfg = Config(
args.cfg,
learning_rate=args.learning_rate,
iters=args.iters,
batch_size=args.batch_size)
train_dataset = cfg.train_dataset
if train_dataset is None:
raise RuntimeError(
'The training dataset is not specified in the configuration file.')
val_dataset = cfg.val_dataset if args.do_eval else None
losses = cfg.loss
msg = '\n---------------Config Information---------------\n'
msg += str(cfg)
msg += '------------------------------------------------'
logger.info(msg)
train(
cfg.model,
train_dataset,
val_dataset=val_dataset,
aug_eval=True,
flip_horizontal_eval=False,
optimizer=cfg.optimizer,
save_dir=args.save_dir,
iters=cfg.iters,
batch_size=cfg.batch_size,
resume_model=args.resume_model,
save_interval=args.save_interval,
log_iters=args.log_iters,
num_workers=args.num_workers,
use_vdl=args.use_vdl,
losses=losses,
keep_checkpoint_max=args.keep_checkpoint_max)
if __name__ == '__main__':
args = parse_args()
main(args)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import paddle
from paddleseg.cvlibs import manager, Config
from core.val import evaluate
from paddleseg.utils import get_sys_env, logger, utils
import datasets, models
def parse_args():
parser = argparse.ArgumentParser(description='Model evaluation')
# params of evaluate
parser.add_argument(
"--config", dest="cfg", help="The config file.", default=None, type=str)
parser.add_argument(
'--model_path',
dest='model_path',
help='The path of model for evaluation',
type=str,
default=None)
parser.add_argument(
'--num_workers',
dest='num_workers',
help='Num workers for data loader',
type=int,
default=0)
# augment for evaluation
parser.add_argument(
'--aug_eval',
dest='aug_eval',
help='Whether to use mulit-scales and flip augment for evaluation',
action='store_true')
parser.add_argument(
'--scales',
dest='scales',
nargs='+',
help='Scales for augment',
type=float,
default=1.0)
parser.add_argument(
'--flip_horizontal',
dest='flip_horizontal',
help='Whether to use flip horizontally augment',
action='store_true')
parser.add_argument(
'--flip_vertical',
dest='flip_vertical',
help='Whether to use flip vertically augment',
action='store_true')
# sliding window evaluation
parser.add_argument(
'--is_slide',
dest='is_slide',
help='Whether to evaluate by sliding window',
action='store_true')
parser.add_argument(
'--crop_size',
dest='crop_size',
nargs=2,
help='The crop size of sliding window, the first is width and the second is height.',
type=int,
default=None)
parser.add_argument(
'--stride',
dest='stride',
nargs=2,
help='The stride of sliding window, the first is width and the second is height.',
type=int,
default=None)
return parser.parse_args()
def main(args):
env_info = get_sys_env()
place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
'GPUs used'] else 'cpu'
paddle.set_device(place)
if not args.cfg:
raise RuntimeError('No configuration file specified.')
cfg = Config(args.cfg)
val_dataset = cfg.val_dataset
if val_dataset is None:
raise RuntimeError(
'The verification dataset is not specified in the configuration file.'
)
msg = '\n---------------Config Information---------------\n'
msg += str(cfg)
msg += '------------------------------------------------'
logger.info(msg)
model = cfg.model
utils.load_entire_model(model, args.model_path)
logger.info('Loaded trained params of model successfully')
evaluate(
model,
val_dataset,
aug_eval=args.aug_eval,
scales=args.scales,
flip_horizontal=args.flip_horizontal,
flip_vertical=args.flip_vertical,
is_slide=args.is_slide,
crop_size=args.crop_size,
stride=args.stride,
num_workers=args.num_workers, )
if __name__ == '__main__':
args = parse_args()
main(args)
# Cityscapes SOTA
The implementation of Hierarchical Multi-Scale Attention based on PaddlePaddle. [[Paper]](https://arxiv.org/abs/2005.10821)<br>
Based on the above work, we made some optimizations:
- Use dice loss and bootstrapped cross entropy loss instead of cross entropy
- Learn all fine data and equal amount of coarse data in each epoch
- The evaluation is carried out by using the equal difference scale series instead of the equal ratio scale series
We achieve mIoU of **87%** on Cityscapes validation set.
The actual effect is as follows (for high-definition pictures, please click [here](https://github.com/PaddlePaddle/PaddleSeg/blob/release/v2.0/docs/images/cityscapes.gif)).
<div align="center">
<img src=https://user-images.githubusercontent.com/30695251/144982303-51d40188-c00d-46b7-9012-41955c4e2156.gif width = "500" />
</div>
## Installation
#### step 1. Install PaddlePaddle
System Requirements:
* PaddlePaddle >= 2.0.0rc1
* Python >= 3.6+
Highly recommend you install the GPU version of PaddlePaddle, due to large overhead of segmentation models, otherwise it could be out of memory while running the models. For more detailed installation tutorials, please refer to the official website of [PaddlePaddle](https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/2.0/install/)
#### step 2. Install PaddleSeg
You should use *API Calling* method to install PaddleSeg for flexible development.
```shell
pip install paddleseg
```
## Data Preparation
Download following files and put into `data/cityscapes` directory. Then unzip these files.
```shell
mkdir -p data/cityscapes
```
Firstly please download 3 files from [Cityscapes dataset](https://www.cityscapes-dataset.com/downloads/)
- leftImg8bit_trainvaltest.zip (11GB)
- gtFine_trainvaltest.zip (241MB)
- leftImg8bit_trainextra.zip (44GB)
Run the following commands to do the label conversion:
```shell
pip install cityscapesscripts
python ../../tools/data/convert_cityscapes.py --cityscapes_path data/cityscapes --num_workers 8
```
Where 'cityscapes_path' should be adjusted according to the actual dataset path. 'num_workers' determines the number of processes started and the size can be adjusted according to the actual situation.
Then download and uncompress Autolabelled-Data from [google drive](https://drive.google.com/file/d/1DtPo-WP-hjaOwsbj6ZxTtOo_7R_4TKRG/view?usp=sharing)
- refinement_final_v0.zip # This file is needed for autolabelled training for recreating SOTA
Delete useless `tmp` directory in `refinement_final` directory.
```
rm -r tmp/
```
Convert autolabelled data according to PaddleSeg data format:
```shell
python tools/convert_cityscapes_autolabeling.py --dataset_root data/cityscapes/
```
Finally, you need to organize data following the below structure.
cityscapes
|
|--leftImg8bit
| |--train
| |--val
| |--test
|
|--gtFine
| |--train
| |--val
| |--test
|
|--leftImg8bit_trainextra
| |--leftImg8bit
| |--train_extra
| |--augsburg
| |--bayreuth
| |--...
|
|--convert_autolabelled
| |--augsburg
| |--bayreuth
| |--...
## Evaluation
### Download Trained Model
```shell
mkdir -p saved_model && cd saved_model
wget https://bj.bcebos.com/paddleseg/dygraph/cityscapes/mscale_ocr_hrnetw48_cityscapes_autolabel_mapillary/model.pdparams
cd ..
```
### Evaluation on Cityscapes
| Model | Backbone | mIoU | mIoU (flip) | mIoU (5 scales + flip) |
|:-:|:-:|:-:|:-:|:-:|
|MscaleOCRNet|HRNet_w48|86.89%|86.99%|87.00%|
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch val.py \
--config configs/mscale_ocr_cityscapes_autolabel_mapillary.yml --num_workers 3 --model_path saved_model/model.pdparams
```
The reported mIoU should be 86.89. This evaluates with scales of 0.5, 1.0 and 2.0. This requires 14.2GB of GPU memory.
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch val.py \
--config configs/mscale_ocr_cityscapes_autolabel_mapillary.yml --num_workers 3 --model_path saved_model/model.pdparams \
--aug_eval --flip_horizontal
```
The reported mIoU should be 86.99. This evaluates with scales of 0.5, 1.0, 2.0 and flip horizontal. This requires 14.2GB of GPU memory.
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch val.py \
--config configs/mscale_ocr_cityscapes_autolabel_mapillary_ms_val.yml --num_workers 3 --model_path saved_model/model.pdparams \
--aug_eval --flip_horizontal
```
The reported mIoU should be 87.00. This evaluates with scales of 0.5, 1.0, 1.5, 2.0, 2.5 and flip horizontal. This requires 21.2GB of GPU memory.
## Training
### Download Pretrained Weights
```shell
mkdir -p pretrain && cd pretrain
wget https://bj.bcebos.com/paddleseg/dygraph/cityscapes/ocrnet_hrnetw48_mapillary/pretrained.pdparams
cd ..
```
Pretrained weights were obtained by pretraining on the Mapillary dataset from OCRNet (backbone is HRNet w48).
### Training on Cityscapes
```shell
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python -u -m paddle.distributed.launch train.py \
--config configs/mscale_ocr_cityscapes_autolabel_mapillary.yml --use_vdl \
--save_dir saved_model/mscale_ocr_cityscapes_autolabel_mapillary --save_interval 2000 --num_workers 5 --do_eval
```
Note that this requires 32GB of GPU memory. You can remove argument `--do_eval` to turn off evaluation during training, thus it only requires 25GB of GPU memory.
If you run out of memory, try to lower the crop size.
## Deploy
Run the following command to export the inference model.
```shell
python export.py \
--config configs/mscale_ocr_cityscapes_autolabel_mapillary_ms_val.yml \
--save_dir ./output \
--input_shape 1 3 2048 1024
```
We can use the following deployment methods to deploy the inference model.
| Platform | Library | Tutorial |
| :----------- | :----------- | :----- |
| Python | Paddle prediction library | [e.g.](../../docs/deployment/inference/python_inference.md) |
| C++ | Paddle prediction library | [e.g.](../../docs/deployment/inference/cpp_inference.md) |
| Mobile | PaddleLite | [e.g.](../../docs/deployment/lite/lite.md) |
| Front-end | PaddleJS | [e.g.](../../docs/deployment/web/web.md) |
Other deployment documents:
* [Inference with TensorRT in C++](https://github.com/PINTO0309/PINTO_model_zoo/tree/main/201_CityscapesSOTA/demo)
* [Inference with ONNX Runtime in Python](https://github.com/iwatake2222/play_with_tensorrt/tree/master/pj_tensorrt_seg_paddleseg_cityscapessota)
* Inference with TensorFlow Lite in Python https://github.com/axinc-ai/ailia-models/tree/master/image_segmentation/paddleseg
Thanks for their contributions!
# Hierarchical multi-scale attention for semantic segmentation
## Reference
> Tao, Andrew, Karan Sapra, and Bryan Catanzaro. "Hierarchical multi-scale attention for semantic segmentation." arXiv preprint arXiv:2005.10821 (2020).
batch_size: 1
iters: 65000
model:
type: MscaleOCRNet
pretrained: pretrain/pretrained.pdparams
n_scales: [0.5,1.0,2.0]
backbone:
type: HRNet_W48_NV
num_classes: 19
backbone_indices: [0]
train_dataset:
type: CityscapesAutolabeling
dataset_root: data/cityscapes
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0
- type: RandomPaddingCrop
crop_size: [2048, 1024]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.25
brightness_prob: 1
contrast_range: 0.25
contrast_prob: 1
saturation_range: 0.25
saturation_prob: 1
hue_range: 63
hue_prob: 1
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: train
val_dataset:
type: CityscapesAutolabeling
dataset_root: data/cityscapes
transforms:
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 0.0001
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.005
power: 2
end_lr: 0.0
loss:
types:
- type: DiceLoss
- type: DiceLoss
- type: DiceLoss
- type: DiceLoss
- type: BootstrappedCrossEntropyLoss
min_K: 100000
loss_th: 0.05
- type: BootstrappedCrossEntropyLoss
min_K: 100000
loss_th: 0.05
- type: BootstrappedCrossEntropyLoss
min_K: 100000
loss_th: 0.05
- type: BootstrappedCrossEntropyLoss
min_K: 100000
loss_th: 0.05
coef: [1, 0.4, 0.05, 0.05, 1, 0.4, 0.05, 0.05]
batch_size: 1
iters: 65000
model:
type: MscaleOCRNet
pretrained: pretrain/pretrained.pdparams
n_scales: [0.5,1.0,1.5,2.0,2.5] # more scales
backbone:
type: HRNet_W48_NV
num_classes: 19
backbone_indices: [0]
val_dataset:
type: CityscapesAutolabeling
dataset_root: data/cityscapes
transforms:
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: val
train_dataset:
type: CityscapesAutolabeling
dataset_root: data/cityscapes/
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0
- type: RandomPaddingCrop
crop_size: [2048, 1024]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.25
brightness_prob: 1
contrast_range: 0.25
contrast_prob: 1
saturation_range: 0.25
saturation_prob: 1
hue_range: 63
hue_prob: 1
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: train
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .cityscapes_autolabeling import CityscapesAutolabeling
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import glob
import random
import paddle
import numpy as np
from PIL import Image
from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose
# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent.
random.seed(100)
@manager.DATASETS.add_component
class CityscapesAutolabeling(paddle.io.Dataset):
"""
Cityscapes dataset with fine data, coarse data and autolabelled data.
Source: https://www.cityscapes-dataset.com/
Autolabelled-Data from [google drive](https://drive.google.com/file/d/1DtPo-WP-hjaOwsbj6ZxTtOo_7R_4TKRG/view?usp=sharing)
The folder structure is as follow:
cityscapes
|
|--leftImg8bit
| |--train
| |--val
| |--test
|
|--gtFine
| |--train
| |--val
| |--test
|
|--leftImg8bit_trainextra
| |--leftImg8bit
| |--train_extra
| |--augsburg
| |--bayreuth
| |--...
|
|--convert_autolabelled
| |--augsburg
| |--bayreuth
| |--...
Make sure there are **labelTrainIds.png in gtFine directory. If not, please run the conver_cityscapes.py in tools.
Convert autolabelled data according to PaddleSeg data format:
python tools/convert_cityscapes_autolabeling.py --dataset_root data/cityscapes/
Args:
transforms (list): Transforms for image.
dataset_root (str): Cityscapes dataset directory.
mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1
add_val (bool, optional): Whether to add val set in training. Default: False
"""
def __init__(self,
transforms,
dataset_root,
mode='train',
coarse_multiple=1,
add_val=False):
self.dataset_root = dataset_root
self.transforms = Compose(transforms)
self.file_list = list()
mode = mode.lower()
self.mode = mode
self.num_classes = 19
self.ignore_index = 255
self.coarse_multiple = coarse_multiple
if mode not in ['train', 'val', 'test']:
raise ValueError(
"mode should be 'train', 'val' or 'test', but got {}.".format(
mode))
if self.transforms is None:
raise ValueError("`transforms` is necessary, but it is None.")
img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
label_dir = os.path.join(self.dataset_root, 'gtFine')
if self.dataset_root is None or not os.path.isdir(
self.dataset_root) or not os.path.isdir(
img_dir) or not os.path.isdir(label_dir):
raise ValueError(
"The dataset is not Found or the folder structure is nonconfoumance."
)
label_files = sorted(
glob.glob(
os.path.join(label_dir, mode, '*',
'*_gtFine_labelTrainIds.png')))
img_files = sorted(
glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.png')))
self.file_list = [
[img_path, label_path]
for img_path, label_path in zip(img_files, label_files)
]
self.num_files = len(self.file_list)
self.total_num_files = self.num_files
if mode == 'train':
# whether to add val set in training
if add_val:
label_files = sorted(
glob.glob(
os.path.join(label_dir, 'val', '*',
'*_gtFine_labelTrainIds.png')))
img_files = sorted(
glob.glob(
os.path.join(img_dir, 'val', '*', '*_leftImg8bit.png')))
val_file_list = [
[img_path, label_path]
for img_path, label_path in zip(img_files, label_files)
]
self.file_list.extend(val_file_list)
self.num_files = len(self.file_list)
# use coarse dataset only in training
img_dir = os.path.join(self.dataset_root, 'leftImg8bit_trainextra',
'leftImg8bit', 'train_extra')
label_dir = os.path.join(self.dataset_root, 'convert_autolabelled')
if self.dataset_root is None or not os.path.isdir(
self.dataset_root) or not os.path.isdir(
img_dir) or not os.path.isdir(label_dir):
raise ValueError(
"The coarse dataset is not Found or the folder structure is nonconfoumance."
)
coarse_label_files = sorted(
glob.glob(os.path.join(label_dir, '*', '*_leftImg8bit.png')))
coarse_img_files = sorted(
glob.glob(os.path.join(img_dir, '*', '*_leftImg8bit.png')))
if len(coarse_img_files) != len(coarse_label_files):
raise ValueError(
"The number of images = {} is not equal to the number of labels = {} in Cityscapes Autolabeling dataset."
.format(len(coarse_img_files), len(coarse_label_files)))
self.coarse_file_list = [[img_path, label_path]
for img_path, label_path in zip(
coarse_img_files, coarse_label_files)]
random.shuffle(self.coarse_file_list)
self.total_num_files = int(self.num_files * (1 + coarse_multiple))
def __getitem__(self, idx):
if self.mode == 'test':
image_path, label_path = self.file_list[idx]
im, _ = self.transforms(im=image_path)
im = im[np.newaxis, ...]
return im, image_path
elif self.mode == 'val':
image_path, label_path = self.file_list[idx]
im, _ = self.transforms(im=image_path)
label = np.asarray(Image.open(label_path))
label = label[np.newaxis, :, :]
return im, label
else:
if idx >= self.num_files:
image_path, label_path = self.coarse_file_list[idx -
self.num_files]
else:
image_path, label_path = self.file_list[idx]
im, label = self.transforms(im=image_path, label=label_path)
return im, label
def shuffle(self):
random.shuffle(self.coarse_file_list)
def __len__(self):
return self.total_num_files
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import paddle
import yaml
from paddleseg.cvlibs import Config
from paddleseg.utils import logger
import datasets, models
def parse_args():
parser = argparse.ArgumentParser(description='Model export.')
parser.add_argument(
"--config", help="The config file.", type=str, required=True)
parser.add_argument(
'--model_path', help='The path of model for export', type=str)
parser.add_argument(
'--save_dir',
help='The directory for saving the exported model',
type=str,
default='./output/inference_model')
parser.add_argument(
'--output_op',
choices=['argmax', 'softmax', 'none'],
default="argmax",
help="Select which op to be appended to output result, default: argmax")
parser.add_argument(
'--without_argmax',
help='Do not add the argmax operation at the end of the network. [Deprecated]',
action='store_true')
parser.add_argument(
'--with_softmax',
help='Add the softmax operation at the end of the network. [Deprecated]',
action='store_true')
parser.add_argument(
"--input_shape",
nargs='+',
help="Export the model with fixed input shape, such as 1 3 1024 1024.",
type=int,
default=None)
return parser.parse_args()
class SavedSegmentationNet(paddle.nn.Layer):
def __init__(self, net, output_op):
super().__init__()
self.net = net
self.output_op = output_op
assert output_op in ['argmax', 'softmax'], \
"output_op should in ['argmax', 'softmax']"
def forward(self, x):
outs = self.net(x)
new_outs = []
for out in outs:
if self.output_op == 'argmax':
out = paddle.argmax(out, axis=1, dtype='int32')
elif self.output_op == 'softmax':
out = paddle.nn.functional.softmax(out, axis=1)
new_outs.append(out)
return new_outs
def main(args):
os.environ['PADDLESEG_EXPORT_STAGE'] = 'True'
cfg = Config(args.config)
cfg.check_sync_info()
net = cfg.model
if args.model_path is not None:
para_state_dict = paddle.load(args.model_path)
net.set_dict(para_state_dict)
logger.info('Loaded trained params of model successfully.')
if args.input_shape is None:
shape = [None, 3, None, None]
else:
shape = args.input_shape
output_op = args.output_op
if args.without_argmax:
logger.warning(
'--without_argmax will be deprecated, please use --output_op')
output_op = 'none'
if args.with_softmax:
logger.warning(
'--with_softmax will be deprecated, please use --output_op')
output_op = 'softmax'
new_net = net if output_op == 'none' else SavedSegmentationNet(net,
output_op)
new_net.eval()
new_net = paddle.jit.to_static(
new_net,
input_spec=[paddle.static.InputSpec(
shape=shape, dtype='float32')])
save_path = os.path.join(args.save_dir, 'model')
paddle.jit.save(new_net, save_path)
yml_file = os.path.join(args.save_dir, 'deploy.yaml')
with open(yml_file, 'w') as file:
transforms = cfg.export_config.get('transforms', [{
'type': 'Normalize'
}])
output_dtype = 'int32' if output_op == 'argmax' else 'float32'
data = {
'Deploy': {
'model': 'model.pdmodel',
'params': 'model.pdiparams',
'transforms': transforms,
'input_shape': shape,
'output_op': output_op,
'output_dtype': output_dtype
}
}
yaml.dump(data, file)
logger.info(f'The inference model is saved in {args.save_dir}')
if __name__ == '__main__':
args = parse_args()
main(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .backbones.hrnet_nv import *
from .mscale_ocrnet import MscaleOCRNet
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment