Commit 0d97cc8c authored by Sugon_ldc's avatar Sugon_ldc
Browse files

add new model

parents
Pipeline #316 failed with stages
in 0 seconds
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import paddle
from urllib.parse import urlparse
from paddleseg.utils import logger, download_pretrained_model
def get_files(root_path):
res = []
for root, dirs, files in os.walk(root_path, followlinks=True):
for f in files:
if f.endswith(('.jpg', '.png', '.jpeg', 'JPG')):
res.append(os.path.join(root, f))
return res
def get_image_list(image_path):
"""Get image list"""
valid_suffix = [
'.JPEG', '.jpeg', '.JPG', '.jpg', '.BMP', '.bmp', '.PNG', '.png'
]
image_list = []
image_dir = None
if os.path.isfile(image_path):
image_dir = None
if os.path.splitext(image_path)[-1] in valid_suffix:
image_list.append(image_path)
else:
image_dir = os.path.dirname(image_path)
with open(image_path, 'r') as f:
for line in f:
line = line.strip()
if len(line.split()) > 1:
raise RuntimeError(
'There should be only one image path per line in `image_path` file. Wrong line: {}'
.format(line))
image_list.append(os.path.join(image_dir, line))
elif os.path.isdir(image_path):
image_dir = image_path
for root, dirs, files in os.walk(image_path):
for f in files:
if '.ipynb_checkpoints' in root:
continue
if os.path.splitext(f)[-1] in valid_suffix:
image_list.append(os.path.join(root, f))
image_list.sort()
else:
raise FileNotFoundError(
'`image_path` is not found. it should be an image file or a directory including images'
)
if len(image_list) == 0:
raise RuntimeError('There are not image file in `image_path`')
return image_list, image_dir
def mkdir(path):
sub_dir = os.path.dirname(path)
if not os.path.exists(sub_dir):
os.makedirs(sub_dir)
def load_pretrained_model(model, pretrained_model):
if pretrained_model is not None:
logger.info('Loading pretrained model from {}'.format(pretrained_model))
if urlparse(pretrained_model).netloc:
pretrained_model = download_pretrained_model(pretrained_model)
if os.path.exists(pretrained_model):
para_state_dict = paddle.load(pretrained_model)
model_state_dict = model.state_dict()
keys = model_state_dict.keys()
num_params_loaded = 0
for k in keys:
if k not in para_state_dict:
logger.warning("{} is not in pretrained model".format(k))
elif list(para_state_dict[k].shape) != list(model_state_dict[k]
.shape):
# When the input is more than 3 channels such as trimap-based method, padding zeros to load.
para_shape = list(para_state_dict[k].shape)
model_shape = list(model_state_dict[k].shape)
if 'weight' in k \
and len(para_shape) > 3 \
and len(para_shape) > 3 \
and para_shape[1] < model_shape[1] \
and para_shape[0] == model_shape[0] \
and para_shape[2] == model_shape[2] \
and para_shape[3] == model_shape[3]:
zeros_pad = paddle.zeros(
(para_shape[0], model_shape[1] - para_shape[1],
para_shape[2], para_shape[3]))
para_state_dict[k] = paddle.concat(
[para_state_dict[k], zeros_pad], axis=1)
model_state_dict[k] = para_state_dict[k]
num_params_loaded += 1
else:
logger.warning(
"[SKIP] Shape of pretrained params {} doesn't match.(Pretrained: {}, Actual: {})"
.format(k, para_state_dict[k].shape,
model_state_dict[k].shape))
else:
model_state_dict[k] = para_state_dict[k]
num_params_loaded += 1
model.set_dict(model_state_dict)
logger.info("There are {}/{} variables loaded into {}.".format(
num_params_loaded,
len(model_state_dict), model.__class__.__name__))
else:
raise ValueError('The pretrained model directory is not Found: {}'.
format(pretrained_model))
else:
logger.info(
'No pretrained model to load, {} will be trained from scratch.'.
format(model.__class__.__name__))
paddleseg >= 2.5
pymatting
scikit-image
numba
opencv-python
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import sys
import cv2
import numpy as np
import paddle
from paddleseg.cvlibs import manager, Config
from paddleseg.utils import get_sys_env, logger
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(LOCAL_PATH, '..'))
manager.BACKBONES._components_dict.clear()
manager.TRANSFORMS._components_dict.clear()
import ppmatting
from ppmatting.core import predict
from ppmatting.utils import get_image_list, estimate_foreground_ml
def parse_args():
parser = argparse.ArgumentParser(
description='PP-HumanSeg inference for video')
parser.add_argument(
"--config",
dest="cfg",
help="The config file.",
default=None,
type=str,
required=True)
parser.add_argument(
'--model_path',
dest='model_path',
help='The path of model for prediction',
type=str,
default=None)
parser.add_argument(
'--image_path',
dest='image_path',
help='Image including human',
type=str,
default=None)
parser.add_argument(
'--trimap_path',
dest='trimap_path',
help='The path of trimap',
type=str,
default=None)
parser.add_argument(
'--background',
dest='background',
help='Background for replacing. It is a string which specifies the background color (r,g,b,w) or a path to background image. If not specified, a green background is used.',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the inference results',
type=str,
default='./output')
parser.add_argument(
'--fg_estimate',
default=True,
type=eval,
choices=[True, False],
help='Whether to estimate foreground when predicting.')
return parser.parse_args()
def main(args):
env_info = get_sys_env()
place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
'GPUs used'] else 'cpu'
paddle.set_device(place)
if not args.cfg:
raise RuntimeError('No configuration file specified.')
cfg = Config(args.cfg)
msg = '\n---------------Config Information---------------\n'
msg += str(cfg)
msg += '------------------------------------------------'
logger.info(msg)
model = cfg.model
transforms = ppmatting.transforms.Compose(cfg.val_transforms)
alpha, fg = predict(
model,
model_path=args.model_path,
transforms=transforms,
image_list=[args.image_path],
trimap_list=[args.trimap_path],
save_dir=args.save_dir,
fg_estimate=args.fg_estimate)
img_ori = cv2.imread(args.image_path)
bg = get_bg(args.background, img_ori.shape)
alpha = alpha / 255.0
alpha = alpha[:, :, np.newaxis]
com = alpha * fg + (1 - alpha) * bg
com = com.astype('uint8')
com_save_path = os.path.join(args.save_dir,
os.path.basename(args.image_path))
cv2.imwrite(com_save_path, com)
def get_bg(background, img_shape):
bg = np.zeros(img_shape)
if background == 'r':
bg[:, :, 2] = 255
elif background is None or background == 'g':
bg[:, :, 1] = 255
elif background == 'b':
bg[:, :, 0] = 255
elif background == 'w':
bg[:, :, :] = 255
elif not os.path.exists(background):
raise Exception('The --background is not existed: {}'.format(
background))
else:
bg = cv2.imread(background)
bg = cv2.resize(bg, (img_shape[1], img_shape[0]))
return bg
if __name__ == "__main__":
args = parse_args()
main(args)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import sys
import paddle
import yaml
from paddleseg.cvlibs import Config, manager
from paddleseg.utils import logger
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(LOCAL_PATH, '..'))
manager.BACKBONES._components_dict.clear()
manager.TRANSFORMS._components_dict.clear()
import ppmatting
def parse_args():
parser = argparse.ArgumentParser(description='Model export.')
# params of training
parser.add_argument(
"--config",
dest="cfg",
help="The config file.",
default=None,
type=str,
required=True)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the exported model',
type=str,
default='./output')
parser.add_argument(
'--model_path',
dest='model_path',
help='The path of model for export',
type=str,
default=None)
parser.add_argument(
'--trimap',
dest='trimap',
help='Whether to input trimap',
action='store_true')
parser.add_argument(
"--input_shape",
nargs='+',
help="Export the model with fixed input shape, such as 1 3 1024 1024.",
type=int,
default=None)
return parser.parse_args()
def main(args):
os.environ['PADDLESEG_EXPORT_STAGE'] = 'True'
cfg = Config(args.cfg)
net = cfg.model
net.eval()
if args.model_path:
para_state_dict = paddle.load(args.model_path)
net.set_dict(para_state_dict)
logger.info('Loaded trained params of model successfully.')
if args.input_shape is None:
shape = [None, 3, None, None]
else:
shape = args.input_shape
input_spec = [{"img": paddle.static.InputSpec(shape=shape, name='img')}]
if args.trimap:
shape[1] = 1
input_spec[0]['trimap'] = paddle.static.InputSpec(
shape=shape, name='trimap')
net = paddle.jit.to_static(net, input_spec=input_spec)
save_path = os.path.join(args.save_dir, 'model')
paddle.jit.save(net, save_path)
yml_file = os.path.join(args.save_dir, 'deploy.yaml')
with open(yml_file, 'w') as file:
transforms = cfg.val_dataset_config.get('transforms', [{
'type': 'Normalize'
}])
data = {
'Deploy': {
'transforms': transforms,
'model': 'model.pdmodel',
'params': 'model.pdiparams',
'input_shape': shape
}
}
yaml.dump(data, file)
logger.info(f'Model is saved in {args.save_dir}.')
if __name__ == '__main__':
args = parse_args()
main(args)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import sys
import paddle
from paddleseg.cvlibs import manager, Config
from paddleseg.utils import get_sys_env, logger
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(LOCAL_PATH, '..'))
manager.BACKBONES._components_dict.clear()
manager.TRANSFORMS._components_dict.clear()
import ppmatting
from ppmatting.core import predict
from ppmatting.utils import get_image_list
def parse_args():
parser = argparse.ArgumentParser(description='Model training')
parser.add_argument(
"--config", dest="cfg", help="The config file.", default=None, type=str)
parser.add_argument(
'--model_path',
dest='model_path',
help='The path of model for prediction',
type=str,
default=None)
parser.add_argument(
'--image_path',
dest='image_path',
help='The path of image, it can be a file or a directory including images',
type=str,
default=None)
parser.add_argument(
'--trimap_path',
dest='trimap_path',
help='The path of trimap, it can be a file or a directory including images. '
'The image should be the same as image when it is a directory.',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the model snapshot',
type=str,
default='./output/results')
parser.add_argument(
'--fg_estimate',
default=True,
type=eval,
choices=[True, False],
help='Whether to estimate foreground when predicting.')
return parser.parse_args()
def main(args):
env_info = get_sys_env()
place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
'GPUs used'] else 'cpu'
paddle.set_device(place)
if not args.cfg:
raise RuntimeError('No configuration file specified.')
cfg = Config(args.cfg)
msg = '\n---------------Config Information---------------\n'
msg += str(cfg)
msg += '------------------------------------------------'
logger.info(msg)
model = cfg.model
transforms = ppmatting.transforms.Compose(cfg.val_transforms)
image_list, image_dir = get_image_list(args.image_path)
if args.trimap_path is None:
trimap_list = None
else:
trimap_list, _ = get_image_list(args.trimap_path)
logger.info('Number of predict images = {}'.format(len(image_list)))
predict(
model,
model_path=args.model_path,
transforms=transforms,
image_list=image_list,
image_dir=image_dir,
trimap_list=trimap_list,
save_dir=args.save_dir,
fg_estimate=args.fg_estimate)
if __name__ == '__main__':
args = parse_args()
main(args)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import sys
from collections import defaultdict
import random
import numpy as np
import paddle
import paddle.nn as nn
from paddleseg.cvlibs import manager, Config
from paddleseg.utils import get_sys_env, logger
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(LOCAL_PATH, '..'))
manager.BACKBONES._components_dict.clear()
manager.TRANSFORMS._components_dict.clear()
import ppmatting
from ppmatting.core import train
def parse_args():
parser = argparse.ArgumentParser(description='Model training')
parser.add_argument(
"--config", dest="cfg", help="The config file.", default=None, type=str)
parser.add_argument(
'--iters',
dest='iters',
help='iters for training',
type=int,
default=None)
parser.add_argument(
'--batch_size',
dest='batch_size',
help='Mini batch size of one gpu or cpu',
type=int,
default=None)
parser.add_argument(
'--learning_rate',
dest='learning_rate',
help='Learning rate',
type=float,
default=None)
parser.add_argument(
'--save_interval',
dest='save_interval',
help='How many iters to save a model snapshot once during training.',
type=int,
default=1000)
parser.add_argument(
'--resume_model',
dest='resume_model',
help='The path of resume model',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the model snapshot',
type=str,
default='./output')
parser.add_argument(
'--keep_checkpoint_max',
dest='keep_checkpoint_max',
help='Maximum number of checkpoints to save',
type=int,
default=5)
parser.add_argument(
'--num_workers',
dest='num_workers',
help='Num workers for data loader',
type=int,
default=0)
parser.add_argument(
'--do_eval',
dest='do_eval',
help='Eval while training',
action='store_true')
parser.add_argument(
'--metrics',
dest='metrics',
nargs='+',
help='The metrics to evaluate, it may be the combination of ("sad", "mse", "grad", "conn")',
type=str,
default='sad')
parser.add_argument(
'--log_iters',
dest='log_iters',
help='Display logging information at every log_iters',
default=10,
type=int)
parser.add_argument(
'--use_vdl',
dest='use_vdl',
help='Whether to record the data to VisualDL during training',
action='store_true')
parser.add_argument(
'--eval_begin_iters',
dest='eval_begin_iters',
help='The iters begin evaluation.',
default=0,
type=int)
parser.add_argument(
'--seed',
dest='seed',
help='Set the random seed during training.',
default=None,
type=int)
parser.add_argument(
"--precision",
default="fp32",
type=str,
choices=["fp32", "fp16"],
help="Use AMP (Auto mixed precision) if precision='fp16'. If precision='fp32', the training is normal."
)
parser.add_argument(
"--amp_level",
default="O1",
type=str,
choices=["O1", "O2"],
help="Auto mixed precision level. Accepted values are “O1” and “O2”: O1 represent mixed precision, the input \
data type of each operator will be casted by white_list and black_list; O2 represent Pure fp16, all operators \
parameters and input data will be casted to fp16, except operators in black_list, don’t support fp16 kernel \
and batchnorm. Default is O1(amp)")
parser.add_argument(
'--profiler_options',
type=str,
default=None,
help='The option of train profiler. If profiler_options is not None, the train ' \
'profiler is enabled. Refer to the paddleseg/utils/train_profiler.py for details.'
)
parser.add_argument(
'--repeats',
type=int,
default=1,
help="Repeat the samples in the dataset for `repeats` times in each epoch."
)
parser.add_argument(
'--device',
dest='device',
help='Set the device type, which may be GPU, CPU or XPU.',
default='gpu',
type=str)
return parser.parse_args()
def main(args):
if args.seed is not None:
paddle.seed(args.seed)
np.random.seed(args.seed)
random.seed(args.seed)
env_info = get_sys_env()
info = ['{}: {}'.format(k, v) for k, v in env_info.items()]
info = '\n'.join(['', format('Environment Information', '-^48s')] + info +
['-' * 48])
logger.info(info)
place = args.device
if place == 'gpu' and env_info['Paddle compiled with cuda'] and env_info[
'GPUs used']:
paddle.set_device('gpu')
elif place == 'xpu' and paddle.is_compiled_with_xpu():
paddle.set_device('xpu')
else:
paddle.set_device('cpu')
if not args.cfg:
raise RuntimeError('No configuration file specified.')
cfg = Config(
args.cfg,
learning_rate=args.learning_rate,
iters=args.iters,
batch_size=args.batch_size)
train_dataset = cfg.train_dataset
if train_dataset is None:
raise RuntimeError(
'The training dataset is not specified in the configuration file.')
elif len(train_dataset) == 0:
raise ValueError(
'The length of train_dataset is 0. Please check if your dataset is valid'
)
if args.repeats > 1:
train_dataset.fg_bg_list *= args.repeats
val_dataset = cfg.val_dataset if args.do_eval else None
msg = '\n---------------Config Information---------------\n'
msg += str(cfg)
msg += '------------------------------------------------'
logger.info(msg)
model = cfg.model
if place == 'gpu' and paddle.distributed.ParallelEnv().nranks > 1:
# convert bn to sync_bn
model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
train(
model,
train_dataset=train_dataset,
val_dataset=val_dataset,
optimizer=cfg.optimizer,
iters=cfg.iters,
batch_size=cfg.batch_size,
num_workers=args.num_workers,
use_vdl=args.use_vdl,
save_interval=args.save_interval,
log_iters=args.log_iters,
resume_model=args.resume_model,
save_dir=args.save_dir,
eval_begin_iters=args.eval_begin_iters,
metrics=args.metrics,
precision=args.precision,
amp_level=args.amp_level,
profiler_options=args.profiler_options)
if __name__ == '__main__':
args = parse_args()
main(args)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
def update_vgg16_params(model_path):
param_state_dict = paddle.load(model_path)
# first conv weight name _conv_block_1._conv_1.weight, shape is [64, 3, ,3, 3]
# first fc weight name: _fc1.weight, shape is [25088, 4096]
for k, v in param_state_dict.items():
print(k, v.shape)
# # first weight
weight = param_state_dict['_conv_block_1._conv_1.weight'] # [64, 3,3,3]
print('ori shape: ', weight.shape)
zeros_pad = paddle.zeros((64, 1, 3, 3))
param_state_dict['_conv_block_1._conv_1.weight'] = paddle.concat(
[weight, zeros_pad], axis=1)
print('shape after padding',
param_state_dict['_conv_block_1._conv_1.weight'].shape)
# fc1
weight = param_state_dict['_fc1.weight']
weight = paddle.transpose(weight, [1, 0])
print('after transpose: ', weight.shape)
weight = paddle.reshape(weight, (4096, 512, 7, 7))
print('after reshape: ', weight.shape)
weight = weight[0:512, :, 2:5, 2:5]
print('after crop: ', weight.shape)
param_state_dict['_conv_6.weight'] = weight
del param_state_dict['_fc1.weight']
del param_state_dict['_fc1.bias']
del param_state_dict['_fc2.weight']
del param_state_dict['_fc2.bias']
del param_state_dict['_out.weight']
del param_state_dict['_out.bias']
paddle.save(param_state_dict, 'VGG16_pretrained.pdparams')
if __name__ == "__main__":
paddle.set_device('cpu')
model_path = '~/.paddleseg/pretrained_model/dygraph/VGG16_pretrained.pdparams'
update_vgg16_params(model_path)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import sys
LOCAL_PATH = os.path.dirname(os.path.abspath(__file__))
sys.path.append(os.path.join(LOCAL_PATH, '..'))
import paddle
import paddleseg
from paddleseg.cvlibs import manager, Config
from paddleseg.utils import get_sys_env, logger, utils
manager.BACKBONES._components_dict.clear()
manager.TRANSFORMS._components_dict.clear()
import ppmatting
from ppmatting.core import evaluate, evaluate_ml
def parse_args():
parser = argparse.ArgumentParser(description='Model training')
parser.add_argument(
"--config", dest="cfg", help="The config file.", default=None, type=str)
parser.add_argument(
'--model_path',
dest='model_path',
help='The path of model for evaluation',
type=str,
default=None)
parser.add_argument(
'--save_dir',
dest='save_dir',
help='The directory for saving the model snapshot',
type=str,
default='./output/results')
parser.add_argument(
'--num_workers',
dest='num_workers',
help='Num workers for data loader',
type=int,
default=0)
parser.add_argument(
'--save_results',
dest='save_results',
help='save prediction alpha while evaluating',
action='store_true')
parser.add_argument(
'--metrics',
dest='metrics',
nargs='+',
help='The metrics to evaluate, it may be the combination of ("sad", "mse", "grad", "conn")',
type=str,
default='sad')
return parser.parse_args()
def main(args):
env_info = get_sys_env()
place = 'gpu' if env_info['Paddle compiled with cuda'] and env_info[
'GPUs used'] else 'cpu'
paddle.set_device(place)
if not args.cfg:
raise RuntimeError('No configuration file specified.')
cfg = Config(args.cfg)
val_dataset = cfg.val_dataset
if val_dataset is None:
raise RuntimeError(
'The verification dataset is not specified in the configuration file.'
)
elif len(val_dataset) == 0:
raise ValueError(
'The length of val_dataset is 0. Please check if your dataset is valid'
)
msg = '\n---------------Config Information---------------\n'
msg += str(cfg)
msg += '------------------------------------------------'
logger.info(msg)
model = cfg.model
if isinstance(model, paddle.nn.Layer):
if args.model_path:
utils.load_entire_model(model, args.model_path)
logger.info('Loaded trained params of model successfully')
evaluate(
model,
val_dataset,
num_workers=args.num_workers,
save_dir=args.save_dir,
save_results=args.save_results,
metrics=args.metrics)
else:
evaluate_ml(
model,
val_dataset,
save_dir=args.save_dir,
save_results=args.save_results)
if __name__ == '__main__':
args = parse_args()
main(args)
# 训练方法
运行train.sh脚本进行训练
run_pretraining.sh脚本为FlagPerf使用
# 原README.md
简体中文 | [English](README_EN.md)
<div align="center">
<p align="center">
<img src="./docs/images/paddleseg_logo.png" align="middle" width = "500" />
</p>
**飞桨高性能图像分割开发套件,端到端完成从训练到部署的全流程图像分割应用。**
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
[![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleSeg.svg)](https://github.com/PaddlePaddle/PaddleSeg/releases)
![python version](https://img.shields.io/badge/python-3.6+-orange.svg)
![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg)
![stars](https://img.shields.io/github/stars/PaddlePaddle/PaddleSeg?color=ccf)
</div>
<div align="center">
<img src="https://github.com/shiyutang/files/blob/9590ea6bfc36139982ce75b00d3b9f26713934dd/teasor.gif" width = "800" />
</div>
## <img src="./docs/images/seg_news_icon.png" width="20"/> 最新动态
* [2022-11-30] :fire: PaddleSeg 2.7版本发布!详细发版信息请参考[Release Note](https://github.com/PaddlePaddle/PaddleSeg/releases)
* 发布实时人像抠图模型[PP-MattingV2](./Matting/):推理速度提升44.6%,平均误差减小17.91%,完美超越此前SOTA模型,支持零成本开箱即用。
* 发布3D医疗影像分割方案[MedicalSegV2](./contrib/MedicalSeg/):涵盖3D医疗影像交互式标注工具EISeg-Med3D、3个高精分割模型,集成并优化前沿分割方案nnUNet-D。
* 官方发布轻量级语义分割模型[RTFormer](./configs/rtformer/):由百度提出并发表于NeurIPS 2022,在公开数据集上实现SOTA性能。
* [2022-07-20] PaddleSeg 2.6版本发布实时人像分割SOTA方案[PP-HumanSegV2](./contrib/PP-HumanSeg)、高性能智能标注工具[EISeg v1.0](./EISeg)正式版、ImageNet分割伪标签数据预训练方法PSSL,开源PP-MattingV1代码和预训练模型。
* [2022-04-20] PaddleSeg 2.5版本发布超轻量级语义分割模型[PP-LiteSeg](./configs/pp_liteseg),高精度抠图模型PP-MattingV1,3D医疗影像开发套件MedicalSegV1,交互式分割工具EISeg v0.5。
* [2022-01-20] PaddleSeg 2.4版本发布交互式分割工具EISeg v0.4,超轻量级人像分割方案PP-HumanSegV1,以及大规模视频会议数据集[PP-HumanSeg14K](./contrib/PP-HumanSeg/paper.md#pp-humanseg14k-a-large-scale-teleconferencing-video-dataset)
## <img src="https://user-images.githubusercontent.com/48054808/157795569-9fc77c85-732f-4870-9be0-99a7fe2cff27.png" width="20"/> 简介
**PaddleSeg**是基于飞桨PaddlePaddle的端到端图像分割套件,内置**45+模型算法****140+预训练模型**,支持**配置化驱动****API调用**开发方式,打通数据标注、模型开发、训练、压缩、部署的**全流程**,提供**语义分割、交互式分割、Matting、全景分割**四大分割能力,助力算法在医疗、工业、遥感、娱乐等场景落地应用。
<div align="center">
<img src="https://github.com/shiyutang/files/raw/main/teasor_new.gif" width = "800" />
</div>
## <img src="./docs/images/feature.png" width="20"/> 特性
* **高精度**:跟踪学术界的前沿分割技术,结合高精度训练的骨干网络,提供40+主流分割网络、140+的高质量预训练模型,效果优于其他开源实现。
* **高性能**:使用多进程异步I/O、多卡并行训练、评估等加速策略,结合飞桨核心框架的显存优化功能,大幅度减少分割模型的训练开销,让开发者更低成本、更高效地完成图像分割训练。
* **模块化**:源于模块化设计思想,解耦数据准备、分割模型、骨干网络、损失函数等不同组件,开发者可以基于实际应用场景出发,组装多样化的配置,满足不同性能和精度的要求。
* **全流程**:打通数据标注、模型开发、模型训练、模型压缩、模型部署全流程,经过业务落地的验证,让开发者完成一站式开发工作。
<div align="center">
<img src="https://user-images.githubusercontent.com/14087480/176379006-7f330e00-b6b0-480e-9df8-8fd1090da4cf.png" width = "800" />
</div>
## <img src="./docs/images/chat.png" width="20"/> 技术交流
* 如果大家有PaddleSeg的使用问题和功能建议, 可以通过[GitHub Issues](https://github.com/PaddlePaddle/PaddleSeg/issues)提issue。
* **欢迎加入PaddleSeg的微信用户群👫**(扫码填写简单问卷即可入群),大家可以**领取30G重磅学习大礼包🎁**,也可以和值班同学、各界大佬直接进行交流。
* 🔥 获取深度学习视频教程、图像分割论文合集
* 🔥 获取PaddleSeg的历次直播视频,最新发版信息和直播动态
* 🔥 获取PaddleSeg自建的人像分割数据集,整理的开源数据集
* 🔥 获取PaddleSeg在垂类场景的预训练模型和应用合集,涵盖人像分割、交互式分割等等
* 🔥 获取PaddleSeg的全流程产业实操范例,包括质检缺陷分割、抠图Matting、道路分割等等
<div align="center">
<img src="https://user-images.githubusercontent.com/48433081/174770518-e6b5319b-336f-45d9-9817-da12b1961fb1.jpg" width = "200" />
</div>
## <img src="./docs/images/model.png" width="20"/> 产品矩阵
<table align="center">
<tbody>
<tr align="center" valign="bottom">
<td>
<b>模型</b>
</td>
<td colspan="2">
<b>组件</b>
</td>
<td>
<b>特色案例</b>
</td>
</tr>
<tr valign="top">
<td>
<ul>
<details><summary><b>语义分割模型</b></summary>
<ul>
<li><a href="./configs/pp_liteseg">PP-LiteSeg</a> </li>
<li><a href="./configs/deeplabv3p">DeepLabV3P</a> </li>
<li><a href="./configs/ocrnet">OCRNet</a> </li>
<li><a href="./configs/mobileseg">MobileSeg</a> </li>
<li><a href="./configs/ann">ANN</a></li>
<li><a href="./configs/attention_unet">Att U-Net</a></li>
<li><a href="./configs/bisenetv1">BiSeNetV1</a></li>
<li><a href="./configs/bisenet">BiSeNetV2</a></li>
<li><a href="./configs/ccnet">CCNet</a></li>
<li><a href="./configs/danet">DANet</a></li>
<li><a href="./configs/ddrnet">DDRNet</a></li>
<li><a href="./configs/decoupled_segnet">DecoupledSeg</a></li>
<li><a href="./configs/deeplabv3">DeepLabV3</a></li>
<li><a href="./configs/dmnet">DMNet</a></li>
<li><a href="./configs/dnlnet">DNLNet</a></li>
<li><a href="./configs/emanet">EMANet</a></li>
<li><a href="./configs/encnet">ENCNet</a></li>
<li><a href="./configs/enet">ENet</a></li>
<li><a href="./configs/espnetv1">ESPNetV1</a></li>
<li><a href="./configs/espnet">ESPNetV2</a></li>
<li><a href="./configs/fastfcn">FastFCN</a></li>
<li><a href="./configs/fastscnn">Fast-SCNN</a></li>
<li><a href="./configs/gcnet">GCNet</a></li>
<li><a href="./configs/ginet">GINet</a></li>
<li><a href="./configs/glore">GloRe</a></li>
<li><a href="./configs/gscnn">GSCNN</a></li>
<li><a href="./configs/hardnet">HarDNet</a></li>
<li><a href="./configs/fcn">HRNet-FCN</a></li>
<li><a href="./configs/hrnet_w48_contrast">HRNet-Contrast</a></li>
<li><a href="./configs/isanet">ISANet</a></li>
<li><a href="./configs/pfpn">PFPNNet</a></li>
<li><a href="./configs/pointrend">PointRend</a></li>
<li><a href="./configs/portraitnet">PotraitNet</a></li>
<li><a href="./configs/pp_humanseg_lite">PP-HumanSeg-Lite</a></li>
<li><a href="./configs/pspnet">PSPNet</a></li>
<li><a href="./configs/pssl">PSSL</a></li>
<li><a href="./configs/segformer">SegFormer</a></li>
<li><a href="./configs/segmenter">SegMenter</a></li>
<li><a href="./configs/segmne">SegNet</a></li>
<li><a href="./configs/setr">SETR</a></li>
<li><a href="./configs/sfnet">SFNet</a></li>
<li><a href="./configs/stdcseg">STDCSeg</a></li>
<li><a href="./configs/u2net">U<sup>2</sup>Net</a></li>
<li><a href="./configs/unet">UNet</a></li>
<li><a href="./configs/unet_plusplus">UNet++</a></li>
<li><a href="./configs/unet_3plus">UNet3+</a></li>
<li><a href="./configs/upernet">UperNet</a></li>
<li><a href="./configs/rtformer">RTFormer</a></li>
<li><a href="./configs/uhrnet">UHRNet</a></li>
<li><a href="./configs/topformer">TopFormer</a></li>
<li><a href="./configs/mscale_ocrnet">MscaleOCRNet-PSA</a></li>
</ul>
</details>
<details><summary><b>交互式分割模型</b></summary>
<ul>
<li><a href="./EISeg">EISeg</a></li>
<li>RITM</li>
<li>EdgeFlow</li>
</ul>
</details>
<details><summary><b>图像抠图模型</b></summary>
<ul>
<li><a href="./Matting/configs/ppmattingv2">PP-MattingV2</a></li>
<li><a href="./Matting/configs/ppmatting">PP-MattingV1</a></li>
<li><a href="./Matting/configs/dim/dim-vgg16.yml">DIM</a></li>
<li><a href="./Matting/configs/modnet/modnet-hrnet_w18.yml">MODNet</a></li>
<li><a href="./Matting/configs/human_matting/human_matting-resnet34_vd.yml">PP-HumanMatting</a></li>
</ul>
</details>
<details><summary><b>全景分割</b></summary>
<ul>
<li><a href="./contrib/PanopticDeepLab/README_CN.md">Panoptic-DeepLab</a></li>
</ul>
</details>
</td>
<td>
<details><summary><b>骨干网络</b></summary>
<ul>
<li><a href="./paddleseg/models/backbones/hrnet.py">HRNet</a></li>
<li><a href="./paddleseg/models/backbones/resnet_cd.py">ResNet</a></li>
<li><a href="./paddleseg/models/backbones/stdcnet.py">STDCNet</a></li>
<li><a href="./paddleseg/models/backbones/mobilenetv2.py">MobileNetV2</a></li>
<li><a href="./paddleseg/models/backbones/mobilenetv3.py">MobileNetV3</a></li>
<li><a href="./paddleseg/models/backbones/shufflenetv2.py">ShuffleNetV2</a></li>
<li><a href="./paddleseg/models/backbones/ghostnet.py">GhostNet</a></li>
<li><a href="./paddleseg/models/backbones/lite_hrnet.py">LiteHRNet</a></li>
<li><a href="./paddleseg/models/backbones/xception_deeplab.py">XCeption</a></li>
<li><a href="./paddleseg/models/backbones/vision_transformer.py">VIT</a></li>
<li><a href="./paddleseg/models/backbones/mix_transformer.py">MixVIT</a></li>
<li><a href="./paddleseg/models/backbones/swin_transformer.py">Swin Transformer</a></li>
</ul>
</details>
<details><summary><b>损失函数</b></summary>
<ul>
<li><a href="./paddleseg/models/losses/binary_cross_entropy_loss.py">Binary CE Loss</a></li>
<li><a href="./paddleseg/models/losses/bootstrapped_cross_entropy_loss.py">Bootstrapped CE Loss</a></li>
<li><a href="./paddleseg/models/losses/cross_entropy_loss.py">Cross Entropy Loss</a></li>
<li><a href="./paddleseg/models/losses/decoupledsegnet_relax_boundary_loss.py">Relax Boundary Loss</a></li>
<li><a href="./paddleseg/models/losses/detail_aggregate_loss.py">Detail Aggregate Loss</a></li>
<li><a href="./paddleseg/models/losses/dice_loss.py">Dice Loss</a></li>
<li><a href="./paddleseg/models/losses/edge_attention_loss.py">Edge Attention Loss</a></li>
<li><a href="./paddleseg/models/losses/focal_loss.py">Focal Loss</a></li>
<li><a href="./paddleseg/models/losses/binary_cross_entropy_loss.py">MultiClassFocal Loss</a></li>
<li><a href="./paddleseg/models/losses/gscnn_dual_task_loss.py">GSCNN Dual Task Loss</a></li>
<li><a href="./paddleseg/models/losses/kl_loss.py">KL Loss</a></li>
<li><a href="./paddleseg/models/losses/l1_loss.py">L1 Loss</a></li>
<li><a href="./paddleseg/models/losses/lovasz_loss.py">Lovasz Loss</a></li>
<li><a href="./paddleseg/models/losses/mean_square_error_loss.py">MSE Loss</a></li>
<li><a href="./paddleseg/models/losses/ohem_cross_entropy_loss.py">OHEM CE Loss</a></li>
<li><a href="./paddleseg/models/losses/pixel_contrast_cross_entropy_loss.py">Pixel Contrast CE Loss</a></li>
<li><a href="./paddleseg/models/losses/point_cross_entropy_loss.py">Point CE Loss</a></li>
<li><a href="./paddleseg/models/losses/rmi_loss.py">RMI Loss</a></li>
<li><a href="./paddleseg/models/losses/semantic_connectivity_loss.py">Connectivity Loss</a></li>
</ul>
</details>
<details><summary><b>评估指标</b></summary>
<ul>
<li>mIoU</li>
<li>Accuracy</li>
<li>Kappa</li>
<li>Dice</li>
<li>AUC_ROC</li>
</ul>
</details>
</td>
<td>
<details><summary><b>支持数据集</b></summary>
<ul>
<li><a href="./paddleseg/datasets/ade.py">ADE20K</a></li>
<li><a href="./paddleseg/datasets/cityscapes.py">Cityscapes</a></li>
<li><a href="./paddleseg/datasets/cocostuff.py">COCO Stuff</a></li>
<li><a href="./paddleseg/datasets/voc.py">Pascal VOC</a></li>
<li><a href="./paddleseg/datasets/eg1800.py">EG1800</a></li>
<li><a href="./paddleseg/datasets/pascal_context.py">Pascal Context</a></li>
<li><a href="./paddleseg/datasets/supervisely.py">SUPERVISELY</a></li>
<li><a href="./paddleseg/datasets/optic_disc_seg.py">OPTIC DISC SEG</a></li>
<li><a href="./paddleseg/datasets/chase_db1.py">CHASE_DB1</a></li>
<li><a href="./paddleseg/datasets/hrf.py">HRF</a></li>
<li><a href="./paddleseg/datasets/drive.py">DRIVE</a></li>
<li><a href="./paddleseg/datasets/stare.py">STARE</a></li>
<li><a href="./paddleseg/datasets/pp_humanseg14k.py">PP-HumanSeg14K</a></li>
<li><a href="./paddleseg/datasets/pssl.py">PSSL</a></li>
</ul>
</details>
<details><summary><b>数据增强</b></summary>
<ul>
<li>Flipping</li>
<li>Resize</li>
<li>ResizeByLong</li>
<li>ResizeByShort</li>
<li>LimitLong</li>
<li>ResizeRangeScaling</li>
<li>ResizeStepScaling</li>
<li>Normalize</li>
<li>Padding</li>
<li>PaddingByAspectRatio</li>
<li>RandomPaddingCrop</li>
<li>RandomCenterCrop</li>
<li>ScalePadding</li>
<li>RandomNoise</li>
<li>RandomBlur</li>
<li>RandomRotation</li>
<li>RandomScaleAspect</li>
<li>RandomDistort</li>
<li>RandomAffine</li>
</ul>
</details>
</td>
<td>
<details><summary><b>模型选型工具</b></summary>
<ul>
<li><a href="./configs/smrt">PaddleSMRT</a></li>
</ul>
</details>
<details><summary><b>人像分割模型</b></summary>
<ul>
<li><a href="./contrib/PP-HumanSeg/README_cn.md">PP-HumanSegV1</a></li>
<li><a href="./contrib/PP-HumanSeg/README_cn.md">PP-HumanSegV2</a></li>
</ul>
</details>
<details><summary><b>3D医疗分割模型</b></summary>
<ul>
<li><a href="./contrib/MedicalSeg/configs/lung_coronavirus">VNet</a></li>
<li><a href="./contrib/MedicalSeg/configs/msd_brain_seg">UNETR</a></li>
<li><a href="./contrib/MedicalSeg/configs/acdc">nnFormer</a></li>
<li><a href="./contrib/MedicalSeg/configs/nnunet/msd_lung">nnUNet-D</a></li>
<li><a href="./contrib/MedicalSeg/configs/synapse">TransUNet</a></li>
<li><a href="./contrib/MedicalSeg/configs/synapse">SwinUNet</a></li>
</ul>
</details>
<details><summary><b>Cityscapes打榜模型</b></summary>
<ul>
<li><a href="./contrib/CityscapesSOTA">HMSA</a></li>
</ul>
</details>
<details><summary><b>CVPR冠军模型</b></summary>
<ul>
<li><a href="./contrib/AutoNUE">MLA Transformer</a></li>
</ul>
</details>
<details><summary><b>领域自适应</b></summary>
<ul>
<li><a href="./contrib/DomainAdaptation">PixMatch</a></li>
</ul>
</details>
</td>
</tr>
</td>
</tr>
</tbody>
</table>
## <img src="https://user-images.githubusercontent.com/48054808/157801371-9a9a8c65-1690-4123-985a-e0559a7f9494.png" width="20"/> 产业级分割模型库
<details>
<summary><b>高精度语义分割模型</b></summary>
#### 高精度模型,分割mIoU高、推理算量大,适合部署在服务器端GPU和Jetson等设备。
| 模型名称 | 骨干网络 | Cityscapes精度mIoU(%) | V100 TRT推理速度(FPS) | 配置文件 |
|:-------- |:--------:|:---------------------:|:---------------------:|:--------:|
| FCN | HRNet_W18 | 78.97 | 24.43 | [yml](./configs/fcn/) |
| FCN | HRNet_W48 | 80.70 | 10.16 | [yml](./configs/fcn/) |
| DeepLabV3 | ResNet50_OS8 | 79.90 | 4.56 | [yml](./configs/deeplabv3/) |
| DeepLabV3 | ResNet101_OS8 | 80.85 | 3.2 | [yml](./configs/deeplabv3/) |
| DeepLabV3 | ResNet50_OS8 | 80.36 | 6.58 | [yml](./configs/deeplabv3p/) |
| DeepLabV3 | ResNet101_OS8 | 81.10 | *3.94* | [yml](./configs/deeplabv3p/) |
| OCRNet :star2: | HRNet_w18 | 80.67 | 13.26 | [yml](./configs/ocrnet/) |
| OCRNet | HRNet_w48 | 82.15 | 6.17 | [yml](./configs/ocrnet/) |
| CCNet | ResNet101_OS8 | 80.95 | 3.24 | [yml](./configs/ccnet/) |
测试条件:
* V100上测速条件:针对Nvidia GPU V100,使用PaddleInference预测库的Python API,开启TensorRT加速,数据类型是FP32,输入图像维度是1x3x1024x2048。
</details>
<details>
<summary><b>轻量级语义分割模型</b></summary>
#### 轻量级模型,分割mIoU中等、推理算量中等,可以部署在服务器端GPU、服务器端X86 CPU和移动端ARM CPU。
| 模型名称 | 骨干网络 | Cityscapes精度mIoU(%) | V100 TRT推理速度(FPS) | 骁龙855推理速度(FPS) | 配置文件 |
|:-------- |:--------:|:---------------------:|:---------------------:|:-----------------:|:--------:|
| PP-LiteSeg :star2: | STDC1 | 77.04 | 69.82 | 17.22 | [yml](./configs/pp_liteseg/) |
| PP-LiteSeg :star2: | STDC2 | 79.04 | 54.53 | 11.75 | [yml](./configs/pp_liteseg/) |
| BiSeNetV1 | - | 75.19 | 14.67 | 1.53 |[yml](./configs/bisenetv1/) |
| BiSeNetV2 | - | 73.19 | 61.83 | 13.67 |[yml](./configs/bisenet/) |
| STDCSeg | STDC1 | 74.74 | 62.24 | 14.51 |[yml](./configs/stdcseg/) |
| STDCSeg | STDC2 | 77.60 | 51.15 | 10.95 |[yml](./configs/stdcseg/) |
| DDRNet_23 | - | 79.85 | 42.64 | 7.68 |[yml](./configs/ddrnet/) |
| HarDNet | - | 79.03 | 30.3 | 5.44 |[yml](./configs/hardnet/) |
| SFNet | ResNet18_OS8 | 78.72 | *10.72* | - | [yml](./configs/sfnet/) |
测试条件:
* V100上测速条件:针对Nvidia GPU V100,使用PaddleInference预测库的Python API,开启TensorRT加速,数据类型是FP32,输入图像维度是1x3x1024x2048。
* 骁龙855上测速条件:针对小米9手机,使用PaddleLite预测库的CPP API,ARMV8编译,单线程,输入图像维度是1x3x256x256。
</details>
<details>
<summary><b>超轻量级语义分割模型</b></summary>
#### 超轻量级模型,分割mIoU一般、推理算量低,适合部署在服务器端X86 CPU和移动端ARM CPU。
| 模型名称 | 骨干网络 | Cityscapes精度mIoU(%) | V100 TRT推理速度(FPS) | 骁龙855推理速度(FPS)| 配置文件 |
|:-------- |:--------:|:---------------------:|:---------------------:|:-----------------:|:--------:|
| MobileSeg | MobileNetV2 | 73.94 | 67.57 | 27.01 | [yml](./configs/mobileseg/) |
| MobileSeg :star2: | MobileNetV3 | 73.47 | 67.39 | 32.90 | [yml](./configs/mobileseg/) |
| MobileSeg | Lite_HRNet_18 | 70.75 | *10.5* | 13.05 | [yml](./configs/mobileseg/) |
| MobileSeg | ShuffleNetV2_x1_0 | 69.46 | *37.09* | 39.61 | [yml](./configs/mobileseg/) |
| MobileSeg | GhostNet_x1_0 | 71.88 | *35.58* | 38.74 | [yml](./configs/mobileseg/) |
测试条件:
* V100上测速条件:针对Nvidia GPU V100,使用PaddleInference预测库的Python API,开启TensorRT加速,数据类型是FP32,输入图像维度是1x3x1024x2048。
* 骁龙855上测速条件:针对小米9手机,使用PaddleLite预测库的CPP API,ARMV8编译,单线程,输入图像维度是1x3x256x256。
</details>
## <img src="./docs/images/teach.png" width="20"/> 使用教程
**入门教程**
* [安装说明](./docs/install_cn.md)
* [快速体验](./docs/quick_start_cn.md)
* [20分钟快速上手PaddleSeg](./docs/whole_process_cn.md)
* [模型库](./docs/model_zoo_overview_cn.md)
**基础教程**
* 准备数据集
* [准备公开数据集](./docs/data/pre_data_cn.md)
* [准备自定义数据集](./docs/data/marker/marker_cn.md)
* [EISeg 数据标注](./EISeg)
* [准备配置文件](./docs/config/pre_config_cn.md)
* [模型训练](./docs/train/train_cn.md)
* [模型评估](./docs/evaluation/evaluate_cn.md)
* [模型预测](./docs/predict/predict_cn.md)
* 模型导出
* [导出预测模型](./docs/model_export_cn.md)
* [导出ONNX模型](./docs/model_export_onnx_cn.md)
* 模型部署
* [Paddle Inference部署(Python)](./docs/deployment/inference/python_inference_cn.md)
* [Paddle Inference部署(C++)](./docs/deployment/inference/cpp_inference_cn.md)
* [Paddle Lite部署](./docs/deployment/lite/lite_cn.md)
* [Paddle Serving部署](./docs/deployment/serving/serving_cn.md)
* [Paddle JS部署](./docs/deployment/web/web_cn.md)
* [推理Benchmark](./docs/deployment/inference/infer_benchmark_cn.md)
**进阶教程**
* [训练技巧](./docs/train/train_tricks_cn.md)
* 模型压缩
* [量化](./docs/deployment/slim/quant/quant_cn.md)
* [蒸馏](./docs/deployment/slim/distill/distill_cn.md)
* [裁剪](./docs/deployment/slim/prune/prune_cn.md)
* [常见问题汇总](./docs/faq/faq/faq_cn.md)
**欢迎贡献**
* [API文档](./docs/apis/README_CN.md)
* 二次开发教程
* [配置文件详解](./docs/design/use/use_cn.md)
* [如何创造自己的模型](./docs/design/create/add_new_model_cn.md)
* 模型贡献
* [提交PR说明](./docs/pr/pr/pr_cn.md)
* [模型PR规范](./docs/pr/pr/style_cn.md)
## <img src="./docs/images/anli.png" width="20"/> 特色能力
- [交互式分割](./EISeg)
- [图像抠图](./Matting)
- [人像分割](./contrib/PP-HumanSeg)
- [3D医疗分割](./contrib/MedicalSeg)
- [Cityscapes打榜模型](./contrib/CityscapesSOTA)
- [全景分割](./contrib/PanopticDeepLab)
- [CVPR冠军模型](./contrib/AutoNUE)
- [领域自适应](./contrib/DomainAdaptation)
## <img src="https://user-images.githubusercontent.com/48054808/157801371-9a9a8c65-1690-4123-985a-e0559a7f9494.png" width="20"/> 产业实践范例
* [使用PP-HumanSegV2进行人像分割](https://aistudio.baidu.com/aistudio/projectdetail/4504982?contributionType=1)
* [使用PP-HumanSegV1进行人像分割](https://aistudio.baidu.com/aistudio/projectdetail/2189481?channelType=0&channel=0)
* [使用PP-LiteSeg进行遥感道路分割](https://aistudio.baidu.com/aistudio/projectdetail/3873145?contributionType=1)
* [PaddleSeg实战之小数据集3D椎骨分割](https://aistudio.baidu.com/aistudio/projectdetail/3878920)
* [PaddleSeg实战之车道线图像分割](https://aistudio.baidu.com/aistudio/projectdetail/1752986?channelType=0&channel=0)
* [PaddleSeg动态图API使用教程](https://aistudio.baidu.com/aistudio/projectdetail/1339458?channelType=0&channel=0)
* [10分钟上手PaddleSeg](https://aistudio.baidu.com/aistudio/projectdetail/1672610?channelType=0&channel=0)
## 许可证书
本项目的发布受Apache 2.0 license许可认证。
## 社区贡献
- 非常感谢[jm12138](https://github.com/jm12138)贡献U<sup>2</sup>-Net模型。
- 非常感谢[zjhellofss](https://github.com/zjhellofss)(傅莘莘)贡献Attention U-Net模型,和Dice loss损失函数。
- 非常感谢[liuguoyu666](https://github.com/liguoyu666)贡献U-Net++模型。
- 非常感谢[yazheng0307](https://github.com/yazheng0307) (刘正)贡献快速开始教程文档。
- 非常感谢[CuberrChen](https://github.com/CuberrChen)贡献STDC (rethink BiSeNet) PointRend,和 Detail Aggregate损失函数。
- 非常感谢[stuartchen1949](https://github.com/stuartchen1949)贡献 SegNet。
- 非常感谢[justld](https://github.com/justld)(郎督)贡献 UPerNet, DDRNet, CCNet, ESPNetV2, DMNet, ENCNet, HRNet_W48_Contrast, BiSeNetV1, FastFCN, SECrossEntropyLoss 和PixelContrastCrossEntropyLoss。
- 非常感谢[Herman-Hu-saber](https://github.com/Herman-Hu-saber)(胡慧明)参与贡献 ESPNetV2。
- 非常感谢[zhangjin12138](https://github.com/zhangjin12138)贡献数据增强方法 RandomCenterCrop。
- 非常感谢[simuler](https://github.com/simuler) 贡献 ESPNetV1。
- 非常感谢[ETTR123](https://github.com/ETTR123)(张恺) 贡献 ENet,PFPNNet。
## <img src="./docs/images/yinyong.png" width="20"/> 学术引用
如果我们的项目在学术上帮助到你,请考虑以下引用:
```latex
@misc{liu2021paddleseg,
title={PaddleSeg: A High-Efficient Development Toolkit for Image Segmentation},
author={Yi Liu and Lutao Chu and Guowei Chen and Zewu Wu and Zeyu Chen and Baohua Lai and Yuying Hao},
year={2021},
eprint={2101.06175},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{paddleseg2019,
title={PaddleSeg, End-to-end image segmentation kit based on PaddlePaddle},
author={PaddlePaddle Authors},
howpublished = {\url{https://github.com/PaddlePaddle/PaddleSeg}},
year={2019}
}
```
简体中文 | [English](README_EN.md)
<div align="center">
<p align="center">
<img src="./docs/images/paddleseg_logo.png" align="middle" width = "500" />
</p>
**飞桨高性能图像分割开发套件,端到端完成从训练到部署的全流程图像分割应用。**
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
[![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleSeg.svg)](https://github.com/PaddlePaddle/PaddleSeg/releases)
![python version](https://img.shields.io/badge/python-3.6+-orange.svg)
![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg)
![stars](https://img.shields.io/github/stars/PaddlePaddle/PaddleSeg?color=ccf)
</div>
<div align="center">
<img src="https://github.com/shiyutang/files/blob/9590ea6bfc36139982ce75b00d3b9f26713934dd/teasor.gif" width = "800" />
</div>
## <img src="./docs/images/seg_news_icon.png" width="20"/> 最新动态
* [2022-11-30] :fire: PaddleSeg 2.7版本发布!详细发版信息请参考[Release Note](https://github.com/PaddlePaddle/PaddleSeg/releases)
* 发布实时人像抠图模型[PP-MattingV2](./Matting/):推理速度提升44.6%,平均误差减小17.91%,完美超越此前SOTA模型,支持零成本开箱即用。
* 发布3D医疗影像分割方案[MedicalSegV2](./contrib/MedicalSeg/):涵盖3D医疗影像交互式标注工具EISeg-Med3D、3个高精分割模型,集成并优化前沿分割方案nnUNet-D。
* 官方发布轻量级语义分割模型[RTFormer](./configs/rtformer/):由百度提出并发表于NeurIPS 2022,在公开数据集上实现SOTA性能。
* [2022-07-20] PaddleSeg 2.6版本发布实时人像分割SOTA方案[PP-HumanSegV2](./contrib/PP-HumanSeg)、高性能智能标注工具[EISeg v1.0](./EISeg)正式版、ImageNet分割伪标签数据预训练方法PSSL,开源PP-MattingV1代码和预训练模型。
* [2022-04-20] PaddleSeg 2.5版本发布超轻量级语义分割模型[PP-LiteSeg](./configs/pp_liteseg),高精度抠图模型PP-MattingV1,3D医疗影像开发套件MedicalSegV1,交互式分割工具EISeg v0.5。
* [2022-01-20] PaddleSeg 2.4版本发布交互式分割工具EISeg v0.4,超轻量级人像分割方案PP-HumanSegV1,以及大规模视频会议数据集[PP-HumanSeg14K](./contrib/PP-HumanSeg/paper.md#pp-humanseg14k-a-large-scale-teleconferencing-video-dataset)
## <img src="https://user-images.githubusercontent.com/48054808/157795569-9fc77c85-732f-4870-9be0-99a7fe2cff27.png" width="20"/> 简介
**PaddleSeg**是基于飞桨PaddlePaddle的端到端图像分割套件,内置**45+模型算法****140+预训练模型**,支持**配置化驱动****API调用**开发方式,打通数据标注、模型开发、训练、压缩、部署的**全流程**,提供**语义分割、交互式分割、Matting、全景分割**四大分割能力,助力算法在医疗、工业、遥感、娱乐等场景落地应用。
<div align="center">
<img src="https://github.com/shiyutang/files/raw/main/teasor_new.gif" width = "800" />
</div>
## <img src="./docs/images/feature.png" width="20"/> 特性
* **高精度**:跟踪学术界的前沿分割技术,结合高精度训练的骨干网络,提供40+主流分割网络、140+的高质量预训练模型,效果优于其他开源实现。
* **高性能**:使用多进程异步I/O、多卡并行训练、评估等加速策略,结合飞桨核心框架的显存优化功能,大幅度减少分割模型的训练开销,让开发者更低成本、更高效地完成图像分割训练。
* **模块化**:源于模块化设计思想,解耦数据准备、分割模型、骨干网络、损失函数等不同组件,开发者可以基于实际应用场景出发,组装多样化的配置,满足不同性能和精度的要求。
* **全流程**:打通数据标注、模型开发、模型训练、模型压缩、模型部署全流程,经过业务落地的验证,让开发者完成一站式开发工作。
<div align="center">
<img src="https://user-images.githubusercontent.com/14087480/176379006-7f330e00-b6b0-480e-9df8-8fd1090da4cf.png" width = "800" />
</div>
## <img src="./docs/images/chat.png" width="20"/> 技术交流
* 如果大家有PaddleSeg的使用问题和功能建议, 可以通过[GitHub Issues](https://github.com/PaddlePaddle/PaddleSeg/issues)提issue。
* **欢迎加入PaddleSeg的微信用户群👫**(扫码填写简单问卷即可入群),大家可以**领取30G重磅学习大礼包🎁**,也可以和值班同学、各界大佬直接进行交流。
* 🔥 获取深度学习视频教程、图像分割论文合集
* 🔥 获取PaddleSeg的历次直播视频,最新发版信息和直播动态
* 🔥 获取PaddleSeg自建的人像分割数据集,整理的开源数据集
* 🔥 获取PaddleSeg在垂类场景的预训练模型和应用合集,涵盖人像分割、交互式分割等等
* 🔥 获取PaddleSeg的全流程产业实操范例,包括质检缺陷分割、抠图Matting、道路分割等等
<div align="center">
<img src="https://user-images.githubusercontent.com/48433081/174770518-e6b5319b-336f-45d9-9817-da12b1961fb1.jpg" width = "200" />
</div>
## <img src="./docs/images/model.png" width="20"/> 产品矩阵
<table align="center">
<tbody>
<tr align="center" valign="bottom">
<td>
<b>模型</b>
</td>
<td colspan="2">
<b>组件</b>
</td>
<td>
<b>特色案例</b>
</td>
</tr>
<tr valign="top">
<td>
<ul>
<details><summary><b>语义分割模型</b></summary>
<ul>
<li><a href="./configs/pp_liteseg">PP-LiteSeg</a> </li>
<li><a href="./configs/deeplabv3p">DeepLabV3P</a> </li>
<li><a href="./configs/ocrnet">OCRNet</a> </li>
<li><a href="./configs/mobileseg">MobileSeg</a> </li>
<li><a href="./configs/ann">ANN</a></li>
<li><a href="./configs/attention_unet">Att U-Net</a></li>
<li><a href="./configs/bisenetv1">BiSeNetV1</a></li>
<li><a href="./configs/bisenet">BiSeNetV2</a></li>
<li><a href="./configs/ccnet">CCNet</a></li>
<li><a href="./configs/danet">DANet</a></li>
<li><a href="./configs/ddrnet">DDRNet</a></li>
<li><a href="./configs/decoupled_segnet">DecoupledSeg</a></li>
<li><a href="./configs/deeplabv3">DeepLabV3</a></li>
<li><a href="./configs/dmnet">DMNet</a></li>
<li><a href="./configs/dnlnet">DNLNet</a></li>
<li><a href="./configs/emanet">EMANet</a></li>
<li><a href="./configs/encnet">ENCNet</a></li>
<li><a href="./configs/enet">ENet</a></li>
<li><a href="./configs/espnetv1">ESPNetV1</a></li>
<li><a href="./configs/espnet">ESPNetV2</a></li>
<li><a href="./configs/fastfcn">FastFCN</a></li>
<li><a href="./configs/fastscnn">Fast-SCNN</a></li>
<li><a href="./configs/gcnet">GCNet</a></li>
<li><a href="./configs/ginet">GINet</a></li>
<li><a href="./configs/glore">GloRe</a></li>
<li><a href="./configs/gscnn">GSCNN</a></li>
<li><a href="./configs/hardnet">HarDNet</a></li>
<li><a href="./configs/fcn">HRNet-FCN</a></li>
<li><a href="./configs/hrnet_w48_contrast">HRNet-Contrast</a></li>
<li><a href="./configs/isanet">ISANet</a></li>
<li><a href="./configs/pfpn">PFPNNet</a></li>
<li><a href="./configs/pointrend">PointRend</a></li>
<li><a href="./configs/portraitnet">PotraitNet</a></li>
<li><a href="./configs/pp_humanseg_lite">PP-HumanSeg-Lite</a></li>
<li><a href="./configs/pspnet">PSPNet</a></li>
<li><a href="./configs/pssl">PSSL</a></li>
<li><a href="./configs/segformer">SegFormer</a></li>
<li><a href="./configs/segmenter">SegMenter</a></li>
<li><a href="./configs/segmne">SegNet</a></li>
<li><a href="./configs/setr">SETR</a></li>
<li><a href="./configs/sfnet">SFNet</a></li>
<li><a href="./configs/stdcseg">STDCSeg</a></li>
<li><a href="./configs/u2net">U<sup>2</sup>Net</a></li>
<li><a href="./configs/unet">UNet</a></li>
<li><a href="./configs/unet_plusplus">UNet++</a></li>
<li><a href="./configs/unet_3plus">UNet3+</a></li>
<li><a href="./configs/upernet">UperNet</a></li>
<li><a href="./configs/rtformer">RTFormer</a></li>
<li><a href="./configs/uhrnet">UHRNet</a></li>
<li><a href="./configs/topformer">TopFormer</a></li>
<li><a href="./configs/mscale_ocrnet">MscaleOCRNet-PSA</a></li>
</ul>
</details>
<details><summary><b>交互式分割模型</b></summary>
<ul>
<li><a href="./EISeg">EISeg</a></li>
<li>RITM</li>
<li>EdgeFlow</li>
</ul>
</details>
<details><summary><b>图像抠图模型</b></summary>
<ul>
<li><a href="./Matting/configs/ppmattingv2">PP-MattingV2</a></li>
<li><a href="./Matting/configs/ppmatting">PP-MattingV1</a></li>
<li><a href="./Matting/configs/dim/dim-vgg16.yml">DIM</a></li>
<li><a href="./Matting/configs/modnet/modnet-hrnet_w18.yml">MODNet</a></li>
<li><a href="./Matting/configs/human_matting/human_matting-resnet34_vd.yml">PP-HumanMatting</a></li>
</ul>
</details>
<details><summary><b>全景分割</b></summary>
<ul>
<li><a href="./contrib/PanopticDeepLab/README_CN.md">Panoptic-DeepLab</a></li>
</ul>
</details>
</td>
<td>
<details><summary><b>骨干网络</b></summary>
<ul>
<li><a href="./paddleseg/models/backbones/hrnet.py">HRNet</a></li>
<li><a href="./paddleseg/models/backbones/resnet_cd.py">ResNet</a></li>
<li><a href="./paddleseg/models/backbones/stdcnet.py">STDCNet</a></li>
<li><a href="./paddleseg/models/backbones/mobilenetv2.py">MobileNetV2</a></li>
<li><a href="./paddleseg/models/backbones/mobilenetv3.py">MobileNetV3</a></li>
<li><a href="./paddleseg/models/backbones/shufflenetv2.py">ShuffleNetV2</a></li>
<li><a href="./paddleseg/models/backbones/ghostnet.py">GhostNet</a></li>
<li><a href="./paddleseg/models/backbones/lite_hrnet.py">LiteHRNet</a></li>
<li><a href="./paddleseg/models/backbones/xception_deeplab.py">XCeption</a></li>
<li><a href="./paddleseg/models/backbones/vision_transformer.py">VIT</a></li>
<li><a href="./paddleseg/models/backbones/mix_transformer.py">MixVIT</a></li>
<li><a href="./paddleseg/models/backbones/swin_transformer.py">Swin Transformer</a></li>
</ul>
</details>
<details><summary><b>损失函数</b></summary>
<ul>
<li><a href="./paddleseg/models/losses/binary_cross_entropy_loss.py">Binary CE Loss</a></li>
<li><a href="./paddleseg/models/losses/bootstrapped_cross_entropy_loss.py">Bootstrapped CE Loss</a></li>
<li><a href="./paddleseg/models/losses/cross_entropy_loss.py">Cross Entropy Loss</a></li>
<li><a href="./paddleseg/models/losses/decoupledsegnet_relax_boundary_loss.py">Relax Boundary Loss</a></li>
<li><a href="./paddleseg/models/losses/detail_aggregate_loss.py">Detail Aggregate Loss</a></li>
<li><a href="./paddleseg/models/losses/dice_loss.py">Dice Loss</a></li>
<li><a href="./paddleseg/models/losses/edge_attention_loss.py">Edge Attention Loss</a></li>
<li><a href="./paddleseg/models/losses/focal_loss.py">Focal Loss</a></li>
<li><a href="./paddleseg/models/losses/binary_cross_entropy_loss.py">MultiClassFocal Loss</a></li>
<li><a href="./paddleseg/models/losses/gscnn_dual_task_loss.py">GSCNN Dual Task Loss</a></li>
<li><a href="./paddleseg/models/losses/kl_loss.py">KL Loss</a></li>
<li><a href="./paddleseg/models/losses/l1_loss.py">L1 Loss</a></li>
<li><a href="./paddleseg/models/losses/lovasz_loss.py">Lovasz Loss</a></li>
<li><a href="./paddleseg/models/losses/mean_square_error_loss.py">MSE Loss</a></li>
<li><a href="./paddleseg/models/losses/ohem_cross_entropy_loss.py">OHEM CE Loss</a></li>
<li><a href="./paddleseg/models/losses/pixel_contrast_cross_entropy_loss.py">Pixel Contrast CE Loss</a></li>
<li><a href="./paddleseg/models/losses/point_cross_entropy_loss.py">Point CE Loss</a></li>
<li><a href="./paddleseg/models/losses/rmi_loss.py">RMI Loss</a></li>
<li><a href="./paddleseg/models/losses/semantic_connectivity_loss.py">Connectivity Loss</a></li>
</ul>
</details>
<details><summary><b>评估指标</b></summary>
<ul>
<li>mIoU</li>
<li>Accuracy</li>
<li>Kappa</li>
<li>Dice</li>
<li>AUC_ROC</li>
</ul>
</details>
</td>
<td>
<details><summary><b>支持数据集</b></summary>
<ul>
<li><a href="./paddleseg/datasets/ade.py">ADE20K</a></li>
<li><a href="./paddleseg/datasets/cityscapes.py">Cityscapes</a></li>
<li><a href="./paddleseg/datasets/cocostuff.py">COCO Stuff</a></li>
<li><a href="./paddleseg/datasets/voc.py">Pascal VOC</a></li>
<li><a href="./paddleseg/datasets/eg1800.py">EG1800</a></li>
<li><a href="./paddleseg/datasets/pascal_context.py">Pascal Context</a></li>
<li><a href="./paddleseg/datasets/supervisely.py">SUPERVISELY</a></li>
<li><a href="./paddleseg/datasets/optic_disc_seg.py">OPTIC DISC SEG</a></li>
<li><a href="./paddleseg/datasets/chase_db1.py">CHASE_DB1</a></li>
<li><a href="./paddleseg/datasets/hrf.py">HRF</a></li>
<li><a href="./paddleseg/datasets/drive.py">DRIVE</a></li>
<li><a href="./paddleseg/datasets/stare.py">STARE</a></li>
<li><a href="./paddleseg/datasets/pp_humanseg14k.py">PP-HumanSeg14K</a></li>
<li><a href="./paddleseg/datasets/pssl.py">PSSL</a></li>
</ul>
</details>
<details><summary><b>数据增强</b></summary>
<ul>
<li>Flipping</li>
<li>Resize</li>
<li>ResizeByLong</li>
<li>ResizeByShort</li>
<li>LimitLong</li>
<li>ResizeRangeScaling</li>
<li>ResizeStepScaling</li>
<li>Normalize</li>
<li>Padding</li>
<li>PaddingByAspectRatio</li>
<li>RandomPaddingCrop</li>
<li>RandomCenterCrop</li>
<li>ScalePadding</li>
<li>RandomNoise</li>
<li>RandomBlur</li>
<li>RandomRotation</li>
<li>RandomScaleAspect</li>
<li>RandomDistort</li>
<li>RandomAffine</li>
</ul>
</details>
</td>
<td>
<details><summary><b>模型选型工具</b></summary>
<ul>
<li><a href="./configs/smrt">PaddleSMRT</a></li>
</ul>
</details>
<details><summary><b>人像分割模型</b></summary>
<ul>
<li><a href="./contrib/PP-HumanSeg/README_cn.md">PP-HumanSegV1</a></li>
<li><a href="./contrib/PP-HumanSeg/README_cn.md">PP-HumanSegV2</a></li>
</ul>
</details>
<details><summary><b>3D医疗分割模型</b></summary>
<ul>
<li><a href="./contrib/MedicalSeg/configs/lung_coronavirus">VNet</a></li>
<li><a href="./contrib/MedicalSeg/configs/msd_brain_seg">UNETR</a></li>
<li><a href="./contrib/MedicalSeg/configs/acdc">nnFormer</a></li>
<li><a href="./contrib/MedicalSeg/configs/nnunet/msd_lung">nnUNet-D</a></li>
<li><a href="./contrib/MedicalSeg/configs/synapse">TransUNet</a></li>
<li><a href="./contrib/MedicalSeg/configs/synapse">SwinUNet</a></li>
</ul>
</details>
<details><summary><b>Cityscapes打榜模型</b></summary>
<ul>
<li><a href="./contrib/CityscapesSOTA">HMSA</a></li>
</ul>
</details>
<details><summary><b>CVPR冠军模型</b></summary>
<ul>
<li><a href="./contrib/AutoNUE">MLA Transformer</a></li>
</ul>
</details>
<details><summary><b>领域自适应</b></summary>
<ul>
<li><a href="./contrib/DomainAdaptation">PixMatch</a></li>
</ul>
</details>
</td>
</tr>
</td>
</tr>
</tbody>
</table>
## <img src="https://user-images.githubusercontent.com/48054808/157801371-9a9a8c65-1690-4123-985a-e0559a7f9494.png" width="20"/> 产业级分割模型库
<details>
<summary><b>高精度语义分割模型</b></summary>
#### 高精度模型,分割mIoU高、推理算量大,适合部署在服务器端GPU和Jetson等设备。
| 模型名称 | 骨干网络 | Cityscapes精度mIoU(%) | V100 TRT推理速度(FPS) | 配置文件 |
|:-------- |:--------:|:---------------------:|:---------------------:|:--------:|
| FCN | HRNet_W18 | 78.97 | 24.43 | [yml](./configs/fcn/) |
| FCN | HRNet_W48 | 80.70 | 10.16 | [yml](./configs/fcn/) |
| DeepLabV3 | ResNet50_OS8 | 79.90 | 4.56 | [yml](./configs/deeplabv3/) |
| DeepLabV3 | ResNet101_OS8 | 80.85 | 3.2 | [yml](./configs/deeplabv3/) |
| DeepLabV3 | ResNet50_OS8 | 80.36 | 6.58 | [yml](./configs/deeplabv3p/) |
| DeepLabV3 | ResNet101_OS8 | 81.10 | *3.94* | [yml](./configs/deeplabv3p/) |
| OCRNet :star2: | HRNet_w18 | 80.67 | 13.26 | [yml](./configs/ocrnet/) |
| OCRNet | HRNet_w48 | 82.15 | 6.17 | [yml](./configs/ocrnet/) |
| CCNet | ResNet101_OS8 | 80.95 | 3.24 | [yml](./configs/ccnet/) |
测试条件:
* V100上测速条件:针对Nvidia GPU V100,使用PaddleInference预测库的Python API,开启TensorRT加速,数据类型是FP32,输入图像维度是1x3x1024x2048。
</details>
<details>
<summary><b>轻量级语义分割模型</b></summary>
#### 轻量级模型,分割mIoU中等、推理算量中等,可以部署在服务器端GPU、服务器端X86 CPU和移动端ARM CPU。
| 模型名称 | 骨干网络 | Cityscapes精度mIoU(%) | V100 TRT推理速度(FPS) | 骁龙855推理速度(FPS) | 配置文件 |
|:-------- |:--------:|:---------------------:|:---------------------:|:-----------------:|:--------:|
| PP-LiteSeg :star2: | STDC1 | 77.04 | 69.82 | 17.22 | [yml](./configs/pp_liteseg/) |
| PP-LiteSeg :star2: | STDC2 | 79.04 | 54.53 | 11.75 | [yml](./configs/pp_liteseg/) |
| BiSeNetV1 | - | 75.19 | 14.67 | 1.53 |[yml](./configs/bisenetv1/) |
| BiSeNetV2 | - | 73.19 | 61.83 | 13.67 |[yml](./configs/bisenet/) |
| STDCSeg | STDC1 | 74.74 | 62.24 | 14.51 |[yml](./configs/stdcseg/) |
| STDCSeg | STDC2 | 77.60 | 51.15 | 10.95 |[yml](./configs/stdcseg/) |
| DDRNet_23 | - | 79.85 | 42.64 | 7.68 |[yml](./configs/ddrnet/) |
| HarDNet | - | 79.03 | 30.3 | 5.44 |[yml](./configs/hardnet/) |
| SFNet | ResNet18_OS8 | 78.72 | *10.72* | - | [yml](./configs/sfnet/) |
测试条件:
* V100上测速条件:针对Nvidia GPU V100,使用PaddleInference预测库的Python API,开启TensorRT加速,数据类型是FP32,输入图像维度是1x3x1024x2048。
* 骁龙855上测速条件:针对小米9手机,使用PaddleLite预测库的CPP API,ARMV8编译,单线程,输入图像维度是1x3x256x256。
</details>
<details>
<summary><b>超轻量级语义分割模型</b></summary>
#### 超轻量级模型,分割mIoU一般、推理算量低,适合部署在服务器端X86 CPU和移动端ARM CPU。
| 模型名称 | 骨干网络 | Cityscapes精度mIoU(%) | V100 TRT推理速度(FPS) | 骁龙855推理速度(FPS)| 配置文件 |
|:-------- |:--------:|:---------------------:|:---------------------:|:-----------------:|:--------:|
| MobileSeg | MobileNetV2 | 73.94 | 67.57 | 27.01 | [yml](./configs/mobileseg/) |
| MobileSeg :star2: | MobileNetV3 | 73.47 | 67.39 | 32.90 | [yml](./configs/mobileseg/) |
| MobileSeg | Lite_HRNet_18 | 70.75 | *10.5* | 13.05 | [yml](./configs/mobileseg/) |
| MobileSeg | ShuffleNetV2_x1_0 | 69.46 | *37.09* | 39.61 | [yml](./configs/mobileseg/) |
| MobileSeg | GhostNet_x1_0 | 71.88 | *35.58* | 38.74 | [yml](./configs/mobileseg/) |
测试条件:
* V100上测速条件:针对Nvidia GPU V100,使用PaddleInference预测库的Python API,开启TensorRT加速,数据类型是FP32,输入图像维度是1x3x1024x2048。
* 骁龙855上测速条件:针对小米9手机,使用PaddleLite预测库的CPP API,ARMV8编译,单线程,输入图像维度是1x3x256x256。
</details>
## <img src="./docs/images/teach.png" width="20"/> 使用教程
**入门教程**
* [安装说明](./docs/install_cn.md)
* [快速体验](./docs/quick_start_cn.md)
* [20分钟快速上手PaddleSeg](./docs/whole_process_cn.md)
* [模型库](./docs/model_zoo_overview_cn.md)
**基础教程**
* 准备数据集
* [准备公开数据集](./docs/data/pre_data_cn.md)
* [准备自定义数据集](./docs/data/marker/marker_cn.md)
* [EISeg 数据标注](./EISeg)
* [准备配置文件](./docs/config/pre_config_cn.md)
* [模型训练](./docs/train/train_cn.md)
* [模型评估](./docs/evaluation/evaluate_cn.md)
* [模型预测](./docs/predict/predict_cn.md)
* 模型导出
* [导出预测模型](./docs/model_export_cn.md)
* [导出ONNX模型](./docs/model_export_onnx_cn.md)
* 模型部署
* [Paddle Inference部署(Python)](./docs/deployment/inference/python_inference_cn.md)
* [Paddle Inference部署(C++)](./docs/deployment/inference/cpp_inference_cn.md)
* [Paddle Lite部署](./docs/deployment/lite/lite_cn.md)
* [Paddle Serving部署](./docs/deployment/serving/serving_cn.md)
* [Paddle JS部署](./docs/deployment/web/web_cn.md)
* [推理Benchmark](./docs/deployment/inference/infer_benchmark_cn.md)
**进阶教程**
* [训练技巧](./docs/train/train_tricks_cn.md)
* 模型压缩
* [量化](./docs/deployment/slim/quant/quant_cn.md)
* [蒸馏](./docs/deployment/slim/distill/distill_cn.md)
* [裁剪](./docs/deployment/slim/prune/prune_cn.md)
* [常见问题汇总](./docs/faq/faq/faq_cn.md)
**欢迎贡献**
* [API文档](./docs/apis/README_CN.md)
* 二次开发教程
* [配置文件详解](./docs/design/use/use_cn.md)
* [如何创造自己的模型](./docs/design/create/add_new_model_cn.md)
* 模型贡献
* [提交PR说明](./docs/pr/pr/pr_cn.md)
* [模型PR规范](./docs/pr/pr/style_cn.md)
## <img src="./docs/images/anli.png" width="20"/> 特色能力
- [交互式分割](./EISeg)
- [图像抠图](./Matting)
- [人像分割](./contrib/PP-HumanSeg)
- [3D医疗分割](./contrib/MedicalSeg)
- [Cityscapes打榜模型](./contrib/CityscapesSOTA)
- [全景分割](./contrib/PanopticDeepLab)
- [CVPR冠军模型](./contrib/AutoNUE)
- [领域自适应](./contrib/DomainAdaptation)
## <img src="https://user-images.githubusercontent.com/48054808/157801371-9a9a8c65-1690-4123-985a-e0559a7f9494.png" width="20"/> 产业实践范例
* [使用PP-HumanSegV2进行人像分割](https://aistudio.baidu.com/aistudio/projectdetail/4504982?contributionType=1)
* [使用PP-HumanSegV1进行人像分割](https://aistudio.baidu.com/aistudio/projectdetail/2189481?channelType=0&channel=0)
* [使用PP-LiteSeg进行遥感道路分割](https://aistudio.baidu.com/aistudio/projectdetail/3873145?contributionType=1)
* [PaddleSeg实战之小数据集3D椎骨分割](https://aistudio.baidu.com/aistudio/projectdetail/3878920)
* [PaddleSeg实战之车道线图像分割](https://aistudio.baidu.com/aistudio/projectdetail/1752986?channelType=0&channel=0)
* [PaddleSeg动态图API使用教程](https://aistudio.baidu.com/aistudio/projectdetail/1339458?channelType=0&channel=0)
* [10分钟上手PaddleSeg](https://aistudio.baidu.com/aistudio/projectdetail/1672610?channelType=0&channel=0)
## 许可证书
本项目的发布受Apache 2.0 license许可认证。
## 社区贡献
- 非常感谢[jm12138](https://github.com/jm12138)贡献U<sup>2</sup>-Net模型。
- 非常感谢[zjhellofss](https://github.com/zjhellofss)(傅莘莘)贡献Attention U-Net模型,和Dice loss损失函数。
- 非常感谢[liuguoyu666](https://github.com/liguoyu666)贡献U-Net++模型。
- 非常感谢[yazheng0307](https://github.com/yazheng0307) (刘正)贡献快速开始教程文档。
- 非常感谢[CuberrChen](https://github.com/CuberrChen)贡献STDC (rethink BiSeNet) PointRend,和 Detail Aggregate损失函数。
- 非常感谢[stuartchen1949](https://github.com/stuartchen1949)贡献 SegNet。
- 非常感谢[justld](https://github.com/justld)(郎督)贡献 UPerNet, DDRNet, CCNet, ESPNetV2, DMNet, ENCNet, HRNet_W48_Contrast, BiSeNetV1, FastFCN, SECrossEntropyLoss 和PixelContrastCrossEntropyLoss。
- 非常感谢[Herman-Hu-saber](https://github.com/Herman-Hu-saber)(胡慧明)参与贡献 ESPNetV2。
- 非常感谢[zhangjin12138](https://github.com/zhangjin12138)贡献数据增强方法 RandomCenterCrop。
- 非常感谢[simuler](https://github.com/simuler) 贡献 ESPNetV1。
- 非常感谢[ETTR123](https://github.com/ETTR123)(张恺) 贡献 ENet,PFPNNet。
## <img src="./docs/images/yinyong.png" width="20"/> 学术引用
如果我们的项目在学术上帮助到你,请考虑以下引用:
```latex
@misc{liu2021paddleseg,
title={PaddleSeg: A High-Efficient Development Toolkit for Image Segmentation},
author={Yi Liu and Lutao Chu and Guowei Chen and Zewu Wu and Zeyu Chen and Baohua Lai and Yuying Hao},
year={2021},
eprint={2101.06175},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{paddleseg2019,
title={PaddleSeg, End-to-end image segmentation kit based on PaddlePaddle},
author={PaddlePaddle Authors},
howpublished = {\url{https://github.com/PaddlePaddle/PaddleSeg}},
year={2019}
}
```
English | [简体中文](README_CN.md)
<div align="center">
<p align="center">
<img src="./docs/images/paddleseg_logo.png" align="middle" width = "500" />
</p>
**A High-Efficient Development Toolkit for Image Segmentation based on [PaddlePaddle](https://github.com/paddlepaddle/paddle).**
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
[![Version](https://img.shields.io/github/release/PaddlePaddle/PaddleSeg.svg)](https://github.com/PaddlePaddle/PaddleSeg/releases)
![python version](https://img.shields.io/badge/python-3.6+-orange.svg)
![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg)
![stars](https://img.shields.io/github/stars/PaddlePaddle/PaddleSeg?color=ccf)
</div>
<div align="center">
<img src="https://github.com/shiyutang/files/blob/9590ea6bfc36139982ce75b00d3b9f26713934dd/teasor.gif" width = "800" />
</div>
## <img src="./docs/images/seg_news_icon.png" width="20"/> News
<ul class="nobull">
<li>[2022-11-30] :fire: PaddleSeg v2.7 is released! More details in <a href="https://github.com/PaddlePaddle/PaddleSeg/releases">Release Notes</a>.</li>
<ul>
<li>Release <a href="./Matting/">PP-MattingV2</a>, a real-time human matting model with SOTA performance. Compared to previous models, the mean error is reduced by 17.91%, the inference speed is accelerated by 44.6% on GPU. </li>
<li>Release <a href="./contrib/MedicalSeg/">MedicalSegV2</a>, a superior 3D medical image segmentation solution, including an intelligent annotation toolkit called EISeg-Med3D, several state-of-the-art models and an optimized nnUNet-D with high performance.</li>
<li>Release <a href="./configs/rtformer/">RTFormer</a>, a real-time semantic segmentation model accepted by NeurIPS 2022. RTFormer combines the advantages of CNN and Transformer modules, and it achieves SOTA trade-off between performance and efficiency on several datasets.</li>
</ul>
<li>[2022-07-20] PaddleSeg v2.6 released a real-time human segmentation SOTA solution <a href="./contrib/PP-HumanSeg">PP-HumanSegV2</a>, a stable-version semi-automatic segmentation annotation <a href="./EISeg">EISeg v1.0</a>, a pseudo label pre-training method PSSL and the source code of PP-MattingV1. </li>
<li>[2022-04-20] PaddleSeg v2.5 released a real-time semantic segmentation model <a href="./configs/pp_liteseg">PP-LiteSeg</a>, a trimap-free image matting model PP-MattingV1, and an easy-to-use solution for 3D medical image segmentation MedicalSegV1.</li>
<li>[2022-01-20] We release PaddleSeg v2.4 with EISeg v0.4, and PP-HumanSegV1 including open-sourced dataset <a href="./contrib/PP-HumanSeg/paper.md#pp-humanseg14k-a-large-scale-teleconferencing-video-dataset">PP-HumanSeg14K</a>. </li>
</ul>
## <img src="https://user-images.githubusercontent.com/48054808/157795569-9fc77c85-732f-4870-9be0-99a7fe2cff27.png" width="20"/> Introduction
PaddleSeg is an end-to-end high-efficent development toolkit for image segmentation based on PaddlePaddle, which helps both developers and researchers in the whole process of designing segmentation models, training models, optimizing performance and inference speed, and deploying models. A lot of well-trained models and various real-world applications in both industry and academia help users conveniently build hands-on experiences in image segmentation.
<div align="center">
<img src="https://github.com/shiyutang/files/raw/main/teasor_new.gif" width = "800" />
</div>
## <img src="./docs/images/feature.png" width="20"/> Features
* **High-Performance Model**: Following the state of the art segmentation methods and use the high-performance backbone, we provide 40+ models and 140+ high-quality pre-training models, which are better than other open-source implementations.
* **High Efficiency**: PaddleSeg provides multi-process asynchronous I/O, multi-card parallel training, evaluation, and other acceleration strategies, combined with the memory optimization function of the PaddlePaddle, which can greatly reduce the training overhead of the segmentation model, all this allowing developers to lower cost and more efficiently train image segmentation model.
* **Modular Design**: We desigin PaddleSeg with the modular design philosophy. Therefore, based on actual application scenarios, developers can assemble diversified training configurations with *data enhancement strategies*, *segmentation models*, *backbone networks*, *loss functions* and other different components to meet different performance and accuracy requirements.
* **Complete Flow**: PaddleSeg support image labeling, model designing, model training, model compression and model deployment. With the help of PaddleSeg, developers can easily finish all taskes.
<div align="center">
<img src="https://user-images.githubusercontent.com/14087480/176402154-390e5815-1a87-41be-9374-9139c632eb66.png" width = "800" />
</div>
## <img src="./docs/images/chat.png" width="20"/> Community
* If you have any questions, suggestions and feature requests, please create an issues in [GitHub Issues](https://github.com/PaddlePaddle/PaddleSeg/issues).
* Welcome to scan the following QR code and join paddleseg wechat group to communicate with us.
<div align="center">
<img src="https://user-images.githubusercontent.com/48433081/174770518-e6b5319b-336f-45d9-9817-da12b1961fb1.jpg" width = "200" />
</div>
## <img src="./docs/images/model.png" width="20"/> Overview
<table align="center">
<tbody>
<tr align="center" valign="bottom">
<td>
<b>Models</b>
</td>
<td colspan="2">
<b>Components</b>
</td>
<td>
<b>Special Cases</b>
</td>
</tr>
<tr valign="top">
<td>
<ul>
<details><summary><b>Semantic Segmentation</b></summary>
<ul>
<li><a href="./configs/pp_liteseg">PP-LiteSeg</a> </li>
<li><a href="./configs/deeplabv3p">DeepLabV3P</a> </li>
<li><a href="./configs/ocrnet">OCRNet</a> </li>
<li><a href="./configs/mobileseg">MobileSeg</a> </li>
<li><a href="./configs/ann">ANN</a></li>
<li><a href="./configs/attention_unet">Att U-Net</a></li>
<li><a href="./configs/bisenetv1">BiSeNetV1</a></li>
<li><a href="./configs/bisenet">BiSeNetV2</a></li>
<li><a href="./configs/ccnet">CCNet</a></li>
<li><a href="./configs/danet">DANet</a></li>
<li><a href="./configs/ddrnet">DDRNet</a></li>
<li><a href="./configs/decoupled_segnet">DecoupledSeg</a></li>
<li><a href="./configs/deeplabv3">DeepLabV3</a></li>
<li><a href="./configs/dmnet">DMNet</a></li>
<li><a href="./configs/dnlnet">DNLNet</a></li>
<li><a href="./configs/emanet">EMANet</a></li>
<li><a href="./configs/encnet">ENCNet</a></li>
<li><a href="./configs/enet">ENet</a></li>
<li><a href="./configs/espnetv1">ESPNetV1</a></li>
<li><a href="./configs/espnet">ESPNetV2</a></li>
<li><a href="./configs/fastfcn">FastFCN</a></li>
<li><a href="./configs/fastscnn">Fast-SCNN</a></li>
<li><a href="./configs/gcnet">GCNet</a></li>
<li><a href="./configs/ginet">GINet</a></li>
<li><a href="./configs/glore">GloRe</a></li>
<li><a href="./configs/gscnn">GSCNN</a></li>
<li><a href="./configs/hardnet">HarDNet</a></li>
<li><a href="./configs/fcn">HRNet-FCN</a></li>
<li><a href="./configs/hrnet_w48_contrast">HRNet-Contrast</a></li>
<li><a href="./configs/isanet">ISANet</a></li>
<li><a href="./configs/pfpn">PFPNNet</a></li>
<li><a href="./configs/pointrend">PointRend</a></li>
<li><a href="./configs/portraitnet">PotraitNet</a></li>
<li><a href="./configs/pp_humanseg_lite">PP-HumanSeg-Lite</a></li>
<li><a href="./configs/pspnet">PSPNet</a></li>
<li><a href="./configs/pssl">PSSL</a></li>
<li><a href="./configs/segformer">SegFormer</a></li>
<li><a href="./configs/segmenter">SegMenter</a></li>
<li><a href="./configs/segmne">SegNet</a></li>
<li><a href="./configs/setr">SETR</a></li>
<li><a href="./configs/sfnet">SFNet</a></li>
<li><a href="./configs/stdcseg">STDCSeg</a></li>
<li><a href="./configs/u2net">U<sup>2</sup>Net</a></li>
<li><a href="./configs/unet">UNet</a></li>
<li><a href="./configs/unet_plusplus">UNet++</a></li>
<li><a href="./configs/unet_3plus">UNet3+</a></li>
<li><a href="./configs/upernet">UperNet</a></li>
<li><a href="./configs/rtformer">RTFormer</a></li>
<li><a href="./configs/uhrnet">UHRNet</a></li>
<li><a href="./configs/topformer">TopFormer</a></li>
<li><a href="./configs/mscale_ocrnet">MscaleOCRNet-PSA</a></li>
</ul>
</details>
<details><summary><b>Interactive Segmentation</b></summary>
<ul>
<li><a href="./EISeg">EISeg</a></li>
<li>RITM</li>
<li>EdgeFlow</li>
</ul>
</details>
<details><summary><b>Image Matting</b></summary>
<ul>
<li><a href="./Matting/configs/ppmattingv2">PP-MattingV2</a></li>
<li><a href="./Matting/configs/ppmatting">PP-MattingV1</a></li>
<li><a href="./Matting/configs/dim/dim-vgg16.yml">DIM</a></li>
<li><a href="./Matting/configs/modnet/modnet-hrnet_w18.yml">MODNet</a></li>
<li><a href="./Matting/configs/human_matting/human_matting-resnet34_vd.yml">PP-HumanMatting</a></li>
</ul>
</details>
<details><summary><b>Panoptic Segmentation</b></summary>
<ul>
<li><a href="./contrib/PanopticDeepLab/README_CN.md">Panoptic-DeepLab</a></li>
</ul>
</details>
</td>
<td>
<details><summary><b>Backbones</b></summary>
<ul>
<li><a href="./paddleseg/models/backbones/hrnet.py">HRNet</a></li>
<li><a href="./paddleseg/models/backbones/resnet_cd.py">ResNet</a></li>
<li><a href="./paddleseg/models/backbones/stdcnet.py">STDCNet</a></li>
<li><a href="./paddleseg/models/backbones/mobilenetv2.py">MobileNetV2</a></li>
<li><a href="./paddleseg/models/backbones/mobilenetv3.py">MobileNetV3</a></li>
<li><a href="./paddleseg/models/backbones/shufflenetv2.py">ShuffleNetV2</a></li>
<li><a href="./paddleseg/models/backbones/ghostnet.py">GhostNet</a></li>
<li><a href="./paddleseg/models/backbones/lite_hrnet.py">LiteHRNet</a></li>
<li><a href="./paddleseg/models/backbones/xception_deeplab.py">XCeption</a></li>
<li><a href="./paddleseg/models/backbones/vision_transformer.py">VIT</a></li>
<li><a href="./paddleseg/models/backbones/mix_transformer.py">MixVIT</a></li>
<li><a href="./paddleseg/models/backbones/swin_transformer.py">Swin Transformer</a></li>
</ul>
</details>
<details><summary><b>Losses</b></summary>
<ul>
<li><a href="./paddleseg/models/losses/binary_cross_entropy_loss.py">Binary CE Loss</a></li>
<li><a href="./paddleseg/models/losses/bootstrapped_cross_entropy_loss.py">Bootstrapped CE Loss</a></li>
<li><a href="./paddleseg/models/losses/cross_entropy_loss.py">Cross Entropy Loss</a></li>
<li><a href="./paddleseg/models/losses/decoupledsegnet_relax_boundary_loss.py">Relax Boundary Loss</a></li>
<li><a href="./paddleseg/models/losses/detail_aggregate_loss.py">Detail Aggregate Loss</a></li>
<li><a href="./paddleseg/models/losses/dice_loss.py">Dice Loss</a></li>
<li><a href="./paddleseg/models/losses/edge_attention_loss.py">Edge Attention Loss</a></li>
<li><a href="./paddleseg/models/losses/focal_loss.py">Focal Loss</a></li>
<li><a href="./paddleseg/models/losses/binary_cross_entropy_loss.py">MultiClassFocal Loss</a></li>
<li><a href="./paddleseg/models/losses/gscnn_dual_task_loss.py">GSCNN Dual Task Loss</a></li>
<li><a href="./paddleseg/models/losses/kl_loss.py">KL Loss</a></li>
<li><a href="./paddleseg/models/losses/l1_loss.py">L1 Loss</a></li>
<li><a href="./paddleseg/models/losses/lovasz_loss.py">Lovasz Loss</a></li>
<li><a href="./paddleseg/models/losses/mean_square_error_loss.py">MSE Loss</a></li>
<li><a href="./paddleseg/models/losses/ohem_cross_entropy_loss.py">OHEM CE Loss</a></li>
<li><a href="./paddleseg/models/losses/pixel_contrast_cross_entropy_loss.py">Pixel Contrast CE Loss</a></li>
<li><a href="./paddleseg/models/losses/point_cross_entropy_loss.py">Point CE Loss</a></li>
<li><a href="./paddleseg/models/losses/rmi_loss.py">RMI Loss</a></li>
<li><a href="./paddleseg/models/losses/semantic_connectivity_loss.py">Connectivity Loss</a></li>
</ul>
</details>
<details><summary><b>Metrics</b></summary>
<ul>
<li>mIoU</li>
<li>Accuracy</li>
<li>Kappa</li>
<li>Dice</li>
<li>AUC_ROC</li>
</ul>
</details>
</td>
<td>
<details><summary><b>Datasets</b></summary>
<ul>
<li><a href="./paddleseg/datasets/ade.py">ADE20K</a></li>
<li><a href="./paddleseg/datasets/cityscapes.py">Cityscapes</a></li>
<li><a href="./paddleseg/datasets/cocostuff.py">COCO Stuff</a></li>
<li><a href="./paddleseg/datasets/voc.py">Pascal VOC</a></li>
<li><a href="./paddleseg/datasets/eg1800.py">EG1800</a></li>
<li><a href="./paddleseg/datasets/pascal_context.py">Pascal Context</a></li>
<li><a href="./paddleseg/datasets/supervisely.py">SUPERVISELY</a></li>
<li><a href="./paddleseg/datasets/optic_disc_seg.py">OPTIC DISC SEG</a></li>
<li><a href="./paddleseg/datasets/chase_db1.py">CHASE_DB1</a></li>
<li><a href="./paddleseg/datasets/hrf.py">HRF</a></li>
<li><a href="./paddleseg/datasets/drive.py">DRIVE</a></li>
<li><a href="./paddleseg/datasets/stare.py">STARE</a></li>
<li><a href="./paddleseg/datasets/pp_humanseg14k.py">PP-HumanSeg14K</a></li>
<li><a href="./paddleseg/datasets/pssl.py">PSSL</a></li>
</ul>
</details>
<details><summary><b>Data Augmentation</b></summary>
<ul>
<li>Flipping</li>
<li>Resize</li>
<li>ResizeByLong</li>
<li>ResizeByShort</li>
<li>LimitLong</li>
<li>ResizeRangeScaling</li>
<li>ResizeStepScaling</li>
<li>Normalize</li>
<li>Padding</li>
<li>PaddingByAspectRatio</li>
<li>RandomPaddingCrop</li>
<li>RandomCenterCrop</li>
<li>ScalePadding</li>
<li>RandomNoise</li>
<li>RandomBlur</li>
<li>RandomRotation</li>
<li>RandomScaleAspect</li>
<li>RandomDistort</li>
<li>RandomAffine</li>
</ul>
</details>
</td>
<td>
<details><summary><b>Model Selection Tool</b></summary>
<ul>
<li><a href="./configs/smrt">PaddleSMRT</a></li>
</ul>
</details>
<details><summary><b>Human Segmentation</b></summary>
<ul>
<li><a href="./contrib/PP-HumanSeg/README_cn.md">PP-HumanSegV1</a></li>
<li><a href="./contrib/PP-HumanSeg/README_cn.md">PP-HumanSegV2</a></li>
</ul>
</details>
<details><summary><b>MedicalSeg</b></summary>
<ul>
<li><a href="./contrib/MedicalSeg/configs/lung_coronavirus">VNet</a></li>
<li><a href="./contrib/MedicalSeg/configs/msd_brain_seg">UNETR</a></li>
<li><a href="./contrib/MedicalSeg/configs/acdc">nnFormer</a></li>
<li><a href="./contrib/MedicalSeg/configs/nnunet/msd_lung">nnUNet-D</a></li>
<li><a href="./contrib/MedicalSeg/configs/synapse">TransUNet</a></li>
<li><a href="./contrib/MedicalSeg/configs/synapse">SwinUNet</a></li>
</ul>
</details>
<details><summary><b>Cityscapes SOTA Model</b></summary>
<ul>
<li><a href="./contrib/CityscapesSOTA">HMSA</a></li>
</ul>
</details>
<details><summary><b>CVPR Champion Model</b></summary>
<ul>
<li><a href="./contrib/AutoNUE">MLA Transformer</a></li>
</ul>
</details>
<details><summary><b>Domain Adaptation</b></summary>
<ul>
<li><a href="./contrib/DomainAdaptation">PixMatch</a></li>
</ul>
</details>
</td>
</tr>
</td>
</tr>
</tbody>
</table>
## <img src="https://user-images.githubusercontent.com/48054808/157801371-9a9a8c65-1690-4123-985a-e0559a7f9494.png" width="20"/> Industrial Segmentation Models
<details>
<summary><b>High Accuracy Semantic Segmentation Models</b></summary>
#### These models have good performance and costly inference time, so they are designed for GPU and Jetson devices.
| Model | Backbone | Cityscapes mIoU(%) | V100 TRT Inference Speed(FPS) | Config File |
|:-------- |:--------:|:---------------------:|:-------------------------------:|:------------:|
| FCN | HRNet_W18 | 78.97 | 24.43 | [yml](./configs/fcn/) |
| FCN | HRNet_W48 | 80.70 | 10.16 | [yml](./configs/fcn/) |
| DeepLabV3 | ResNet50_OS8 | 79.90 | 4.56 | [yml](./configs/deeplabv3/) |
| DeepLabV3 | ResNet101_OS8 | 80.85 | 3.2 | [yml](./configs/deeplabv3/) |
| DeepLabV3 | ResNet50_OS8 | 80.36 | 6.58 | [yml](./configs/deeplabv3p/) |
| DeepLabV3 | ResNet101_OS8 | 81.10 | *3.94* | [yml](./configs/deeplabv3p/) |
| OCRNet :star2: | HRNet_w18 | 80.67 | 13.26 | [yml](./configs/ocrnet/) |
| OCRNet | HRNet_w48 | 82.15 | 6.17 | [yml](./configs/ocrnet/) |
| CCNet | ResNet101_OS8 | 80.95 | 3.24 | [yml](./configs/ccnet/) |
Note that:
* Test the inference speed on Nvidia GPU V100: use PaddleInference Python API, enable TensorRT, the data type is FP32, the dimension of input is 1x3x1024x2048.
</details>
<details>
<summary><b>Lightweight Semantic Segmentation Models</b></summary>
#### The segmentation accuracy and inference speed of these models are medium. They can be deployed on GPU, X86 CPU and ARM CPU.
| Model | Backbone | Cityscapes mIoU(%) | V100 TRT Inference Speed(FPS) | Snapdragon 855 Inference Speed(FPS) | Config File |
|:-------- |:--------:|:---------------------:|:-------------------------------:|:-----------------:|:--------:|
| PP-LiteSeg :star2: | STDC1 | 77.04 | 69.82 | 17.22 | [yml](./configs/pp_liteseg/) |
| PP-LiteSeg :star2: | STDC2 | 79.04 | 54.53 | 11.75 | [yml](./configs/pp_liteseg/) |
| BiSeNetV1 | - | 75.19 | 14.67 | 1.53 |[yml](./configs/bisenetv1/) |
| BiSeNetV2 | - | 73.19 | 61.83 | 13.67 |[yml](./configs/bisenet/) |
| STDCSeg | STDC1 | 74.74 | 62.24 | 14.51 |[yml](./configs/stdcseg/) |
| STDCSeg | STDC2 | 77.60 | 51.15 | 10.95 |[yml](./configs/stdcseg/) |
| DDRNet_23 | - | 79.85 | 42.64 | 7.68 |[yml](./configs/ddrnet/) |
| HarDNet | - | 79.03 | 30.3 | 5.44 |[yml](./configs/hardnet/) |
| SFNet | ResNet18_OS8 | 78.72 | *10.72* | - | [yml](./configs/sfnet/) |
Note that:
* Test the inference speed on Nvidia GPU V100: use PaddleInference Python API, enable TensorRT, the data type is FP32, the dimension of input is 1x3x1024x2048.
* Test the inference speed on Snapdragon 855: use PaddleLite CPP API, 1 thread, the dimension of input is 1x3x256x256.
</details>
<details>
<summary><b>Super Lightweight Semantic Segmentation Models</b></summary>
#### These super lightweight semantic segmentation models are designed for X86 CPU and ARM CPU.
| Model | Backbone | Cityscapes mIoU(%) | V100 TRT Inference Speed(FPS) | Snapdragon 855 Inference Speed(FPS) | Config File |
|:-------- |:--------:|:---------------------:|:-------------------------------:|:-----------------------------------:|:-----------:|
| MobileSeg | MobileNetV2 | 73.94 | 67.57 | 27.01 | [yml](./configs/mobileseg/) |
| MobileSeg :star2: | MobileNetV3 | 73.47 | 67.39 | 32.90 | [yml](./configs/mobileseg/) |
| MobileSeg | Lite_HRNet_18 | 70.75 | *10.5* | 13.05 | [yml](./configs/mobileseg/) |
| MobileSeg | ShuffleNetV2_x1_0 | 69.46 | *37.09* | 39.61 | [yml](./configs/mobileseg/) |
| MobileSeg | GhostNet_x1_0 | 71.88 | *35.58* | 38.74 | [yml](./configs/mobileseg/) |
Note that:
* Test the inference speed on Nvidia GPU V100: use PaddleInference Python API, enable TensorRT, the data type is FP32, the dimension of input is 1x3x1024x2048.
* Test the inference speed on Snapdragon 855: use PaddleLite CPP API, 1 thread, the dimension of input is 1x3x256x256.
</details>
## <img src="./docs/images/teach.png" width="20"/> Tutorials
**Introductory Tutorials**
* [Installation](./docs/install.md)
* [Quick Start](./docs/quick_start.md)
* [A 20 minutes Blitz to Learn PaddleSeg](./docs/whole_process.md)
* [Model Zoo](./docs/model_zoo_overview.md)
**Basic Tutorials**
* Data Preparation
* [Prepare Public Dataset](./docs/data/pre_data.md)
* [Prepare Customized Dataset](./docs/data/marker/marker.md)
* [Label Data with EISeg](./EISeg)
* [Config Preparation](./docs/config/pre_config.md)
* [Model Training](/docs/train/train.md)
* [Model Evaluation](./docs/evaluation/evaluate.md)
* [Model Prediction](./docs/predict/predict.md)
* Model Export
* [Export Inference Model](./docs/model_export.md)
* [Export ONNX Model](./docs/model_export_onnx.md)
* Model Deploy
* [Paddle Inference (Python)](./docs/deployment/inference/python_inference.md)
* [Paddle Inference (C++)](./docs/deployment/inference/cpp_inference.md)
* [Paddle Lite](./docs/deployment/lite/lite.md)
* [Paddle Serving](./docs/deployment/serving/serving.md)
* [Paddle JS](./docs/deployment/web/web.md)
* [Benchmark](./docs/deployment/inference/infer_benchmark.md)
**Advanced Tutorials**
* [Training Tricks](./docs/train/train_tricks.md)
* Model Compression
* [Quantization](./docs/deployment/slim/quant/quant.md)
* [Distillation](./docs/deployment/slim/distill/distill.md)
* [Prune](./docs/deployment/slim/prune/prune.md)
* [FAQ](./docs/faq/faq/faq.md)
**Welcome to Contribute**
* [API Documention](./docs/apis)
* Advanced Development
* [Detailed Configuration File](./docs/design/use/use.md)
* [Create Your Own Model](./docs/design/create/add_new_model.md)
* Pull Request
* [PR Tutorial](./docs/pr/pr/pr.md)
* [PR Style](./docs/pr/pr/style.md)
## <img src="./docs/images/anli.png" width="20"/> Special Features
* [Interactive Segmentation](./EISeg)
* [Image Matting](./Matting)
* [PP-HumanSeg](./contrib/PP-HumanSeg)
* [3D Medical Segmentation](./contrib/MedicalSeg)
* [Cityscapes SOTA](./contrib/CityscapesSOTA)
* [Panoptic Segmentation](./contrib/PanopticDeepLab)
* [CVPR Champion Solution](./contrib/AutoNUE)
* [Domain Adaptation](./contrib/DomainAdaptation)
## <img src="https://user-images.githubusercontent.com/48054808/157801371-9a9a8c65-1690-4123-985a-e0559a7f9494.png" width="20"/> Industrial Tutorial Examples
* [Using PP-HumanSegV2 for Human Segmentation](https://aistudio.baidu.com/aistudio/projectdetail/4504982?contributionType=1)
* [Using PP-HumanSegV1 for Human Segmentation](https://aistudio.baidu.com/aistudio/projectdetail/2189481?channelType=0&channel=0)
* [Using PP-LiteSeg for Road Segmentation](https://aistudio.baidu.com/aistudio/projectdetail/3873145?contributionType=1)
* [Using PaddleSeg for Mini-dataset Spine Segmentation](https://aistudio.baidu.com/aistudio/projectdetail/3878920)
* [Using PaddleSeg for Lane Segmentation](https://aistudio.baidu.com/aistudio/projectdetail/1752986?channelType=0&channel=0)
* [PaddleSeg in APIs](https://aistudio.baidu.com/aistudio/projectdetail/1339458?channelType=0&channel=0)
* [Learn Paddleseg in 10 Mins](https://aistudio.baidu.com/aistudio/projectdetail/1672610?channelType=0&channel=0)
## License
PaddleSeg is released under the [Apache 2.0 license](LICENSE).
## Acknowledgement
* Thanks [jm12138](https://github.com/jm12138) for contributing U<sup>2</sup>-Net.
* Thanks [zjhellofss](https://github.com/zjhellofss) (Fu Shenshen) for contributing Attention U-Net, and Dice Loss.
* Thanks [liuguoyu666](https://github.com/liguoyu666), [geoyee](https://github.com/geoyee) for contributing U-Net++ and U-Net3+.
* Thanks [yazheng0307](https://github.com/yazheng0307) (LIU Zheng) for contributing quick-start document.
* Thanks [CuberrChen](https://github.com/CuberrChen) for contributing STDC(rethink BiSeNet), PointRend and DetailAggregateLoss.
* Thanks [stuartchen1949](https://github.com/stuartchen1949) for contributing SegNet.
* Thanks [justld](https://github.com/justld) (Lang Du) for contributing UPerNet, DDRNet, CCNet, ESPNetV2, DMNet, ENCNet, HRNet_W48_Contrast, FastFCN, BiSeNetV1, SECrossEntropyLoss and PixelContrastCrossEntropyLoss.
* Thanks [Herman-Hu-saber](https://github.com/Herman-Hu-saber) (Hu Huiming) for contributing ESPNetV2.
* Thanks [zhangjin12138](https://github.com/zhangjin12138) for contributing RandomCenterCrop.
* Thanks [simuler](https://github.com/simuler) for contributing ESPNetV1.
* Thanks [ETTR123](https://github.com/ETTR123)(Zhang Kai) for contributing ENet, PFPNNet.
## Citation
If you find our project useful in your research, please consider citing:
```latex
@misc{liu2021paddleseg,
title={PaddleSeg: A High-Efficient Development Toolkit for Image Segmentation},
author={Yi Liu and Lutao Chu and Guowei Chen and Zewu Wu and Zeyu Chen and Baohua Lai and Yuying Hao},
year={2021},
eprint={2101.06175},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
@misc{paddleseg2019,
title={PaddleSeg, End-to-end image segmentation kit based on PaddlePaddle},
author={PaddlePaddle Contributors},
howpublished = {\url{https://github.com/PaddlePaddle/PaddleSeg}},
year={2019}
}
```
English | [简体中文](README_cn.md)
The config files of different models are saved in `PaddleSeg/configs`.
PaddleSeg use the config files to train, validate and export models.
# Configuration items
----
### train_dataset
> Training datasset
>
> * parameter
> * type: Dataset type, please refer to the training configuration file for more details of supported values
> * **others**: Please refer to the corresponding model training configuration file
----
### val_dataset
> Evaluation dataset
> * parameter
> * type: Dataset type, please refer to the training configuration file for more details of supported values
> * **others**: Please refer to the corresponding model training configuration file
>
----
### batch_size
> On a single card, the amount of data during each iteration of training
----
### iters
> Training steps
----
### optimizer
> Training optimizer
> * parameter
> * type : supports all official optimizers of PaddlePaddle
> * weight_decay : L2 regularization value
> * **others** : Please refer to [Optimizer](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/optimizer/Overview_cn.html)
----
### lr_scheduler
> Learning rate
> * parameter
> * type : learning rate type, supports 10 strategies, namely 'PolynomialDecay', 'PiecewiseDecay', 'StepDecay', 'CosineAnnealingDecay', 'ExponentialDecay', 'InverseTimeDecay', 'LinearWarmup', 'MultiStepDecay', 'NaturalExpDecay', 'NoamDecay'.
> * **others** : Please refer to [Paddle official LRScheduler document](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/optimizer/lr/LRScheduler_cn.html)
----
### learning_rate(this configuration is not recommended, it will be discarded in the future, we recommend to use lr_scheduler instead)
> Learning rate
> * parameter
> * value: initial learning rate value
> * decay: decay configuration
> * type: attenuation type, currently only supports poly
> * power: attenuation rate
> * end_lr: final learning rate
----
### loss
> Loss function
> * parameter
> * types: list of loss functions
> * type: Loss function type, please refer to the loss function library for more details
> * ignore_index : The category that needs to be ignored during the training process. The default value is the same train_datasetas ignore_index. It is recommended not to set this item . If you set this, "ignore_index" in loss and train_datasetthe must be the same.
> * coef : a list of coefficients corresponding to corresponding loss functions
----
### model
> Model to be trained
> * parameter
> * type : model type, please refer to the model library for the more details
> * **others**: Please refer to the corresponding model training configuration file
---
### export
> Model export configuration
> * parameter
> * transforms: Preprocessing operations during prediction. The transforms are the same as train_dataset, val_datasetetc. If you do not fill in this item, the data will be normalized by default.
For more details, please refer to [detailed configuration file](../docs/design/use/use.md)
大量模型的配置文件保存在`PaddleSeg/configs`目录下。 PaddleSeg使用这些配置文件进行模型训练、测试和导出。
# 配置项
----
### train_dataset
> 训练数据集
>
> * 参数
> * type : 数据集类型,所支持值请参考训练配置文件
> * **others** : 请参考对应模型训练配置文件
----
### val_dataset
> 评估数据集
> * 参数
> * type : 数据集类型,所支持值请参考训练配置文件
> * **others** : 请参考对应模型训练配置文件
>
----
### batch_size
> 单张卡上,每步迭代训练时的数据量
----
### iters
> 训练步数
----
### optimizer
> 训练优化器
> * 参数
> * type : 优化器类型,支持目前Paddle官方所有优化器
> * weight_decay : L2正则化的值
> * **others** : 请参考[Paddle官方Optimizer文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/optimizer/Overview_cn.html)
----
### lr_scheduler
> 学习率
> * 参数
> * type : 学习率类型,支持10种策略,分别是'PolynomialDecay', 'PiecewiseDecay', 'StepDecay', 'CosineAnnealingDecay', 'ExponentialDecay', 'InverseTimeDecay', 'LinearWarmup', 'MultiStepDecay', 'NaturalExpDecay', 'NoamDecay'.
> * **others** : 请参考[Paddle官方LRScheduler文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/paddle/optimizer/lr/LRScheduler_cn.html)
----
### learning_rate(不推荐使用该配置,将来会被废弃,建议使用`lr_scheduler`代替)
> 学习率
> * 参数
> * value : 初始学习率
> * decay : 衰减配置
> * type : 衰减类型,目前只支持poly
> * power : 衰减率
> * end_lr : 最终学习率
----
### loss
> 损失函数
> * 参数
> * types : 损失函数列表
> * type : 损失函数类型,所支持值请参考损失函数库
> * ignore_index : 训练过程需要忽略的类别,默认取值与`train_dataset`的ignore_index一致,**推荐不用设置此项**。如果设置了此项,`loss`和`train_dataset`的ignore_index必须相同。
> * coef : 对应损失函数列表的系数列表
----
### model
> 待训练模型
> * 参数
> * type : 模型类型,所支持值请参考模型库
> * **others** : 请参考对应模型训练配置文件
---
### export
> 模型导出配置
> * 参数
> * transforms : 预测时的预处理操作,支持配置的transforms与`train_dataset`、`val_dataset`等相同。如果不填写该项,默认只会对数据进行归一化标准化操作。
具体配置文件说明请参照[配置文件详解](../docs/design/use/use_cn.md)
batch_size: 4
iters: 80000
train_dataset:
type: ADE20K
dataset_root: data/ADEChallengeData2016/
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [512, 512]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
mode: train
val_dataset:
type: ADE20K
dataset_root: data/ADEChallengeData2016/
transforms:
- type: Normalize
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 4.0e-5
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
end_lr: 0
power: 0.9
loss:
types:
- type: CrossEntropyLoss
coef: [1]
train_dataset:
type: AutoNUE
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [1920, 1080]
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [1024, 512]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.25
brightness_prob: 1
contrast_range: 0.25
contrast_prob: 1
saturation_range: 0.25
saturation_prob: 1
hue_range: 63
hue_prob: 1
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: train
val_dataset:
type: AutoNUE
dataset_root: data/IDD_Segmentation
transforms:
- type: Resize
target_size: [1920, 1080]
- type: Normalize
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 0.0001
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.005
end_lr: 0
power: 2
batch_size: 16
iters: 40000
train_dataset:
type: CHASEDB1
dataset_root: data/CHASE_DB1
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [128, 128]
- type: RandomHorizontalFlip
- type: RandomVerticalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
mode: train
val_dataset:
type: CHASEDB1
dataset_root: data/CHASE_DB1
transforms:
- type: Normalize
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 4.0e-5
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
end_lr: 0
power: 0.9
loss:
types:
- type: DiceLoss
coef: [1]
test_config:
auc_roc: True
batch_size: 2
iters: 80000
train_dataset:
type: Cityscapes
dataset_root: data/cityscapes
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [1024, 512]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
mode: train
val_dataset:
type: Cityscapes
dataset_root: data/cityscapes
transforms:
- type: Normalize
mode: val
optimizer:
type: sgd
momentum: 0.9
weight_decay: 4.0e-5
lr_scheduler:
type: PolynomialDecay
learning_rate: 0.01
end_lr: 0
power: 0.9
loss:
types:
- type: CrossEntropyLoss
coef: [1]
_base_: './cityscapes.yml'
train_dataset:
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [1024, 1024]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
val_dataset:
transforms:
- type: Normalize
_base_: './cityscapes.yml'
train_dataset:
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.5
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [769, 769]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.4
contrast_range: 0.4
saturation_range: 0.4
- type: Normalize
val_dataset:
transforms:
- type: Padding
target_size: [2049, 1025]
- type: Normalize
_base_: './cityscapes.yml'
train_dataset:
transforms:
- type: ResizeStepScaling
min_scale_factor: 0.25
max_scale_factor: 2.0
scale_step_size: 0.25
- type: RandomPaddingCrop
crop_size: [769, 769]
- type: RandomHorizontalFlip
- type: RandomDistort
brightness_range: 0.5
contrast_range: 0.5
saturation_range: 0.5
- type: Normalize
val_dataset:
transforms:
- type: Padding
target_size: [2048, 1024]
- type: Normalize
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment