Commit fccfdfa5 authored by dlyrm's avatar dlyrm
Browse files

update code

parent dcc7bf4f
Pipeline #681 canceled with stages
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import datetime
import six
import copy
import json
import paddle
import paddle.distributed as dist
from ppdet.utils.checkpoint import save_model
from ppdet.metrics import get_infer_results
from ppdet.utils.logger import setup_logger
logger = setup_logger('ppdet.engine')
__all__ = [
'Callback', 'ComposeCallback', 'LogPrinter', 'Checkpointer',
'VisualDLWriter'
]
class Callback(object):
def __init__(self, model):
self.model = model
def on_step_begin(self, status):
pass
def on_step_end(self, status):
pass
def on_epoch_begin(self, status):
pass
def on_epoch_end(self, status):
pass
def on_train_begin(self, status):
pass
def on_train_end(self, status):
pass
class ComposeCallback(object):
def __init__(self, callbacks):
callbacks = [c for c in list(callbacks) if c is not None]
for c in callbacks:
assert isinstance(
c, Callback), "callback should be subclass of Callback"
self._callbacks = callbacks
def on_step_begin(self, status):
for c in self._callbacks:
c.on_step_begin(status)
def on_step_end(self, status):
for c in self._callbacks:
c.on_step_end(status)
def on_epoch_begin(self, status):
for c in self._callbacks:
c.on_epoch_begin(status)
def on_epoch_end(self, status):
for c in self._callbacks:
c.on_epoch_end(status)
def on_train_begin(self, status):
for c in self._callbacks:
c.on_train_begin(status)
def on_train_end(self, status):
for c in self._callbacks:
c.on_train_end(status)
class LogPrinter(Callback):
def __init__(self, model):
super(LogPrinter, self).__init__(model)
def on_step_end(self, status):
if dist.get_world_size() < 2 or dist.get_rank() == 0:
mode = status['mode']
if mode == 'train':
epoch_id = status['epoch_id']
step_id = status['step_id']
steps_per_epoch = status['steps_per_epoch']
training_staus = status['training_staus']
batch_time = status['batch_time']
data_time = status['data_time']
epoches = self.model.cfg.epoch
batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
))]['batch_size']
logs = training_staus.log()
space_fmt = ':' + str(len(str(steps_per_epoch))) + 'd'
if step_id % self.model.cfg.log_iter == 0:
eta_steps = (epoches - epoch_id) * steps_per_epoch - step_id
eta_sec = eta_steps * batch_time.global_avg
eta_str = str(datetime.timedelta(seconds=int(eta_sec)))
ips = float(batch_size) / batch_time.avg
max_mem_reserved_str = ""
max_mem_allocated_str = ""
if paddle.device.is_compiled_with_cuda():
max_mem_reserved_str = f"max_mem_reserved: {paddle.device.cuda.max_memory_reserved() // (1024 ** 2)} MB d"
max_mem_allocated_str = f"max_mem_allocated: {paddle.device.cuda.max_memory_allocated() // (1024 ** 2)} MB"
fmt = ' '.join([
'Epoch: [{}]',
'[{' + space_fmt + '}/{}]',
'eta: {eta}',
'lr: {lr:.6f}',
'{meters}',
'batch_cost: {btime}',
'data_cost: {dtime}',
'ips: {ips:.4f} images/s',
'{max_mem_reserved_str}',
'{max_mem_allocated_str}'
])
fmt = fmt.format(
epoch_id,
step_id,
steps_per_epoch,
eta=eta_str,
lr=status['learning_rate'],
meters=logs,
btime=str(batch_time),
dtime=str(data_time),
ips=ips,
max_mem_reserved_str=max_mem_reserved_str,
max_mem_allocated_str=max_mem_allocated_str)
logger.info(fmt)
if mode == 'eval':
step_id = status['step_id']
if step_id % 100 == 0:
logger.info("Eval iter: {}".format(step_id))
def on_epoch_end(self, status):
if dist.get_world_size() < 2 or dist.get_rank() == 0:
mode = status['mode']
if mode == 'eval':
sample_num = status['sample_num']
cost_time = status['cost_time']
logger.info('Total sample number: {}, average FPS: {}'.format(
sample_num, sample_num / cost_time))
class Checkpointer(Callback):
def __init__(self, model):
super(Checkpointer, self).__init__(model)
self.best_ap = -1000.
self.save_dir = os.path.join(self.model.cfg.save_dir,
self.model.cfg.filename)
if hasattr(self.model.model, 'student_model'):
self.weight = self.model.model.student_model
else:
self.weight = self.model.model
def on_epoch_end(self, status):
# Checkpointer only performed during training
mode = status['mode']
epoch_id = status['epoch_id']
weight = None
save_name = None
if dist.get_world_size() < 2 or dist.get_rank() == 0:
if mode == 'train':
end_epoch = self.model.cfg.epoch
if (
epoch_id + 1
) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
save_name = str(
epoch_id) if epoch_id != end_epoch - 1 else "model_final"
weight = self.weight.state_dict()
elif mode == 'eval':
if 'save_best_model' in status and status['save_best_model']:
for metric in self.model._metrics:
map_res = metric.get_results()
eval_func = "ap"
if 'bbox' in map_res:
key = 'bbox'
elif 'keypoint' in map_res:
key = 'keypoint'
else:
key = 'mask'
if key not in map_res:
logger.warning("Evaluation results empty, this may be due to " \
"training iterations being too few or not " \
"loading the correct weights.")
return
if map_res[key][0] >= self.best_ap:
self.best_ap = map_res[key][0]
save_name = 'best_model'
weight = self.weight.state_dict()
logger.info("Best test {} {} is {:0.3f}.".format(
key, eval_func, abs(self.best_ap)))
if weight:
if self.model.use_ema:
exchange_save_model = status.get('exchange_save_model',
False)
if not exchange_save_model:
# save model and ema_model
save_model(
status['weight'],
self.model.optimizer,
self.save_dir,
save_name,
epoch_id + 1,
ema_model=weight)
else:
# save model(student model) and ema_model(teacher model)
# in DenseTeacher SSOD, the teacher model will be higher,
# so exchange when saving pdparams
student_model = status['weight'] # model
teacher_model = weight # ema_model
save_model(
teacher_model,
self.model.optimizer,
self.save_dir,
save_name,
epoch_id + 1,
ema_model=student_model)
del teacher_model
del student_model
else:
save_model(weight, self.model.optimizer, self.save_dir,
save_name, epoch_id + 1)
class VisualDLWriter(Callback):
"""
Use VisualDL to log data or image
"""
def __init__(self, model):
super(VisualDLWriter, self).__init__(model)
assert six.PY3, "VisualDL requires Python >= 3.5"
try:
from visualdl import LogWriter
except Exception as e:
logger.error('visualdl not found, plaese install visualdl. '
'for example: `pip install visualdl`.')
raise e
self.vdl_writer = LogWriter(
model.cfg.get('vdl_log_dir', 'vdl_log_dir/scalar'))
self.vdl_loss_step = 0
self.vdl_mAP_step = 0
self.vdl_image_step = 0
self.vdl_image_frame = 0
def on_step_end(self, status):
mode = status['mode']
if dist.get_world_size() < 2 or dist.get_rank() == 0:
if mode == 'train':
training_staus = status['training_staus']
for loss_name, loss_value in training_staus.get().items():
self.vdl_writer.add_scalar(loss_name, loss_value,
self.vdl_loss_step)
self.vdl_loss_step += 1
elif mode == 'test':
ori_image = status['original_image']
result_image = status['result_image']
self.vdl_writer.add_image(
"original/frame_{}".format(self.vdl_image_frame), ori_image,
self.vdl_image_step)
self.vdl_writer.add_image(
"result/frame_{}".format(self.vdl_image_frame),
result_image, self.vdl_image_step)
self.vdl_image_step += 1
# each frame can display ten pictures at most.
if self.vdl_image_step % 10 == 0:
self.vdl_image_step = 0
self.vdl_image_frame += 1
def on_epoch_end(self, status):
mode = status['mode']
if dist.get_world_size() < 2 or dist.get_rank() == 0:
if mode == 'eval':
for metric in self.model._metrics:
for key, map_value in metric.get_results().items():
self.vdl_writer.add_scalar("{}-mAP".format(key),
map_value[0],
self.vdl_mAP_step)
self.vdl_mAP_step += 1
class WandbCallback(Callback):
def __init__(self, model):
super(WandbCallback, self).__init__(model)
try:
import wandb
self.wandb = wandb
except Exception as e:
logger.error('wandb not found, please install wandb. '
'Use: `pip install wandb`.')
raise e
self.wandb_params = model.cfg.get('wandb', None)
self.save_dir = os.path.join(self.model.cfg.save_dir,
self.model.cfg.filename)
if self.wandb_params is None:
self.wandb_params = {}
for k, v in model.cfg.items():
if k.startswith("wandb_"):
self.wandb_params.update({k.lstrip("wandb_"): v})
self._run = None
if dist.get_world_size() < 2 or dist.get_rank() == 0:
_ = self.run
self.run.config.update(self.model.cfg)
self.run.define_metric("epoch")
self.run.define_metric("eval/*", step_metric="epoch")
self.best_ap = -1000.
self.fps = []
@property
def run(self):
if self._run is None:
if self.wandb.run is not None:
logger.info(
"There is an ongoing wandb run which will be used"
"for logging. Please use `wandb.finish()` to end that"
"if the behaviour is not intended")
self._run = self.wandb.run
else:
self._run = self.wandb.init(**self.wandb_params)
return self._run
def save_model(self,
optimizer,
save_dir,
save_name,
last_epoch,
ema_model=None,
ap=None,
fps=None,
tags=None):
if dist.get_world_size() < 2 or dist.get_rank() == 0:
model_path = os.path.join(save_dir, save_name)
metadata = {}
metadata["last_epoch"] = last_epoch
if ap:
metadata["ap"] = ap
if fps:
metadata["fps"] = fps
if ema_model is None:
ema_artifact = self.wandb.Artifact(
name="ema_model-{}".format(self.run.id),
type="model",
metadata=metadata)
model_artifact = self.wandb.Artifact(
name="model-{}".format(self.run.id),
type="model",
metadata=metadata)
ema_artifact.add_file(model_path + ".pdema", name="model_ema")
model_artifact.add_file(model_path + ".pdparams", name="model")
self.run.log_artifact(ema_artifact, aliases=tags)
self.run.log_artfact(model_artifact, aliases=tags)
else:
model_artifact = self.wandb.Artifact(
name="model-{}".format(self.run.id),
type="model",
metadata=metadata)
model_artifact.add_file(model_path + ".pdparams", name="model")
self.run.log_artifact(model_artifact, aliases=tags)
def on_step_end(self, status):
mode = status['mode']
if dist.get_world_size() < 2 or dist.get_rank() == 0:
if mode == 'train':
training_status = status['training_staus'].get()
for k, v in training_status.items():
training_status[k] = float(v)
# calculate ips, data_cost, batch_cost
batch_time = status['batch_time']
data_time = status['data_time']
batch_size = self.model.cfg['{}Reader'.format(mode.capitalize(
))]['batch_size']
ips = float(batch_size) / float(batch_time.avg)
data_cost = float(data_time.avg)
batch_cost = float(batch_time.avg)
metrics = {"train/" + k: v for k, v in training_status.items()}
metrics["train/ips"] = ips
metrics["train/data_cost"] = data_cost
metrics["train/batch_cost"] = batch_cost
self.fps.append(ips)
self.run.log(metrics)
def on_epoch_end(self, status):
mode = status['mode']
epoch_id = status['epoch_id']
save_name = None
if dist.get_world_size() < 2 or dist.get_rank() == 0:
if mode == 'train':
fps = sum(self.fps) / len(self.fps)
self.fps = []
end_epoch = self.model.cfg.epoch
if (
epoch_id + 1
) % self.model.cfg.snapshot_epoch == 0 or epoch_id == end_epoch - 1:
save_name = str(
epoch_id) if epoch_id != end_epoch - 1 else "model_final"
tags = ["latest", "epoch_{}".format(epoch_id)]
self.save_model(
self.model.optimizer,
self.save_dir,
save_name,
epoch_id + 1,
self.model.use_ema,
fps=fps,
tags=tags)
if mode == 'eval':
sample_num = status['sample_num']
cost_time = status['cost_time']
fps = sample_num / cost_time
merged_dict = {}
for metric in self.model._metrics:
for key, map_value in metric.get_results().items():
merged_dict["eval/{}-mAP".format(key)] = map_value[0]
merged_dict["epoch"] = status["epoch_id"]
merged_dict["eval/fps"] = sample_num / cost_time
self.run.log(merged_dict)
if 'save_best_model' in status and status['save_best_model']:
for metric in self.model._metrics:
map_res = metric.get_results()
if 'bbox' in map_res:
key = 'bbox'
elif 'keypoint' in map_res:
key = 'keypoint'
else:
key = 'mask'
if key not in map_res:
logger.warning("Evaluation results empty, this may be due to " \
"training iterations being too few or not " \
"loading the correct weights.")
return
if map_res[key][0] >= self.best_ap:
self.best_ap = map_res[key][0]
save_name = 'best_model'
tags = ["best", "epoch_{}".format(epoch_id)]
self.save_model(
self.model.optimizer,
self.save_dir,
save_name,
last_epoch=epoch_id + 1,
ema_model=self.model.use_ema,
ap=abs(self.best_ap),
fps=fps,
tags=tags)
def on_train_end(self, status):
self.run.finish()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import random
import numpy as np
import paddle
from paddle.distributed import fleet
__all__ = ['init_parallel_env', 'set_random_seed', 'init_fleet_env']
def init_fleet_env(find_unused_parameters=False):
strategy = fleet.DistributedStrategy()
strategy.find_unused_parameters = find_unused_parameters
fleet.init(is_collective=True, strategy=strategy)
def init_parallel_env():
env = os.environ
dist = 'PADDLE_TRAINER_ID' in env and 'PADDLE_TRAINERS_NUM' in env
if dist:
trainer_id = int(env['PADDLE_TRAINER_ID'])
local_seed = (99 + trainer_id)
random.seed(local_seed)
np.random.seed(local_seed)
paddle.distributed.init_parallel_env()
def set_random_seed(seed):
paddle.seed(seed)
random.seed(seed)
np.random.seed(seed)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import yaml
from collections import OrderedDict
import paddle
from ppdet.data.source.category import get_categories
from ppdet.utils.logger import setup_logger
logger = setup_logger('ppdet.engine')
# Global dictionary
TRT_MIN_SUBGRAPH = {
'YOLO': 3,
'PPYOLOE': 10,
'YOLOX': 20,
'YOLOF': 40,
'YOLOv5': 20,
'RTMDet': 20,
'YOLOv6': 10,
'YOLOv7': 10,
'YOLOv8': 10,
'DETR': 3,
}
TO_STATIC_SPEC = {
'yolov5_l_300e_coco': None,
'yolov7_l_300e_coco': None,
'yolov3_darknet53_270e_coco': [{
'im_id': paddle.static.InputSpec(
name='im_id', shape=[-1, 1], dtype='float32'),
'is_crowd': paddle.static.InputSpec(
name='is_crowd', shape=[-1, 50], dtype='float32'),
'gt_bbox': paddle.static.InputSpec(
name='gt_bbox', shape=[-1, 50, 4], dtype='float32'),
'curr_iter': paddle.static.InputSpec(
name='curr_iter', shape=[-1], dtype='float32'),
'image': paddle.static.InputSpec(
name='image', shape=[-1, 3, -1, -1], dtype='float32'),
'im_shape': paddle.static.InputSpec(
name='im_shape', shape=[-1, 2], dtype='float32'),
'scale_factor': paddle.static.InputSpec(
name='scale_factor', shape=[-1, 2], dtype='float32'),
'target0': paddle.static.InputSpec(
name='target0', shape=[-1, 3, 86, -1, -1], dtype='float32'),
'target1': paddle.static.InputSpec(
name='target1', shape=[-1, 3, 86, -1, -1], dtype='float32'),
'target2': paddle.static.InputSpec(
name='target2', shape=[-1, 3, 86, -1, -1], dtype='float32'),
}],
}
def apply_to_static(config, model):
filename = config.get('filename', None)
spec = TO_STATIC_SPEC.get(filename, None)
model = paddle.jit.to_static(model, input_spec=spec)
logger.info("Successfully to apply @to_static with specs: {}".format(spec))
return model
def _prune_input_spec(input_spec, program, targets):
# try to prune static program to figure out pruned input spec
# so we perform following operations in static mode
device = paddle.get_device()
paddle.enable_static()
paddle.set_device(device)
pruned_input_spec = [{}]
program = program.clone()
program = program._prune(targets=targets)
global_block = program.global_block()
for name, spec in input_spec[0].items():
try:
v = global_block.var(name)
pruned_input_spec[0][name] = spec
except Exception:
pass
paddle.disable_static(place=device)
return pruned_input_spec
def _parse_reader(reader_cfg, dataset_cfg, metric, arch, image_shape):
preprocess_list = []
anno_file = dataset_cfg.get_anno()
clsid2catid, catid2name = get_categories(metric, anno_file, arch)
label_list = [str(cat) for cat in catid2name.values()]
fuse_normalize = reader_cfg.get('fuse_normalize', False)
sample_transforms = reader_cfg['sample_transforms']
for st in sample_transforms[1:]:
for key, value in st.items():
p = {'type': key}
if key == 'Resize':
if int(image_shape[1]) != -1:
value['target_size'] = image_shape[1:]
value['interp'] = value.get('interp', 1) # cv2.INTER_LINEAR
if fuse_normalize and key == 'NormalizeImage':
continue
p.update(value)
preprocess_list.append(p)
batch_transforms = reader_cfg.get('batch_transforms', None)
if batch_transforms:
for bt in batch_transforms:
for key, value in bt.items():
# for deploy/infer, use PadStride(stride) instead PadBatch(pad_to_stride)
if key == 'PadBatch':
preprocess_list.append({
'type': 'PadStride',
'stride': value['pad_to_stride']
})
break
return preprocess_list, label_list
def _parse_tracker(tracker_cfg):
tracker_params = {}
for k, v in tracker_cfg.items():
tracker_params.update({k: v})
return tracker_params
def _dump_infer_config(config, path, image_shape, model):
arch_state = False
from ppdet.core.config.yaml_helpers import setup_orderdict
setup_orderdict()
use_dynamic_shape = True if image_shape[2] == -1 else False
infer_cfg = OrderedDict({
'mode': 'paddle',
'draw_threshold': 0.5,
'metric': config['metric'],
'use_dynamic_shape': use_dynamic_shape
})
export_onnx = config.get('export_onnx', False)
export_eb = config.get('export_eb', False)
infer_arch = config['architecture']
if 'RCNN' in infer_arch and export_onnx:
logger.warning(
"Exporting RCNN model to ONNX only support batch_size = 1")
infer_cfg['export_onnx'] = True
infer_cfg['export_eb'] = export_eb
for arch, min_subgraph_size in TRT_MIN_SUBGRAPH.items():
if arch in infer_arch:
infer_cfg['arch'] = arch
infer_cfg['min_subgraph_size'] = min_subgraph_size
arch_state = True
break
if infer_arch == 'PPYOLOEWithAuxHead':
infer_arch = 'PPYOLOE'
if infer_arch in [
'YOLOX', 'YOLOF', 'PPYOLOE', 'YOLOv5', 'YOLOv6', 'YOLOv7', 'YOLOv8'
]:
infer_cfg['arch'] = infer_arch
infer_cfg['min_subgraph_size'] = TRT_MIN_SUBGRAPH[infer_arch]
arch_state = True
if not arch_state:
logger.error(
'Architecture: {} is not supported for exporting model now.\n'.
format(infer_arch) +
'Please set TRT_MIN_SUBGRAPH in ppdet/engine/export_utils.py')
os._exit(0)
if 'mask_head' in config[config['architecture']] and config[config[
'architecture']]['mask_head']:
infer_cfg['mask'] = True
label_arch = 'detection_arch'
reader_cfg = config['TestReader']
dataset_cfg = config['TestDataset']
infer_cfg['Preprocess'], infer_cfg['label_list'] = _parse_reader(
reader_cfg, dataset_cfg, config['metric'], label_arch, image_shape[1:])
yaml.dump(infer_cfg, open(path, 'w'))
logger.info("Export inference config file to {}".format(os.path.join(path)))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment