"composable_kernel/include/utility/math.hpp" did not exist on "79d9b1084b8f65fe6c261483276b791aeb918627"
Commit 11f6ff38 authored by LDOUBLEV's avatar LDOUBLEV
Browse files

add supplementary

parent b9c0627d
#!/bin/bash
source test_tipc/common_func.sh
FILENAME=$1
# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer']
MODE=$2
dataline=$(awk 'NR==1, NR==51{print}' $FILENAME)
# parser params
IFS=$'\n'
lines=(${dataline})
model_name=$(func_parser_value "${lines[1]}")
python=$(func_parser_value "${lines[2]}")
gpu_list=$(func_parser_value "${lines[3]}")
train_use_gpu_key=$(func_parser_key "${lines[4]}")
train_use_gpu_value=$(func_parser_value "${lines[4]}")
autocast_list=$(func_parser_value "${lines[5]}")
autocast_key=$(func_parser_key "${lines[5]}")
epoch_key=$(func_parser_key "${lines[6]}")
epoch_num=$(func_parser_params "${lines[6]}" "${MODE}")
save_model_key=$(func_parser_key "${lines[7]}")
train_batch_key=$(func_parser_key "${lines[8]}")
train_batch_value=$(func_parser_params "${lines[8]}" "${MODE}")
pretrain_model_key=$(func_parser_key "${lines[9]}")
pretrain_model_value=$(func_parser_value "${lines[9]}")
checkpoints_key=$(func_parser_key "${lines[10]}")
checkpoints_value=$(func_parser_value "${lines[10]}")
use_custom_key=$(func_parser_key "${lines[11]}")
use_custom_list=$(func_parser_value "${lines[11]}")
model_type_key=$(func_parser_key "${lines[12]}")
model_type_list=$(func_parser_value "${lines[12]}")
use_share_conv_key=$(func_parser_key "${lines[13]}")
use_share_conv_list=$(func_parser_value "${lines[13]}")
run_train_py=$(func_parser_value "${lines[14]}")
LOG_PATH="./test_tipc/extra_output"
mkdir -p ${LOG_PATH}
status_log="${LOG_PATH}/results_python.log"
if [ ${MODE} = "lite_train_lite_infer" ] || [ ${MODE} = "whole_train_whole_infer" ]; then
IFS="|"
export Count=0
USE_GPU_KEY=(${train_use_gpu_value})
# select cpu\gpu\distribute training
for gpu in ${gpu_list[*]}; do
train_use_gpu=${USE_GPU_KEY[Count]}
Count=$(($Count + 1))
ips=""
if [ ${gpu} = "-1" ];then
env=""
elif [ ${#gpu} -le 1 ];then
env="export CUDA_VISIBLE_DEVICES=${gpu}"
eval ${env}
elif [ ${#gpu} -le 15 ];then
IFS=","
array=(${gpu})
env="export CUDA_VISIBLE_DEVICES=${array[0]}"
IFS="|"
else
IFS=";"
array=(${gpu})
ips=${array[0]}
gpu=${array[1]}
IFS="|"
env=" "
fi
for autocast in ${autocast_list[*]}; do
# set amp
if [ ${autocast} = "amp" ]; then
set_amp_config="AMP.use_amp=True"
else
set_amp_config=" "
fi
if [ ${run_train_py} = "null" ]; then
continue
fi
set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
set_checkpoints=$(func_set_params "${checkpoints_key}" "${checkpoints_value}")
set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}")
for custom_op in ${use_custom_list[*]}; do
for model_type in ${model_type_list[*]}; do
for share_conv in ${use_share_conv_list[*]}; do
set_use_custom_op=$(func_set_params "${use_custom_key}" "${custom_op}")
set_model_type=$(func_set_params "${model_type_key}" "${model_type}")
set_use_share_conv=$(func_set_params "${use_share_conv_key}" "${share_conv}")
set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
if [ ${#gpu} -le 2 ];then # train with cpu or single gpu
cmd="${python} ${run_train_py} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_checkpoints} ${set_autocast} ${set_batchsize} ${set_use_custom_op} ${set_model_type} ${set_use_share_conv} ${set_amp_config}"
elif [ ${#ips} -le 26 ];then # train with multi-gpu
cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train_py} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_checkpoints} ${set_autocast} ${set_batchsize} ${set_use_custom_op} ${set_model_type} ${set_use_share_conv} ${set_amp_config}"
fi
# run train
eval "unset CUDA_VISIBLE_DEVICES"
# echo $cmd
eval $cmd
status_check $? "${cmd}" "${status_log}"
done
done
done
done
done
fi
===========================train_params===========================
model_name:ch_PPOCRv2_det
python:python3.7
gpu_list:0|0,1
use_gpu:True|True
AMP.use_amp:True|False
epoch:lite_train_lite_infer=2|whole_train_whole_infer=1000
save_model_dir:./output/
TRAIN.batch_size:lite_train_lite_infer=1280|whole_train_whole_infer=1280
pretrained_model:null
checkpoints:null
use_custom_relu:False|True
model_type:cls|cls_distill|cls_distill_multiopt
MODEL.siamese:False|True
norm_train:train.py -c mv3_large_x0_5.yml -o
quant_train:False
prune_train:False
===========================train_params===========================
model_name:ch_PPOCRv2_det
python:python3.7
gpu_list:0|0,1
use_gpu:True|True
AMP.use_amp:True|False
epoch:lite_train_lite_infer=20|whole_train_whole_infer=1000
save_model_dir:./output/
TRAIN.batch_size:lite_train_lite_infer=2|whole_train_whole_infer=4
pretrained_model:null
checkpoints:null
use_custom_relu:False|True
model_type:cls|cls_distill|cls_distill_multiopt
MODEL.siamese:False|True
norm_train:train.py -c mv3_large_x0_5.yml -o prune_train=True
quant_train:False
prune_train:False
===========================train_params===========================
model_name:ch_PPOCRv2_det
python:python3.7
gpu_list:0|0,1
use_gpu:True|True
AMP.use_amp:True|False
epoch:lite_train_lite_infer=20|whole_train_whole_infer=1000
save_model_dir:./output/
TRAIN.batch_size:lite_train_lite_infer=2|whole_train_whole_infer=4
pretrained_model:null
checkpoints:null
use_custom_relu:False|True
model_type:cls|cls_distill|cls_distill_multiopt
MODEL.siamese:False|True
norm_train:train.py -c mv3_large_x0_5.yml -o quant_train=True
quant_train:False
prune_train:False
import paddle
import numpy as np
import os
import paddle.nn as nn
import paddle.distributed as dist
dist.get_world_size()
dist.init_parallel_env()
from loss import build_loss, LossDistill, DMLLoss, KLJSLoss
from optimizer import create_optimizer
from data_loader import build_dataloader
from metric import create_metric
from mv3 import MobileNetV3_large_x0_5, distillmv3_large_x0_5, build_model
from config import preprocess
import time
from paddleslim.dygraph.quant import QAT
from slim.slim_quant import PACT, quant_config
from slim.slim_fpgm import prune_model
from utils import load_model
def _mkdir_if_not_exist(path, logger):
"""
mkdir if not exists, ignore the exception when multiprocess mkdir together
"""
if not os.path.exists(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno == errno.EEXIST and os.path.isdir(path):
logger.warning(
'be happy if some process has already created {}'.format(
path))
else:
raise OSError('Failed to mkdir {}'.format(path))
def save_model(model,
optimizer,
model_path,
logger,
is_best=False,
prefix='ppocr',
**kwargs):
"""
save model to the target path
"""
_mkdir_if_not_exist(model_path, logger)
model_prefix = os.path.join(model_path, prefix)
paddle.save(model.state_dict(), model_prefix + '.pdparams')
if type(optimizer) is list:
paddle.save(optimizer[0].state_dict(), model_prefix + '.pdopt')
paddle.save(optimizer[1].state_dict(), model_prefix + "_1" + '.pdopt')
else:
paddle.save(optimizer.state_dict(), model_prefix + '.pdopt')
# # save metric and config
# with open(model_prefix + '.states', 'wb') as f:
# pickle.dump(kwargs, f, protocol=2)
if is_best:
logger.info('save best model is to {}'.format(model_prefix))
else:
logger.info("save model in {}".format(model_prefix))
def amp_scaler(config):
if 'AMP' in config and config['AMP']['use_amp'] is True:
AMP_RELATED_FLAGS_SETTING = {
'FLAGS_cudnn_batchnorm_spatial_persistent': 1,
'FLAGS_max_inplace_grad_add': 8,
}
paddle.fluid.set_flags(AMP_RELATED_FLAGS_SETTING)
scale_loss = config["AMP"].get("scale_loss", 1.0)
use_dynamic_loss_scaling = config["AMP"].get("use_dynamic_loss_scaling",
False)
scaler = paddle.amp.GradScaler(
init_loss_scaling=scale_loss,
use_dynamic_loss_scaling=use_dynamic_loss_scaling)
return scaler
else:
return None
def set_seed(seed):
paddle.seed(seed)
np.random.seed(seed)
def train(config, scaler=None):
EPOCH = config['epoch']
topk = config['topk']
batch_size = config['TRAIN']['batch_size']
num_workers = config['TRAIN']['num_workers']
train_loader = build_dataloader(
'train', batch_size=batch_size, num_workers=num_workers)
# build metric
metric_func = create_metric
# build model
# model = MobileNetV3_large_x0_5(class_dim=100)
model = build_model(config)
# build_optimizer
optimizer, lr_scheduler = create_optimizer(
config, parameter_list=model.parameters())
# load model
pre_best_model_dict = load_model(config, model, optimizer)
if len(pre_best_model_dict) > 0:
pre_str = 'The metric of loaded metric as follows {}'.format(', '.join(
['{}: {}'.format(k, v) for k, v in pre_best_model_dict.items()]))
logger.info(pre_str)
# about slim prune and quant
if "quant_train" in config and config['quant_train'] is True:
quanter = QAT(config=quant_config, act_preprocess=PACT)
quanter.quantize(model)
elif "prune_train" in config and config['prune_train'] is True:
model = prune_model(model, [1, 3, 32, 32], 0.1)
else:
pass
# distribution
model.train()
model = paddle.DataParallel(model)
# build loss function
loss_func = build_loss(config)
data_num = len(train_loader)
best_acc = {}
for epoch in range(EPOCH):
st = time.time()
for idx, data in enumerate(train_loader):
img_batch, label = data
img_batch = paddle.transpose(img_batch, [0, 3, 1, 2])
label = paddle.unsqueeze(label, -1)
if scaler is not None:
with paddle.amp.auto_cast():
outs = model(img_batch)
else:
outs = model(img_batch)
# cal metric
acc = metric_func(outs, label)
# cal loss
avg_loss = loss_func(outs, label)
if scaler is None:
# backward
avg_loss.backward()
optimizer.step()
optimizer.clear_grad()
else:
scaled_avg_loss = scaler.scale(avg_loss)
scaled_avg_loss.backward()
scaler.minimize(optimizer, scaled_avg_loss)
if not isinstance(lr_scheduler, float):
lr_scheduler.step()
if idx % 10 == 0:
et = time.time()
strs = f"epoch: [{epoch}/{EPOCH}], iter: [{idx}/{data_num}], "
strs += f"loss: {avg_loss.numpy()[0]}"
strs += f", acc_topk1: {acc['top1'].numpy()[0]}, acc_top5: {acc['top5'].numpy()[0]}"
strs += f", batch_time: {round(et-st, 4)} s"
logger.info(strs)
st = time.time()
if epoch % 10 == 0:
acc = eval(config, model)
if len(best_acc) < 1 or acc['top5'].numpy()[0] > best_acc['top5']:
best_acc = acc
best_acc['epoch'] = epoch
is_best = True
else:
is_best = False
logger.info(
f"The best acc: acc_topk1: {best_acc['top1'].numpy()[0]}, acc_top5: {best_acc['top5'].numpy()[0]}, best_epoch: {best_acc['epoch']}"
)
save_model(
model,
optimizer,
config['save_model_dir'],
logger,
is_best,
prefix="cls")
def train_distill(config, scaler=None):
EPOCH = config['epoch']
topk = config['topk']
batch_size = config['TRAIN']['batch_size']
num_workers = config['TRAIN']['num_workers']
train_loader = build_dataloader(
'train', batch_size=batch_size, num_workers=num_workers)
# build metric
metric_func = create_metric
# model = distillmv3_large_x0_5(class_dim=100)
model = build_model(config)
# pact quant train
if "quant_train" in config and config['quant_train'] is True:
quanter = QAT(config=quant_config, act_preprocess=PACT)
quanter.quantize(model)
elif "prune_train" in config and config['prune_train'] is True:
model = prune_model(model, [1, 3, 32, 32], 0.1)
else:
pass
# build_optimizer
optimizer, lr_scheduler = create_optimizer(
config, parameter_list=model.parameters())
# load model
pre_best_model_dict = load_model(config, model, optimizer)
if len(pre_best_model_dict) > 0:
pre_str = 'The metric of loaded metric as follows {}'.format(', '.join(
['{}: {}'.format(k, v) for k, v in pre_best_model_dict.items()]))
logger.info(pre_str)
model.train()
model = paddle.DataParallel(model)
# build loss function
loss_func_distill = LossDistill(model_name_list=['student', 'student1'])
loss_func_dml = DMLLoss(model_name_pairs=['student', 'student1'])
loss_func_js = KLJSLoss(mode='js')
data_num = len(train_loader)
best_acc = {}
for epoch in range(EPOCH):
st = time.time()
for idx, data in enumerate(train_loader):
img_batch, label = data
img_batch = paddle.transpose(img_batch, [0, 3, 1, 2])
label = paddle.unsqueeze(label, -1)
if scaler is not None:
with paddle.amp.auto_cast():
outs = model(img_batch)
else:
outs = model(img_batch)
# cal metric
acc = metric_func(outs['student'], label)
# cal loss
avg_loss = loss_func_distill(outs, label)['student'] + \
loss_func_distill(outs, label)['student1'] + \
loss_func_dml(outs, label)['student_student1']
# backward
if scaler is None:
avg_loss.backward()
optimizer.step()
optimizer.clear_grad()
else:
scaled_avg_loss = scaler.scale(avg_loss)
scaled_avg_loss.backward()
scaler.minimize(optimizer, scaled_avg_loss)
if not isinstance(lr_scheduler, float):
lr_scheduler.step()
if idx % 10 == 0:
et = time.time()
strs = f"epoch: [{epoch}/{EPOCH}], iter: [{idx}/{data_num}], "
strs += f"loss: {avg_loss.numpy()[0]}"
strs += f", acc_topk1: {acc['top1'].numpy()[0]}, acc_top5: {acc['top5'].numpy()[0]}"
strs += f", batch_time: {round(et-st, 4)} s"
logger.info(strs)
st = time.time()
if epoch % 10 == 0:
acc = eval(config, model._layers.student)
if len(best_acc) < 1 or acc['top5'].numpy()[0] > best_acc['top5']:
best_acc = acc
best_acc['epoch'] = epoch
is_best = True
else:
is_best = False
logger.info(
f"The best acc: acc_topk1: {best_acc['top1'].numpy()[0]}, acc_top5: {best_acc['top5'].numpy()[0]}, best_epoch: {best_acc['epoch']}"
)
save_model(
model,
optimizer,
config['save_model_dir'],
logger,
is_best,
prefix="cls_distill")
def train_distill_multiopt(config, scaler=None):
EPOCH = config['epoch']
topk = config['topk']
batch_size = config['TRAIN']['batch_size']
num_workers = config['TRAIN']['num_workers']
train_loader = build_dataloader(
'train', batch_size=batch_size, num_workers=num_workers)
# build metric
metric_func = create_metric
# model = distillmv3_large_x0_5(class_dim=100)
model = build_model(config)
# build_optimizer
optimizer, lr_scheduler = create_optimizer(
config, parameter_list=model.student.parameters())
optimizer1, lr_scheduler1 = create_optimizer(
config, parameter_list=model.student1.parameters())
# load model
pre_best_model_dict = load_model(config, model, optimizer)
if len(pre_best_model_dict) > 0:
pre_str = 'The metric of loaded metric as follows {}'.format(', '.join(
['{}: {}'.format(k, v) for k, v in pre_best_model_dict.items()]))
logger.info(pre_str)
# quant train
if "quant_train" in config and config['quant_train'] is True:
quanter = QAT(config=quant_config, act_preprocess=PACT)
quanter.quantize(model)
elif "prune_train" in config and config['prune_train'] is True:
model = prune_model(model, [1, 3, 32, 32], 0.1)
else:
pass
model.train()
model = paddle.DataParallel(model)
# build loss function
loss_func_distill = LossDistill(model_name_list=['student', 'student1'])
loss_func_dml = DMLLoss(model_name_pairs=['student', 'student1'])
loss_func_js = KLJSLoss(mode='js')
data_num = len(train_loader)
best_acc = {}
for epoch in range(EPOCH):
st = time.time()
for idx, data in enumerate(train_loader):
img_batch, label = data
img_batch = paddle.transpose(img_batch, [0, 3, 1, 2])
label = paddle.unsqueeze(label, -1)
if scaler is not None:
with paddle.amp.auto_cast():
outs = model(img_batch)
else:
outs = model(img_batch)
# cal metric
acc = metric_func(outs['student'], label)
# cal loss
avg_loss = loss_func_distill(outs,
label)['student'] + loss_func_dml(
outs, label)['student_student1']
avg_loss1 = loss_func_distill(outs,
label)['student1'] + loss_func_dml(
outs, label)['student_student1']
if scaler is None:
# backward
avg_loss.backward(retain_graph=True)
optimizer.step()
optimizer.clear_grad()
avg_loss1.backward()
optimizer1.step()
optimizer1.clear_grad()
else:
scaled_avg_loss = scaler.scale(avg_loss)
scaled_avg_loss.backward()
scaler.minimize(optimizer, scaled_avg_loss)
scaled_avg_loss = scaler.scale(avg_loss1)
scaled_avg_loss.backward()
scaler.minimize(optimizer1, scaled_avg_loss)
if not isinstance(lr_scheduler, float):
lr_scheduler.step()
if not isinstance(lr_scheduler1, float):
lr_scheduler1.step()
if idx % 10 == 0:
et = time.time()
strs = f"epoch: [{epoch}/{EPOCH}], iter: [{idx}/{data_num}], "
strs += f"loss: {avg_loss.numpy()[0]}, loss1: {avg_loss1.numpy()[0]}"
strs += f", acc_topk1: {acc['top1'].numpy()[0]}, acc_top5: {acc['top5'].numpy()[0]}"
strs += f", batch_time: {round(et-st, 4)} s"
logger.info(strs)
st = time.time()
if epoch % 10 == 0:
acc = eval(config, model._layers.student)
if len(best_acc) < 1 or acc['top5'].numpy()[0] > best_acc['top5']:
best_acc = acc
best_acc['epoch'] = epoch
is_best = True
else:
is_best = False
logger.info(
f"The best acc: acc_topk1: {best_acc['top1'].numpy()[0]}, acc_top5: {best_acc['top5'].numpy()[0]}, best_epoch: {best_acc['epoch']}"
)
save_model(
model, [optimizer, optimizer1],
config['save_model_dir'],
logger,
is_best,
prefix="cls_distill_multiopt")
def eval(config, model):
batch_size = config['VALID']['batch_size']
num_workers = config['VALID']['num_workers']
valid_loader = build_dataloader(
'test', batch_size=batch_size, num_workers=num_workers)
# build metric
metric_func = create_metric
outs = []
labels = []
for idx, data in enumerate(valid_loader):
img_batch, label = data
img_batch = paddle.transpose(img_batch, [0, 3, 1, 2])
label = paddle.unsqueeze(label, -1)
out = model(img_batch)
outs.append(out)
labels.append(label)
outs = paddle.concat(outs, axis=0)
labels = paddle.concat(labels, axis=0)
acc = metric_func(outs, labels)
strs = f"The metric are as follows: acc_topk1: {acc['top1'].numpy()[0]}, acc_top5: {acc['top5'].numpy()[0]}"
logger.info(strs)
return acc
if __name__ == "__main__":
config, logger = preprocess(is_train=False)
# AMP scaler
scaler = amp_scaler(config)
model_type = config['model_type']
if model_type == "cls":
train(config)
elif model_type == "cls_distill":
train_distill(config)
elif model_type == "cls_distill_multiopt":
train_distill_multiopt(config)
else:
raise ValueError("model_type should be one of ['']")
# single GPU
python3.7 train.py -c mv3_large_x0_5.yml
# distribute training
python3.7 -m paddle.distributed.launch --log_dir=./debug/ --gpus '0,1' train.py -c mv3_large_x0_5.yml
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import logging
import functools
import paddle.distributed as dist
logger_initialized = {}
def print_dict(d, logger, delimiter=0):
"""
Recursively visualize a dict and
indenting acrrording by the relationship of keys.
"""
for k, v in sorted(d.items()):
if isinstance(v, dict):
logger.info("{}{} : ".format(delimiter * " ", str(k)))
print_dict(v, logger, delimiter + 4)
elif isinstance(v, list) and len(v) >= 1 and isinstance(v[0], dict):
logger.info("{}{} : ".format(delimiter * " ", str(k)))
for value in v:
print_dict(value, logger, delimiter + 4)
else:
logger.info("{}{} : {}".format(delimiter * " ", k, v))
@functools.lru_cache()
def get_logger(name='root', log_file=None, log_level=logging.DEBUG):
"""Initialize and get a logger by name.
If the logger has not been initialized, this method will initialize the
logger by adding one or two handlers, otherwise the initialized logger will
be directly returned. During initialization, a StreamHandler will always be
added. If `log_file` is specified a FileHandler will also be added.
Args:
name (str): Logger name.
log_file (str | None): The log filename. If specified, a FileHandler
will be added to the logger.
log_level (int): The logger level. Note that only the process of
rank 0 is affected, and other processes will set the level to
"Error" thus be silent most of the time.
Returns:
logging.Logger: The expected logger.
"""
logger = logging.getLogger(name)
if name in logger_initialized:
return logger
for logger_name in logger_initialized:
if name.startswith(logger_name):
return logger
formatter = logging.Formatter(
'[%(asctime)s] %(name)s %(levelname)s: %(message)s',
datefmt="%Y/%m/%d %H:%M:%S")
stream_handler = logging.StreamHandler(stream=sys.stdout)
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if log_file is not None and dist.get_rank() == 0:
log_file_folder = os.path.split(log_file)[0]
os.makedirs(log_file_folder, exist_ok=True)
file_handler = logging.FileHandler(log_file, 'a')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
if dist.get_rank() == 0:
logger.setLevel(log_level)
else:
logger.setLevel(logging.ERROR)
logger_initialized[name] = True
return logger
def load_model(config, model, optimizer=None):
"""
load model from checkpoint or pretrained_model
"""
logger = get_logger()
checkpoints = config.get('checkpoints')
pretrained_model = config.get('pretrained_model')
best_model_dict = {}
if checkpoints:
if checkpoints.endswith('.pdparams'):
checkpoints = checkpoints.replace('.pdparams', '')
assert os.path.exists(checkpoints + ".pdparams"), \
"The {}.pdparams does not exists!".format(checkpoints)
# load params from trained model
params = paddle.load(checkpoints + '.pdparams')
state_dict = model.state_dict()
new_state_dict = {}
for key, value in state_dict.items():
if key not in params:
logger.warning("{} not in loaded params {} !".format(
key, params.keys()))
continue
pre_value = params[key]
if list(value.shape) == list(pre_value.shape):
new_state_dict[key] = pre_value
else:
logger.warning(
"The shape of model params {} {} not matched with loaded params shape {} !".
format(key, value.shape, pre_value.shape))
model.set_state_dict(new_state_dict)
if optimizer is not None:
if os.path.exists(checkpoints + '.pdopt'):
optim_dict = paddle.load(checkpoints + '.pdopt')
optimizer.set_state_dict(optim_dict)
else:
logger.warning(
"{}.pdopt is not exists, params of optimizer is not loaded".
format(checkpoints))
if os.path.exists(checkpoints + '.states'):
with open(checkpoints + '.states', 'rb') as f:
states_dict = pickle.load(f) if six.PY2 else pickle.load(
f, encoding='latin1')
best_model_dict = states_dict.get('best_model_dict', {})
if 'epoch' in states_dict:
best_model_dict['start_epoch'] = states_dict['epoch'] + 1
logger.info("resume from {}".format(checkpoints))
elif pretrained_model:
load_pretrained_params(model, pretrained_model)
else:
logger.info('train from scratch')
return best_model_dict
def load_pretrained_params(model, path):
logger = get_logger()
if path.endswith('.pdparams'):
path = path.replace('.pdparams', '')
assert os.path.exists(path + ".pdparams"), \
"The {}.pdparams does not exists!".format(path)
params = paddle.load(path + '.pdparams')
state_dict = model.state_dict()
new_state_dict = {}
for k1 in params.keys():
if k1 not in state_dict.keys():
logger.warning("The pretrained params {} not in model".format(k1))
else:
if list(state_dict[k1].shape) == list(params[k1].shape):
new_state_dict[k1] = params[k1]
else:
logger.warning(
"The shape of model params {} {} not matched with loaded params {} {} !".
format(k1, state_dict[k1].shape, k1, params[k1].shape))
model.set_state_dict(new_state_dict)
logger.info("load pretrain successful from {}".format(path))
return model
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment