Commit 3b6a2f17 authored by LDOUBLEV's avatar LDOUBLEV
Browse files

Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into tipc_test_allclose

parents 32fdd08b 13961868
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import os
import random
import copy
import logging
import argparse
import paddle
import numpy as np
from seqeval.metrics import classification_report, f1_score, precision_score, recall_score
from paddlenlp.transformers import LayoutXLMModel, LayoutXLMTokenizer, LayoutXLMForTokenClassification
from xfun import XFUNDataset
from utils import parse_args
from utils import get_bio_label_maps
logger = logging.getLogger(__name__)
def set_seed(args):
random.seed(args.seed)
np.random.seed(args.seed)
paddle.seed(args.seed)
def train(args):
os.makedirs(args.output_dir, exist_ok=True)
logging.basicConfig(
filename=os.path.join(args.output_dir, "train.log")
if paddle.distributed.get_rank() == 0 else None,
format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
datefmt="%m/%d/%Y %H:%M:%S",
level=logging.INFO
if paddle.distributed.get_rank() == 0 else logging.WARN, )
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
logger.addHandler(ch)
label2id_map, id2label_map = get_bio_label_maps(args.label_map_path)
pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
# dist mode
if paddle.distributed.get_world_size() > 1:
paddle.distributed.init_parallel_env()
tokenizer = LayoutXLMTokenizer.from_pretrained(args.model_name_or_path)
base_model = LayoutXLMModel.from_pretrained(args.model_name_or_path)
model = LayoutXLMForTokenClassification(
base_model, num_classes=len(label2id_map), dropout=None)
# dist mode
if paddle.distributed.get_world_size() > 1:
model = paddle.DataParallel(model)
train_dataset = XFUNDataset(
tokenizer,
data_dir=args.train_data_dir,
label_path=args.train_label_path,
label2id_map=label2id_map,
img_size=(224, 224),
pad_token_label_id=pad_token_label_id,
contains_re=False,
add_special_ids=False,
return_attention_mask=True,
load_mode='all')
train_sampler = paddle.io.DistributedBatchSampler(
train_dataset, batch_size=args.per_gpu_train_batch_size, shuffle=True)
args.train_batch_size = args.per_gpu_train_batch_size * max(
1, paddle.distributed.get_world_size())
train_dataloader = paddle.io.DataLoader(
train_dataset,
batch_sampler=train_sampler,
num_workers=0,
use_shared_memory=True,
collate_fn=None, )
t_total = len(train_dataloader) * args.num_train_epochs
# build linear decay with warmup lr sch
lr_scheduler = paddle.optimizer.lr.PolynomialDecay(
learning_rate=args.learning_rate,
decay_steps=t_total,
end_lr=0.0,
power=1.0)
if args.warmup_steps > 0:
lr_scheduler = paddle.optimizer.lr.LinearWarmup(
lr_scheduler,
args.warmup_steps,
start_lr=0,
end_lr=args.learning_rate, )
optimizer = paddle.optimizer.AdamW(
learning_rate=lr_scheduler,
parameters=model.parameters(),
epsilon=args.adam_epsilon,
weight_decay=args.weight_decay)
# Train!
logger.info("***** Running training *****")
logger.info(" Num examples = %d", len(train_dataset))
logger.info(" Num Epochs = %d", args.num_train_epochs)
logger.info(" Instantaneous batch size per GPU = %d",
args.per_gpu_train_batch_size)
logger.info(
" Total train batch size (w. parallel, distributed) = %d",
args.train_batch_size * paddle.distributed.get_world_size(), )
logger.info(" Total optimization steps = %d", t_total)
global_step = 0
tr_loss = 0.0
set_seed(args)
best_metrics = None
for epoch_id in range(args.num_train_epochs):
for step, batch in enumerate(train_dataloader):
model.train()
outputs = model(**batch)
# model outputs are always tuple in ppnlp (see doc)
loss = outputs[0]
loss = loss.mean()
logger.info(
"[epoch {}/{}][iter: {}/{}] lr: {:.5f}, train loss: {:.5f}, ".
format(epoch_id, args.num_train_epochs, step,
len(train_dataloader),
lr_scheduler.get_lr(), loss.numpy()[0]))
loss.backward()
tr_loss += loss.item()
optimizer.step()
lr_scheduler.step() # Update learning rate schedule
optimizer.clear_grad()
global_step += 1
if (paddle.distributed.get_rank() == 0 and args.eval_steps > 0 and
global_step % args.eval_steps == 0):
# Log metrics
# Only evaluate when single GPU otherwise metrics may not average well
if paddle.distributed.get_rank(
) == 0 and args.evaluate_during_training:
results, _ = evaluate(
args,
model,
tokenizer,
label2id_map,
id2label_map,
pad_token_label_id, )
if best_metrics is None or results["f1"] >= best_metrics[
"f1"]:
best_metrics = copy.deepcopy(results)
output_dir = os.path.join(args.output_dir, "best_model")
os.makedirs(output_dir, exist_ok=True)
if paddle.distributed.get_rank() == 0:
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
paddle.save(
args,
os.path.join(output_dir, "training_args.bin"))
logger.info("Saving model checkpoint to %s",
output_dir)
logger.info("[epoch {}/{}][iter: {}/{}] results: {}".format(
epoch_id, args.num_train_epochs, step,
len(train_dataloader), results))
if best_metrics is not None:
logger.info("best metrics: {}".format(best_metrics))
if paddle.distributed.get_rank(
) == 0 and args.save_steps > 0 and global_step % args.save_steps == 0:
# Save model checkpoint
output_dir = os.path.join(args.output_dir,
"checkpoint-{}".format(global_step))
os.makedirs(output_dir, exist_ok=True)
if paddle.distributed.get_rank() == 0:
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)
paddle.save(args,
os.path.join(output_dir, "training_args.bin"))
logger.info("Saving model checkpoint to %s", output_dir)
return global_step, tr_loss / global_step
def evaluate(args,
model,
tokenizer,
label2id_map,
id2label_map,
pad_token_label_id,
prefix=""):
eval_dataset = XFUNDataset(
tokenizer,
data_dir=args.eval_data_dir,
label_path=args.eval_label_path,
label2id_map=label2id_map,
img_size=(224, 224),
pad_token_label_id=pad_token_label_id,
contains_re=False,
add_special_ids=False,
return_attention_mask=True,
load_mode='all')
args.eval_batch_size = args.per_gpu_eval_batch_size * max(
1, paddle.distributed.get_world_size())
eval_dataloader = paddle.io.DataLoader(
eval_dataset,
batch_size=args.eval_batch_size,
num_workers=0,
use_shared_memory=True,
collate_fn=None, )
# Eval!
logger.info("***** Running evaluation %s *****", prefix)
logger.info(" Num examples = %d", len(eval_dataset))
logger.info(" Batch size = %d", args.eval_batch_size)
eval_loss = 0.0
nb_eval_steps = 0
preds = None
out_label_ids = None
model.eval()
for idx, batch in enumerate(eval_dataloader):
with paddle.no_grad():
outputs = model(**batch)
tmp_eval_loss, logits = outputs[:2]
tmp_eval_loss = tmp_eval_loss.mean()
if paddle.distributed.get_rank() == 0:
logger.info("[Eval]process: {}/{}, loss: {:.5f}".format(
idx, len(eval_dataloader), tmp_eval_loss.numpy()[0]))
eval_loss += tmp_eval_loss.item()
nb_eval_steps += 1
if preds is None:
preds = logits.numpy()
out_label_ids = batch["labels"].numpy()
else:
preds = np.append(preds, logits.numpy(), axis=0)
out_label_ids = np.append(
out_label_ids, batch["labels"].numpy(), axis=0)
eval_loss = eval_loss / nb_eval_steps
preds = np.argmax(preds, axis=2)
# label_map = {i: label.upper() for i, label in enumerate(labels)}
out_label_list = [[] for _ in range(out_label_ids.shape[0])]
preds_list = [[] for _ in range(out_label_ids.shape[0])]
for i in range(out_label_ids.shape[0]):
for j in range(out_label_ids.shape[1]):
if out_label_ids[i, j] != pad_token_label_id:
out_label_list[i].append(id2label_map[out_label_ids[i][j]])
preds_list[i].append(id2label_map[preds[i][j]])
results = {
"loss": eval_loss,
"precision": precision_score(out_label_list, preds_list),
"recall": recall_score(out_label_list, preds_list),
"f1": f1_score(out_label_list, preds_list),
}
with open(os.path.join(args.output_dir, "test_gt.txt"), "w") as fout:
for lbl in out_label_list:
for l in lbl:
fout.write(l + "\t")
fout.write("\n")
with open(os.path.join(args.output_dir, "test_pred.txt"), "w") as fout:
for lbl in preds_list:
for l in lbl:
fout.write(l + "\t")
fout.write("\n")
report = classification_report(out_label_list, preds_list)
logger.info("\n" + report)
logger.info("***** Eval results %s *****", prefix)
for key in sorted(results.keys()):
logger.info(" %s = %s", key, str(results[key]))
return results, preds_list
def print_arguments(args):
"""print arguments"""
print('----------- Configuration Arguments -----------')
for arg, value in sorted(vars(args).items()):
print('%s: %s' % (arg, value))
print('------------------------------------------------')
if __name__ == "__main__":
args = parse_args()
print_arguments(args)
train(args)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import argparse
import cv2
import random
import numpy as np
import imghdr
from copy import deepcopy
import paddle
from PIL import Image, ImageDraw, ImageFont
from paddleocr import PaddleOCR
def get_bio_label_maps(label_map_path):
with open(label_map_path, "r") as fin:
lines = fin.readlines()
lines = [line.strip() for line in lines]
if "O" not in lines:
lines.insert(0, "O")
labels = []
for line in lines:
if line == "O":
labels.append("O")
else:
labels.append("B-" + line)
labels.append("I-" + line)
label2id_map = {label: idx for idx, label in enumerate(labels)}
id2label_map = {idx: label for idx, label in enumerate(labels)}
return label2id_map, id2label_map
def get_image_file_list(img_file):
imgs_lists = []
if img_file is None or not os.path.exists(img_file):
raise Exception("not found any img file in {}".format(img_file))
img_end = {'jpg', 'bmp', 'png', 'jpeg', 'rgb', 'tif', 'tiff', 'gif', 'GIF'}
if os.path.isfile(img_file) and imghdr.what(img_file) in img_end:
imgs_lists.append(img_file)
elif os.path.isdir(img_file):
for single_file in os.listdir(img_file):
file_path = os.path.join(img_file, single_file)
if os.path.isfile(file_path) and imghdr.what(file_path) in img_end:
imgs_lists.append(file_path)
if len(imgs_lists) == 0:
raise Exception("not found any img file in {}".format(img_file))
imgs_lists = sorted(imgs_lists)
return imgs_lists
def draw_ser_results(image,
ocr_results,
font_path="../doc/fonts/simfang.ttf",
font_size=18):
np.random.seed(0)
color = (np.random.permutation(range(255)),
np.random.permutation(range(255)),
np.random.permutation(range(255)))
color_map = {
idx: (color[0][idx], color[1][idx], color[2][idx])
for idx in range(1, 255)
}
if isinstance(image, np.ndarray):
image = Image.fromarray(image)
img_new = image.copy()
draw = ImageDraw.Draw(img_new)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
for ocr_info in ocr_results:
if ocr_info["pred_id"] not in color_map:
continue
color = color_map[ocr_info["pred_id"]]
# draw ocr results outline
bbox = ocr_info["bbox"]
bbox = ((bbox[0], bbox[1]), (bbox[2], bbox[3]))
draw.rectangle(bbox, fill=color)
# draw ocr results
text = "{}: {}".format(ocr_info["pred"], ocr_info["text"])
start_y = max(0, bbox[0][1] - font_size)
tw = font.getsize(text)[0]
draw.rectangle(
[(bbox[0][0] + 1, start_y), (bbox[0][0] + tw + 1,
start_y + font_size)],
fill=(0, 0, 255))
draw.text(
(bbox[0][0] + 1, start_y), text, fill=(255, 255, 255), font=font)
img_new = Image.blend(image, img_new, 0.5)
return np.array(img_new)
def build_ocr_engine(rec_model_dir, det_model_dir):
ocr_engine = PaddleOCR(
rec_model_dir=rec_model_dir,
det_model_dir=det_model_dir,
use_angle_cls=False)
return ocr_engine
# pad sentences
def pad_sentences(tokenizer,
encoded_inputs,
max_seq_len=512,
pad_to_max_seq_len=True,
return_attention_mask=True,
return_token_type_ids=True,
return_overflowing_tokens=False,
return_special_tokens_mask=False):
# Padding with larger size, reshape is carried out
max_seq_len = (
len(encoded_inputs["input_ids"]) // max_seq_len + 1) * max_seq_len
needs_to_be_padded = pad_to_max_seq_len and \
max_seq_len and len(encoded_inputs["input_ids"]) < max_seq_len
if needs_to_be_padded:
difference = max_seq_len - len(encoded_inputs["input_ids"])
if tokenizer.padding_side == 'right':
if return_attention_mask:
encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
"input_ids"]) + [0] * difference
if return_token_type_ids:
encoded_inputs["token_type_ids"] = (
encoded_inputs["token_type_ids"] +
[tokenizer.pad_token_type_id] * difference)
if return_special_tokens_mask:
encoded_inputs["special_tokens_mask"] = encoded_inputs[
"special_tokens_mask"] + [1] * difference
encoded_inputs["input_ids"] = encoded_inputs[
"input_ids"] + [tokenizer.pad_token_id] * difference
encoded_inputs["bbox"] = encoded_inputs["bbox"] + [[0, 0, 0, 0]
] * difference
else:
if return_attention_mask:
encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
"input_ids"])
return encoded_inputs
def split_page(encoded_inputs, max_seq_len=512):
"""
truncate is often used in training process
"""
for key in encoded_inputs:
encoded_inputs[key] = paddle.to_tensor(encoded_inputs[key])
if encoded_inputs[key].ndim <= 1: # for input_ids, att_mask and so on
encoded_inputs[key] = encoded_inputs[key].reshape([-1, max_seq_len])
else: # for bbox
encoded_inputs[key] = encoded_inputs[key].reshape(
[-1, max_seq_len, 4])
return encoded_inputs
def preprocess(
tokenizer,
ori_img,
ocr_info,
img_size=(224, 224),
pad_token_label_id=-100,
max_seq_len=512,
add_special_ids=False,
return_attention_mask=True, ):
ocr_info = deepcopy(ocr_info)
height = ori_img.shape[0]
width = ori_img.shape[1]
img = cv2.resize(ori_img,
(224, 224)).transpose([2, 0, 1]).astype(np.float32)
segment_offset_id = []
words_list = []
bbox_list = []
input_ids_list = []
token_type_ids_list = []
for info in ocr_info:
# x1, y1, x2, y2
bbox = info["bbox"]
bbox[0] = int(bbox[0] * 1000.0 / width)
bbox[2] = int(bbox[2] * 1000.0 / width)
bbox[1] = int(bbox[1] * 1000.0 / height)
bbox[3] = int(bbox[3] * 1000.0 / height)
text = info["text"]
encode_res = tokenizer.encode(
text, pad_to_max_seq_len=False, return_attention_mask=True)
if not add_special_ids:
# TODO: use tok.all_special_ids to remove
encode_res["input_ids"] = encode_res["input_ids"][1:-1]
encode_res["token_type_ids"] = encode_res["token_type_ids"][1:-1]
encode_res["attention_mask"] = encode_res["attention_mask"][1:-1]
input_ids_list.extend(encode_res["input_ids"])
token_type_ids_list.extend(encode_res["token_type_ids"])
bbox_list.extend([bbox] * len(encode_res["input_ids"]))
words_list.append(text)
segment_offset_id.append(len(input_ids_list))
encoded_inputs = {
"input_ids": input_ids_list,
"token_type_ids": token_type_ids_list,
"bbox": bbox_list,
"attention_mask": [1] * len(input_ids_list),
}
encoded_inputs = pad_sentences(
tokenizer,
encoded_inputs,
max_seq_len=max_seq_len,
return_attention_mask=return_attention_mask)
encoded_inputs = split_page(encoded_inputs)
fake_bs = encoded_inputs["input_ids"].shape[0]
encoded_inputs["image"] = paddle.to_tensor(img).unsqueeze(0).expand(
[fake_bs] + list(img.shape))
encoded_inputs["segment_offset_id"] = segment_offset_id
return encoded_inputs
def postprocess(attention_mask, preds, id2label_map):
if isinstance(preds, paddle.Tensor):
preds = preds.numpy()
preds = np.argmax(preds, axis=2)
preds_list = [[] for _ in range(preds.shape[0])]
# keep batch info
for i in range(preds.shape[0]):
for j in range(preds.shape[1]):
if attention_mask[i][j] == 1:
preds_list[i].append(id2label_map[preds[i][j]])
return preds_list
def merge_preds_list_with_ocr_info(ocr_info, segment_offset_id, preds_list,
label2id_map_for_draw):
# must ensure the preds_list is generated from the same image
preds = [p for pred in preds_list for p in pred]
id2label_map = dict()
for key in label2id_map_for_draw:
val = label2id_map_for_draw[key]
if key == "O":
id2label_map[val] = key
if key.startswith("B-") or key.startswith("I-"):
id2label_map[val] = key[2:]
else:
id2label_map[val] = key
for idx in range(len(segment_offset_id)):
if idx == 0:
start_id = 0
else:
start_id = segment_offset_id[idx - 1]
end_id = segment_offset_id[idx]
curr_pred = preds[start_id:end_id]
curr_pred = [label2id_map_for_draw[p] for p in curr_pred]
if len(curr_pred) <= 0:
pred_id = 0
else:
counts = np.bincount(curr_pred)
pred_id = np.argmax(counts)
ocr_info[idx]["pred_id"] = int(pred_id)
ocr_info[idx]["pred"] = id2label_map[int(pred_id)]
return ocr_info
def parse_args():
parser = argparse.ArgumentParser()
# Required parameters
# yapf: disable
parser.add_argument("--model_name_or_path", default=None, type=str, required=True,)
parser.add_argument("--train_data_dir", default=None, type=str, required=False,)
parser.add_argument("--train_label_path", default=None, type=str, required=False,)
parser.add_argument("--eval_data_dir", default=None, type=str, required=False,)
parser.add_argument("--eval_label_path", default=None, type=str, required=False,)
parser.add_argument("--output_dir", default=None, type=str, required=True,)
parser.add_argument("--max_seq_length", default=512, type=int,)
parser.add_argument("--evaluate_during_training", action="store_true",)
parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.",)
parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for eval.",)
parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.",)
parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight decay if we apply some.",)
parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.",)
parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.",)
parser.add_argument("--num_train_epochs", default=3, type=int, help="Total number of training epochs to perform.",)
parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.",)
parser.add_argument("--eval_steps", type=int, default=10, help="eval every X updates steps.",)
parser.add_argument("--save_steps", type=int, default=50, help="Save checkpoint every X updates steps.",)
parser.add_argument("--seed", type=int, default=2048, help="random seed for initialization",)
parser.add_argument("--ocr_rec_model_dir", default=None, type=str, )
parser.add_argument("--ocr_det_model_dir", default=None, type=str, )
parser.add_argument("--label_map_path", default="./labels/labels_ser.txt", type=str, required=False, )
parser.add_argument("--infer_imgs", default=None, type=str, required=False)
parser.add_argument("--ocr_json_path", default=None, type=str, required=False, help="ocr prediction results")
# yapf: enable
args = parser.parse_args()
return args
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import json
import os
import cv2
import numpy as np
import paddle
import copy
from paddle.io import Dataset
__all__ = ["XFUNDataset"]
class XFUNDataset(Dataset):
"""
Example:
print("=====begin to build dataset=====")
from paddlenlp.transformers import LayoutXLMTokenizer
tokenizer = LayoutXLMTokenizer.from_pretrained("/paddle/models/transformers/layoutxlm-base-paddle/")
tok_res = tokenizer.tokenize("Maribyrnong")
# res = tokenizer.convert_ids_to_tokens(val_data["input_ids"][0])
dataset = XfunDatasetForSer(
tokenizer,
data_dir="./zh.val/",
label_path="zh.val/xfun_normalize_val.json",
img_size=(224,224))
print(len(dataset))
data = dataset[0]
print(data.keys())
print("input_ids: ", data["input_ids"])
print("labels: ", data["labels"])
print("token_type_ids: ", data["token_type_ids"])
print("words_list: ", data["words_list"])
print("image shape: ", data["image"].shape)
"""
def __init__(self,
tokenizer,
data_dir,
label_path,
contains_re=False,
label2id_map=None,
img_size=(224, 224),
pad_token_label_id=None,
add_special_ids=False,
return_attention_mask=True,
load_mode='all',
max_seq_len=512):
super().__init__()
self.tokenizer = tokenizer
self.data_dir = data_dir
self.label_path = label_path
self.contains_re = contains_re
self.label2id_map = label2id_map
self.img_size = img_size
self.pad_token_label_id = pad_token_label_id
self.add_special_ids = add_special_ids
self.return_attention_mask = return_attention_mask
self.load_mode = load_mode
self.max_seq_len = max_seq_len
if self.pad_token_label_id is None:
self.pad_token_label_id = paddle.nn.CrossEntropyLoss().ignore_index
self.all_lines = self.read_all_lines()
self.entities_labels = {'HEADER': 0, 'QUESTION': 1, 'ANSWER': 2}
self.return_keys = {
'bbox': 'np',
'input_ids': 'np',
'labels': 'np',
'attention_mask': 'np',
'image': 'np',
'token_type_ids': 'np',
'entities': 'dict',
'relations': 'dict',
}
if load_mode == "all":
self.encoded_inputs_all = self._parse_label_file_all()
def pad_sentences(self,
encoded_inputs,
max_seq_len=512,
pad_to_max_seq_len=True,
return_attention_mask=True,
return_token_type_ids=True,
truncation_strategy="longest_first",
return_overflowing_tokens=False,
return_special_tokens_mask=False):
# Padding
needs_to_be_padded = pad_to_max_seq_len and \
max_seq_len and len(encoded_inputs["input_ids"]) < max_seq_len
if needs_to_be_padded:
difference = max_seq_len - len(encoded_inputs["input_ids"])
if self.tokenizer.padding_side == 'right':
if return_attention_mask:
encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
"input_ids"]) + [0] * difference
if return_token_type_ids:
encoded_inputs["token_type_ids"] = (
encoded_inputs["token_type_ids"] +
[self.tokenizer.pad_token_type_id] * difference)
if return_special_tokens_mask:
encoded_inputs["special_tokens_mask"] = encoded_inputs[
"special_tokens_mask"] + [1] * difference
encoded_inputs["input_ids"] = encoded_inputs[
"input_ids"] + [self.tokenizer.pad_token_id] * difference
encoded_inputs["labels"] = encoded_inputs[
"labels"] + [self.pad_token_label_id] * difference
encoded_inputs["bbox"] = encoded_inputs[
"bbox"] + [[0, 0, 0, 0]] * difference
elif self.tokenizer.padding_side == 'left':
if return_attention_mask:
encoded_inputs["attention_mask"] = [0] * difference + [
1
] * len(encoded_inputs["input_ids"])
if return_token_type_ids:
encoded_inputs["token_type_ids"] = (
[self.tokenizer.pad_token_type_id] * difference +
encoded_inputs["token_type_ids"])
if return_special_tokens_mask:
encoded_inputs["special_tokens_mask"] = [
1
] * difference + encoded_inputs["special_tokens_mask"]
encoded_inputs["input_ids"] = [
self.tokenizer.pad_token_id
] * difference + encoded_inputs["input_ids"]
encoded_inputs["labels"] = [
self.pad_token_label_id
] * difference + encoded_inputs["labels"]
encoded_inputs["bbox"] = [
[0, 0, 0, 0]
] * difference + encoded_inputs["bbox"]
else:
if return_attention_mask:
encoded_inputs["attention_mask"] = [1] * len(encoded_inputs[
"input_ids"])
return encoded_inputs
def truncate_inputs(self, encoded_inputs, max_seq_len=512):
for key in encoded_inputs:
if key == "sample_id":
continue
length = min(len(encoded_inputs[key]), max_seq_len)
encoded_inputs[key] = encoded_inputs[key][:length]
return encoded_inputs
def read_all_lines(self, ):
with open(self.label_path, "r") as fin:
lines = fin.readlines()
return lines
def _parse_label_file_all(self):
"""
parse all samples
"""
encoded_inputs_all = []
for line in self.all_lines:
encoded_inputs_all.extend(self._parse_label_file(line))
return encoded_inputs_all
def _parse_label_file(self, line):
"""
parse single sample
"""
image_name, info_str = line.split("\t")
image_path = os.path.join(self.data_dir, image_name)
def add_imgge_path(x):
x['image_path'] = image_path
return x
encoded_inputs = self._read_encoded_inputs_sample(info_str)
if self.contains_re:
encoded_inputs = self._chunk_re(encoded_inputs)
else:
encoded_inputs = self._chunk_ser(encoded_inputs)
encoded_inputs = list(map(add_imgge_path, encoded_inputs))
return encoded_inputs
def _read_encoded_inputs_sample(self, info_str):
"""
parse label info
"""
# read text info
info_dict = json.loads(info_str)
height = info_dict["height"]
width = info_dict["width"]
words_list = []
bbox_list = []
input_ids_list = []
token_type_ids_list = []
gt_label_list = []
if self.contains_re:
# for re
entities = []
relations = []
id2label = {}
entity_id_to_index_map = {}
empty_entity = set()
for info in info_dict["ocr_info"]:
if self.contains_re:
# for re
if len(info["text"]) == 0:
empty_entity.add(info["id"])
continue
id2label[info["id"]] = info["label"]
relations.extend([tuple(sorted(l)) for l in info["linking"]])
# x1, y1, x2, y2
bbox = info["bbox"]
label = info["label"]
bbox[0] = int(bbox[0] * 1000.0 / width)
bbox[2] = int(bbox[2] * 1000.0 / width)
bbox[1] = int(bbox[1] * 1000.0 / height)
bbox[3] = int(bbox[3] * 1000.0 / height)
text = info["text"]
encode_res = self.tokenizer.encode(
text, pad_to_max_seq_len=False, return_attention_mask=True)
gt_label = []
if not self.add_special_ids:
# TODO: use tok.all_special_ids to remove
encode_res["input_ids"] = encode_res["input_ids"][1:-1]
encode_res["token_type_ids"] = encode_res["token_type_ids"][1:
-1]
encode_res["attention_mask"] = encode_res["attention_mask"][1:
-1]
if label.lower() == "other":
gt_label.extend([0] * len(encode_res["input_ids"]))
else:
gt_label.append(self.label2id_map[("b-" + label).upper()])
gt_label.extend([self.label2id_map[("i-" + label).upper()]] *
(len(encode_res["input_ids"]) - 1))
if self.contains_re:
if gt_label[0] != self.label2id_map["O"]:
entity_id_to_index_map[info["id"]] = len(entities)
entities.append({
"start": len(input_ids_list),
"end":
len(input_ids_list) + len(encode_res["input_ids"]),
"label": label.upper(),
})
input_ids_list.extend(encode_res["input_ids"])
token_type_ids_list.extend(encode_res["token_type_ids"])
bbox_list.extend([bbox] * len(encode_res["input_ids"]))
gt_label_list.extend(gt_label)
words_list.append(text)
encoded_inputs = {
"input_ids": input_ids_list,
"labels": gt_label_list,
"token_type_ids": token_type_ids_list,
"bbox": bbox_list,
"attention_mask": [1] * len(input_ids_list),
# "words_list": words_list,
}
encoded_inputs = self.pad_sentences(
encoded_inputs,
max_seq_len=self.max_seq_len,
return_attention_mask=self.return_attention_mask)
encoded_inputs = self.truncate_inputs(encoded_inputs)
if self.contains_re:
relations = self._relations(entities, relations, id2label,
empty_entity, entity_id_to_index_map)
encoded_inputs['relations'] = relations
encoded_inputs['entities'] = entities
return encoded_inputs
def _chunk_ser(self, encoded_inputs):
encoded_inputs_all = []
seq_len = len(encoded_inputs['input_ids'])
chunk_size = 512
for chunk_id, index in enumerate(range(0, seq_len, chunk_size)):
chunk_beg = index
chunk_end = min(index + chunk_size, seq_len)
encoded_inputs_example = {}
for key in encoded_inputs:
encoded_inputs_example[key] = encoded_inputs[key][chunk_beg:
chunk_end]
encoded_inputs_all.append(encoded_inputs_example)
return encoded_inputs_all
def _chunk_re(self, encoded_inputs):
# prepare data
entities = encoded_inputs.pop('entities')
relations = encoded_inputs.pop('relations')
encoded_inputs_all = []
chunk_size = 512
for chunk_id, index in enumerate(
range(0, len(encoded_inputs["input_ids"]), chunk_size)):
item = {}
for k in encoded_inputs:
item[k] = encoded_inputs[k][index:index + chunk_size]
# select entity in current chunk
entities_in_this_span = []
global_to_local_map = {} #
for entity_id, entity in enumerate(entities):
if (index <= entity["start"] < index + chunk_size and
index <= entity["end"] < index + chunk_size):
entity["start"] = entity["start"] - index
entity["end"] = entity["end"] - index
global_to_local_map[entity_id] = len(entities_in_this_span)
entities_in_this_span.append(entity)
# select relations in current chunk
relations_in_this_span = []
for relation in relations:
if (index <= relation["start_index"] < index + chunk_size and
index <= relation["end_index"] < index + chunk_size):
relations_in_this_span.append({
"head": global_to_local_map[relation["head"]],
"tail": global_to_local_map[relation["tail"]],
"start_index": relation["start_index"] - index,
"end_index": relation["end_index"] - index,
})
item.update({
"entities": reformat(entities_in_this_span),
"relations": reformat(relations_in_this_span),
})
item['entities']['label'] = [
self.entities_labels[x] for x in item['entities']['label']
]
encoded_inputs_all.append(item)
return encoded_inputs_all
def _relations(self, entities, relations, id2label, empty_entity,
entity_id_to_index_map):
"""
build relations
"""
relations = list(set(relations))
relations = [
rel for rel in relations
if rel[0] not in empty_entity and rel[1] not in empty_entity
]
kv_relations = []
for rel in relations:
pair = [id2label[rel[0]], id2label[rel[1]]]
if pair == ["question", "answer"]:
kv_relations.append({
"head": entity_id_to_index_map[rel[0]],
"tail": entity_id_to_index_map[rel[1]]
})
elif pair == ["answer", "question"]:
kv_relations.append({
"head": entity_id_to_index_map[rel[1]],
"tail": entity_id_to_index_map[rel[0]]
})
else:
continue
relations = sorted(
[{
"head": rel["head"],
"tail": rel["tail"],
"start_index": get_relation_span(rel, entities)[0],
"end_index": get_relation_span(rel, entities)[1],
} for rel in kv_relations],
key=lambda x: x["head"], )
return relations
def load_img(self, image_path):
# read img
img = cv2.imread(image_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
resize_h, resize_w = self.img_size
im_shape = img.shape[0:2]
im_scale_y = resize_h / im_shape[0]
im_scale_x = resize_w / im_shape[1]
img_new = cv2.resize(
img, None, None, fx=im_scale_x, fy=im_scale_y, interpolation=2)
mean = np.array([0.485, 0.456, 0.406])[np.newaxis, np.newaxis, :]
std = np.array([0.229, 0.224, 0.225])[np.newaxis, np.newaxis, :]
img_new = img_new / 255.0
img_new -= mean
img_new /= std
img = img_new.transpose((2, 0, 1))
return img
def __getitem__(self, idx):
if self.load_mode == "all":
data = copy.deepcopy(self.encoded_inputs_all[idx])
else:
data = self._parse_label_file(self.all_lines[idx])[0]
image_path = data.pop('image_path')
data["image"] = self.load_img(image_path)
return_data = {}
for k, v in data.items():
if k in self.return_keys:
if self.return_keys[k] == 'np':
v = np.array(v)
return_data[k] = v
return return_data
def __len__(self, ):
if self.load_mode == "all":
return len(self.encoded_inputs_all)
else:
return len(self.all_lines)
def get_relation_span(rel, entities):
bound = []
for entity_index in [rel["head"], rel["tail"]]:
bound.append(entities[entity_index]["start"])
bound.append(entities[entity_index]["end"])
return min(bound), max(bound)
def reformat(data):
new_data = {}
for item in data:
for k, v in item.items():
if k not in new_data:
new_data[k] = []
new_data[k].append(v)
return new_data
...@@ -201,8 +201,11 @@ fi ...@@ -201,8 +201,11 @@ fi
if [ ${MODE} = "serving_infer" ];then if [ ${MODE} = "serving_infer" ];then
# prepare serving env # prepare serving env
python_name=$(func_parser_value "${lines[2]}") python_name_list=$(func_parser_value "${lines[2]}")
wget https://paddle-serving.bj.bcebos.com/chain/paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl IFS='|'
array=(${python_name_list})
python_name=${array[0]}
wget -nc https://paddle-serving.bj.bcebos.com/chain/paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl
${python_name} -m pip install install paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl ${python_name} -m pip install install paddle_serving_server_gpu-0.0.0.post101-py3-none-any.whl
${python_name} -m pip install paddle_serving_client==0.6.1 ${python_name} -m pip install paddle_serving_client==0.6.1
${python_name} -m pip install paddle-serving-app==0.6.3 ${python_name} -m pip install paddle-serving-app==0.6.3
......
# 飞桨训推一体认证(TIPC) # 飞桨训推一体全流程(TIPC)
## 1. 简介 ## 1. 简介
飞桨除了基本的模型训练和预测,还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的飞桨训推一体认证 (Training and Inference Pipeline Certification(TIPC)) 信息和测试工具,方便用户查阅每种模型的训练推理部署打通情况,并可以进行一键测试。 飞桨除了基本的模型训练和预测,还提供了支持多端多平台的高性能推理部署工具。本文档提供了PaddleOCR中所有模型的飞桨训推一体全流程(Training and Inference Pipeline Criterion(TIPC)信息和测试工具,方便用户查阅每种模型的训练推理部署打通情况,并可以进行一键测试。
<div align="center"> <div align="center">
<img src="docs/guide.png" width="1000"> <img src="docs/guide.png" width="1000">
......
...@@ -10,7 +10,7 @@ lines=(${dataline}) ...@@ -10,7 +10,7 @@ lines=(${dataline})
# parser serving # parser serving
model_name=$(func_parser_value "${lines[1]}") model_name=$(func_parser_value "${lines[1]}")
python=$(func_parser_value "${lines[2]}") python_list=$(func_parser_value "${lines[2]}")
trans_model_py=$(func_parser_value "${lines[3]}") trans_model_py=$(func_parser_value "${lines[3]}")
infer_model_dir_key=$(func_parser_key "${lines[4]}") infer_model_dir_key=$(func_parser_key "${lines[4]}")
infer_model_dir_value=$(func_parser_value "${lines[4]}") infer_model_dir_value=$(func_parser_value "${lines[4]}")
...@@ -54,14 +54,15 @@ function func_serving(){ ...@@ -54,14 +54,15 @@ function func_serving(){
set_serving_server=$(func_set_params "${serving_server_key}" "${serving_server_value}") set_serving_server=$(func_set_params "${serving_server_key}" "${serving_server_value}")
set_serving_client=$(func_set_params "${serving_client_key}" "${serving_client_value}") set_serving_client=$(func_set_params "${serving_client_key}" "${serving_client_value}")
set_image_dir=$(func_set_params "${image_dir_key}" "${image_dir_value}") set_image_dir=$(func_set_params "${image_dir_key}" "${image_dir_value}")
trans_model_cmd="${python} ${trans_model_py} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_serving_server} ${set_serving_client}" python_list=(${python_list})
trans_model_cmd="${python_list[0]} ${trans_model_py} ${set_dirname} ${set_model_filename} ${set_params_filename} ${set_serving_server} ${set_serving_client}"
eval $trans_model_cmd eval $trans_model_cmd
cd ${serving_dir_value} cd ${serving_dir_value}
echo $PWD echo $PWD
unset https_proxy unset https_proxy
unset http_proxy unset http_proxy
for python in ${python[*]}; do for python in ${python_list[*]}; do
if [ ${python} = "cpp"]; then if [ ${python} = "cpp" ]; then
for use_gpu in ${web_use_gpu_list[*]}; do for use_gpu in ${web_use_gpu_list[*]}; do
if [ ${use_gpu} = "null" ]; then if [ ${use_gpu} = "null" ]; then
web_service_cpp_cmd="${python} -m paddle_serving_server.serve --model ppocr_det_mobile_2.0_serving/ ppocr_rec_mobile_2.0_serving/ --port 9293" web_service_cpp_cmd="${python} -m paddle_serving_server.serve --model ppocr_det_mobile_2.0_serving/ ppocr_rec_mobile_2.0_serving/ --port 9293"
...@@ -91,9 +92,6 @@ function func_serving(){ ...@@ -91,9 +92,6 @@ function func_serving(){
echo ${ues_gpu} echo ${ues_gpu}
if [ ${use_gpu} = "null" ]; then if [ ${use_gpu} = "null" ]; then
for use_mkldnn in ${web_use_mkldnn_list[*]}; do for use_mkldnn in ${web_use_mkldnn_list[*]}; do
if [ ${use_mkldnn} = "False" ]; then
continue
fi
for threads in ${web_cpu_threads_list[*]}; do for threads in ${web_cpu_threads_list[*]}; do
set_cpu_threads=$(func_set_params "${web_cpu_threads_key}" "${threads}") set_cpu_threads=$(func_set_params "${web_cpu_threads_key}" "${threads}")
web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${web_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} &" web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${web_use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} &"
...@@ -124,6 +122,9 @@ function func_serving(){ ...@@ -124,6 +122,9 @@ function func_serving(){
continue continue
fi fi
set_tensorrt=$(func_set_params "${web_use_trt_key}" "${use_trt}") set_tensorrt=$(func_set_params "${web_use_trt_key}" "${use_trt}")
if [ ${use_trt} = True ]; then
device_type=2
fi
set_precision=$(func_set_params "${web_precision_key}" "${precision}") set_precision=$(func_set_params "${web_precision_key}" "${precision}")
web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} & " web_service_cmd="${python} ${web_service_py} ${web_use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} & "
eval $web_service_cmd eval $web_service_cmd
......
...@@ -271,8 +271,13 @@ def create_predictor(args, mode, logger): ...@@ -271,8 +271,13 @@ def create_predictor(args, mode, logger):
min_input_shape = {"x": [1, 3, 10, 10]} min_input_shape = {"x": [1, 3, 10, 10]}
max_input_shape = {"x": [1, 3, 512, 512]} max_input_shape = {"x": [1, 3, 512, 512]}
opt_input_shape = {"x": [1, 3, 256, 256]} opt_input_shape = {"x": [1, 3, 256, 256]}
config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, if mode == "rec":
opt_input_shape) if args.rec_algorithm == "CRNN":
config.set_trt_dynamic_shape_info(
min_input_shape, max_input_shape, opt_input_shape)
else:
config.set_trt_dynamic_shape_info(
min_input_shape, max_input_shape, opt_input_shape)
else: else:
config.disable_gpu() config.disable_gpu()
...@@ -311,7 +316,10 @@ def create_predictor(args, mode, logger): ...@@ -311,7 +316,10 @@ def create_predictor(args, mode, logger):
def get_infer_gpuid(): def get_infer_gpuid():
if not paddle.fluid.core.is_compiled_with_rocm():
cmd = "env | grep CUDA_VISIBLE_DEVICES" cmd = "env | grep CUDA_VISIBLE_DEVICES"
else:
cmd = "env | grep HIP_VISIBLE_DEVICES"
env_cuda = os.popen(cmd).readlines() env_cuda = os.popen(cmd).readlines()
if len(env_cuda) == 0: if len(env_cuda) == 0:
return 0 return 0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment