Commit 19a23d09 authored by wangsen's avatar wangsen
Browse files

Initial commit

parents
Pipeline #1247 failed with stages
in 0 seconds
# 调用代码
import requests
import json
def send_msg(requestData):
url = 'http://localhost:8082/recommendinfo'
headers = {'content-type': 'application/json'}
ret = requests.post(url, json=requestData, headers=headers, stream=True)
if ret.status_code==200:
text = json.loads(ret.text)
return text
send_msg({'input': ['我的心情很好', '我很生气']})
\ No newline at end of file
model:
bert_model: E:/Github/bert4torch/examples/serving/sanic_server/files/bert_cls.onnx
vocab: F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/vocab.txt
\ No newline at end of file
version: 1
log_path: E:/Github/bert4torch/examples/serving/sanic_server/logs
formatters:
simple:
format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
consolefmt:
format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
tracefmt:
format: '%(asctime)s - %(filename)s - [line:%(lineno)d] %(levelname)s %(message)s'
datefmt: '%Y-%m-%d %H:%M:%S'
handlers:
console:
class : logging.StreamHandler
formatter: consolefmt
level : WARN
stream : ext://sys.stdout
outloggerfile:
class: logging.handlers.RotatingFileHandler
formatter: simple
level: WARN
filename: E:/Github/bert4torch/examples/serving/sanic_server/logs/out.log
maxBytes: 3145728
encoding: utf8
backupCount: 10
traceloggerfile:
class : logging.handlers.TimedRotatingFileHandler
formatter: tracefmt
level : INFO
filename: E:/Github/bert4torch/examples/serving/sanic_server/logs/trace.log
backupCount: 10
encoding: utf8
interval: 1
when: MIDNIGHT
errorloggerfile:
class : logging.handlers.TimedRotatingFileHandler
formatter: simple
level : INFO
filename: E:/Github/bert4torch/examples/serving/sanic_server/logs/error.log
backupCount: 10
encoding: utf8
interval: 1
when: MIDNIGHT
loggers:
tracelogger:
level: INFO
handlers: [traceloggerfile]
propagate: no
outlogger:
level: INFO
propagate: no
errorlogger:
level: WARNING
handlers: [console, errorloggerfile]
propagate: no
root:
level: DEBUG
handlers: [console,outloggerfile]
\ No newline at end of file
from sanic import Sanic
from typing import Optional, Text
import src.config.constants as constants
import src.utils.loggers as loggers
import json
def create_app(confs: Optional[Text] = None):
from src.utils.configs import Configuration
Configuration.configurations = Configuration.read_config_file(confs + '/configurations.yml')
loggers.get_out_log().info("configurations: {}.".format(json.dumps(Configuration.configurations)))
from src.utils.loggers import configure_file_logging
configure_file_logging(confs)
app = Sanic(__name__)
register_view(app)
return app
def register_view(app):
from src.view.view import setup_model, health_check, process_rec_info
from src.model.model import BertModel
app.modelSortLightGBM = BertModel()
app.register_listener(setup_model, "before_server_start")
# app.add_task() # 一些后台任务
app.add_route(handler=health_check, uri="/", methods={"GET"})
# get请求展示报错情况,日志如何记录。 post请求展示正常情况。
app.add_route(handler=process_rec_info, uri="/recommendinfo", methods={"POST"})
def start_server(confs: Optional[Text] = None, port: int = constants.DEFAULT_SERVER_PORT):
server = create_app(confs)
protocol = "http"
loggers.get_out_log().info(
"Starting server on "
"{}".format(constants.DEFAULT_SERVER_FORMAT.format(protocol, port))
)
server.run(host='0.0.0.0', port=port, debug=False, workers=1)
if __name__ == "__main__":
start_server(confs='E:/Github/bert4torch/examples/serving/sanic_server/conf')
DEFAULT_SERVER_FORMAT = "{}://localhost:{}"
DEFAULT_SERVER_PORT = 8082
CONFIG_MODEL_KEY = 'model'
CONFIG_MODEL_PATH = "bert_model"
CONFIG_MODEL_VOCAB = "vocab"
\ No newline at end of file
from src.utils import loggers
import src.config.constants as constants
from src.utils.configs import Configuration
import traceback
from bert4torch.tokenizers import Tokenizer
from bert4torch.snippets import sequence_padding
import numpy as np
class BertModel():
def __init__(self):
rec_info_config = Configuration.configurations.get(constants.CONFIG_MODEL_KEY)
self.model_path = rec_info_config.get(constants.CONFIG_MODEL_PATH)
self.vocab_path = rec_info_config.get(constants.CONFIG_MODEL_VOCAB)
self.mapping = {0: 'negative', 1: 'positive'}
def load_model(self):
try:
import onnxruntime
self.model = onnxruntime.InferenceSession(self.model_path)
self.tokenizer = Tokenizer(self.vocab_path, do_lower_case=True)
except Exception as ex:
loggers.get_error_log().error("An exception occured while load model: {}".format(traceback.format_exc()))
async def process(self, user_inputs):
user_inputs = [user_inputs] if isinstance(user_inputs, str) else user_inputs
input_ids, segment_ids = self.tokenizer.encode(user_inputs)
input_ids = sequence_padding(input_ids).astype('int64')
segment_ids = sequence_padding(segment_ids).astype('int64')
# 模型推理结果
ort_inputs = {self.model.get_inputs()[0].name: input_ids,
self.model.get_inputs()[1].name: segment_ids}
ort_outs = self.model.run(None, ort_inputs)
ort_outs = list(np.argmax(ort_outs[0], axis=1))
return [{k:v} for k, v in zip(user_inputs, [self.mapping[i] for i in ort_outs])]
\ No newline at end of file
# -*- coding: utf-8 -*-
# @Author : LUYADONG977
from typing import Text, Dict, Any, Union, List
from ruamel import yaml
# import ruamel_yaml as yaml
class Configuration(object):
configurations = {}
@staticmethod
def fix_yaml_loader() -> None:
"""Ensure that any string read by yaml is represented as unicode."""
def construct_yaml_str(self, node):
# Override the default string handling function
# to always return unicode objects
return self.construct_scalar(node)
yaml.Loader.add_constructor("tag:yaml.org,2002:str", construct_yaml_str)
yaml.SafeLoader.add_constructor("tag:yaml.org,2002:str", construct_yaml_str)
@staticmethod
def replace_environment_variables():
"""Enable yaml loader to process the environment variables in the yaml."""
import re
import os
# eg. ${USER_NAME}, ${PASSWORD}
env_var_pattern = re.compile(r"^(.*)\$\{(.*)\}(.*)$")
yaml.add_implicit_resolver("!env_var", env_var_pattern)
def env_var_constructor(loader, node):
"""Process environment variables found in the YAML."""
value = loader.construct_scalar(node)
expanded_vars = os.path.expandvars(value)
if "$" in expanded_vars:
not_expanded = [w for w in expanded_vars.split() if "$" in w]
raise ValueError(
"Error when trying to expand the environment variables"
" in '{}'. Please make sure to also set these environment"
" variables: '{}'.".format(value, not_expanded)
)
return expanded_vars
yaml.SafeConstructor.add_constructor("!env_var", env_var_constructor)
@staticmethod
def read_yaml(content: Text) -> Union[List[Any], Dict[Text, Any]]:
"""Parses yaml from a text.
Args:
content: A text containing yaml content.
"""
Configuration.fix_yaml_loader()
Configuration.replace_environment_variables()
yaml_parser = yaml.YAML(typ="safe")
yaml_parser.version = "1.2"
yaml_parser.unicode_supplementary = True
# noinspection PyUnresolvedReferences
try:
return yaml_parser.load(content) or {}
except yaml.scanner.ScannerError:
# A `ruamel.yaml.scanner.ScannerError` might happen due to escaped
# unicode sequences that form surrogate pairs. Try converting the input
# to a parsable format based on
# https://stackoverflow.com/a/52187065/3429596.
content = (
content.encode("utf-8")
.decode("raw_unicode_escape")
.encode("utf-16", "surrogatepass")
.decode("utf-16")
)
return yaml_parser.load(content) or {}
@staticmethod
def read_file(filename: Text, encoding: Text = "utf-8") -> Any:
"""Read text from a file."""
try:
with open(filename, encoding=encoding) as f:
return f.read()
except FileNotFoundError:
raise ValueError("File '{}' does not exist.".format(filename))
@staticmethod
def read_config_file(filename: Text) -> Dict[Text, Any]:
"""Parses a yaml configuration file. Content needs to be a dictionary
Args:
filename: The path to the file which should be read.
"""
content = Configuration.read_yaml(Configuration.read_file(filename, "utf-8"))
if content is None:
return {}
elif isinstance(content, dict):
return content
else:
raise ValueError(
"Tried to load invalid config file '{}'. "
"Expected a key value mapping but found {}"
".".format(filename, type(content))
)
@classmethod
def get(cls, configname, def_val):
if configname in cls.configurations:
return cls.configurations[configname]
else:
return def_val
class ConfigurationUtils:
configurations = {}
def __init__(self):
pass
@classmethod
def get_port(cls):
return cls.configurations["port"]
@classmethod
def get_zk_path_midintent(cls):
return cls.configurations["zk_path_mid"]
@classmethod
def get_zk_path_entity(cls):
return cls.configurations["zk_path_entity"]
@classmethod
def get_config(cls, config_name, def_val):
if config_name in cls.configurations:
return cls.configurations[config_name]
else:
return def_val
@classmethod
def get_nas_path(cls):
return cls.configurations["model_path"]
if __name__ == '__main__':
import src.config.constants as constants
conf = Configuration.read_config_file('/Users/lvqi034/PycharmProjects/rec_news/conf/configurations.yml')
bi_conf = conf.get(constants.CONFIG_BI_MODEL_KEY)
model_path = bi_conf.get(constants.CONFIG_BI_MODEL_PATH_KEY)
print(model_path)
print(bi_conf.get("poolid")[0:5])
# -*- coding: utf-8 -*-
import logging
import logging.config
from typing import Optional, Text
from src.utils.configs import Configuration
TRACE_LOG = "tracelogger"
ERROR_LOG = "errorlogger"
OUT_LOG = "outlogger"
logger = logging.getLogger(__name__)
LOG_PATH_KEY = "log_path"
def configure_file_logging(config_path: Optional[Text]):
if config_path is None:
return
dict = Configuration.read_config_file(config_path + "/logger.yml")
import os
if LOG_PATH_KEY in dict:
log_path = dict[LOG_PATH_KEY]
if not os.path.exists(log_path):
os.makedirs(log_path) # 创建路径
dict.pop(LOG_PATH_KEY)
# logging.config.dictConfig(codecs.open(config_path + "\logger.yml", 'r', 'utf-8').read())
logging.config.dictConfig(dict)
def get_trace_log():
return logging.getLogger(TRACE_LOG)
def get_error_log():
return logging.getLogger(ERROR_LOG)
def get_out_log():
return logging.getLogger(OUT_LOG)
if __name__ == '__main__':
pass
# -*- coding: utf-8 -*-
import time
import copy
from typing import Optional, Text
from src.utils.configs import Configuration
from src.utils import loggers
class TraceLog(object):
def __init__(self):
self.st = 0
self.ed = 0
self.costT = 0
self.startT = 0
self.rid = None
self.userid = None
self.input_sent = None
self.req = None
self.modelresult = None
self.modelresultlen = None
self.cost_detail = None
self.ans = None
self.e = None
self._begin()
self.version = None
def _begin(self):
self.st = time.time()
def _end(self):
self.ed = time.time()
def modelResults(self, result: Optional[Text]):
trace_model = copy.deepcopy(result)
self.modelresult = trace_model
def costDetail(self, cost_detail: Optional[Text]):
'''详细的每个步骤耗时
'''
self.cost_detail = cost_detail
def modelResultsLen(self, len: Optional[Text]):
self.modelresultlen = len
def requestId(self, rid: Optional[Text]):
self.rid = rid
def inputSent(self, input_sent: Optional[Text]):
self.input_sent = input_sent
def requestEntity(self, req: Optional[Text]):
self.req = req
def responseEntity(self, ans: Optional[Text]):
self.ans = ans
def apiVersion(self, v: Optional[Text]):
self.version = v
def exception(self, e: Exception):
self.e = e
def _log(self):
self.costT = "%.2fms" % ((self.ed - self.st) * 1000)
loggers.get_trace_log().info(self._obj2json())
def start_log(self):
data_head = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(self.st))
data_secs = (self.st - int(self.st)) * 1000
self.startT = "%s.%03d" % (data_head, data_secs)
loggers.get_trace_log().info(self._obj2json())
def end_log(self):
self._end()
self._log()
def _obj2json(self):
trace_log_bean = {"cost": self.costT} if self.costT else {"startT": self.startT}
if self.version:
trace_log_bean['api_version'] = self.version
if self.rid:
trace_log_bean['rid'] = self.rid
if self.userid:
trace_log_bean['userid'] = self.userid
if self.input_sent:
trace_log_bean['input_sent'] = self.input_sent
if self.req:
trace_log_bean['req'] = self.req
if self.cost_detail:
trace_log_bean['cost_detail'] = self.cost_detail
if self.modelresult:
trace_log_bean['modelresult'] = self.modelresult
if self.modelresultlen:
trace_log_bean['modelresultlen'] = self.modelresultlen
if self.ans:
trace_log_bean['ans'] = self.ans
if self.e:
trace_log_bean['e'] = self.e
return trace_log_bean
if __name__ == "__main__":
Configuration.configurations = Configuration.read_config_file("/Users/lvqi034/PycharmProjects/base/configs/logger.yml")
tracelog = TraceLog()
tracelog.requestId("12345")
tracelog.end_log()
from sanic import response
from src.utils.trace_log import TraceLog
from src.utils import loggers
import traceback
import uuid
import time
async def setup_model(app, loop):
loggers.get_out_log().info("----------setup model-------------")
global model
model = app.modelSortLightGBM
model.load_model()
loggers.get_out_log().info("----------done setup model-------------")
async def health_check(request):
return response.json({"status": "ok"})
async def process_rec_info(request):
tracelog = TraceLog()
try:
data = request.json
rid = data.get("requestid", uuid.uuid4().hex)
input_sent = data.get("input")
# todo: log params
tracelog.apiVersion(1)
tracelog.requestId(rid)
tracelog.inputSent(input_sent)
tracelog.start_log()
# 模型推理
all_start = time.time()
cost_detail = {}
finalresult = await model.process(input_sent)
cost_detail['all_process'] = (time.time() - all_start) * 1000
tracelog.costDetail(cost_detail)
tracelog.modelResults(finalresult)
tracelog.modelResultsLen(len(finalresult))
ret = {
"code": 0,
"requestid": rid,
"errmsg": "",
"total": len(finalresult),
"recResults": finalresult
}
except Exception as e:
loggers.get_error_log().error("error occur in recommand infos {}".format(traceback.format_exc()))
t = "{}".format(e)
ret = {
"code": -1,
"requestid": rid,
"errmsg": f"{t}",
"total": 0,
"recResults": [{}]
}
tracelog.exception(t)
tracelog.responseEntity(ret)
tracelog.end_log()
return response.json(ret)
# 转onnx并推理得到结果
# 默认export只会转forward结果,因此需要到处时候要把predict改成forward来完成转换
import numpy as np
import torch.onnx
import os
import numpy as np
import torch
import torch.nn as nn
from bert4torch.snippets import get_pool_emb
from bert4torch.tokenizers import Tokenizer
from bert4torch.models import build_transformer_model, BaseModel
import time
from tqdm import tqdm
config_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/bert_config.json'
checkpoint_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/pytorch_model.bin'
dict_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/vocab.txt'
tokenizer = Tokenizer(dict_path, do_lower_case=True)
class Model(BaseModel):
def __init__(self, pool_method='cls') -> None:
super().__init__()
self.pool_method = pool_method
self.bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True)
self.dropout = nn.Dropout(0.1)
self.dense = nn.Linear(self.bert.configs['hidden_size'], 2)
def forward(self, token_ids, segment_ids):
self.eval()
with torch.no_grad():
hidden_states, pooling = self.bert([token_ids, segment_ids])
pooled_output = get_pool_emb(hidden_states, pooling, token_ids.gt(0).long(), self.pool_method)
output = self.dropout(pooled_output)
output = nn.Softmax(dim=-1)(self.dense(output))
return output
torch_model = Model()
torch_model.load_weights('E:/Github/bert4torch/examples/sentence_classfication/best_cls_model.pt')
# 模型输入
sentence = '你在干嘛呢?这几天外面的天气真不错啊,万里无云,阳光明媚的,我的心情也特别的好,我特别想出门去转转呢。你在干嘛呢?这几天外面的天气真不错啊,万里无云,阳光明媚的,我的心情也特别的好,我特别想出门去转转呢。你在干嘛呢?这几天外面的天气真不错啊,万里无云,阳光明媚的,我的心情也特别的好,我特别想出门去转转呢。你在干嘛呢?这几天外面的天气真不错啊,万里无云,阳光明媚的,我的心情也特别的好,我特别想出门。'
input_ids, segment_ids = tokenizer.encode(sentence)
input_ids = torch.tensor([input_ids])
segment_ids = torch.tensor([segment_ids])
torch_out = torch_model(input_ids, segment_ids)
# 转onnx
if not os.path.exists("bert_cls.onnx"):
torch.onnx.export(torch_model, # model being run
(input_ids, segment_ids), # model input (or a tuple for multiple inputs)
"bert_cls.onnx", # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
opset_version=11, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
input_names = ['input_ids', 'segment_ids'], # the model's input names
output_names = ['output'], # the model's output names
dynamic_axes={'input_ids' : {0 : 'batch_size', 1: 'seq_len'}, # variable length axes
'segment_ids' : {0 : 'batch_size', 1: 'seq_len'}, # variable length axes
'output' : {0 : 'batch_size', 1: 'seq_len'}})
# 模型验证
import onnx
onnx_model = onnx.load("bert_cls.onnx")
onnx.checker.check_model(onnx_model)
# 模型推理
import onnxruntime
ort_session = onnxruntime.InferenceSession("bert_cls.onnx")
def to_numpy(tensor):
return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
# 计算ONNX输出
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(input_ids),
ort_session.get_inputs()[1].name: to_numpy(segment_ids)}
ort_outs = ort_session.run(None, ort_inputs)
# 比较两者数据
np.testing.assert_allclose(to_numpy(torch_out), ort_outs[0], rtol=1e-03, atol=1e-05)
print("Exported model has been tested with ONNXRuntime, and the result looks good!")
print('torch_out: ', torch_out[0])
print('ort_outs: ', ort_outs[0][0])
# =====================================测试两者的速度
# torch cpu
steps = 100
start = time.time()
for i in tqdm(range(steps)):
torch_model(input_ids, segment_ids)
print('pytorch cpu: ', (time.time()-start)*1000/steps, ' ms')
# torch gpu
torch_model = torch_model.to('cuda')
input_ids = input_ids.to('cuda')
segment_ids = segment_ids.to('cuda')
start = time.time()
for i in tqdm(range(steps)):
torch_model(input_ids, segment_ids)
print('pytorch gpu: ', (time.time()-start)*1000/steps, ' ms')
# onnx cpu
start = time.time()
for i in tqdm(range(steps)):
ort_session.run(None, ort_inputs)
print('onnx_runtime cpu: ', (time.time()-start)*1000/steps, ' ms')
# ONNX+TensorRT
本文以情感二分类为例,使用ONNX+TensorRT来部署
## 1. pytorch权重转onnx
1. 首先需要运行[情感分类任务](https://github.com/Tongjilibo/bert4torch/blob/master/examples/sentence_classfication/task_sentiment_classification.py),并保存pytorch的权重
2. 使用了pytorch自带的`torch.onnx.export()`来转换,转换脚本见[ONNX转换bert权重](https://github.com/Tongjilibo/bert4torch/blob/master/examples/serving/task_bert_cls_onnx.py)
## 2. tensorrt环境安装
参考[TensorRT 8.2.1.8 安装笔记(超全超详细)|Docker 快速搭建 TensorRT 环境](https://zhuanlan.zhihu.com/p/446477459)中的半自动安装流程,可直接阅读源文档
1. 官网下载对应版本的镜像(个人根据具体cuda版本选择)
```shell
docker pull nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04
```
2. 运行镜像/创建容器
```shell
docker run -it --name trt_test --gpus all -v /home/tensorrt:/tensorrt nvidia/cuda:11.3.0-cudnn8-devel-ubuntu20.04 /bin/bash
```
3. [下载TensorRT包](https://developer.nvidia.com/zh-cn/tensorrt),这一步需要注册账号,我下载的是`TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz`
4. 回到容器安装TensorRT(cd到容器内的tensorrt路径下解压刚才下载的tar包)
```shell
tar -zxvf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz
```
5. 添加环境变量
```
# 安装vim
apt-get update
apt-get install vim
vim ~/.bashrc
export LD_LIBRARY_PATH=/tensorrt/TensorRT-8.4.1.5/lib:$LD_LIBRARY_PATH
source ~/.bashrc
```
6. 安装 python(安装之后输入python查看安装的版本,下一步要用到)
```shell
apt-get install -y --no-install-recommends \
python3 \
python3-pip \
python3-dev \
python3-wheel &&\
cd /usr/local/bin &&\
ln -s /usr/bin/python3 python &&\
ln -s /usr/bin/pip3 pip;
```
7. pip安装对应的TensorRT库
注意一定要使用pip本地安装tar附带的对应python版本的whl包
```shell
cd TensorRT-8.4.1.5/python/
pip3 install tensorrt-8.2.1.8-cp36-none-linux_x86_64.whl
```
8. 测试TensorRT的python接口
```python
import tensorrt
print(tensorrt.__version__)
```
## 3. onnx转trt权重
- 转换命令
```shell
./trtexec --onnx=/tensorrt/bert_cls.onnx --saveEngine=/tensorrt/bert_cls.trt --minShapes=input_ids:1x512,segment_ids:1x512 --optShapes=input_ids:1x512,segment_ids:1x512 --maxShapes=input_ids:20x512,segment_ids:20x512 --device=0
```
- 注意项:1)测试中如果把batch_size维度和seq_len维度都设置成动态速度会很慢(100ms+),因此这里只保留动态的batchsize维度,seq_len都padding到512;2)[参考资料](https://github.com/NVIDIA/TENSORRT/issues/976)
## 4. tensorrt加载模型推理
- 参考文档:[基于 TensorRT 实现 Bert 预训练模型推理加速(超详细-附核心代码-避坑指南)](https://zhuanlan.zhihu.com/p/446477075)
- 推理代码
```python
import numpy as np
from bert4torch.tokenizers import Tokenizer
import tensorrt as trt
import common
import time
import numpy as np
from tqdm import tqdm
"""
a、获取 engine,建立上下文
"""
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
def get_engine(engine_file_path):
print("Reading engine from file {}".format(engine_file_path))
with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
engine = runtime.deserialize_cuda_engine(f.read())
return engine
engine_model_path = "bert_cls.trt"
# Build a TensorRT engine.
engine = get_engine(engine_model_path)
# Contexts are used to perform inference.
context = engine.create_execution_context()
"""
b、从engine中获取inputs, outputs, bindings, stream 的格式以及分配缓存
"""
def to_numpy(tensor):
for i, item in enumerate(tensor):
tensor[i] = item + [0] * (512-len(item))
return np.array(tensor, np.int32)
dict_path = '/tensorrt/vocab.txt'
tokenizer = Tokenizer(dict_path, do_lower_case=True)
sentences = ['你在干嘛呢?这几天外面的天气真不错啊,万里无云,阳光明媚的,我的心情也特别的好,我特别想出门去转转呢。你在干嘛呢?这几天外面的天气真不错啊,万里无云,阳光明媚的,我的心情也特别的好,我特别想出门去转转呢。你在干嘛呢?这几天外面的天气真不错啊,万里无云,阳光明媚的,我的心情也特别的好,我特别想出门去转转呢。你在干嘛呢?这几天外面的天气真不错啊,万里无云,阳光明媚的,我的心情也特别的好,我特别想出门。']
input_ids, segment_ids = tokenizer.encode(sentences)
tokens_id = to_numpy(input_ids)
segment_ids = to_numpy(segment_ids)
context.active_optimization_profile = 0
origin_inputshape = context.get_binding_shape(0) # (1,-1)
origin_inputshape[0],origin_inputshape[1] = tokens_id.shape # (batch_size, max_sequence_length)
context.set_binding_shape(0, (origin_inputshape))
context.set_binding_shape(1, (origin_inputshape))
"""
c、输入数据填充
"""
inputs, outputs, bindings, stream = common.allocate_buffers_v2(engine, context)
inputs[0].host = tokens_id
inputs[1].host = segment_ids
"""
d、tensorrt推理
"""
trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
preds = np.argmax(trt_outputs, axis=1)
print("====preds====:",preds)
"""
e、测试耗时
"""
steps = 100
start = time.time()
for i in tqdm(range(steps)):
common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
preds = np.argmax(trt_outputs, axis=1)
print('onnx+tensorrt: ', (time.time()-start)*1000/steps, ' ms')
```
- 所需[common.py](https://github.com/NVIDIA/TensorRT/blob/96e23978cd6e4a8fe869696d3d8ec2b47120629b/samples/python/common.py)
- 运行结果
```shell
Reading engine from file bert_cls.trt
onnx_tensorrt.py:44: DeprecationWarning: Use set_optimization_profile_async instead.
context.active_optimization_profile = 0
====preds====: [1]
100%|██████████████████████████████████████████████████████████████████████████| 100/100 [00:01<00:00, 79.81it/s]
onnx+tensorrt: 12.542836666107178 ms
```
# 5. 速度比较
- 测试方式: btz=1, seq_len=202(对于tensorrt测试了seq_len=202和512), iterations=100
| 方案 | cpu | gpu |
|----|----|----|
|pytorch|144ms|29ms|
|onnx|66ms|——|
|onnx+tensorrt|——|7ms (len=202), 12ms (len=512)|
# 6. 实验文件
- [文件树](https://pan.baidu.com/s/1vX3yK7BWQScnK_5Zb-pAkQ?pwd=rhq9)
```shell
tensorrt
├─common.py
├─onnx_tensorrt.py
├─bert_cls.onnx
├─bert_cls.trt
├─TensorRT-8.4.1.5
```
- docker镜像: 1)可按上述方式自行构建,2)直接pull笔者上传的镜像
```shell
docker pull tongjilibo/tensorrt:11.3.0-cudnn8-devel-ubuntu20.04-tensorrt8.4.1.5
docker run -it --name trt_torch --gpus all -v /home/libo/tensorrt:/tensorrt tongjilibo/tensorrt:11.3.0-cudnn8-devel-ubuntu20.04-tensorrt8.4.1.5 /bin/bash
```
\ No newline at end of file
#! -*- coding:utf-8 -*-
# 混合精度训练示例,测试中显存占用降低了15%
from bert4torch.tokenizers import Tokenizer
from bert4torch.models import build_transformer_model, BaseModel
from bert4torch.snippets import sequence_padding, Callback, text_segmentate, ListDataset, seed_everything
import torch.nn as nn
import torch
import torch.optim as optim
import random, os, numpy as np
from torch.utils.data import DataLoader
maxlen = 256
batch_size = 16
config_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/bert_config.json'
checkpoint_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/pytorch_model.bin'
dict_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/vocab.txt'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# 固定seed
seed_everything(42)
# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)
# 加载数据集
class MyDataset(ListDataset):
@staticmethod
def load_data(filenames):
"""加载数据,并尽量划分为不超过maxlen的句子
"""
D = []
seps, strips = u'\n。!?!?;;,, ', u';;,, '
for filename in filenames:
with open(filename, encoding='utf-8') as f:
for l in f:
text, label = l.strip().split('\t')
for t in text_segmentate(text, maxlen - 2, seps, strips):
D.append((t, int(label)))
return D
def collate_fn(batch):
batch_token_ids, batch_segment_ids, batch_labels = [], [], []
for text, label in batch:
token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
batch_token_ids.append(token_ids)
batch_segment_ids.append(segment_ids)
batch_labels.append([label])
batch_token_ids = torch.tensor(sequence_padding(batch_token_ids), dtype=torch.long, device=device)
batch_segment_ids = torch.tensor(sequence_padding(batch_segment_ids), dtype=torch.long, device=device)
batch_labels = torch.tensor(batch_labels, dtype=torch.long, device=device)
return [batch_token_ids, batch_segment_ids], batch_labels.flatten()
# 加载数据集
train_dataloader = DataLoader(MyDataset(['F:/Projects/data/corpus/sentence_classification/sentiment/sentiment.train.data']), batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
valid_dataloader = DataLoader(MyDataset(['F:/Projects/data/corpus/sentence_classification/sentiment/sentiment.valid.data']), batch_size=batch_size, collate_fn=collate_fn)
test_dataloader = DataLoader(MyDataset(['F:/Projects/data/corpus/sentence_classification/sentiment/sentiment.test.data']), batch_size=batch_size, collate_fn=collate_fn)
# 定义bert上的模型结构
class Model(BaseModel):
def __init__(self) -> None:
super().__init__()
self.bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True)
self.dropout = nn.Dropout(0.1)
self.dense = nn.Linear(self.bert.configs['hidden_size'], 2)
def forward(self, token_ids, segment_ids):
_, pooled_output = self.bert([token_ids, segment_ids])
output = self.dropout(pooled_output)
output = self.dense(output)
return output
model = Model().to(device)
# 定义使用的loss和optimizer,这里支持自定义
model.compile(
loss=nn.CrossEntropyLoss(),
optimizer=optim.Adam(model.parameters(), lr=2e-5),
use_amp=True, # True表示使用梯度累积
metrics=['accuracy'],
)
if __name__ == '__main__':
model.fit(train_dataloader, epochs=20, steps_per_epoch=None)
#! -*- coding:utf-8 -*-
# DP示例,这里是把loss放在模型里计算的话,则可以部分缓解负载不均衡的问题
import os
# 也可命令行传入
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
from bert4torch.tokenizers import Tokenizer
from bert4torch.models import build_transformer_model, BaseModel, BaseModelDP
from bert4torch.snippets import sequence_padding, text_segmentate, ListDataset, seed_everything
import torch.nn as nn
import torch
import torch.optim as optim
import random, os, numpy as np
from torch.utils.data import DataLoader
maxlen = 256
batch_size = 16
config_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/bert_config.json'
checkpoint_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/pytorch_model.bin'
dict_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/vocab.txt'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# 固定seed
seed_everything(42)
# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)
# 加载数据集
class MyDataset(ListDataset):
@staticmethod
def load_data(filenames):
"""加载数据,并尽量划分为不超过maxlen的句子
"""
D = []
seps, strips = u'\n。!?!?;;,, ', u';;,, '
for filename in filenames:
with open(filename, encoding='utf-8') as f:
for l in f:
text, label = l.strip().split('\t')
for t in text_segmentate(text, maxlen - 2, seps, strips):
D.append((t, int(label)))
return D
def collate_fn(batch):
batch_token_ids, batch_segment_ids, batch_labels = [], [], []
for text, label in batch:
token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
batch_token_ids.append(token_ids)
batch_segment_ids.append(segment_ids)
batch_labels.append([label])
batch_token_ids = torch.tensor(sequence_padding(batch_token_ids), dtype=torch.long, device=device)
batch_segment_ids = torch.tensor(sequence_padding(batch_segment_ids), dtype=torch.long, device=device)
batch_labels = torch.tensor(batch_labels, dtype=torch.long, device=device)
return [batch_token_ids, batch_segment_ids, batch_labels.flatten()], None
# 加载数据集
train_dataloader = DataLoader(MyDataset(['F:/Projects/data/corpus/sentence_classification/sentiment/sentiment.train.data']), batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
# 定义bert上的模型结构,这里loss并不是放在模型里计算的
class Model(nn.Module):
def __init__(self) -> None:
super().__init__()
self.bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True)
self.dropout = nn.Dropout(0.1)
self.dense = nn.Linear(self.bert.configs['hidden_size'], 2)
self.loss_fn = nn.CrossEntropyLoss()
def forward(self, token_ids, segment_ids, labels):
_, pooled_output = self.bert([token_ids, segment_ids])
output = self.dropout(pooled_output)
output = self.dense(output)
loss = self.loss_fn(output, labels)
return loss
model = Model().to(device)
model = BaseModelDP(model) # 指定DP模型使用多gpu
# 定义使用的loss和optimizer,这里支持自定义
model.compile(
loss=lambda x, _: x.mean(), # 多个gpu计算的loss的均值
optimizer=optim.Adam(model.parameters(), lr=2e-5),
)
if __name__ == '__main__':
model.fit(train_dataloader, epochs=20, steps_per_epoch=10)
#! -*- coding:utf-8 -*-
# DDP示例
# 启动命令:python -m torch.distributed.launch --nproc_per_node=2 --nnodes=1 task_distributed_data_parallel.py
import os
# 也可命令行传入
os.environ["CUDA_VISIBLE_DEVICES"]="0,1"
from bert4torch.tokenizers import Tokenizer
from bert4torch.models import build_transformer_model, BaseModelDDP
from bert4torch.snippets import sequence_padding, text_segmentate, ListDataset, seed_everything
import torch.nn as nn
import torch
import torch.optim as optim
import random, os, numpy as np
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--local_rank", type=int, default=-1)
args = parser.parse_args()
torch.cuda.set_device(args.local_rank)
device = torch.device('cuda', args.local_rank)
torch.distributed.init_process_group(backend='nccl')
# 模型设置
maxlen = 256
batch_size = 16
config_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/bert_config.json'
checkpoint_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/pytorch_model.bin'
dict_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/vocab.txt'
# 固定seed
seed_everything(42)
# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)
# 加载数据集
class MyDataset(ListDataset):
@staticmethod
def load_data(filenames):
"""加载数据,并尽量划分为不超过maxlen的句子
"""
D = []
seps, strips = u'\n。!?!?;;,, ', u';;,, '
for filename in filenames:
with open(filename, encoding='utf-8') as f:
for l in f:
text, label = l.strip().split('\t')
for t in text_segmentate(text, maxlen - 2, seps, strips):
D.append((t, int(label)))
return D
def collate_fn(batch):
batch_token_ids, batch_segment_ids, batch_labels = [], [], []
for text, label in batch:
token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
batch_token_ids.append(token_ids)
batch_segment_ids.append(segment_ids)
batch_labels.append([label])
batch_token_ids = torch.tensor(sequence_padding(batch_token_ids), dtype=torch.long, device=device)
batch_segment_ids = torch.tensor(sequence_padding(batch_segment_ids), dtype=torch.long, device=device)
batch_labels = torch.tensor(batch_labels, dtype=torch.long, device=device)
return [batch_token_ids, batch_segment_ids, batch_labels.flatten()], None
# 加载数据集
train_dataset = MyDataset(['F:/Projects/data/corpus/sentence_classification/sentiment/sentiment.train.data'])
train_sampler = DistributedSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size, collate_fn=collate_fn)
# 定义bert上的模型结构,这里loss并不是放在模型里计算的
class Model(nn.Module):
def __init__(self) -> None:
super().__init__()
self.bert = build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True)
self.dropout = nn.Dropout(0.1)
self.dense = nn.Linear(self.bert.configs['hidden_size'], 2)
self.loss_fn = nn.CrossEntropyLoss()
def forward(self, token_ids, segment_ids, labels):
_, pooled_output = self.bert([token_ids, segment_ids])
output = self.dropout(pooled_output)
output = self.dense(output)
loss = self.loss_fn(output, labels)
return loss
model = Model().to(device)
# 指定DDP模型使用多gpu, master_rank为指定用于打印训练过程的local_rank
model = BaseModelDDP(model, master_rank=0, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=False)
# 定义使用的loss和optimizer,这里支持自定义
model.compile(
loss=lambda x, _: x, # 直接把forward计算的loss传出来
optimizer=optim.Adam(model.parameters(), lr=2e-5),
)
if __name__ == '__main__':
model.fit(train_dataloader, epochs=20, steps_per_epoch=None)
#! -*- coding:utf-8 -*-
# 通过R-Drop增强模型的泛化性能
# 官方项目:https://github.com/dropreg/R-Drop
# 数据集:情感分类数据集
from bert4torch.models import build_transformer_model, BaseModel
import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
from bert4torch.snippets import sequence_padding, Callback, ListDataset, seed_everything, text_segmentate, get_pool_emb
from bert4torch.tokenizers import Tokenizer
from bert4torch.losses import RDropLoss
from tqdm import tqdm
import torch.nn.functional as F
maxlen = 256
batch_size = 16
# BERT base
config_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/bert_config.json'
checkpoint_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/pytorch_model.bin'
dict_path = 'F:/Projects/pretrain_ckpt/bert/[google_tf_base]--chinese_L-12_H-768_A-12/vocab.txt'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
seed_everything(42)
# 建立分词器
tokenizer = Tokenizer(dict_path, do_lower_case=True)
# 加载数据集
class MyDataset(ListDataset):
@staticmethod
def load_data(filenames):
"""加载数据,并尽量划分为不超过maxlen的句子
"""
D = []
seps, strips = u'\n。!?!?;;,, ', u';;,, '
for filename in filenames:
with open(filename, encoding='utf-8') as f:
for l in f:
text, label = l.strip().split('\t')
for t in text_segmentate(text, maxlen - 2, seps, strips):
D.append((t, int(label)))
return D
def collate_fn(batch):
batch_token_ids, batch_labels = [], []
for text, label in batch:
token_ids, _ = tokenizer.encode(text, maxlen=maxlen)
for _ in range(2):
batch_token_ids.append(token_ids)
batch_labels.append([label])
batch_token_ids = torch.tensor(sequence_padding(batch_token_ids), dtype=torch.long, device=device)
batch_labels = torch.tensor(batch_labels, dtype=torch.long, device=device)
return batch_token_ids, batch_labels.flatten()
# 加载数据集
train_dataloader = DataLoader(MyDataset(['F:/Projects/data/corpus/sentence_classification/sentiment/sentiment.train.data']), batch_size=batch_size, shuffle=True, collate_fn=collate_fn)
valid_dataloader = DataLoader(MyDataset(['F:/Projects/data/corpus/sentence_classification/sentiment/sentiment.valid.data']), batch_size=batch_size, collate_fn=collate_fn)
test_dataloader = DataLoader(MyDataset(['F:/Projects/data/corpus/sentence_classification/sentiment/sentiment.test.data']), batch_size=batch_size, collate_fn=collate_fn)
# 定义bert上的模型结构
class Model(BaseModel):
def __init__(self, pool_method='cls') -> None:
super().__init__()
self.pool_method = pool_method
self.bert= build_transformer_model(config_path=config_path, checkpoint_path=checkpoint_path, dropout_rate=0.3, segment_vocab_size=0, with_pool=True)
self.dropout = nn.Dropout(0.1)
self.dense = nn.Linear(self.bert.configs['hidden_size'], 2)
def forward(self, token_ids):
hidden_states, pooling = self.bert([token_ids])
pooled_output = get_pool_emb(hidden_states, pooling, token_ids.gt(0).long(), self.pool_method)
output = self.dropout(pooled_output)
output = self.dense(output)
return output
model = Model().to(device)
model.compile(loss=RDropLoss(), optimizer=optim.Adam(model.parameters(), lr=2e-5), metrics=['accuracy'])
class Evaluator(Callback):
"""评估与保存
"""
def __init__(self):
self.best_val_acc = 0.
def on_epoch_end(self, global_step, epoch, logs=None):
val_acc = self.evaluate(valid_dataloader)
test_acc = self.evaluate(test_dataloader)
if val_acc > self.best_val_acc:
self.best_val_acc = val_acc
# model.save_weights('best_model.pt')
print(f'val_acc: {val_acc:.5f}, test_acc: {test_acc:.5f}, best_val_acc: {self.best_val_acc:.5f}\n')
# 定义评价函数
def evaluate(self, data):
total, right = 0., 0.
for x_true, y_true in data:
y_pred = model.predict(x_true).argmax(axis=1)
total += len(y_true)
right += (y_true == y_pred).sum().item()
return right / total
if __name__ == '__main__':
evaluator = Evaluator()
model.fit(train_dataloader, epochs=10, steps_per_epoch=None, callbacks=[evaluator])
else:
model.load_weights('best_model.pt')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment