Commit 24b257f1 authored by sunzhq2's avatar sunzhq2
Browse files

init

parent 920b3c0f
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from fpdf import FPDF
import json
import math
import os
class PDF(FPDF):
def titles(self, title, backend):
self.set_xy(0.0, 0.0)
self.set_font('Times', 'B', 16)
# self.set_text_color(220, 50, 50)
self.cell(w=210.0,
h=40.0,
align='C',
txt=title + ' REPORT (' + backend + ')',
border=0)
def lines(self):
self.rect(5.0, 5.0, 200.0, 287.0)
def icon(self, icon_path):
self.set_xy(10.0, 10.0)
self.image(icon_path, link='', type='', w=37.6, h=5.2)
self.set_xy(157.0, 0.0)
self.set_font('Times', 'B', 10)
# self.set_text_color(220, 50, 50)
self.cell(w=60.0, h=25.0, align='C', txt='BYTE MLPERF', border=0)
def charts(self, chart_path):
self.y += 5
self.x += 6
self.image(chart_path, link='', type='', w=700 / 4, h=450 / 4.9)
def diff_tables(self, data, dataset):
col_width = 45
# self.set_xy(10.00125,40)
x = self.x
i = 0
self.set_font("Times", 'B', size=10)
line_height = self.font_size * 2.5
self.x = x + 5
self.multi_cell(90 * math.ceil(((len(data)) / 3)),
line_height,
'Accuracy Results' + ' (' + dataset + ')',
border=1,
align='C')
y = self.y
reset_y = self.y
self.ln(line_height)
self.set_font("Times", size=10)
final_y = None
for i, (key, val) in enumerate(data.items()):
if i < 4:
if (i % 3 == 0):
final_y = y
y = reset_y
self.x = x + 90 * (i // 3) + 5
self.y = y
self.multi_cell(col_width,
line_height,
key,
border=1,
align='C')
self.x += (45 + 90 * (i // 3)) + 5
self.y = y
self.multi_cell(col_width,
line_height,
str(val),
border=1,
align='C')
y = self.y
i += 1
if final_y:
self.y = final_y
def graph_tables(self, data):
real_data = []
row_name = []
row_data = []
for key, val in data.items():
row_name.append(key)
row_data.append(str(val))
real_data.append(row_name)
real_data.append(row_data)
col_width = 45
self.set_xy(10.00125, 30)
x = self.x
self.x += 27
self.set_font("Times", 'B', size=10)
line_height = self.font_size * 2.5
self.multi_cell(135,
line_height,
'Graph Compilation Results',
border=1,
align='C')
y = self.y
self.ln(line_height)
self.set_font("Times", size=10)
for row in real_data:
self.x = x
for i, datum in enumerate(row):
self.y = y
self.x += (i + 1) * 45 - 18
self.multi_cell(col_width,
line_height,
str(datum),
border=1,
align='C')
y = self.y
self.y += 5
def performance_tables(self, data):
real_data = []
row_name = []
for i in range(len(data)):
row_data = []
for key, val in data[i].items():
if i == 0:
row_name.append(key)
row_data.append(val)
real_data.append(row_data)
real_data.insert(0, row_name)
col_width = 33.75
self.set_xy(10.00125, 65)
x = self.x
self.x += 27
self.set_font("Times", 'B', size=10)
line_height = self.font_size * 2.5
self.multi_cell(135,
line_height,
'Performance Results',
border=1,
align='C')
y = self.y
self.ln(line_height)
self.set_font("Times", size=10)
for row in real_data:
self.x = x
for i, datum in enumerate(row):
self.y = y
self.x += (i + 1) * 33.75 - 6.75
self.multi_cell(col_width,
line_height,
str(datum),
border=1,
align='C')
y = self.y
self.ln(line_height)
def footer(self):
# Go to 1.5 cm from bottom
self.set_y(-15)
# Select Arial italic 8
self.set_font('Arial', 'I', 8)
# Print centered page number
self.cell(0, 10, '%s' % self.page_no(), 0, 0, 'C')
def generate_report(self, path):
with open(path, 'r') as f:
report = json.load(f)
output_dir = os.path.dirname(path) + '/'
index = output_dir.index('ByteMLPerf') + len('ByteMLPerf')
base_path = output_dir[:index]
icon_path = os.path.join(base_path, 'docs/images/icon.png')
self.add_page()
self.lines()
self.icon(icon_path)
self.graph_tables(report['Graph Compile'])
if 'Performance' in report:
self.performance_tables(report['Performance'])
if 'Accuracy' in report:
self.diff_tables(report['Accuracy'], report['Dataset'])
if 'Diff Dist' in report['Accuracy']:
self.charts(output_dir + report['Accuracy']['Diff Dist'])
self.titles(report['Model'], report['Backend'])
self.set_author('Bytedance')
precision = path.split('/')[-1].split('-')[1]
self.output(output_dir + report['Model'] + '-TO-' + precision.upper() + '.pdf', 'F')
return True
def build_pdf(path):
pdf = PDF(orientation='P', unit='mm', format='A4')
return pdf.generate_report(path)
#!bin/bash
if [ ! -d "tools/venv" ]; then
python3 -m virtualenv tools/venv
source tools/venv/bin/activate
tools/venv/bin/python3 -m pip install --upgrade pip -q
tools/venv/bin/python3 -m pip install -r tools/requirements.txt -q
else
source tools/venv/bin/activate
fi
if [ "$3" == "pt2onnx" ];then
python3 tools/torch_to_onnx.py --model_path $1 --output_path $2
elif [ "$3" == "saved2onnx" ];then
python3 tools/saved_to_onnx.py --model_path $1 --output_path $2
elif [ "$3" == "saved2frozen" ];then
python3 tools/saved_to_frozen.py --model_path $1 --output_path $2
fi
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.saved_model import signature_constants
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
def convert_pb_to_server_model(pb_model_path, export_dir, input_names,
output_names):
if not input_names:
raise ValueError("Converter needs inputs")
if not output_names:
raise ValueError("Converter needs outputs")
input_names = input_names.split(",")
output_names = output_names.split(",")
graph_def = read_pb_model(pb_model_path)
convert_pb_saved_model(graph_def, export_dir, input_names, output_names)
def read_pb_model(pb_model_path):
with tf.io.gfile.GFile(pb_model_path, "rb") as f:
graph_def = tf.compat.v1.GraphDef()
graph_def.ParseFromString(f.read())
return graph_def
def convert_pb_saved_model(graph_def, export_dir, input_names, output_names):
builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
sigs = {}
with tf.Session(graph=tf.Graph()) as sess:
tf.import_graph_def(graph_def, name="")
g = tf.get_default_graph()
input_infos = {}
output_infos = {}
for input_name in input_names:
input_infos[input_name] = g.get_tensor_by_name(input_name)
for output_name in output_names:
output_infos[output_name] = g.get_tensor_by_name(output_name)
sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \
tf.saved_model.signature_def_utils.predict_signature_def(
input_infos, output_infos)
builder.add_meta_graph_and_variables(sess, [tag_constants.SERVING],
signature_def_map=sigs)
builder.save()
path = "densenet121.pb"
convert_pb_to_server_model(path,
os.path.abspath('.') + "/densenet_saved_model",
"input_1", "fc1000")
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
from tensorflow.keras import backend
from tensorflow.python.tools import freeze_graph
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
import logging
import argparse
def frozen_graph(h5_file_path, workdir, pb_name):
model = tf.keras.models.load_model(h5_file_path,
custom_objects={
"backend": backend,
})
model.summary()
full_model = tf.function(lambda input_1: model(input_1))
full_model = full_model.get_concrete_function(
tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype))
# Get frozen ConcreteFunction
frozen_func = convert_variables_to_constants_v2(full_model)
frozen_func.graph.as_graph_def()
layers = [op.name for op in frozen_func.graph.get_operations()]
print(frozen_func.outputs)
# Save frozen graph from frozen ConcreteFunction to hard drive
tf.io.write_graph(graph_or_graph_def=frozen_func.graph,
logdir=workdir,
name=pb_name,
as_text=False)
print('model has been saved')
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description='VC model h5->freezedpb script')
parser.add_argument("--h5_model_path", type=str, required=True)
parser.add_argument("--freezed_pb_name", type=str, required=True)
parser.add_argument("--workdir", type=str, required=True)
args = parser.parse_args()
frozen_graph(args.h5_model_path, args.workdir, args.freezed_pb_name)
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
import numpy as np
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
try:
# Currently, memory growth needs to be the same across GPUs
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Memory growth must be set before GPUs have been initialized
print(e)
def my_calibration_input_fn():
for _ in range(10):
yield np.random.normal(size=(1, 224, 224, 3)).astype(np.uint8),
# yield tf.random.normal((1, 224, 224, 3)).astype(np.uint8),
saved_model_path = 'byte_mlperf/model_zoo/resnet50_saved_model'
model_params = tf.experimental.tensorrt.ConversionParams(
precision_mode="int8".upper(), max_batch_size=64, use_calibration=True)
model_trt = tf.experimental.tensorrt.Converter(
input_saved_model_dir=saved_model_path, conversion_params=model_params)
model_trt.convert(calibration_input_fn=my_calibration_input_fn)
output_saved_model_dir = 'test'
model_trt.save(output_saved_model_dir)
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import mxnet as mx
import numpy as np
import onnx
def get_mod(prefix, epoch, ctx, data_shape):
sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
mod = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
mod.bind(for_training=False,
data_shapes=[("data", data_shape)],
label_shapes=mod._label_shapes)
mod.set_params(arg_params, aux_params, allow_missing=True)
return mod
def load_mxnet():
prefix = "image_level_space"
epoch = 0
ctx = mx.cpu()
data_shape = (1, 3, 736, 416)
mod = get_mod(prefix, epoch, ctx, data_shape)
return mod
'''
require mxnet >= 19.0
'''
def do_mxnet2onnx(sym, params, onnx_file, in_shapes, in_types,
dynamic_input_shapes):
'''
example:
sym = 'byte_mlperf/byte_mlperf/download/manysplit/image_level_space-symbol.json'
params = 'byte_mlperf/byte_mlperf/download/manysplit/image_level_space-0000.params'
onnx_file = 'manysplit.onnx'
in_shapes = [(1,3,736,416)]
in_types = [np.float32]
dynamic_input_shapes = [(None,3,736,416)]
'''
converted_model_path = mx.onnx.export_model(
sym,
params,
in_shapes,
in_types,
onnx_file,
dynamic=True,
dynamic_input_shapes=dynamic_input_shapes,
verbose=True)
# Load the ONNX model
model_proto = onnx.load_model(converted_model_path)
# Check if the converted ONNX protobuf is valid
onnx.checker.check_graph(model_proto.graph)
if __name__ == "__main__":
# load_mxnet()
do_mxnet2onnx()
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import cast
import numpy as np
from numpy.lib.function_base import append
import onnx
import onnx.helper as helper
import onnxruntime as rt
from onnx import numpy_helper
from onnx.tools import update_model_dims
from onnx import shape_inference, TensorProto
import struct
import copy
import sys
'''
DType Info
'''
ONNX_DTYPE = {
0: TensorProto.FLOAT, # UNDEFINE, default as float32
1: TensorProto.FLOAT,
2: TensorProto.UINT8,
3: TensorProto.INT8,
4: TensorProto.UINT16,
5: TensorProto.INT16,
6: TensorProto.INT32,
7: TensorProto.INT64,
8: TensorProto.STRING,
9: TensorProto.BOOL,
10: TensorProto.FLOAT16,
11: TensorProto.DOUBLE,
12: TensorProto.UINT32,
13: TensorProto.UINT64,
}
'''
Nodes
'''
def get_node_by_name(graph, name):
for node in graph.node:
if node.name == name:
return node
return None
def get_nodes_by_optype(graph, typename):
nodes = []
for node in graph.node:
if node.op_type == typename:
nodes.append(node)
return nodes
def get_node_by_output_name(graph, name):
for node in graph.node:
if node.output[0] == name:
return node
return None
def get_node_successor(graph, target_node):
successor = []
for node in graph.node:
if len(list(set(node.input).intersection(set(
target_node.output)))) > 0:
successor.append(node)
return successor
def get_value_info_by_name(graph, name):
for val_info in graph.value_info:
if val_info.name == name:
return val_info
return None
def get_shape_from_value_info(val_info):
shape = [d.dim_value for d in val_info.type.tensor_type.shape.dim]
return shape
def remove_weights(graph, name_list):
rm_list = []
for weight in graph.initializer:
if weight.name in name_list:
rm_list.append(weight)
for weight in rm_list:
graph.initializer.remove(weight)
def remove_inputs(graph, name_list):
rm_list = []
for input_t in graph.input:
if input_t.name in name_list:
rm_list.append(input_t)
for input_t in rm_list:
graph.input.remove(input_t)
def remove_value_infos(graph, name_list):
rm_list = []
for value_info in graph.value_info:
if value_info.name in name_list:
rm_list.append(value_info)
for value_info in rm_list:
graph.value_info.remove(value_info)
def remove_node_by_name(graph, name):
target_node = get_node_by_name(graph, name)
remove_node(graph, target_node)
def remove_node(graph, target_node):
'''
remove the node with only one input and only one output
'''
node_input = target_node.input[0]
node_output = target_node.output[0]
# set input of successor node to predecessor node of target node
for node in graph.node:
for i, n in enumerate(node.input):
if n == node_output:
node.input[i] = node_input
target_names = set(target_node.input) & set(
[weight.name for weight in graph.initializer])
remove_weights(graph, target_names)
target_names.add(node_output)
remove_inputs(graph, target_names)
remove_value_infos(graph, target_names)
graph.node.remove(target_node)
'''
Constant & Initializer
'''
def is_initializer(graph, name):
for tensor in graph.initializer:
if tensor.name == name:
return True
return False
def get_initializer_by_name(graph, name):
for tensor in graph.initializer:
if tensor.name == name:
return tensor
return None
def get_init_value(tensor):
return numpy_helper.to_array(tensor)
def set_init_value(graph, weight, data_numpy):
# NOTE: weight can be stroed in human readable fields(float_data, int32_data, string_data, ...)
# as well as raw_data, if we set weight by raw_data, we must clear the fields above to make it effective
# NOTE: data_type between numpy and TensorProto
raw_shape = tuple([i for i in weight.dims])
new_shape = np.shape(data_numpy)
if weight.data_type == 8:
# string data type is special, it requires to store data in string_data field
# NOT the raw_data field
weight.string_data = bytes(data_numpy, encoding="utf8")
weight.ClearField("raw_data")
return
if new_shape != raw_shape:
print(
"Warning: the new weight shape is not consistent with original shape!"
)
weight.dims[:] = list(new_shape)
# in cast is graph input?
for model_input in graph.input:
if model_input.name == weight.name:
# copy from onnx.helper...
tensor_shape_proto = model_input.type.tensor_type.shape
tensor_shape_proto.ClearField("dim")
tensor_shape_proto.dim.extend([])
for d in new_shape:
dim = tensor_shape_proto.dim.add()
dim.dim_value = d
weight.ClearField("float_data")
weight.ClearField("int32_data")
weight.ClearField("int64_data")
weight.raw_data = data_numpy.tobytes()
return
def is_constant(node):
if node.op_type == "Constant":
return True
else:
return False
def get_constant_value(node):
for attr in node.attribute:
if attr.name == 'value':
if attr.t.data_type == 1:
return np.array(struct.unpack('f', attr.t.raw_data))
elif attr.t.data_type == 2:
return np.array(struct.unpack('i', attr.t.raw_data))
elif attr.t.data_type == 3:
return np.array(struct.unpack('s', attr.t.raw_data))
elif attr.t.data_type == 4:
return np.array(struct.unpack('t', attr.t.raw_data))
elif attr.t.data_type == 5:
return np.array(struct.unpack('g', attr.t.raw_data))
elif attr.t.data_type == 6:
return np.frombuffer(attr.t.raw_data, dtype=np.float32)
elif attr.t.data_type == 7:
return np.frombuffer(attr.t.raw_data, dtype=np.int32)
elif attr.t.data_type == 8:
return np.frombuffer(attr.t.raw_data, dtype=np.string)
elif attr.t.data_type == 9:
return np.frombuffer(attr.t.raw_data, dtype=np.bool)
elif attr.t.data_type == 10:
return np.frombuffer(attr.t.raw_data, dtype=np.float16)
elif attr.t.data_type == 11:
return np.frombuffer(attr.t.raw_data, dtype=np.double)
elif attr.t.data_type == 12:
return np.frombuffer(attr.t.raw_data, dtype=np.uint32)
elif attr.t.data_type == 13:
return np.frombuffer(attr.t.raw_data, dtype=np.uint64)
else:
print("unsupported attribute data type with attribute name")
def set_constant_value(target_node, value):
# NOTE : dtype value should match with target_node
for attr in target_node.attribute:
if (attr.name == "value"):
attr.t.raw_data = value.tobytes()
'''
Attributes
'''
def get_attribute_by_name(node, name):
for attr in node.attribute:
if attr.name == name:
return attr
return attr
def set_node_attribute(target_node, attr_name, attr_value):
flag = False
for attr in target_node.attribute:
if (attr.name == attr_name):
if attr.type == 1: # float value
attr.f = attr_value
elif attr.type == 2: # int value
attr.i = attr_value
elif attr.type == 3: # string value
attr.s = attr_value
elif attr.type == 4: # tensor value
attr.t = attr_value
elif attr.type == 5: # graph value
attr.g = attr_value
# NOTE: For repeated composite types, we should use something like
# del attr.xxx[:]
# attr.xxx.extend([n1, n2, n3])
elif attr.type == 6: # float[]
attr.floats[:] = attr_value
elif attr.type == 7: # int[]
attr.ints[:] = attr_value
elif attr.type == 8: # strings[]
attr.strings[:] = attr_value
else:
print("unsupported attribute data type with attribute name")
return False
flag = True
if not flag:
# attribute not in original node
print("Warning: you are appending a new attribute to the node!")
target_node.attribute.append(
helper.make_attribute(attr_name, attr_value))
flag = True
return flag
'''
Graph Input/Output
'''
def add_extra_output(graph, target_output, target_shape):
extra_elem_type = 1
for vi in graph.value_info:
if vi.name == target_output:
extra_elem_type = vi.type.tensor_type.elem_type
extra_output = helper.make_tensor_value_info(target_output,
extra_elem_type, target_shape)
'''
# NOTE
# if we know the value type and shape, we can alse use this
def make_tensor_value_info(
name, # type: Text
elem_type, # type: int
shape, # type: Optional[Sequence[Union[Text, int]]]
doc_string="", # type: Text
shape_denotation=None, # type: Optional[List[Text]]
):
'''
graph.output.append(extra_output)
return
def get_graph_input_by_name(graph, name):
for input in graph.input:
if input.name == name:
return input
return None
def get_graph_output_by_name(graph, name):
for out in graph.output:
if out.name == name:
return out
return None
def resort_nodes(model):
new_model = copy.deepcopy(model)
for n in new_model.graph.node:
model.graph.node.remove(n)
ready_tensors = [n.name for n in model.graph.input]
ready_tensors.extend([n.name for n in model.graph.initializer])
ready_tensors = set(ready_tensors)
all_nodes = [n for n in new_model.graph.node]
while True:
activate_nodes = []
for node in all_nodes:
inputs = set(node.input)
if len(inputs - ready_tensors) == 0:
activate_nodes.append(node)
assert len(activate_nodes) != 0, 'invalid graph'
for node in activate_nodes:
model.graph.node.append(node)
ready_tensors = ready_tensors | set(node.output)
all_nodes.remove(node)
if len(all_nodes) == 0:
break
return model
'''
Pass
'''
def fix_model_shape(model,
in_dim_dict=None,
out_dim_dict=None,
fully_si=False):
if in_dim_dict != None and out_dim_dict != None:
update_model_dims.update_inputs_outputs_dims(model, in_dim_dict,
out_dim_dict)
if fully_si:
input_num = len(model.graph.input)
tensors = model.graph.initializer
for i, tensor in enumerate(tensors):
value_info = helper.make_tensor_value_info(
tensor.name, ONNX_DTYPE[tensor.data_type], tensor.dims)
model.graph.input.insert(i + input_num, value_info)
onnx.checker.check_model(model)
model = shape_inference.infer_shapes(model)
return model
def remove_redundant_cast(graph):
cast_nodes = get_nodes_by_optype(graph, "Cast")
for node in cast_nodes:
in_node = get_node_by_output_name(graph, node.input[0])
if in_node.op_type == "Cast":
print("Removing redundant cast: ", in_node)
node.input[0] = in_node.input[0]
graph.node.remove(in_node)
def onxx_sess_opt(model, opt_model):
sess_options = rt.SessionOptions()
sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_BASIC
sess_options.optimized_model_filepath = opt_model
rt.InferenceSession(model,
sess_options,
providers=['CPUExecutionProvider'])
# ------------- Model speficted pass --------------------
def convert_fp16_to_fp32(model):
# handle model.graph.initializer
to_convert = []
for init in model.graph.initializer:
# print(init.name)
if init.data_type != 10:
continue
to_convert.append(init)
for init in to_convert:
val = get_init_value(init)
new_val = val.astype(np.float32)
new_init = numpy_helper.from_array(new_val, init.name)
model.graph.initializer.remove(init)
model.graph.initializer.append(new_init)
# handle mode.graph.node
cons_ops = get_nodes_by_optype(model.graph, "Constant")
for op in cons_ops:
val_attr = get_attribute_by_name(op, "value")
if val_attr.t.data_type != 10:
continue
# import pdb;pdb.set_trace()
val = get_constant_value(op)
new_val = val.astype(np.float32)
set_constant_value(op, new_val)
val_attr.t.data_type = 1
for val_info in model.graph.value_info:
if val_info.type.tensor_type.elem_type != 10:
continue
val_info.type.tensor_type.elem_type = 1
# handle cast op
cast_ops = get_nodes_by_optype(model.graph, "Cast")
to_remove = []
for cast in cast_ops:
to = get_attribute_by_name(cast, "to")
if to.i != 10 and to.i != 1:
continue
if to.i == 10:
up_node = get_node_by_output_name(model.graph, cast.input[0])
set_node_attribute(cast, "to", 1)
if up_node.op_type != "Cast":
continue
up_to = get_attribute_by_name(up_node, "to")
if up_to.i != 1:
continue
if to.i == 1:
down_node = get_node_successor(model.graph, cast)
if len(down_node) == 0:
continue
if down_node[0].op_type != "Cast":
continue
down_to = get_attribute_by_name(down_node[0], "to")
if down_to.i != 10:
continue
# print(cast.name)
succs = get_node_successor(model.graph, cast)
for succ in succs:
for idx, in_name in enumerate(succ.input):
if in_name == cast.output[0]:
succ.input[idx] = cast.input[0]
to_remove.append(cast)
for cast in to_remove:
out_info = get_graph_output_by_name(model.graph, cast.output[0])
if out_info == None:
model.graph.node.remove(cast)
else:
node = get_node_by_output_name(model.graph, cast.input[0])
if node != None:
for idx, out in enumerate(node.output):
if out == cast.input[0]:
node.output[idx] = cast.output[0]
model.graph.node.remove(cast)
return model
def replace_mask_where(model):
# pattern: sub -> cast ----|
# |-----------> where
where_ops = get_nodes_by_optype(model.graph, "Where")
to_replace = []
for where_node in where_ops:
cond = where_node.input[0]
node = get_node_by_output_name(model.graph, cond)
if node.op_type != "Cast":
continue
y_in = where_node.input[2]
node = get_node_by_output_name(model.graph, y_in)
if node.op_type != "Sub":
continue
to_replace.append(where_node)
to_remove = []
for where in to_replace:
x_in = where.input[1]
y_in = where.input[2]
mul_op = onnx.helper.make_node('Mul', [x_in, y_in],
where.output,
name="{}_mask_mul_replaced".format(
where.name))
model.graph.node.append(mul_op)
cast_op = get_node_by_output_name(model.graph, where.input[0])
to_remove.append(cast_op)
to_remove.append(where)
for node in to_remove:
model.graph.node.remove(node)
return model
def convert_expand_to_tile(model):
expand_ops = get_nodes_by_optype(model.graph, "Expand")
for expand_node in expand_ops:
ifm = expand_node.input[0]
ofm = expand_node.output[0]
ifm_vi = get_value_info_by_name(model.graph, expand_node.input[0])
if ifm_vi == None:
continue
init_shape = get_initializer_by_name(model.graph, expand_node.input[1])
if init_shape == None:
continue
shape_val = get_init_value(init_shape)
ofm_shape = shape_val.tolist()
ifm_shape = [
dim.dim_value for dim in ifm_vi.type.tensor_type.shape.dim
]
repeats = [
1 if i == j else int(j / i) for i, j in zip(ifm_shape, ofm_shape)
]
repeats = np.array(repeats)
repeats = numpy_helper.from_array(
repeats, 'Tile_{}_repeats'.format(expand_node.name))
tile_node = onnx.helper.make_node('Tile', [ifm, repeats.name], [ofm],
name=expand_node.name)
model.graph.node.append(tile_node)
model.graph.initializer.append(repeats)
model.graph.node.remove(expand_node)
return model
def concat_to_tile(model):
def is_tile_type(node):
tile_flag = True
for idx in range(len(node.input) - 1):
if node.input[idx] == node.input[idx + 1]:
continue
else:
tile_flag = False
break
return tile_flag
concat_ops = get_nodes_by_optype(model.graph, "Concat")
for concat in concat_ops:
if not is_tile_type(concat):
continue
print("Converting concat to tile")
in_val = get_value_info_by_name(model.graph, concat.input[0])
out_val = get_value_info_by_name(model.graph, concat.output[0])
ifm_shape = get_shape_from_value_info(in_val)
ofm_shape = get_shape_from_value_info(out_val)
repeats = [
1 if i == j else int(j / i) for i, j in zip(ifm_shape, ofm_shape)
]
repeats = np.array(repeats)
repeats = numpy_helper.from_array(
repeats, 'Tile_{}_repeats'.format(concat.name))
tile_node = onnx.helper.make_node('Tile',
[concat.input[0], repeats.name],
[concat.output[0]],
name=concat.name)
model.graph.node.append(tile_node)
model.graph.initializer.append(repeats)
model.graph.node.remove(concat)
def remove_qdq(model):
q_ops = get_nodes_by_optype(model.graph, "QuantizeLinear")
for q_op in q_ops:
dq = get_node_successor(model.graph, q_op)
if len(dq) != 1 and dq[0].op_type != "DequantizeLinear":
continue
qdq_succ = get_node_successor(model.graph, dq[0])
for i, n in enumerate(qdq_succ[0].input):
if n == dq[0].output[0]:
qdq_succ[0].input[i] = q_op.input[0]
model.graph.node.remove(q_op)
model.graph.node.remove(dq[0])
import torch
from onnx2torch import convert
import onnxruntime as ort
if __name__ == "__main__":
# Path to ONNX model
onnx_model_path = 'converted_models/no_qdq_2.onnx'
onnx_model = onnx.load(onnx_model_path)
in_shape_dict = {
"data": [2, 10, 3, 256, 256],
}
out_shape_dict = {'logits': [2, 2], '1383': [1, 20]}
onnx_model = fix_model_shape(onnx_model, in_shape_dict, out_shape_dict,
True)
onnx.save(onnx_model, 'converted_models/no_qdq_3.onnx')
onxx_sess_opt('converted_models/no_qdq_3.onnx',
'converted_models/no_qdq_3.onnx')
onnx_model = onnx.load('converted_models/no_qdq_3.onnx')
torch_model_2 = convert(onnx_model)
# You can pass the path to the onnx model to convert it or...
# torch_model_1 = convert(onnx_model_path)
# Create example data
x = torch.ones((2, 10, 3, 256, 256))
out_torch = torch_model_2(x)
trace_model = torch.jit.trace(torch_model_2, x)
ort_sess = ort.InferenceSession(onnx_model_path)
outputs_ort = ort_sess.run(None, {'data': x.numpy()})
print(outputs_ort[0] - out_torch[0].detach().numpy())
print(outputs_ort[1] - out_torch[1].detach().numpy())
# Check the Onnx output against PyTorch
# print(torch.max(torch.abs(outputs_ort[0] - out_torch[0].detach().numpy())))
# print(torch.max(torch.abs(outputs_ort[1] - out_torch[1].detach().numpy())))
# print(np.allclose(outputs_ort[0], out_torch[0].detach().numpy(), atol=1.e-7))
# print(np.allclose(outputs_ort[1], out_torch[1].detach().numpy(), atol=1.e-7))
tensorflow>=2.6.0
tf2onnx
numpy
torch==1.9.1
\ No newline at end of file
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
An Interface to export saved_models to frozen models.
Please notice, this API makes 2 assumptions
1. saved_model like below:
|--save-model.pb
|--variable
|-- |--variables.data-00000-of-00001
|-- |--variables.index
2. saved_tags is tag_constants.SERVING by default if not specific
3. signature is signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY by default if not specific
Copyright Reserve: Habana Labs
'''
import sys
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import saved_model_cli
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.saved_model import signature_constants
import argparse
from six import StringIO
import contextlib
def freeze_saved_model(saved_model_dir,
output_nodes,
pb_name,
saved_tags=tag_constants.SERVING):
input_saved_model_dir = saved_model_dir
output_node_names = output_nodes
input_binary = False
input_saver_def_path = False
restore_op_name = None
filename_tensor_name = None
clear_devices = True
input_meta_graph = False
checkpoint_path = None
input_graph_filename = None
saved_model_tags = saved_tags
output_graph_filename = pb_name
freeze_graph.freeze_graph(input_graph_filename, input_saver_def_path,
input_binary, checkpoint_path, output_node_names,
restore_op_name, filename_tensor_name,
output_graph_filename, clear_devices, "", "", "",
input_meta_graph, input_saved_model_dir,
saved_model_tags)
@contextlib.contextmanager
def captured_output():
new_out, new_err = StringIO(), StringIO()
old_out, old_err = sys.stdout, sys.stderr
try:
sys.stdout, sys.stderr = new_out, new_err
yield sys.stdout, sys.stderr
finally:
sys.stdout, sys.stderr = old_out, old_err
def get_output_node(saved_model_dir, saved_tags, sign):
parser = saved_model_cli.create_parser()
args = parser.parse_args([
'show', '--dir', saved_model_dir, '--tag_set', saved_tags,
'--signature_def', sign
])
with captured_output() as (out, err):
saved_model_cli.show(args)
result = out.getvalue().strip()
print(result)
output_num = 0
output_nodes = None
lines = result.split('\n')
for idx, line in enumerate(result.split('\n')):
if "outputs[" in line:
line = lines[idx + 3]
output = line.split(":")[1]
if output_num > 0:
output_nodes = output_nodes + "," + output
else:
output_nodes = output
output_num = output_num + 1
if output_nodes == None:
raise RuntimeError("No Output Nodes found in saved_model.")
return output_nodes, output_num
def saved_to_frozen(
saved_model_dir,
frozen_path,
saved_tags=tag_constants.SERVING,
sign=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY):
output_nodes, output_num = get_output_node(saved_model_dir, saved_tags,
sign)
output_nodes = output_nodes
print("[INFO]: Save Model has [", output_num, "] outputs.")
print("[INFO]: Outputs Nodes: [", output_nodes, "].")
# cwd = os.getcwd()
# frozen_path = os.path.join(cwd, "converted_frozen.pb")
freeze_saved_model(saved_model_dir, output_nodes, frozen_path, saved_tags)
print("[INFO]: Saved Model convert to Frozen Model done.")
print("[INFO]: Frozen Model saved here: ", frozen_path)
return frozen_path
def get_args():
"""Parse commandline."""
parser = argparse.ArgumentParser()
parser.add_argument("--model_path", default="")
parser.add_argument("--output_path", default="")
args = parser.parse_args()
return args
if __name__ == "__main__":
args = get_args()
saved_to_frozen(args.model_path, args.output_path)
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tf2onnx
from tf2onnx import tf_loader
import argparse
ONNX_OPSET = 11
def _convert_graphdef_to_onnx(graph_def,
inputs=None,
outputs=None,
output_path='',
**kwargs):
inputs_as_nchw = kwargs.get('inputs_as_nchw', None)
custom_ops = kwargs.get('custom_ops', None)
custom_op_handlers = kwargs.get('custom_op_handlers', None)
custom_rewriter = kwargs.get('custom_rewriter', None)
extra_opset = kwargs.get('extra_opset', None)
large_model = kwargs.get('large_model', False)
name = kwargs.get('name', 'habana_convert')
target = kwargs.get('target', None)
shape_override = kwargs.get('shape_override', {})
tf2onnx.convert.from_graph_def(graph_def,
name=name,
input_names=inputs,
output_names=outputs,
opset=ONNX_OPSET,
custom_ops=custom_ops,
custom_op_handlers=custom_op_handlers,
custom_rewriter=custom_rewriter,
inputs_as_nchw=inputs_as_nchw,
extra_opset=extra_opset,
shape_override=shape_override,
target=target,
large_model=large_model,
output_path=output_path)
return output_path
def savedmodel_to_onnx(model_path, output_path='', **kwargs):
inputs = kwargs.get('inputs', None)
outputs = kwargs.get('outputs', None)
graph_def, inputs, outputs = tf_loader.from_saved_model(
model_path, inputs, outputs)
return _convert_graphdef_to_onnx(graph_def, inputs, outputs, output_path,
**kwargs)
def get_args():
"""Parse commandline."""
parser = argparse.ArgumentParser()
parser.add_argument("--model_path", default="")
parser.add_argument("--output_path", default="")
args = parser.parse_args()
return args
if __name__ == "__main__":
args = get_args()
savedmodel_to_onnx(args.model_path, args.output_path)
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import tensorflow as tf
# tf.contrib.resampler
from tensorflow.core.framework import types_pb2, graph_pb2, attr_value_pb2
from tensorflow.tools.graph_transforms import TransformGraph
from google.protobuf import text_format
import numpy as np
from textops import tf_load_op_library
# Const should be float32 in object detection api during nms (see here: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/non-max-suppression-v4.html)
keep_fp32_node_name = []
keep_fp16_node_name = []
def load_graph(model_path):
graph = tf.Graph()
with graph.as_default():
graph_def = tf.GraphDef()
if model_path.endswith("pb"):
with open(model_path, "rb") as f:
graph_def.ParseFromString(f.read())
else:
with open(model_path, "r") as pf:
text_format.Parse(pf.read(), graph_def)
tf.import_graph_def(graph_def, name="")
sess = tf.Session(graph=graph)
return sess
def rewrite_batch_norm_node_v2(node, graph_def, target_type='fp16'):
"""
Rewrite FusedBatchNorm with FusedBatchNormV2 for reserve_space_1 and reserve_space_2 in FusedBatchNorm require float32 for
gradient calculation (See here: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/fused-batch-norm)
"""
if target_type == 'fp16':
dtype = types_pb2.DT_HALF
elif target_type == 'fp64':
dtype = types_pb2.DT_DOUBLE
else:
dtype = types_pb2.DT_FLOAT
new_node = graph_def.node.add()
new_node.op = "FusedBatchNormV2"
new_node.name = node.name
new_node.input.extend(node.input)
new_node.attr["U"].CopyFrom(
attr_value_pb2.AttrValue(type=types_pb2.DT_FLOAT))
for attr in list(node.attr.keys()):
if attr == "T":
node.attr[attr].type = dtype
new_node.attr[attr].CopyFrom(node.attr[attr])
print("rewrite fused_batch_norm done!")
def convert_graph_to_fp16(model_path,
save_path,
name,
as_text=False,
target_type='fp16',
input_name=None,
output_names=None):
if target_type == 'fp16':
dtype = types_pb2.DT_HALF
elif target_type == 'fp64':
dtype = types_pb2.DT_DOUBLE
else:
dtype = types_pb2.DT_FLOAT
source_sess = load_graph(model_path)
source_graph_def = source_sess.graph.as_graph_def()
target_graph_def = graph_pb2.GraphDef()
target_graph_def.versions.CopyFrom(source_graph_def.versions)
for node in source_graph_def.node:
# fused batch norm node
if node.op == "FusedBatchNorm":
rewrite_batch_norm_node_v2(node,
target_graph_def,
target_type=target_type)
continue
# replicate node
new_node = target_graph_def.node.add()
new_node.op = node.op
new_node.name = node.name
new_node.input.extend(node.input)
attrs = list(node.attr.keys())
# keep batch norm params node
if ("BatchNorm" in node.name) or ('batch_normalization' in node.name):
for attr in attrs:
new_node.attr[attr].CopyFrom(node.attr[attr])
continue
# replace dtype in node attr with target dtype
for attr in attrs:
# keep special node in fp32
if node.name in keep_fp32_node_name:
new_node.attr[attr].CopyFrom(node.attr[attr])
continue
if node.attr[attr].type == types_pb2.DT_FLOAT:
# modify node dtype
node.attr[attr].type = dtype
if attr == "value":
tensor = node.attr[attr].tensor
if tensor.dtype == types_pb2.DT_FLOAT:
# if float_val exists
if tensor.float_val:
float_val = tf.make_ndarray(node.attr[attr].tensor)
new_node.attr[attr].tensor.CopyFrom(
tf.make_tensor_proto(float_val, dtype=dtype))
continue
# if tensor content exists
if tensor.tensor_content:
tensor_shape = [
x.size for x in tensor.tensor_shape.dim
]
tensor_weights = tf.make_ndarray(tensor)
# reshape tensor
tensor_weights = np.reshape(tensor_weights,
tensor_shape)
tensor_proto = tf.make_tensor_proto(tensor_weights,
dtype=dtype)
new_node.attr[attr].tensor.CopyFrom(tensor_proto)
continue
new_node.attr[attr].CopyFrom(node.attr[attr])
# transform graph
if output_names:
if not input_name:
input_name = []
transforms = ["strip_unused_nodes"]
target_graph_def = TransformGraph(target_graph_def, input_name,
output_names, transforms)
# write graph_def to model
tf.io.write_graph(target_graph_def,
logdir=save_path,
name=name,
as_text=as_text)
print("Converting done ...")
def main():
# input_name = ["input_ids", "segment_ids", "input_mask"]
# output_names = ["output_scores"]
input_name = [
"block_ids", "font_size", "height", "strclass", "tag_titles", "tags",
"text", "urls", "width", "x_axis", "y_axis"
]
output_names = ["loss/Softmax", "init_all_tables"]
model_path = "frozen_init_all_table.pb"
save_path = "./"
name = "fp32_frozen_init_all_table.pb"
as_text = False
target_type = 'fp32'
convert_graph_to_fp16(model_path,
save_path,
name,
as_text=as_text,
target_type=target_type,
input_name=input_name,
output_names=output_names)
# test loading
# ISSUE: loading detection model is extremely slow while loading classification model is normal
sess = load_graph(save_path + "/" + name)
print("DONE!")
if __name__ == "__main__":
tf_load_op_library()
main()
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import tensorflow as tf
from tensorflow.core import framework
from tensorflow.core.framework import types_pb2, graph_pb2, attr_value_pb2
from tensorflow.tools.graph_transforms import TransformGraph
from google.protobuf import text_format
import numpy as np
def isTextProtobuf(filename):
""" Returns whether a filename is a text protobuf based on the file extension.
Args:
filename: string - file name to process.
Returns:
true if `filename`'s extension is .pbtxt, false otherwise.
"""
retval = False
_, filename_ext = os.path.splitext(filename)
if filename_ext and filename_ext.lower() == ".pbtxt":
retval = True
return retval
def saveGraphProtobufToFile(file_name, graph_d):
""" Saves a `GraphDef` protocol buffer graph to a file.
Args:
file_name: string - name of the file where to write the graph.
graph_d: The `GraphDef` protocol buffer to save.
"""
output_file_name_no_dir = os.path.basename(file_name)
output_file_dir = os.path.dirname(file_name)
tf.io.write_graph(graph_d,
output_file_dir,
output_file_name_no_dir,
as_text=isTextProtobuf(file_name))
def loadGraphProtobufFromFile(file_name):
""" Loads a `GraphDef` protocol buffer graph from a file.
Args:
file_name: string - name of the file to load.
Returns:
A `GraphDef` protocol buffer loaded from the file.
"""
graph_d = framework.graph_pb2.GraphDef()
with open(file_name, "rb") as f:
if isTextProtobuf(file_name):
# for text file:
text_format.Merge(f.read(), graph_d)
else:
# for binary file:
graph_d.ParseFromString(f.read())
return graph_d
def duplicateGraph(graph_d):
""" Creates a deep copy of a tf GraphDef.
Args:
graph_d: A `GraphDef` protocol buffer to duplicate.
Returns:
A deep copy of the specified tf GraphDef.
"""
with tf.Graph().as_default() as tmp_graph:
_ = tf.import_graph_def(graph_d, name="")
return tmp_graph.as_graph_def()
def getNodeNames(nodes_d):
""" Compiles a list of strings representing all the name of
the nodes in the specified list of nodes.
Args:
nodes_d: List of `NodeDef` objects to process.
Returns:
A list of strings representing all the name of the nodes in `nodes_d`.
"""
return [node_d.name for node_d in nodes_d]
def getNodeIndexByName(nodes_d, node_name):
""" Finds the NodeDef node in list of NodeDef corresponding to
the specified name.
Args:
nodes_d: List of `NodeDef` objects to process.
node_name: node to find.
Returns:
And integer index representing the index of the node in the list
passed or -1 if not found.
"""
retval = -1
for i, node_d in enumerate(nodes_d):
if node_d.name == node_name:
retval = i
break
return retval
def getNodeInputNamesClean(node_input_names):
retval = []
for input_name in node_input_names:
tensor_idx = input_name.rfind(":")
if tensor_idx < 0:
retval.append(input_name)
else:
retval.append(input_name[:tensor_idx])
return retval
def getNodeByName(nodes_d, node_name):
""" Finds the NodeDef node in list of NodeDef corresponding to
the specified name.
Args:
nodes_d: List of `NodeDef` objects to process.
node_name: node to find.
Returns:
The `NodeDef` node in `nodes_d` corresponding to the specified name,
or None if name is not found in `nodes_d`.
"""
retval = getNodeIndexByName(nodes_d, node_name)
if (retval < 0):
retval = None
else:
retval = nodes_d[retval]
return retval
def getInputNodeNames(graph_d):
""" Finds the placeholder nodes (or inputs) in the graph.
Args:
graph_d: A `GraphDef` protocol buffer to process.
Returns:
A list of node names corresponding to all nodes that are
inputs to the graph.
"""
retval = []
for node_d in graph_d.node:
if node_d.op == "Placeholder":
retval.append(node_d.name)
return retval
def getOutputNodeNames(graph_d):
""" Finds the nodes that are leaf nodes (or outputs) in the graph.
Args:
graph_d: A `GraphDef` protocol buffer to process.
Returns:
A list of node names corresponding to all nodes that are
leaf nodes (or outputs) in the graph.
"""
non_output_node_names = set()
for node_d in graph_d.node:
non_output_node_names = non_output_node_names | set(
getNodeInputNamesClean(node_d.input))
graph_node_names = set(getNodeNames(graph_d.node))
return list(graph_node_names - non_output_node_names)
def getNodesInOutput(graph_d, node_name):
""" Finds all nodes that use the output of specified node as
their input in the specified graph.
Args:
graph_d: A `GraphDef` protocol buffer to process.
node_name: String name of node to check.
Returns:
A list of node names corresponding to all nodes that use the
output of specified node as their input.
"""
retval = []
for node_d in graph_d.node:
node_input_names = getNodeInputNamesClean(node_d.input)
for id, input_name in enumerate(node_input_names):
if input_name == node_name:
retval.append([id, node_d.name])
break
return retval
def getNodesInSubGraph(graph_d, start_nodes, end_nodes):
subgraph = []
for node in start_nodes:
subgraph.append(node)
successor = start_nodes
while len(successor) != 0:
for node in successor:
tmp_suc = getNodesInOutput(graph_d, node)
for suc in tmp_suc:
if suc in subgraph:
continue
else:
subgraph.append(suc)
successor = tmp_suc
return subgraph
def convertTensorflow2NumpyShape(shape_tf):
""" Converts a tensorflow `TensorShape` to a numpy shape.
All unknown values for partial shapes will be converted to -1.
Args:
shape_tf: A `TensorShape` object to convert.
Returns:
A list of values representing a valid numpy style shape.
"""
retval = [
shape_val if shape_val is not None else -1
for shape_val in shape_tf.as_list()
]
return retval
def convertNumpy2TensorflowShape(shape_np):
""" Converts a numpy shape to a tensorflow shape.
All unknown (-1) values for partial shapes will be converted to None.
Args:
shape_np: A list of values representing a valid numpy shape.
Returns:
A list of values representing a valid tensorflow style shape.
"""
retval = [shape_val if shape_val >= 0 else None for shape_val in shape_np]
return retval
def getInputShape(graph_d, numpy_format=False):
""" Retrieves the shape of all inputs to specified `GraphDef` object.
Args:
graph_d: A `GraphDef` protocol buffer to process.
numpy_format: boolean - if False (default), shape is given in tensorflow format,
otherwise, numpy format.
Returns:
A mapping string => list: from input tensor name to shape.
"""
retval = {}
input_node_names = getInputNodeNames(graph_d)
tf.import_graph_def(graph_d, name="")
for input_node_name in input_node_names:
# find all output tensors for this placeholder, i.e. input:0, input:1, etc.
try:
i = 0
while True:
input_tensor_name = input_node_name + ":" + str(i)
next_input_tensor = tf.get_default_graph().get_tensor_by_name(
input_tensor_name)
tensor_shape = next_input_tensor.shape
if numpy_format:
tensor_shape = convertTensorflow2NumpyShape(tensor_shape)
retval[input_tensor_name] = tensor_shape
i += 1
except:
pass # reached the end of the placeholder outputs
return retval
def getInputOutputNodes(frozen_graph):
""" Finds all input and output nodes in the specified graph.
Args:
frozen_graph: TensorFlow frozen graph
Returns:
A list of input and output node names.
"""
predefined_inputs = ['segment', 'mask', 'input_ids']
graph_d = loadGraphProtobufFromFile(frozen_graph)
inputs = getInputNodeNames(graph_d)
outputs = getOutputNodeNames(graph_d)
nodes = [
str for str in inputs if any(sub in str for sub in predefined_inputs)
]
if len(nodes) == len(predefined_inputs):
return [inputs, outputs]
else:
status, inputs = findNodeByName(graph_d, predefined_inputs)
if status:
return [inputs, outputs]
else:
raise RuntimeError(
"Cannot find suitable inputs for this tool, please indicate the names of inputs after preprocessing"
)
def findNodeByName(graph_d, node_name):
""" Finds nodes specified by name in the specified graph.
Args:
graph_d: A `GraphDef` protocol buffer to process.
node_name: String name of node to check.
Returns:
status - True if all nodes are found, False otherwise
A list of node names.
"""
status = False
all_nodes = list(getNodeNames(graph_d.node))
retval = [str for str in all_nodes if any(sub in str for sub in node_name)]
if len(node_name) == len(retval):
status = True
return status, retval
def load_graph(model_path):
graph = tf.Graph()
with graph.as_default():
graph_def = tf.GraphDef()
if model_path.endswith("pb"):
with open(model_path, "rb") as f:
graph_def.ParseFromString(f.read())
else:
with open(model_path, "r") as pf:
text_format.Parse(pf.read(), graph_def)
return graph_def
from opt_tf import *
import os
import tensorflow as tf
import sys
from tensorflow.python.tools import freeze_graph
from tensorflow.python.tools import saved_model_cli
from tensorflow.python.saved_model import tag_constants
from tensorflow.python.saved_model import signature_constants
from tensorflow.tools.graph_transforms import TransformGraph
from six import StringIO, iteritems
import contextlib
from tensorflow.core.framework import types_pb2, tensor_shape_pb2, graph_pb2, attr_value_pb2
import numpy as np
from load_runstep import load_runstep
def load_graph(model):
graph_def = tf.GraphDef()
print("load model: ", model)
with open(model, 'rb') as f:
graph_def.ParseFromString(f.read())
return graph_def
def find_node(graph_def, name):
node = None
for n in graph_def.node:
if n.name == name:
node = n
break
# if node == None:
# print('Node {} not found'.format(name))
return node
def find_node_by_type(graph_def, type):
node = []
for n in graph_def.node:
if n.op == type:
node.append(n)
return node
def get_node_successor(graph_def, node_name):
outputs = []
for n in graph_def.node:
for input in n.input:
if node_name == input.split(':')[0]:
outputs.append(n)
# if len(outputs) == 0:
# print("[INFO] {} has no successor".format(node_name))
return outputs
def get_node_output(graph_def, node_name):
outputs = []
for n in graph_def.node:
for input in n.input:
if node_name == input.split(':')[0]:
if len(input.split(':')) == 1:
if not input + ":0" in outputs:
outputs.append(input + ":0")
else:
if not input in outputs:
outputs.append(input)
# if len(outputs) == 0:
# print("[INFO] {} has no output".format(node_name))
return outputs
# single in & singel out
def remove_nodes(graph_d, nodes):
for node in nodes:
# assert len(node.input) == 1
pre_node = node.input[0]
succ_nodes = get_node_successor(graph_d, node.name)
for succ in succ_nodes:
for idx, name in enumerate(succ.input):
if name == node.name:
succ.input[idx] = pre_node
graph_d.node.remove(node)
return graph_d
def create_shape_proto(shape):
shape_proto = tensor_shape_pb2.TensorShapeProto()
for dim in shape:
shape_proto.dim.add().size = dim
return attr_value_pb2.AttrValue(shape=shape_proto)
def set_shape(node, shape):
node.attr["shape"].CopyFrom(create_shape_proto(shape))
def remove_control_dep(graph_def):
# reset & import
tf.reset_default_graph()
tf.import_graph_def(graph_def, name="")
for node in graph_def.node:
op = tf.get_default_graph().get_operation_by_name(node.name)
if len(op.control_inputs) != 0:
tf.contrib.graph_editor.remove_control_inputs(
op, op.control_inputs)
graph_def = tf.get_default_graph().as_graph_def()
return graph_def
def is_leaf_node(graph_d, name):
for n in graph_d.node:
for in_n in n.input:
if name == in_n or name == in_n.split(":0")[0]:
return False
return True
def get_node_shape(node):
return [d.size for d in node.attr["shape"].shape.dim]
def get_graph_input(graph_d):
in_node = []
for n in graph_d.node:
if n.op == "Placeholder":
in_node.append(n.name)
to_remove = []
for in_n in in_node:
if is_leaf_node(graph_d, in_n):
to_remove.append(in_n)
for name in to_remove:
node = find_node(graph_d, name)
graph_d.node.remove(node)
real_in = set(in_node) - set(to_remove)
return list(real_in)
def get_graph_output(graph_d):
out_node = []
for n in graph_d.node:
if len(get_node_successor(graph_d, n.name)) == 0:
out_node.append(n.name)
# if len(out_node) == 0:
# print("[INFO] Graph No Outputs??")
return out_node
def get_constant_val(node):
val = tf.make_ndarray(node.attr["value"].tensor)
return val
def get_dtype_from_np(val):
if val.dtype == np.int32:
return types_pb2.DT_INT32
if val.dtype == np.float32:
return types_pb2.DT_FLOAT
if val.dtype == np.int64:
return types_pb2.DT_INT64
if val.dtype == np.float16:
return types_pb2.DT_HALF
raise ValueError("DTYPE {} NOT SUPPORTEED!".format(val.dtype))
def set_constant_val(node, val):
tf_dtype = get_dtype_from_np(val)
node.attr["value"].tensor.CopyFrom(
tf.make_tensor_proto(val, dtype=tf_dtype))
@contextlib.contextmanager
def captured_output():
new_out, new_err = StringIO(), StringIO()
old_out, old_err = sys.stdout, sys.stderr
try:
sys.stdout, sys.stderr = new_out, new_err
yield sys.stdout, sys.stderr
finally:
sys.stdout, sys.stderr = old_out, old_err
def get_saved_input_node(saved_model_dir, saved_tags, sign):
parser = saved_model_cli.create_parser()
args = parser.parse_args([
'show', '--dir', saved_model_dir, '--tag_set', saved_tags,
'--signature_def', sign
])
with captured_output() as (out, err):
saved_model_cli.show(args)
result = out.getvalue().strip()
input_tensors = []
lines = result.split('\n')
for idx, line in enumerate(result.split('\n')):
if "inputs[" in line:
line = lines[idx + 3]
input = line.split(":")[1]
input_tensors.append(input.strip() + ":0")
return input_tensors
def get_saved_output_node(saved_model_dir, saved_tags, sign):
parser = saved_model_cli.create_parser()
args = parser.parse_args([
'show', '--dir', saved_model_dir, '--tag_set', saved_tags,
'--signature_def', sign
])
with captured_output() as (out, err):
saved_model_cli.show(args)
result = out.getvalue().strip()
# print(result)
output_nodes = []
lines = result.split('\n')
for idx, line in enumerate(result.split('\n')):
if "outputs[" in line:
line = lines[idx + 3]
output = line.split(":")[1]
output_nodes.append(output.strip() + ":0")
return output_nodes
def duplicate_const(graph_d):
all_consts = find_node_by_type(graph_d, "Const")
need_duplicate = []
for node in all_consts:
if len(get_node_successor(graph_d, node.name)) > 1:
need_duplicate.append(node.name)
for node in need_duplicate:
succ_nodes = get_node_successor(graph_d, node)
for idx, succ in enumerate(succ_nodes):
ori_node = find_node(graph_d, node)
new_node = graph_d.node.add()
new_node.op = ori_node.op
new_node.name = ori_node.name + "new_{}".format(idx)
new_node.input.extend(ori_node.input)
attrs = list(ori_node.attr.keys())
for attr in attrs:
new_node.attr[attr].CopyFrom(ori_node.attr[attr])
for i, input in enumerate(succ.input):
if input == ori_node.name:
succ.input[i] = new_node.name
return graph_d
def rewrite_batch_norm_node_v2(node, graph_def, target_type):
"""
Rewrite FusedBatchNorm with FusedBatchNormV2 for reserve_space_1 and reserve_space_2 in FusedBatchNorm require float32 for
gradient calculation (See here: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/fused-batch-norm)
"""
if target_type == 'fp16':
dtype = types_pb2.DT_HALF
elif target_type == 'fp32':
dtype = types_pb2.DT_FLOAT
new_node = graph_def.node.add()
new_node.op = "FusedBatchNormV2"
new_node.name = node.name
new_node.input.extend(node.input)
new_node.attr["U"].CopyFrom(
attr_value_pb2.AttrValue(type=types_pb2.DT_FLOAT))
for attr in list(node.attr.keys()):
if attr == "T":
node.attr[attr].type = dtype
new_node.attr[attr].CopyFrom(node.attr[attr])
print("rewrite fused_batch_norm done!")
def convert_graph_to_fp16(model_path,
save_path,
name,
as_text=False,
target_type='fp16',
input_name=None,
output_names=None,
keep_fp32_node_name=[]):
if target_type == 'fp16':
dtype = types_pb2.DT_HALF
elif target_type == 'fp32':
dtype = types_pb2.DT_FLOAT
source_sess = load_graph(model_path)
source_graph_def = source_sess.graph.as_graph_def()
target_graph_def = graph_pb2.GraphDef()
target_graph_def.versions.CopyFrom(source_graph_def.versions)
for node in source_graph_def.node:
# fused batch norm node
if node.op == "FusedBatchNorm":
rewrite_batch_norm_node_v2(node,
target_graph_def,
target_type=target_type)
continue
# replicate node
new_node = target_graph_def.node.add()
new_node.op = node.op
new_node.name = node.name
new_node.input.extend(node.input)
attrs = list(node.attr.keys())
# keep batch norm params node
if ("BatchNorm" in node.name) or ('batch_normalization' in node.name):
for attr in attrs:
new_node.attr[attr].CopyFrom(node.attr[attr])
continue
# replace dtype in node attr with target dtype
for attr in attrs:
# keep special node in fp32
if node.name in keep_fp32_node_name:
new_node.attr[attr].CopyFrom(node.attr[attr])
continue
if node.attr[attr].type == types_pb2.DT_FLOAT:
# modify node dtype
node.attr[attr].type = dtype
if attr == "value":
tensor = node.attr[attr].tensor
if tensor.dtype == types_pb2.DT_FLOAT:
# if float_val exists
if tensor.float_val:
float_val = tf.make_ndarray(node.attr[attr].tensor)
new_node.attr[attr].tensor.CopyFrom(
tf.make_tensor_proto(float_val, dtype=dtype))
continue
# if tensor content exists
if tensor.tensor_content:
tensor_shape = [
x.size for x in tensor.tensor_shape.dim
]
tensor_weights = tf.make_ndarray(tensor)
# reshape tensor
tensor_weights = np.reshape(tensor_weights,
tensor_shape)
tensor_proto = tf.make_tensor_proto(tensor_weights,
dtype=dtype)
new_node.attr[attr].tensor.CopyFrom(tensor_proto)
continue
new_node.attr[attr].CopyFrom(node.attr[attr])
# transform graph
if output_names:
if not input_name:
input_name = []
transforms = ["strip_unused_nodes"]
target_graph_def = TransformGraph(target_graph_def, input_name,
output_names, transforms)
# write graph_def to model
tf.io.write_graph(target_graph_def,
logdir=save_path,
name=name,
as_text=as_text)
print("Converting done ...")
def convert_graph_to_fp32(model_path,
save_path,
name,
as_text=False,
target_type='fp32',
input_name=None,
output_names=None,
keep_fp16_node_name=[]):
if target_type == 'fp16':
dtype = types_pb2.DT_HALF
elif target_type == 'fp32':
dtype = types_pb2.DT_FLOAT
source_sess = load_graph(model_path)
source_graph_def = source_sess.graph.as_graph_def()
target_graph_def = graph_pb2.GraphDef()
target_graph_def.versions.CopyFrom(source_graph_def.versions)
for node in source_graph_def.node:
# fused batch norm node
if node.op == "FusedBatchNorm":
rewrite_batch_norm_node_v2(node,
target_graph_def,
target_type=target_type)
continue
# replicate node
new_node = target_graph_def.node.add()
new_node.op = node.op
new_node.name = node.name
new_node.input.extend(node.input)
attrs = list(node.attr.keys())
# keep batch norm params node
if ("BatchNorm" in node.name) or ('batch_normalization' in node.name):
for attr in attrs:
new_node.attr[attr].CopyFrom(node.attr[attr])
continue
# replace dtype in node attr with target dtype
for attr in attrs:
# keep special node in fp16
if node.name in keep_fp16_node_name:
new_node.attr[attr].CopyFrom(node.attr[attr])
continue
if node.attr[attr].type == types_pb2.DT_HALF:
# modify node dtype
node.attr[attr].type = dtype
if attr == "value":
tensor = node.attr[attr].tensor
if tensor.dtype == types_pb2.DT_HALF:
# if half_val exists
if tensor.half_val:
half_val = tf.make_ndarray(node.attr[attr].tensor)
new_node.attr[attr].tensor.CopyFrom(
tf.make_tensor_proto(half_val, dtype=dtype))
continue
# if tensor content exists
if tensor.tensor_content:
tensor_shape = [
x.size for x in tensor.tensor_shape.dim
]
tensor_weights = tf.make_ndarray(tensor)
# reshape tensor
tensor_weights = np.reshape(tensor_weights,
tensor_shape)
tensor_proto = tf.make_tensor_proto(tensor_weights,
dtype=dtype)
new_node.attr[attr].tensor.CopyFrom(tensor_proto)
continue
new_node.attr[attr].CopyFrom(node.attr[attr])
# transform graph
if output_names:
if not input_name:
input_name = []
transforms = ["strip_unused_nodes"]
target_graph_def = TransformGraph(target_graph_def, input_name,
output_names, transforms)
# write graph_def to model
tf.io.write_graph(target_graph_def,
logdir=save_path,
name=name,
as_text=as_text)
print("Converting done ...")
# Copyright 2023 ByteDance and/or its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import json
import numpy as np
import torch
def torch_to_onnx(model_path, output_path):
model_name = output_path.split("/")[-1][:-4]
with open("general_perf/model_zoo/" + model_name + "json", "r") as f:
model_info = json.load(f)
model_inputs = model_info["inputs"].split(",")
input_shapes = model_info["input_shape"]
input_type = model_info["input_type"].split(",")
example_inputs = _get_fake_samples(input_shapes, input_type)
model = torch.jit.load(model_path, map_location=torch.device("cpu"))
model.eval()
names = model_inputs
dynamic_inputs = {}
for i in range(len(names)):
dynamic_inputs[names[i]] = {0: "batch_size"}
outputs = model_info["outputs"].split(",")
for output in outputs:
dynamic_inputs[output] = {0: "batch_size"}
torch.onnx.export(
model,
example_inputs,
output_path,
opset_version=11,
input_names=names,
output_names=outputs,
dynamic_axes=dynamic_inputs,
)
def _get_fake_samples(shape, type):
data = []
idx = 0
for key, val in shape.items():
val = [val[0] * 1] + val[1:]
data.append(torch.from_numpy(np.random.random(val).astype(type[idx].lower())))
idx += 1
return data
def get_args():
"""Parse commandline."""
parser = argparse.ArgumentParser()
parser.add_argument("--model_path", default="")
parser.add_argument("--output_path", default="")
args = parser.parse_args()
return args
if __name__ == "__main__":
args = get_args()
torch_to_onnx(args.model_path, args.output_path)
{
"model": "albert-torch-fp32",
"test_perf": true,
"test_accuracy": true,
"test_numeric": true,
"clients": 3,
"iterations": 100,
"batch_sizes":[1,4,8,16,24],
"data_percent": 100,
"compile_only": false
}
{
"model": "bert-onnxruntime-fp16",
"test_perf": true,
"test_accuracy": true,
"test_numeric": false,
"clients": 3,
"iterations": 100,
"batch_sizes":[4,8,16,32,64,128],
"data_percent": 100,
"compile_only": false
}
\ No newline at end of file
{
"model": "bert-onnxruntime-fp32",
"test_perf": true,
"test_accuracy": true,
"test_numeric": false,
"clients": 3,
"iterations": 100,
"batch_sizes":[4,8,16,32,64,128],
"data_percent": 100,
"compile_only": false
}
\ No newline at end of file
{
"model": "bert-tf-fp32",
"test_perf": true,
"test_accuracy": true,
"test_numeric": true,
"clients": 3,
"iterations": 100,
"batch_sizes":[4,8,16,24],
"data_percent": 100,
"compile_only": false
}
\ No newline at end of file
{
"model": "bert-torch-fp16",
"test_perf": true,
"test_accuracy": true,
"test_numeric": false,
"clients": 3,
"iterations": 100,
"batch_sizes":[1,2,4,8,16,32,64,128,256,512,1024],
"data_percent": 100,
"compile_only": false
}
{
"model": "bert-torch-fp32",
"test_perf": true,
"test_accuracy": true,
"test_numeric": false,
"clients": 3,
"iterations": 100,
"batch_sizes":[1,2,4,8,16,32,64,128,256,512,1024],
"data_percent": 100,
"compile_only": false
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment