init

24b257f1 · sunzhq2 · 920b3c0f · 24b257f1 · 24b257f1 · 24b257f1
Commit 24b257f1 authored Nov 19, 2024 by sunzhq2
20 changed files
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/build_pdf.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/build_pdf.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from fpdf import FPDF
+import json
+import math
+import os
+class PDF(FPDF):
+    def titles(self, title, backend):
+        self.set_xy(0.0, 0.0)
+        self.set_font('Times', 'B', 16)
+        # self.set_text_color(220, 50, 50)
+        self.cell(w=210.0,
+                  h=40.0,
+                  align='C',
+                  txt=title + ' REPORT (' + backend + ')',
+                  border=0)
+    def lines(self):
+        self.rect(5.0, 5.0, 200.0, 287.0)
+    def icon(self, icon_path):
+        self.set_xy(10.0, 10.0)
+        self.image(icon_path, link='', type='', w=37.6, h=5.2)
+        self.set_xy(157.0, 0.0)
+        self.set_font('Times', 'B', 10)
+        # self.set_text_color(220, 50, 50)
+        self.cell(w=60.0, h=25.0, align='C', txt='BYTE MLPERF', border=0)
+    def charts(self, chart_path):
+        self.y += 5
+        self.x += 6
+        self.image(chart_path, link='', type='', w=700 / 4, h=450 / 4.9)
+    def diff_tables(self, data, dataset):
+        col_width = 45
+        # self.set_xy(10.00125,40)
+        x = self.x
+        i = 0
+        self.set_font("Times", 'B', size=10)
+        line_height = self.font_size * 2.5
+        self.x = x + 5
+        self.multi_cell(90 * math.ceil(((len(data)) / 3)),
+                        line_height,
+                        'Accuracy Results' + ' (' + dataset + ')',
+                        border=1,
+                        align='C')
+        y = self.y
+        reset_y = self.y
+        self.ln(line_height)
+        self.set_font("Times", size=10)
+        final_y = None
+        for i, (key, val) in enumerate(data.items()):
+            if i < 4:
+                if (i % 3 == 0):
+                    final_y = y
+                    y = reset_y
+                self.x = x + 90 * (i // 3) + 5
+                self.y = y
+                self.multi_cell(col_width,
+                                line_height,
+                                key,
+                                border=1,
+                                align='C')
+                self.x += (45 + 90 * (i // 3)) + 5
+                self.y = y
+                self.multi_cell(col_width,
+                                line_height,
+                                str(val),
+                                border=1,
+                                align='C')
+                y = self.y
+                i += 1
+        if final_y:
+            self.y = final_y
+    def graph_tables(self, data):
+        real_data = []
+        row_name = []
+        row_data = []
+        for key, val in data.items():
+            row_name.append(key)
+            row_data.append(str(val))
+        real_data.append(row_name)
+        real_data.append(row_data)
+        col_width = 45
+        self.set_xy(10.00125, 30)
+        x = self.x
+        self.x += 27
+        self.set_font("Times", 'B', size=10)
+        line_height = self.font_size * 2.5
+        self.multi_cell(135,
+                        line_height,
+                        'Graph Compilation Results',
+                        border=1,
+                        align='C')
+        y = self.y
+        self.ln(line_height)
+        self.set_font("Times", size=10)
+        for row in real_data:
+            self.x = x
+            for i, datum in enumerate(row):
+                self.y = y
+                self.x += (i + 1) * 45 - 18
+                self.multi_cell(col_width,
+                                line_height,
+                                str(datum),
+                                border=1,
+                                align='C')
+            y = self.y
+        self.y += 5
+    def performance_tables(self, data):
+        real_data = []
+        row_name = []
+        for i in range(len(data)):
+            row_data = []
+            for key, val in data[i].items():
+                if i == 0:
+                    row_name.append(key)
+                row_data.append(val)
+            real_data.append(row_data)
+        real_data.insert(0, row_name)
+        col_width = 33.75
+        self.set_xy(10.00125, 65)
+        x = self.x
+        self.x += 27
+        self.set_font("Times", 'B', size=10)
+        line_height = self.font_size * 2.5
+        self.multi_cell(135,
+                        line_height,
+                        'Performance Results',
+                        border=1,
+                        align='C')
+        y = self.y
+        self.ln(line_height)
+        self.set_font("Times", size=10)
+        for row in real_data:
+            self.x = x
+            for i, datum in enumerate(row):
+                self.y = y
+                self.x += (i + 1) * 33.75 - 6.75
+                self.multi_cell(col_width,
+                                line_height,
+                                str(datum),
+                                border=1,
+                                align='C')
+            y = self.y
+            self.ln(line_height)
+    def footer(self):
+        # Go to 1.5 cm from bottom
+        self.set_y(-15)
+        # Select Arial italic 8
+        self.set_font('Arial', 'I', 8)
+        # Print centered page number
+        self.cell(0, 10, '%s' % self.page_no(), 0, 0, 'C')
+    def generate_report(self, path):
+        with open(path, 'r') as f:
+            report = json.load(f)
+        output_dir = os.path.dirname(path) + '/'
+        index = output_dir.index('ByteMLPerf') + len('ByteMLPerf')
+        base_path = output_dir[:index]
+        icon_path = os.path.join(base_path, 'docs/images/icon.png')
+        self.add_page()
+        self.lines()
+        self.icon(icon_path)
+        self.graph_tables(report['Graph Compile'])
+        if 'Performance' in report:
+            self.performance_tables(report['Performance'])
+        if 'Accuracy' in report:
+            self.diff_tables(report['Accuracy'], report['Dataset'])
+            if 'Diff Dist' in report['Accuracy']:
+                self.charts(output_dir + report['Accuracy']['Diff Dist'])
+        self.titles(report['Model'], report['Backend'])
+        self.set_author('Bytedance')
+        precision = path.split('/')[-1].split('-')[1]
+        self.output(output_dir + report['Model'] + '-TO-' + precision.upper() + '.pdf', 'F')
+        return True
+def build_pdf(path):
+    pdf = PDF(orientation='P', unit='mm', format='A4')
+    return pdf.generate_report(path)
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/convert.sh
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/convert.sh
+#！bin/bash
+if [ ! -d "tools/venv" ]; then
+    python3 -m virtualenv tools/venv
+    source tools/venv/bin/activate
+    tools/venv/bin/python3 -m pip install --upgrade pip -q
+    tools/venv/bin/python3 -m pip install -r tools/requirements.txt -q
+else
+    source tools/venv/bin/activate
+fi
+if [ "$3" == "pt2onnx" ];then
+    python3 tools/torch_to_onnx.py --model_path $1 --output_path $2
+elif [ "$3" == "saved2onnx" ];then
+    python3 tools/saved_to_onnx.py --model_path $1 --output_path $2
+elif [ "$3" == "saved2frozen" ];then
+    python3 tools/saved_to_frozen.py --model_path $1 --output_path $2
+fi
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/frozen_to_saved.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/frozen_to_saved.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+from tensorflow.python.saved_model import tag_constants
+from tensorflow.python.saved_model import signature_constants
+import tensorflow.compat.v1 as tf
+tf.disable_v2_behavior()
+def convert_pb_to_server_model(pb_model_path, export_dir, input_names,
+                               output_names):
+    if not input_names:
+        raise ValueError("Converter needs inputs")
+    if not output_names:
+        raise ValueError("Converter needs outputs")
+    input_names = input_names.split(",")
+    output_names = output_names.split(",")
+    graph_def = read_pb_model(pb_model_path)
+    convert_pb_saved_model(graph_def, export_dir, input_names, output_names)
+def read_pb_model(pb_model_path):
+    with tf.io.gfile.GFile(pb_model_path, "rb") as f:
+        graph_def = tf.compat.v1.GraphDef()
+        graph_def.ParseFromString(f.read())
+        return graph_def
+def convert_pb_saved_model(graph_def, export_dir, input_names, output_names):
+    builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
+    sigs = {}
+    with tf.Session(graph=tf.Graph()) as sess:
+        tf.import_graph_def(graph_def, name="")
+        g = tf.get_default_graph()
+        input_infos = {}
+        output_infos = {}
+        for input_name in input_names:
+            input_infos[input_name] = g.get_tensor_by_name(input_name)
+        for output_name in output_names:
+            output_infos[output_name] = g.get_tensor_by_name(output_name)
+        sigs[signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] = \
+            tf.saved_model.signature_def_utils.predict_signature_def(
+                input_infos, output_infos)
+        builder.add_meta_graph_and_variables(sess, [tag_constants.SERVING],
+                                             signature_def_map=sigs)
+        builder.save()
+path = "densenet121.pb"
+convert_pb_to_server_model(path,
+                           os.path.abspath('.') + "/densenet_saved_model",
+                           "input_1", "fc1000")
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/h5_to_frozen.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/h5_to_frozen.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import tensorflow as tf
+from tensorflow.keras import backend
+from tensorflow.python.tools import freeze_graph
+from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
+import logging
+import argparse
+def frozen_graph(h5_file_path, workdir, pb_name):
+    model = tf.keras.models.load_model(h5_file_path,
+                                       custom_objects={
+                                           "backend": backend,
+                                       })
+    model.summary()
+    full_model = tf.function(lambda input_1: model(input_1))
+    full_model = full_model.get_concrete_function(
+        tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype))
+    # Get frozen ConcreteFunction
+    frozen_func = convert_variables_to_constants_v2(full_model)
+    frozen_func.graph.as_graph_def()
+    layers = [op.name for op in frozen_func.graph.get_operations()]
+    print(frozen_func.outputs)
+    # Save frozen graph from frozen ConcreteFunction to hard drive
+    tf.io.write_graph(graph_or_graph_def=frozen_func.graph,
+                      logdir=workdir,
+                      name=pb_name,
+                      as_text=False)
+    print('model has been saved')
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description='VC model h5->freezedpb script')
+    parser.add_argument("--h5_model_path", type=str, required=True)
+    parser.add_argument("--freezed_pb_name", type=str, required=True)
+    parser.add_argument("--workdir", type=str, required=True)
+    args = parser.parse_args()
+    frozen_graph(args.h5_model_path, args.workdir, args.freezed_pb_name)
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/model_trt_convert.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/model_trt_convert.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import tensorflow as tf
+import numpy as np
+gpus = tf.config.experimental.list_physical_devices('GPU')
+if gpus:
+    try:
+        # Currently, memory growth needs to be the same across GPUs
+        for gpu in gpus:
+            tf.config.experimental.set_memory_growth(gpu, True)
+        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
+        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
+    except RuntimeError as e:
+        # Memory growth must be set before GPUs have been initialized
+        print(e)
+def my_calibration_input_fn():
+    for _ in range(10):
+        yield np.random.normal(size=(1, 224, 224, 3)).astype(np.uint8),
+        # yield tf.random.normal((1, 224, 224, 3)).astype(np.uint8),
+saved_model_path = 'byte_mlperf/model_zoo/resnet50_saved_model'
+model_params = tf.experimental.tensorrt.ConversionParams(
+    precision_mode="int8".upper(), max_batch_size=64, use_calibration=True)
+model_trt = tf.experimental.tensorrt.Converter(
+    input_saved_model_dir=saved_model_path, conversion_params=model_params)
+model_trt.convert(calibration_input_fn=my_calibration_input_fn)
+output_saved_model_dir = 'test'
+model_trt.save(output_saved_model_dir)
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/mxnet_to_onnx.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/mxnet_to_onnx.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import mxnet as mx
+import numpy as np
+import onnx
+def get_mod(prefix, epoch, ctx, data_shape):
+    sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
+    mod = mx.mod.Module(symbol=sym, context=ctx, label_names=None)
+    mod.bind(for_training=False,
+             data_shapes=[("data", data_shape)],
+             label_shapes=mod._label_shapes)
+    mod.set_params(arg_params, aux_params, allow_missing=True)
+    return mod
+def load_mxnet():
+    prefix = "image_level_space"
+    epoch = 0
+    ctx = mx.cpu()
+    data_shape = (1, 3, 736, 416)
+    mod = get_mod(prefix, epoch, ctx, data_shape)
+    return mod
+'''
+require mxnet >= 19.0
+'''
+def do_mxnet2onnx(sym, params, onnx_file, in_shapes, in_types,
+                  dynamic_input_shapes):
+    '''
+    example:
+    sym = 'byte_mlperf/byte_mlperf/download/manysplit/image_level_space-symbol.json'
+    params = 'byte_mlperf/byte_mlperf/download/manysplit/image_level_space-0000.params'
+    onnx_file = 'manysplit.onnx'
+    in_shapes = [(1,3,736,416)]
+    in_types = [np.float32]
+    dynamic_input_shapes = [(None,3,736,416)]
+    '''
+    converted_model_path = mx.onnx.export_model(
+        sym,
+        params,
+        in_shapes,
+        in_types,
+        onnx_file,
+        dynamic=True,
+        dynamic_input_shapes=dynamic_input_shapes,
+        verbose=True)
+    # Load the ONNX model
+    model_proto = onnx.load_model(converted_model_path)
+    # Check if the converted ONNX protobuf is valid
+    onnx.checker.check_graph(model_proto.graph)
+if __name__ == "__main__":
+    # load_mxnet()
+    do_mxnet2onnx()
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/onnx_utils.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/onnx_utils.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import cast
+import numpy as np
+from numpy.lib.function_base import append
+import onnx
+import onnx.helper as helper
+import onnxruntime as rt
+from onnx import numpy_helper
+from onnx.tools import update_model_dims
+from onnx import shape_inference, TensorProto
+import struct
+import copy
+import sys
+'''
+DType Info
+'''
+ONNX_DTYPE = {
+    0: TensorProto.FLOAT,  # UNDEFINE, default as float32
+    1: TensorProto.FLOAT,
+    2: TensorProto.UINT8,
+    3: TensorProto.INT8,
+    4: TensorProto.UINT16,
+    5: TensorProto.INT16,
+    6: TensorProto.INT32,
+    7: TensorProto.INT64,
+    8: TensorProto.STRING,
+    9: TensorProto.BOOL,
+    10: TensorProto.FLOAT16,
+    11: TensorProto.DOUBLE,
+    12: TensorProto.UINT32,
+    13: TensorProto.UINT64,
+}
+'''
+Nodes
+'''
+def get_node_by_name(graph, name):
+    for node in graph.node:
+        if node.name == name:
+            return node
+    return None
+def get_nodes_by_optype(graph, typename):
+    nodes = []
+    for node in graph.node:
+        if node.op_type == typename:
+            nodes.append(node)
+    return nodes
+def get_node_by_output_name(graph, name):
+    for node in graph.node:
+        if node.output[0] == name:
+            return node
+    return None
+def get_node_successor(graph, target_node):
+    successor = []
+    for node in graph.node:
+        if len(list(set(node.input).intersection(set(
+                target_node.output)))) > 0:
+            successor.append(node)
+    return successor
+def get_value_info_by_name(graph, name):
+    for val_info in graph.value_info:
+        if val_info.name == name:
+            return val_info
+    return None
+def get_shape_from_value_info(val_info):
+    shape = [d.dim_value for d in val_info.type.tensor_type.shape.dim]
+    return shape
+def remove_weights(graph, name_list):
+    rm_list = []
+    for weight in graph.initializer:
+        if weight.name in name_list:
+            rm_list.append(weight)
+    for weight in rm_list:
+        graph.initializer.remove(weight)
+def remove_inputs(graph, name_list):
+    rm_list = []
+    for input_t in graph.input:
+        if input_t.name in name_list:
+            rm_list.append(input_t)
+    for input_t in rm_list:
+        graph.input.remove(input_t)
+def remove_value_infos(graph, name_list):
+    rm_list = []
+    for value_info in graph.value_info:
+        if value_info.name in name_list:
+            rm_list.append(value_info)
+    for value_info in rm_list:
+        graph.value_info.remove(value_info)
+def remove_node_by_name(graph, name):
+    target_node = get_node_by_name(graph, name)
+    remove_node(graph, target_node)
+def remove_node(graph, target_node):
+    '''
+        remove the node with only one input and only one output
+    '''
+    node_input = target_node.input[0]
+    node_output = target_node.output[0]
+    # set input of successor node to predecessor node of target node
+    for node in graph.node:
+        for i, n in enumerate(node.input):
+            if n == node_output:
+                node.input[i] = node_input
+    target_names = set(target_node.input) & set(
+        [weight.name for weight in graph.initializer])
+    remove_weights(graph, target_names)
+    target_names.add(node_output)
+    remove_inputs(graph, target_names)
+    remove_value_infos(graph, target_names)
+    graph.node.remove(target_node)
+'''
+Constant & Initializer
+'''
+def is_initializer(graph, name):
+    for tensor in graph.initializer:
+        if tensor.name == name:
+            return True
+    return False
+def get_initializer_by_name(graph, name):
+    for tensor in graph.initializer:
+        if tensor.name == name:
+            return tensor
+    return None
+def get_init_value(tensor):
+    return numpy_helper.to_array(tensor)
+def set_init_value(graph, weight, data_numpy):
+    # NOTE: weight can be stroed in human readable fields(float_data, int32_data, string_data, ...)
+    # as well as raw_data, if we set weight by raw_data, we must clear the fields above to make it effective
+    # NOTE: data_type between numpy and TensorProto
+    raw_shape = tuple([i for i in weight.dims])
+    new_shape = np.shape(data_numpy)
+    if weight.data_type == 8:
+        # string data type is special, it requires to store data in string_data field
+        # NOT the raw_data field
+        weight.string_data = bytes(data_numpy, encoding="utf8")
+        weight.ClearField("raw_data")
+        return
+    if new_shape != raw_shape:
+        print(
+            "Warning: the new weight shape is not consistent with original shape!"
+        )
+        weight.dims[:] = list(new_shape)
+        #  in cast is graph input?
+        for model_input in graph.input:
+            if model_input.name == weight.name:
+                # copy from onnx.helper...
+                tensor_shape_proto = model_input.type.tensor_type.shape
+                tensor_shape_proto.ClearField("dim")
+                tensor_shape_proto.dim.extend([])
+                for d in new_shape:
+                    dim = tensor_shape_proto.dim.add()
+                    dim.dim_value = d
+    weight.ClearField("float_data")
+    weight.ClearField("int32_data")
+    weight.ClearField("int64_data")
+    weight.raw_data = data_numpy.tobytes()
+    return
+def is_constant(node):
+    if node.op_type == "Constant":
+        return True
+    else:
+        return False
+def get_constant_value(node):
+    for attr in node.attribute:
+        if attr.name == 'value':
+            if attr.t.data_type == 1:
+                return np.array(struct.unpack('f', attr.t.raw_data))
+            elif attr.t.data_type == 2:
+                return np.array(struct.unpack('i', attr.t.raw_data))
+            elif attr.t.data_type == 3:
+                return np.array(struct.unpack('s', attr.t.raw_data))
+            elif attr.t.data_type == 4:
+                return np.array(struct.unpack('t', attr.t.raw_data))
+            elif attr.t.data_type == 5:
+                return np.array(struct.unpack('g', attr.t.raw_data))
+            elif attr.t.data_type == 6:
+                return np.frombuffer(attr.t.raw_data, dtype=np.float32)
+            elif attr.t.data_type == 7:
+                return np.frombuffer(attr.t.raw_data, dtype=np.int32)
+            elif attr.t.data_type == 8:
+                return np.frombuffer(attr.t.raw_data, dtype=np.string)
+            elif attr.t.data_type == 9:
+                return np.frombuffer(attr.t.raw_data, dtype=np.bool)
+            elif attr.t.data_type == 10:
+                return np.frombuffer(attr.t.raw_data, dtype=np.float16)
+            elif attr.t.data_type == 11:
+                return np.frombuffer(attr.t.raw_data, dtype=np.double)
+            elif attr.t.data_type == 12:
+                return np.frombuffer(attr.t.raw_data, dtype=np.uint32)
+            elif attr.t.data_type == 13:
+                return np.frombuffer(attr.t.raw_data, dtype=np.uint64)
+            else:
+                print("unsupported attribute data type with attribute name")
+def set_constant_value(target_node, value):
+    # NOTE : dtype value should match with target_node
+    for attr in target_node.attribute:
+        if (attr.name == "value"):
+            attr.t.raw_data = value.tobytes()
+'''
+Attributes
+'''
+def get_attribute_by_name(node, name):
+    for attr in node.attribute:
+        if attr.name == name:
+            return attr
+    return attr
+def set_node_attribute(target_node, attr_name, attr_value):
+    flag = False
+    for attr in target_node.attribute:
+        if (attr.name == attr_name):
+            if attr.type == 1:  # float value
+                attr.f = attr_value
+            elif attr.type == 2:  # int value
+                attr.i = attr_value
+            elif attr.type == 3:  # string value
+                attr.s = attr_value
+            elif attr.type == 4:  # tensor value
+                attr.t = attr_value
+            elif attr.type == 5:  # graph value
+                attr.g = attr_value
+            # NOTE: For repeated composite types, we should use something like
+            # del attr.xxx[:]
+            # attr.xxx.extend([n1, n2, n3])
+            elif attr.type == 6:  # float[]
+                attr.floats[:] = attr_value
+            elif attr.type == 7:  # int[]
+                attr.ints[:] = attr_value
+            elif attr.type == 8:  # strings[]
+                attr.strings[:] = attr_value
+            else:
+                print("unsupported attribute data type with attribute name")
+                return False
+            flag = True
+    if not flag:
+        # attribute not in original node
+        print("Warning: you are appending a new attribute to the node!")
+        target_node.attribute.append(
+            helper.make_attribute(attr_name, attr_value))
+        flag = True
+    return flag
+'''
+Graph Input/Output
+'''
+def add_extra_output(graph, target_output, target_shape):
+    extra_elem_type = 1
+    for vi in graph.value_info:
+        if vi.name == target_output:
+            extra_elem_type = vi.type.tensor_type.elem_type
+    extra_output = helper.make_tensor_value_info(target_output,
+                                                 extra_elem_type, target_shape)
+    '''
+    # NOTE
+    # if we know the value type and shape, we can alse use this
+    def make_tensor_value_info(
+        name,  # type: Text
+        elem_type,  # type: int
+        shape,  # type: Optional[Sequence[Union[Text, int]]]
+        doc_string="",  # type: Text
+        shape_denotation=None,  # type: Optional[List[Text]]
+    ):
+    '''
+    graph.output.append(extra_output)
+    return
+def get_graph_input_by_name(graph, name):
+    for input in graph.input:
+        if input.name == name:
+            return input
+    return None
+def get_graph_output_by_name(graph, name):
+    for out in graph.output:
+        if out.name == name:
+            return out
+    return None
+def resort_nodes(model):
+    new_model = copy.deepcopy(model)
+    for n in new_model.graph.node:
+        model.graph.node.remove(n)
+    ready_tensors = [n.name for n in model.graph.input]
+    ready_tensors.extend([n.name for n in model.graph.initializer])
+    ready_tensors = set(ready_tensors)
+    all_nodes = [n for n in new_model.graph.node]
+    while True:
+        activate_nodes = []
+        for node in all_nodes:
+            inputs = set(node.input)
+            if len(inputs - ready_tensors) == 0:
+                activate_nodes.append(node)
+        assert len(activate_nodes) != 0, 'invalid graph'
+        for node in activate_nodes:
+            model.graph.node.append(node)
+            ready_tensors = ready_tensors | set(node.output)
+            all_nodes.remove(node)
+        if len(all_nodes) == 0:
+            break
+    return model
+'''
+Pass
+'''
+def fix_model_shape(model,
+                    in_dim_dict=None,
+                    out_dim_dict=None,
+                    fully_si=False):
+    if in_dim_dict != None and out_dim_dict != None:
+        update_model_dims.update_inputs_outputs_dims(model, in_dim_dict,
+                                                     out_dim_dict)
+    if fully_si:
+        input_num = len(model.graph.input)
+        tensors = model.graph.initializer
+        for i, tensor in enumerate(tensors):
+            value_info = helper.make_tensor_value_info(
+                tensor.name, ONNX_DTYPE[tensor.data_type], tensor.dims)
+            model.graph.input.insert(i + input_num, value_info)
+    onnx.checker.check_model(model)
+    model = shape_inference.infer_shapes(model)
+    return model
+def remove_redundant_cast(graph):
+    cast_nodes = get_nodes_by_optype(graph, "Cast")
+    for node in cast_nodes:
+        in_node = get_node_by_output_name(graph, node.input[0])
+        if in_node.op_type == "Cast":
+            print("Removing redundant cast: ", in_node)
+            node.input[0] = in_node.input[0]
+            graph.node.remove(in_node)
+def onxx_sess_opt(model, opt_model):
+    sess_options = rt.SessionOptions()
+    sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_BASIC
+    sess_options.optimized_model_filepath = opt_model
+    rt.InferenceSession(model,
+                        sess_options,
+                        providers=['CPUExecutionProvider'])
+# ------------- Model speficted pass --------------------
+def convert_fp16_to_fp32(model):
+    # handle model.graph.initializer
+    to_convert = []
+    for init in model.graph.initializer:
+        # print(init.name)
+        if init.data_type != 10:
+            continue
+        to_convert.append(init)
+    for init in to_convert:
+        val = get_init_value(init)
+        new_val = val.astype(np.float32)
+        new_init = numpy_helper.from_array(new_val, init.name)
+        model.graph.initializer.remove(init)
+        model.graph.initializer.append(new_init)
+    # handle mode.graph.node
+    cons_ops = get_nodes_by_optype(model.graph, "Constant")
+    for op in cons_ops:
+        val_attr = get_attribute_by_name(op, "value")
+        if val_attr.t.data_type != 10:
+            continue
+        # import pdb;pdb.set_trace()
+        val = get_constant_value(op)
+        new_val = val.astype(np.float32)
+        set_constant_value(op, new_val)
+        val_attr.t.data_type = 1
+    for val_info in model.graph.value_info:
+        if val_info.type.tensor_type.elem_type != 10:
+            continue
+        val_info.type.tensor_type.elem_type = 1
+    # handle cast op
+    cast_ops = get_nodes_by_optype(model.graph, "Cast")
+    to_remove = []
+    for cast in cast_ops:
+        to = get_attribute_by_name(cast, "to")
+        if to.i != 10 and to.i != 1:
+            continue
+        if to.i == 10:
+            up_node = get_node_by_output_name(model.graph, cast.input[0])
+            set_node_attribute(cast, "to", 1)
+            if up_node.op_type != "Cast":
+                continue
+            up_to = get_attribute_by_name(up_node, "to")
+            if up_to.i != 1:
+                continue
+        if to.i == 1:
+            down_node = get_node_successor(model.graph, cast)
+            if len(down_node) == 0:
+                continue
+            if down_node[0].op_type != "Cast":
+                continue
+            down_to = get_attribute_by_name(down_node[0], "to")
+            if down_to.i != 10:
+                continue
+        # print(cast.name)
+        succs = get_node_successor(model.graph, cast)
+        for succ in succs:
+            for idx, in_name in enumerate(succ.input):
+                if in_name == cast.output[0]:
+                    succ.input[idx] = cast.input[0]
+        to_remove.append(cast)
+    for cast in to_remove:
+        out_info = get_graph_output_by_name(model.graph, cast.output[0])
+        if out_info == None:
+            model.graph.node.remove(cast)
+        else:
+            node = get_node_by_output_name(model.graph, cast.input[0])
+            if node != None:
+                for idx, out in enumerate(node.output):
+                    if out == cast.input[0]:
+                        node.output[idx] = cast.output[0]
+            model.graph.node.remove(cast)
+    return model
+def replace_mask_where(model):
+    # pattern: sub -> cast ----|
+    #           |-----------> where
+    where_ops = get_nodes_by_optype(model.graph, "Where")
+    to_replace = []
+    for where_node in where_ops:
+        cond = where_node.input[0]
+        node = get_node_by_output_name(model.graph, cond)
+        if node.op_type != "Cast":
+            continue
+        y_in = where_node.input[2]
+        node = get_node_by_output_name(model.graph, y_in)
+        if node.op_type != "Sub":
+            continue
+        to_replace.append(where_node)
+    to_remove = []
+    for where in to_replace:
+        x_in = where.input[1]
+        y_in = where.input[2]
+        mul_op = onnx.helper.make_node('Mul', [x_in, y_in],
+                                       where.output,
+                                       name="{}_mask_mul_replaced".format(
+                                           where.name))
+        model.graph.node.append(mul_op)
+        cast_op = get_node_by_output_name(model.graph, where.input[0])
+        to_remove.append(cast_op)
+        to_remove.append(where)
+    for node in to_remove:
+        model.graph.node.remove(node)
+    return model
+def convert_expand_to_tile(model):
+    expand_ops = get_nodes_by_optype(model.graph, "Expand")
+    for expand_node in expand_ops:
+        ifm = expand_node.input[0]
+        ofm = expand_node.output[0]
+        ifm_vi = get_value_info_by_name(model.graph, expand_node.input[0])
+        if ifm_vi == None:
+            continue
+        init_shape = get_initializer_by_name(model.graph, expand_node.input[1])
+        if init_shape == None:
+            continue
+        shape_val = get_init_value(init_shape)
+        ofm_shape = shape_val.tolist()
+        ifm_shape = [
+            dim.dim_value for dim in ifm_vi.type.tensor_type.shape.dim
+        ]
+        repeats = [
+            1 if i == j else int(j / i) for i, j in zip(ifm_shape, ofm_shape)
+        ]
+        repeats = np.array(repeats)
+        repeats = numpy_helper.from_array(
+            repeats, 'Tile_{}_repeats'.format(expand_node.name))
+        tile_node = onnx.helper.make_node('Tile', [ifm, repeats.name], [ofm],
+                                          name=expand_node.name)
+        model.graph.node.append(tile_node)
+        model.graph.initializer.append(repeats)
+        model.graph.node.remove(expand_node)
+    return model
+def concat_to_tile(model):
+    def is_tile_type(node):
+        tile_flag = True
+        for idx in range(len(node.input) - 1):
+            if node.input[idx] == node.input[idx + 1]:
+                continue
+            else:
+                tile_flag = False
+                break
+        return tile_flag
+    concat_ops = get_nodes_by_optype(model.graph, "Concat")
+    for concat in concat_ops:
+        if not is_tile_type(concat):
+            continue
+        print("Converting concat to tile")
+        in_val = get_value_info_by_name(model.graph, concat.input[0])
+        out_val = get_value_info_by_name(model.graph, concat.output[0])
+        ifm_shape = get_shape_from_value_info(in_val)
+        ofm_shape = get_shape_from_value_info(out_val)
+        repeats = [
+            1 if i == j else int(j / i) for i, j in zip(ifm_shape, ofm_shape)
+        ]
+        repeats = np.array(repeats)
+        repeats = numpy_helper.from_array(
+            repeats, 'Tile_{}_repeats'.format(concat.name))
+        tile_node = onnx.helper.make_node('Tile',
+                                          [concat.input[0], repeats.name],
+                                          [concat.output[0]],
+                                          name=concat.name)
+        model.graph.node.append(tile_node)
+        model.graph.initializer.append(repeats)
+        model.graph.node.remove(concat)
+def remove_qdq(model):
+    q_ops = get_nodes_by_optype(model.graph, "QuantizeLinear")
+    for q_op in q_ops:
+        dq = get_node_successor(model.graph, q_op)
+        if len(dq) != 1 and dq[0].op_type != "DequantizeLinear":
+            continue
+        qdq_succ = get_node_successor(model.graph, dq[0])
+        for i, n in enumerate(qdq_succ[0].input):
+            if n == dq[0].output[0]:
+                qdq_succ[0].input[i] = q_op.input[0]
+        model.graph.node.remove(q_op)
+        model.graph.node.remove(dq[0])
+import torch
+from onnx2torch import convert
+import onnxruntime as ort
+if __name__ == "__main__":
+    # Path to ONNX model
+    onnx_model_path = 'converted_models/no_qdq_2.onnx'
+    onnx_model = onnx.load(onnx_model_path)
+    in_shape_dict = {
+        "data": [2, 10, 3, 256, 256],
+    }
+    out_shape_dict = {'logits': [2, 2], '1383': [1, 20]}
+    onnx_model = fix_model_shape(onnx_model, in_shape_dict, out_shape_dict,
+                                 True)
+    onnx.save(onnx_model, 'converted_models/no_qdq_3.onnx')
+    onxx_sess_opt('converted_models/no_qdq_3.onnx',
+                  'converted_models/no_qdq_3.onnx')
+    onnx_model = onnx.load('converted_models/no_qdq_3.onnx')
+    torch_model_2 = convert(onnx_model)
+    # You can pass the path to the onnx model to convert it or...
+    # torch_model_1 = convert(onnx_model_path)
+    # Create example data
+    x = torch.ones((2, 10, 3, 256, 256))
+    out_torch = torch_model_2(x)
+    trace_model = torch.jit.trace(torch_model_2, x)
+    ort_sess = ort.InferenceSession(onnx_model_path)
+    outputs_ort = ort_sess.run(None, {'data': x.numpy()})
+    print(outputs_ort[0] - out_torch[0].detach().numpy())
+    print(outputs_ort[1] - out_torch[1].detach().numpy())
+    # Check the Onnx output against PyTorch
+    # print(torch.max(torch.abs(outputs_ort[0] - out_torch[0].detach().numpy())))
+    # print(torch.max(torch.abs(outputs_ort[1] - out_torch[1].detach().numpy())))
+    # print(np.allclose(outputs_ort[0], out_torch[0].detach().numpy(), atol=1.e-7))
+    # print(np.allclose(outputs_ort[1], out_torch[1].detach().numpy(), atol=1.e-7))
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/requirements.txt
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/requirements.txt
+tensorflow>=2.6.0
+tf2onnx
+numpy
+torch==1.9.1
\ No newline at end of file
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/saved_to_frozen.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/saved_to_frozen.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+'''
+An Interface to export saved_models to frozen models.
+Please notice, this API makes 2 assumptions
+    1. saved_model like below:
+        |--save-model.pb
+        |--variable
+        |-- |--variables.data-00000-of-00001
+        |-- |--variables.index
+    2. saved_tags is tag_constants.SERVING by default if not specific
+    3. signature is signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY by default if not specific
+Copyright Reserve: Habana Labs
+'''
+import sys
+from tensorflow.python.tools import freeze_graph
+from tensorflow.python.tools import saved_model_cli
+from tensorflow.python.saved_model import tag_constants
+from tensorflow.python.saved_model import signature_constants
+import argparse
+from six import StringIO
+import contextlib
+def freeze_saved_model(saved_model_dir,
+                       output_nodes,
+                       pb_name,
+                       saved_tags=tag_constants.SERVING):
+    input_saved_model_dir = saved_model_dir
+    output_node_names = output_nodes
+    input_binary = False
+    input_saver_def_path = False
+    restore_op_name = None
+    filename_tensor_name = None
+    clear_devices = True
+    input_meta_graph = False
+    checkpoint_path = None
+    input_graph_filename = None
+    saved_model_tags = saved_tags
+    output_graph_filename = pb_name
+    freeze_graph.freeze_graph(input_graph_filename, input_saver_def_path,
+                              input_binary, checkpoint_path, output_node_names,
+                              restore_op_name, filename_tensor_name,
+                              output_graph_filename, clear_devices, "", "", "",
+                              input_meta_graph, input_saved_model_dir,
+                              saved_model_tags)
+@contextlib.contextmanager
+def captured_output():
+    new_out, new_err = StringIO(), StringIO()
+    old_out, old_err = sys.stdout, sys.stderr
+    try:
+        sys.stdout, sys.stderr = new_out, new_err
+        yield sys.stdout, sys.stderr
+    finally:
+        sys.stdout, sys.stderr = old_out, old_err
+def get_output_node(saved_model_dir, saved_tags, sign):
+    parser = saved_model_cli.create_parser()
+    args = parser.parse_args([
+        'show', '--dir', saved_model_dir, '--tag_set', saved_tags,
+        '--signature_def', sign
+    ])
+    with captured_output() as (out, err):
+        saved_model_cli.show(args)
+    result = out.getvalue().strip()
+    print(result)
+    output_num = 0
+    output_nodes = None
+    lines = result.split('\n')
+    for idx, line in enumerate(result.split('\n')):
+        if "outputs[" in line:
+            line = lines[idx + 3]
+            output = line.split(":")[1]
+            if output_num > 0:
+                output_nodes = output_nodes + "," + output
+            else:
+                output_nodes = output
+            output_num = output_num + 1
+    if output_nodes == None:
+        raise RuntimeError("No Output Nodes found in saved_model.")
+    return output_nodes, output_num
+def saved_to_frozen(
+    saved_model_dir,
+    frozen_path,
+    saved_tags=tag_constants.SERVING,
+    sign=signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY):
+    output_nodes, output_num = get_output_node(saved_model_dir, saved_tags,
+                                               sign)
+    output_nodes = output_nodes
+    print("[INFO]: Save Model has [", output_num, "] outputs.")
+    print("[INFO]: Outputs Nodes: [", output_nodes, "].")
+    # cwd = os.getcwd()
+    # frozen_path = os.path.join(cwd, "converted_frozen.pb")
+    freeze_saved_model(saved_model_dir, output_nodes, frozen_path, saved_tags)
+    print("[INFO]: Saved Model convert to Frozen Model done.")
+    print("[INFO]: Frozen Model saved here: ", frozen_path)
+    return frozen_path
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", default="")
+    parser.add_argument("--output_path", default="")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = get_args()
+    saved_to_frozen(args.model_path, args.output_path)
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/saved_to_onnx.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/saved_to_onnx.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import tf2onnx
+from tf2onnx import tf_loader
+import argparse
+ONNX_OPSET = 11
+def _convert_graphdef_to_onnx(graph_def,
+                              inputs=None,
+                              outputs=None,
+                              output_path='',
+                              **kwargs):
+    inputs_as_nchw = kwargs.get('inputs_as_nchw', None)
+    custom_ops = kwargs.get('custom_ops', None)
+    custom_op_handlers = kwargs.get('custom_op_handlers', None)
+    custom_rewriter = kwargs.get('custom_rewriter', None)
+    extra_opset = kwargs.get('extra_opset', None)
+    large_model = kwargs.get('large_model', False)
+    name = kwargs.get('name', 'habana_convert')
+    target = kwargs.get('target', None)
+    shape_override = kwargs.get('shape_override', {})
+    tf2onnx.convert.from_graph_def(graph_def,
+                                   name=name,
+                                   input_names=inputs,
+                                   output_names=outputs,
+                                   opset=ONNX_OPSET,
+                                   custom_ops=custom_ops,
+                                   custom_op_handlers=custom_op_handlers,
+                                   custom_rewriter=custom_rewriter,
+                                   inputs_as_nchw=inputs_as_nchw,
+                                   extra_opset=extra_opset,
+                                   shape_override=shape_override,
+                                   target=target,
+                                   large_model=large_model,
+                                   output_path=output_path)
+    return output_path
+def savedmodel_to_onnx(model_path, output_path='', **kwargs):
+    inputs = kwargs.get('inputs', None)
+    outputs = kwargs.get('outputs', None)
+    graph_def, inputs, outputs = tf_loader.from_saved_model(
+        model_path, inputs, outputs)
+    return _convert_graphdef_to_onnx(graph_def, inputs, outputs, output_path,
+                                     **kwargs)
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", default="")
+    parser.add_argument("--output_path", default="")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = get_args()
+    savedmodel_to_onnx(args.model_path, args.output_path)
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/tf_fp32_to_fp16.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/tf_fp32_to_fp16.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import tensorflow as tf
+# tf.contrib.resampler
+from tensorflow.core.framework import types_pb2, graph_pb2, attr_value_pb2
+from tensorflow.tools.graph_transforms import TransformGraph
+from google.protobuf import text_format
+import numpy as np
+from textops import tf_load_op_library
+# Const should be float32 in object detection api during nms (see here: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/non-max-suppression-v4.html)
+keep_fp32_node_name = []
+keep_fp16_node_name = []
+def load_graph(model_path):
+    graph = tf.Graph()
+    with graph.as_default():
+        graph_def = tf.GraphDef()
+        if model_path.endswith("pb"):
+            with open(model_path, "rb") as f:
+                graph_def.ParseFromString(f.read())
+        else:
+            with open(model_path, "r") as pf:
+                text_format.Parse(pf.read(), graph_def)
+        tf.import_graph_def(graph_def, name="")
+        sess = tf.Session(graph=graph)
+        return sess
+def rewrite_batch_norm_node_v2(node, graph_def, target_type='fp16'):
+    """
+    Rewrite FusedBatchNorm with FusedBatchNormV2 for reserve_space_1 and reserve_space_2 in FusedBatchNorm require float32 for 
+    gradient calculation (See here: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/fused-batch-norm)
+    """
+    if target_type == 'fp16':
+        dtype = types_pb2.DT_HALF
+    elif target_type == 'fp64':
+        dtype = types_pb2.DT_DOUBLE
+    else:
+        dtype = types_pb2.DT_FLOAT
+    new_node = graph_def.node.add()
+    new_node.op = "FusedBatchNormV2"
+    new_node.name = node.name
+    new_node.input.extend(node.input)
+    new_node.attr["U"].CopyFrom(
+        attr_value_pb2.AttrValue(type=types_pb2.DT_FLOAT))
+    for attr in list(node.attr.keys()):
+        if attr == "T":
+            node.attr[attr].type = dtype
+        new_node.attr[attr].CopyFrom(node.attr[attr])
+    print("rewrite fused_batch_norm done!")
+def convert_graph_to_fp16(model_path,
+                          save_path,
+                          name,
+                          as_text=False,
+                          target_type='fp16',
+                          input_name=None,
+                          output_names=None):
+    if target_type == 'fp16':
+        dtype = types_pb2.DT_HALF
+    elif target_type == 'fp64':
+        dtype = types_pb2.DT_DOUBLE
+    else:
+        dtype = types_pb2.DT_FLOAT
+    source_sess = load_graph(model_path)
+    source_graph_def = source_sess.graph.as_graph_def()
+    target_graph_def = graph_pb2.GraphDef()
+    target_graph_def.versions.CopyFrom(source_graph_def.versions)
+    for node in source_graph_def.node:
+        # fused batch norm node
+        if node.op == "FusedBatchNorm":
+            rewrite_batch_norm_node_v2(node,
+                                       target_graph_def,
+                                       target_type=target_type)
+            continue
+        # replicate node
+        new_node = target_graph_def.node.add()
+        new_node.op = node.op
+        new_node.name = node.name
+        new_node.input.extend(node.input)
+        attrs = list(node.attr.keys())
+        # keep batch norm params node
+        if ("BatchNorm" in node.name) or ('batch_normalization' in node.name):
+            for attr in attrs:
+                new_node.attr[attr].CopyFrom(node.attr[attr])
+            continue
+        # replace dtype in node attr with target dtype
+        for attr in attrs:
+            # keep special node in fp32
+            if node.name in keep_fp32_node_name:
+                new_node.attr[attr].CopyFrom(node.attr[attr])
+                continue
+            if node.attr[attr].type == types_pb2.DT_FLOAT:
+                # modify node dtype
+                node.attr[attr].type = dtype
+            if attr == "value":
+                tensor = node.attr[attr].tensor
+                if tensor.dtype == types_pb2.DT_FLOAT:
+                    # if float_val exists
+                    if tensor.float_val:
+                        float_val = tf.make_ndarray(node.attr[attr].tensor)
+                        new_node.attr[attr].tensor.CopyFrom(
+                            tf.make_tensor_proto(float_val, dtype=dtype))
+                        continue
+                    # if tensor content exists
+                    if tensor.tensor_content:
+                        tensor_shape = [
+                            x.size for x in tensor.tensor_shape.dim
+                        ]
+                        tensor_weights = tf.make_ndarray(tensor)
+                        # reshape tensor
+                        tensor_weights = np.reshape(tensor_weights,
+                                                    tensor_shape)
+                        tensor_proto = tf.make_tensor_proto(tensor_weights,
+                                                            dtype=dtype)
+                        new_node.attr[attr].tensor.CopyFrom(tensor_proto)
+                        continue
+            new_node.attr[attr].CopyFrom(node.attr[attr])
+    # transform graph
+    if output_names:
+        if not input_name:
+            input_name = []
+        transforms = ["strip_unused_nodes"]
+        target_graph_def = TransformGraph(target_graph_def, input_name,
+                                          output_names, transforms)
+    # write graph_def to model
+    tf.io.write_graph(target_graph_def,
+                      logdir=save_path,
+                      name=name,
+                      as_text=as_text)
+    print("Converting done ...")
+def main():
+    # input_name = ["input_ids", "segment_ids", "input_mask"]
+    # output_names = ["output_scores"]
+    input_name = [
+        "block_ids", "font_size", "height", "strclass", "tag_titles", "tags",
+        "text", "urls", "width", "x_axis", "y_axis"
+    ]
+    output_names = ["loss/Softmax", "init_all_tables"]
+    model_path = "frozen_init_all_table.pb"
+    save_path = "./"
+    name = "fp32_frozen_init_all_table.pb"
+    as_text = False
+    target_type = 'fp32'
+    convert_graph_to_fp16(model_path,
+                          save_path,
+                          name,
+                          as_text=as_text,
+                          target_type=target_type,
+                          input_name=input_name,
+                          output_names=output_names)
+    # test loading
+    # ISSUE: loading detection model is extremely slow while loading classification model is normal
+    sess = load_graph(save_path + "/" + name)
+    print("DONE!")
+if __name__ == "__main__":
+    tf_load_op_library()
+    main()
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/tf_utils.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/tf_utils.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import warnings
+warnings.simplefilter(action='ignore', category=FutureWarning)
+import tensorflow as tf
+from tensorflow.core import framework
+from tensorflow.core.framework import types_pb2, graph_pb2, attr_value_pb2
+from tensorflow.tools.graph_transforms import TransformGraph
+from google.protobuf import text_format
+import numpy as np
+def isTextProtobuf(filename):
+    """ Returns whether a filename is a text protobuf based on the file extension.
+    Args:
+        filename: string - file name to process.
+    Returns:
+        true if `filename`'s extension is .pbtxt, false otherwise.
+    """
+    retval = False
+    _, filename_ext = os.path.splitext(filename)
+    if filename_ext and filename_ext.lower() == ".pbtxt":
+        retval = True
+    return retval
+def saveGraphProtobufToFile(file_name, graph_d):
+    """ Saves a `GraphDef` protocol buffer graph to a file.
+    Args:
+        file_name: string - name of the file where to write the graph.
+        graph_d: The `GraphDef` protocol buffer to save.
+    """
+    output_file_name_no_dir = os.path.basename(file_name)
+    output_file_dir = os.path.dirname(file_name)
+    tf.io.write_graph(graph_d,
+                      output_file_dir,
+                      output_file_name_no_dir,
+                      as_text=isTextProtobuf(file_name))
+def loadGraphProtobufFromFile(file_name):
+    """ Loads a `GraphDef` protocol buffer graph from a file.
+    Args:
+        file_name: string - name of the file to load.
+    Returns:
+        A `GraphDef` protocol buffer loaded from the file.
+    """
+    graph_d = framework.graph_pb2.GraphDef()
+    with open(file_name, "rb") as f:
+        if isTextProtobuf(file_name):
+            # for text file:
+            text_format.Merge(f.read(), graph_d)
+        else:
+            # for binary file:
+            graph_d.ParseFromString(f.read())
+    return graph_d
+def duplicateGraph(graph_d):
+    """ Creates a deep copy of a tf GraphDef.
+    Args:
+        graph_d: A `GraphDef` protocol buffer to duplicate.
+    Returns:
+        A deep copy of the specified tf GraphDef.
+    """
+    with tf.Graph().as_default() as tmp_graph:
+        _ = tf.import_graph_def(graph_d, name="")
+        return tmp_graph.as_graph_def()
+def getNodeNames(nodes_d):
+    """ Compiles a list of strings representing all the name of
+    the nodes in the specified list of nodes.
+    Args:
+        nodes_d: List of `NodeDef` objects to process.
+    Returns:
+        A list of strings representing all the name of the nodes in `nodes_d`.
+    """
+    return [node_d.name for node_d in nodes_d]
+def getNodeIndexByName(nodes_d, node_name):
+    """ Finds the NodeDef node in list of NodeDef corresponding to
+    the specified name.
+    Args:
+        nodes_d: List of `NodeDef` objects to process.
+        node_name: node to find.
+    Returns:
+        And integer index representing the index of the node in the list
+        passed or -1 if not found.
+    """
+    retval = -1
+    for i, node_d in enumerate(nodes_d):
+        if node_d.name == node_name:
+            retval = i
+            break
+    return retval
+def getNodeInputNamesClean(node_input_names):
+    retval = []
+    for input_name in node_input_names:
+        tensor_idx = input_name.rfind(":")
+        if tensor_idx < 0:
+            retval.append(input_name)
+        else:
+            retval.append(input_name[:tensor_idx])
+    return retval
+def getNodeByName(nodes_d, node_name):
+    """ Finds the NodeDef node in list of NodeDef corresponding to
+    the specified name.
+    Args:
+        nodes_d: List of `NodeDef` objects to process.
+        node_name: node to find.
+    Returns:
+        The `NodeDef` node in `nodes_d` corresponding to the specified name,
+        or None if name is not found in `nodes_d`.
+    """
+    retval = getNodeIndexByName(nodes_d, node_name)
+    if (retval < 0):
+        retval = None
+    else:
+        retval = nodes_d[retval]
+    return retval
+def getInputNodeNames(graph_d):
+    """ Finds the placeholder nodes (or inputs) in the graph.
+    Args:
+        graph_d: A `GraphDef` protocol buffer to process.
+    Returns:
+        A list of node names corresponding to all nodes that are
+        inputs to the graph.
+    """
+    retval = []
+    for node_d in graph_d.node:
+        if node_d.op == "Placeholder":
+            retval.append(node_d.name)
+    return retval
+def getOutputNodeNames(graph_d):
+    """ Finds the nodes that are leaf nodes (or outputs) in the graph.
+    Args:
+        graph_d: A `GraphDef` protocol buffer to process.
+    Returns:
+        A list of node names corresponding to all nodes that are
+        leaf nodes (or outputs) in the graph.
+    """
+    non_output_node_names = set()
+    for node_d in graph_d.node:
+        non_output_node_names = non_output_node_names | set(
+            getNodeInputNamesClean(node_d.input))
+    graph_node_names = set(getNodeNames(graph_d.node))
+    return list(graph_node_names - non_output_node_names)
+def getNodesInOutput(graph_d, node_name):
+    """ Finds all nodes that use the output of specified node as
+    their input in the specified graph.
+    Args:
+        graph_d: A `GraphDef` protocol buffer to process.
+        node_name: String name of node to check.
+    Returns:
+        A list of node names corresponding to all nodes that use the
+        output of specified node as their input.
+    """
+    retval = []
+    for node_d in graph_d.node:
+        node_input_names = getNodeInputNamesClean(node_d.input)
+        for id, input_name in enumerate(node_input_names):
+            if input_name == node_name:
+                retval.append([id, node_d.name])
+                break
+    return retval
+def getNodesInSubGraph(graph_d, start_nodes, end_nodes):
+    subgraph = []
+    for node in start_nodes:
+        subgraph.append(node)
+    successor = start_nodes
+    while len(successor) != 0:
+        for node in successor:
+            tmp_suc = getNodesInOutput(graph_d, node)
+            for suc in tmp_suc:
+                if suc in subgraph:
+                    continue
+                else:
+                    subgraph.append(suc)
+        successor = tmp_suc
+    return subgraph
+def convertTensorflow2NumpyShape(shape_tf):
+    """ Converts a tensorflow `TensorShape` to a numpy shape.
+    All unknown values for partial shapes will be converted to -1.
+    Args:
+        shape_tf: A `TensorShape` object to convert.
+    Returns:
+        A list of values representing a valid numpy style shape.
+    """
+    retval = [
+        shape_val if shape_val is not None else -1
+        for shape_val in shape_tf.as_list()
+    ]
+    return retval
+def convertNumpy2TensorflowShape(shape_np):
+    """ Converts a numpy shape to a tensorflow shape.
+    All unknown (-1) values for partial shapes will be converted to None.
+    Args:
+        shape_np: A list of values representing a valid numpy shape.
+    Returns:
+        A list of values representing a valid tensorflow style shape.
+    """
+    retval = [shape_val if shape_val >= 0 else None for shape_val in shape_np]
+    return retval
+def getInputShape(graph_d, numpy_format=False):
+    """ Retrieves the shape of all inputs to specified `GraphDef` object.
+    Args:
+        graph_d: A `GraphDef` protocol buffer to process.
+        numpy_format: boolean - if False (default), shape is given in tensorflow format,
+            otherwise, numpy format.
+    Returns:
+        A mapping string => list: from input tensor name to shape.
+    """
+    retval = {}
+    input_node_names = getInputNodeNames(graph_d)
+    tf.import_graph_def(graph_d, name="")
+    for input_node_name in input_node_names:
+        # find all output tensors for this placeholder, i.e. input:0, input:1, etc.
+        try:
+            i = 0
+            while True:
+                input_tensor_name = input_node_name + ":" + str(i)
+                next_input_tensor = tf.get_default_graph().get_tensor_by_name(
+                    input_tensor_name)
+                tensor_shape = next_input_tensor.shape
+                if numpy_format:
+                    tensor_shape = convertTensorflow2NumpyShape(tensor_shape)
+                retval[input_tensor_name] = tensor_shape
+                i += 1
+        except:
+            pass  # reached the end of the placeholder outputs
+    return retval
+def getInputOutputNodes(frozen_graph):
+    """ Finds all input and output nodes in the specified graph.
+    Args:
+        frozen_graph: TensorFlow frozen graph
+    Returns:
+        A list of input and output node names.
+    """
+    predefined_inputs = ['segment', 'mask', 'input_ids']
+    graph_d = loadGraphProtobufFromFile(frozen_graph)
+    inputs = getInputNodeNames(graph_d)
+    outputs = getOutputNodeNames(graph_d)
+    nodes = [
+        str for str in inputs if any(sub in str for sub in predefined_inputs)
+    ]
+    if len(nodes) == len(predefined_inputs):
+        return [inputs, outputs]
+    else:
+        status, inputs = findNodeByName(graph_d, predefined_inputs)
+        if status:
+            return [inputs, outputs]
+        else:
+            raise RuntimeError(
+                "Cannot find suitable inputs for this tool, please indicate the names of inputs after preprocessing"
+            )
+def findNodeByName(graph_d, node_name):
+    """ Finds nodes specified by name in the specified graph.
+    Args:
+        graph_d: A `GraphDef` protocol buffer to process.
+        node_name: String name of node to check.
+    Returns:
+        status - True if all nodes are found, False otherwise
+        A list of node names.
+    """
+    status = False
+    all_nodes = list(getNodeNames(graph_d.node))
+    retval = [str for str in all_nodes if any(sub in str for sub in node_name)]
+    if len(node_name) == len(retval):
+        status = True
+    return status, retval
+def load_graph(model_path):
+    graph = tf.Graph()
+    with graph.as_default():
+        graph_def = tf.GraphDef()
+        if model_path.endswith("pb"):
+            with open(model_path, "rb") as f:
+                graph_def.ParseFromString(f.read())
+        else:
+            with open(model_path, "r") as pf:
+                text_format.Parse(pf.read(), graph_def)
+        return graph_def
+from opt_tf import *
+import os
+import tensorflow as tf
+import sys
+from tensorflow.python.tools import freeze_graph
+from tensorflow.python.tools import saved_model_cli
+from tensorflow.python.saved_model import tag_constants
+from tensorflow.python.saved_model import signature_constants
+from tensorflow.tools.graph_transforms import TransformGraph
+from six import StringIO, iteritems
+import contextlib
+from tensorflow.core.framework import types_pb2, tensor_shape_pb2, graph_pb2, attr_value_pb2
+import numpy as np
+from load_runstep import load_runstep
+def load_graph(model):
+    graph_def = tf.GraphDef()
+    print("load model: ", model)
+    with open(model, 'rb') as f:
+        graph_def.ParseFromString(f.read())
+    return graph_def
+def find_node(graph_def, name):
+    node = None
+    for n in graph_def.node:
+        if n.name == name:
+            node = n
+            break
+    # if node == None:
+    #     print('Node {} not found'.format(name))
+    return node
+def find_node_by_type(graph_def, type):
+    node = []
+    for n in graph_def.node:
+        if n.op == type:
+            node.append(n)
+    return node
+def get_node_successor(graph_def, node_name):
+    outputs = []
+    for n in graph_def.node:
+        for input in n.input:
+            if node_name == input.split(':')[0]:
+                outputs.append(n)
+    # if len(outputs) == 0:
+    #     print("[INFO] {} has no successor".format(node_name))
+    return outputs
+def get_node_output(graph_def, node_name):
+    outputs = []
+    for n in graph_def.node:
+        for input in n.input:
+            if node_name == input.split(':')[0]:
+                if len(input.split(':')) == 1:
+                    if not input + ":0" in outputs:
+                        outputs.append(input + ":0")
+                else:
+                    if not input in outputs:
+                        outputs.append(input)
+    # if len(outputs) == 0:
+    #     print("[INFO] {} has no output".format(node_name))
+    return outputs
+# single in & singel out
+def remove_nodes(graph_d, nodes):
+    for node in nodes:
+        # assert len(node.input) == 1
+        pre_node = node.input[0]
+        succ_nodes = get_node_successor(graph_d, node.name)
+        for succ in succ_nodes:
+            for idx, name in enumerate(succ.input):
+                if name == node.name:
+                    succ.input[idx] = pre_node
+        graph_d.node.remove(node)
+    return graph_d
+def create_shape_proto(shape):
+    shape_proto = tensor_shape_pb2.TensorShapeProto()
+    for dim in shape:
+        shape_proto.dim.add().size = dim
+    return attr_value_pb2.AttrValue(shape=shape_proto)
+def set_shape(node, shape):
+    node.attr["shape"].CopyFrom(create_shape_proto(shape))
+def remove_control_dep(graph_def):
+    # reset & import
+    tf.reset_default_graph()
+    tf.import_graph_def(graph_def, name="")
+    for node in graph_def.node:
+        op = tf.get_default_graph().get_operation_by_name(node.name)
+        if len(op.control_inputs) != 0:
+            tf.contrib.graph_editor.remove_control_inputs(
+                op, op.control_inputs)
+    graph_def = tf.get_default_graph().as_graph_def()
+    return graph_def
+def is_leaf_node(graph_d, name):
+    for n in graph_d.node:
+        for in_n in n.input:
+            if name == in_n or name == in_n.split(":0")[0]:
+                return False
+    return True
+def get_node_shape(node):
+    return [d.size for d in node.attr["shape"].shape.dim]
+def get_graph_input(graph_d):
+    in_node = []
+    for n in graph_d.node:
+        if n.op == "Placeholder":
+            in_node.append(n.name)
+    to_remove = []
+    for in_n in in_node:
+        if is_leaf_node(graph_d, in_n):
+            to_remove.append(in_n)
+    for name in to_remove:
+        node = find_node(graph_d, name)
+        graph_d.node.remove(node)
+    real_in = set(in_node) - set(to_remove)
+    return list(real_in)
+def get_graph_output(graph_d):
+    out_node = []
+    for n in graph_d.node:
+        if len(get_node_successor(graph_d, n.name)) == 0:
+            out_node.append(n.name)
+    # if len(out_node) == 0:
+    #     print("[INFO] Graph No Outputs??")
+    return out_node
+def get_constant_val(node):
+    val = tf.make_ndarray(node.attr["value"].tensor)
+    return val
+def get_dtype_from_np(val):
+    if val.dtype == np.int32:
+        return types_pb2.DT_INT32
+    if val.dtype == np.float32:
+        return types_pb2.DT_FLOAT
+    if val.dtype == np.int64:
+        return types_pb2.DT_INT64
+    if val.dtype == np.float16:
+        return types_pb2.DT_HALF
+    raise ValueError("DTYPE {} NOT SUPPORTEED!".format(val.dtype))
+def set_constant_val(node, val):
+    tf_dtype = get_dtype_from_np(val)
+    node.attr["value"].tensor.CopyFrom(
+        tf.make_tensor_proto(val, dtype=tf_dtype))
+@contextlib.contextmanager
+def captured_output():
+    new_out, new_err = StringIO(), StringIO()
+    old_out, old_err = sys.stdout, sys.stderr
+    try:
+        sys.stdout, sys.stderr = new_out, new_err
+        yield sys.stdout, sys.stderr
+    finally:
+        sys.stdout, sys.stderr = old_out, old_err
+def get_saved_input_node(saved_model_dir, saved_tags, sign):
+    parser = saved_model_cli.create_parser()
+    args = parser.parse_args([
+        'show', '--dir', saved_model_dir, '--tag_set', saved_tags,
+        '--signature_def', sign
+    ])
+    with captured_output() as (out, err):
+        saved_model_cli.show(args)
+    result = out.getvalue().strip()
+    input_tensors = []
+    lines = result.split('\n')
+    for idx, line in enumerate(result.split('\n')):
+        if "inputs[" in line:
+            line = lines[idx + 3]
+            input = line.split(":")[1]
+            input_tensors.append(input.strip() + ":0")
+    return input_tensors
+def get_saved_output_node(saved_model_dir, saved_tags, sign):
+    parser = saved_model_cli.create_parser()
+    args = parser.parse_args([
+        'show', '--dir', saved_model_dir, '--tag_set', saved_tags,
+        '--signature_def', sign
+    ])
+    with captured_output() as (out, err):
+        saved_model_cli.show(args)
+    result = out.getvalue().strip()
+    # print(result)
+    output_nodes = []
+    lines = result.split('\n')
+    for idx, line in enumerate(result.split('\n')):
+        if "outputs[" in line:
+            line = lines[idx + 3]
+            output = line.split(":")[1]
+            output_nodes.append(output.strip() + ":0")
+    return output_nodes
+def duplicate_const(graph_d):
+    all_consts = find_node_by_type(graph_d, "Const")
+    need_duplicate = []
+    for node in all_consts:
+        if len(get_node_successor(graph_d, node.name)) > 1:
+            need_duplicate.append(node.name)
+    for node in need_duplicate:
+        succ_nodes = get_node_successor(graph_d, node)
+        for idx, succ in enumerate(succ_nodes):
+            ori_node = find_node(graph_d, node)
+            new_node = graph_d.node.add()
+            new_node.op = ori_node.op
+            new_node.name = ori_node.name + "new_{}".format(idx)
+            new_node.input.extend(ori_node.input)
+            attrs = list(ori_node.attr.keys())
+            for attr in attrs:
+                new_node.attr[attr].CopyFrom(ori_node.attr[attr])
+            for i, input in enumerate(succ.input):
+                if input == ori_node.name:
+                    succ.input[i] = new_node.name
+    return graph_d
+def rewrite_batch_norm_node_v2(node, graph_def, target_type):
+    """
+    Rewrite FusedBatchNorm with FusedBatchNormV2 for reserve_space_1 and reserve_space_2 in FusedBatchNorm require float32 for 
+    gradient calculation (See here: https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/fused-batch-norm)
+    """
+    if target_type == 'fp16':
+        dtype = types_pb2.DT_HALF
+    elif target_type == 'fp32':
+        dtype = types_pb2.DT_FLOAT
+    new_node = graph_def.node.add()
+    new_node.op = "FusedBatchNormV2"
+    new_node.name = node.name
+    new_node.input.extend(node.input)
+    new_node.attr["U"].CopyFrom(
+        attr_value_pb2.AttrValue(type=types_pb2.DT_FLOAT))
+    for attr in list(node.attr.keys()):
+        if attr == "T":
+            node.attr[attr].type = dtype
+        new_node.attr[attr].CopyFrom(node.attr[attr])
+    print("rewrite fused_batch_norm done!")
+def convert_graph_to_fp16(model_path,
+                          save_path,
+                          name,
+                          as_text=False,
+                          target_type='fp16',
+                          input_name=None,
+                          output_names=None,
+                          keep_fp32_node_name=[]):
+    if target_type == 'fp16':
+        dtype = types_pb2.DT_HALF
+    elif target_type == 'fp32':
+        dtype = types_pb2.DT_FLOAT
+    source_sess = load_graph(model_path)
+    source_graph_def = source_sess.graph.as_graph_def()
+    target_graph_def = graph_pb2.GraphDef()
+    target_graph_def.versions.CopyFrom(source_graph_def.versions)
+    for node in source_graph_def.node:
+        # fused batch norm node
+        if node.op == "FusedBatchNorm":
+            rewrite_batch_norm_node_v2(node,
+                                       target_graph_def,
+                                       target_type=target_type)
+            continue
+        # replicate node
+        new_node = target_graph_def.node.add()
+        new_node.op = node.op
+        new_node.name = node.name
+        new_node.input.extend(node.input)
+        attrs = list(node.attr.keys())
+        # keep batch norm params node
+        if ("BatchNorm" in node.name) or ('batch_normalization' in node.name):
+            for attr in attrs:
+                new_node.attr[attr].CopyFrom(node.attr[attr])
+            continue
+        # replace dtype in node attr with target dtype
+        for attr in attrs:
+            # keep special node in fp32
+            if node.name in keep_fp32_node_name:
+                new_node.attr[attr].CopyFrom(node.attr[attr])
+                continue
+            if node.attr[attr].type == types_pb2.DT_FLOAT:
+                # modify node dtype
+                node.attr[attr].type = dtype
+            if attr == "value":
+                tensor = node.attr[attr].tensor
+                if tensor.dtype == types_pb2.DT_FLOAT:
+                    # if float_val exists
+                    if tensor.float_val:
+                        float_val = tf.make_ndarray(node.attr[attr].tensor)
+                        new_node.attr[attr].tensor.CopyFrom(
+                            tf.make_tensor_proto(float_val, dtype=dtype))
+                        continue
+                    # if tensor content exists
+                    if tensor.tensor_content:
+                        tensor_shape = [
+                            x.size for x in tensor.tensor_shape.dim
+                        ]
+                        tensor_weights = tf.make_ndarray(tensor)
+                        # reshape tensor
+                        tensor_weights = np.reshape(tensor_weights,
+                                                    tensor_shape)
+                        tensor_proto = tf.make_tensor_proto(tensor_weights,
+                                                            dtype=dtype)
+                        new_node.attr[attr].tensor.CopyFrom(tensor_proto)
+                        continue
+            new_node.attr[attr].CopyFrom(node.attr[attr])
+    # transform graph
+    if output_names:
+        if not input_name:
+            input_name = []
+        transforms = ["strip_unused_nodes"]
+        target_graph_def = TransformGraph(target_graph_def, input_name,
+                                          output_names, transforms)
+    # write graph_def to model
+    tf.io.write_graph(target_graph_def,
+                      logdir=save_path,
+                      name=name,
+                      as_text=as_text)
+    print("Converting done ...")
+def convert_graph_to_fp32(model_path,
+                          save_path,
+                          name,
+                          as_text=False,
+                          target_type='fp32',
+                          input_name=None,
+                          output_names=None,
+                          keep_fp16_node_name=[]):
+    if target_type == 'fp16':
+        dtype = types_pb2.DT_HALF
+    elif target_type == 'fp32':
+        dtype = types_pb2.DT_FLOAT
+    source_sess = load_graph(model_path)
+    source_graph_def = source_sess.graph.as_graph_def()
+    target_graph_def = graph_pb2.GraphDef()
+    target_graph_def.versions.CopyFrom(source_graph_def.versions)
+    for node in source_graph_def.node:
+        # fused batch norm node
+        if node.op == "FusedBatchNorm":
+            rewrite_batch_norm_node_v2(node,
+                                       target_graph_def,
+                                       target_type=target_type)
+            continue
+        # replicate node
+        new_node = target_graph_def.node.add()
+        new_node.op = node.op
+        new_node.name = node.name
+        new_node.input.extend(node.input)
+        attrs = list(node.attr.keys())
+        # keep batch norm params node
+        if ("BatchNorm" in node.name) or ('batch_normalization' in node.name):
+            for attr in attrs:
+                new_node.attr[attr].CopyFrom(node.attr[attr])
+            continue
+        # replace dtype in node attr with target dtype
+        for attr in attrs:
+            # keep special node in fp16
+            if node.name in keep_fp16_node_name:
+                new_node.attr[attr].CopyFrom(node.attr[attr])
+                continue
+            if node.attr[attr].type == types_pb2.DT_HALF:
+                # modify node dtype
+                node.attr[attr].type = dtype
+            if attr == "value":
+                tensor = node.attr[attr].tensor
+                if tensor.dtype == types_pb2.DT_HALF:
+                    # if half_val exists
+                    if tensor.half_val:
+                        half_val = tf.make_ndarray(node.attr[attr].tensor)
+                        new_node.attr[attr].tensor.CopyFrom(
+                            tf.make_tensor_proto(half_val, dtype=dtype))
+                        continue
+                    # if tensor content exists
+                    if tensor.tensor_content:
+                        tensor_shape = [
+                            x.size for x in tensor.tensor_shape.dim
+                        ]
+                        tensor_weights = tf.make_ndarray(tensor)
+                        # reshape tensor
+                        tensor_weights = np.reshape(tensor_weights,
+                                                    tensor_shape)
+                        tensor_proto = tf.make_tensor_proto(tensor_weights,
+                                                            dtype=dtype)
+                        new_node.attr[attr].tensor.CopyFrom(tensor_proto)
+                        continue
+            new_node.attr[attr].CopyFrom(node.attr[attr])
+    # transform graph
+    if output_names:
+        if not input_name:
+            input_name = []
+        transforms = ["strip_unused_nodes"]
+        target_graph_def = TransformGraph(target_graph_def, input_name,
+                                          output_names, transforms)
+    # write graph_def to model
+    tf.io.write_graph(target_graph_def,
+                      logdir=save_path,
+                      name=name,
+                      as_text=as_text)
+    print("Converting done ...")
--- a/ByteMLPerf/byte_infer_perf/general_perf/tools/torch_to_onnx.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/tools/torch_to_onnx.py
+# Copyright 2023 ByteDance and/or its affiliates.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import json
+import numpy as np
+import torch
+def torch_to_onnx(model_path, output_path):
+    model_name = output_path.split("/")[-1][:-4]
+    with open("general_perf/model_zoo/" + model_name + "json", "r") as f:
+        model_info = json.load(f)
+    model_inputs = model_info["inputs"].split(",")
+    input_shapes = model_info["input_shape"]
+    input_type = model_info["input_type"].split(",")
+    example_inputs = _get_fake_samples(input_shapes, input_type)
+    model = torch.jit.load(model_path, map_location=torch.device("cpu"))
+    model.eval()
+    names = model_inputs
+    dynamic_inputs = {}
+    for i in range(len(names)):
+        dynamic_inputs[names[i]] = {0: "batch_size"}
+    outputs = model_info["outputs"].split(",")
+    for output in outputs:
+        dynamic_inputs[output] = {0: "batch_size"}
+    torch.onnx.export(
+        model,
+        example_inputs,
+        output_path,
+        opset_version=11,
+        input_names=names,
+        output_names=outputs,
+        dynamic_axes=dynamic_inputs,
+    )
+def _get_fake_samples(shape, type):
+    data = []
+    idx = 0
+    for key, val in shape.items():
+        val = [val[0] * 1] + val[1:]
+        data.append(torch.from_numpy(np.random.random(val).astype(type[idx].lower())))
+        idx += 1
+    return data
+def get_args():
+    """Parse commandline."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", default="")
+    parser.add_argument("--output_path", default="")
+    args = parser.parse_args()
+    return args
+if __name__ == "__main__":
+    args = get_args()
+    torch_to_onnx(args.model_path, args.output_path)
--- a/ByteMLPerf/byte_infer_perf/general_perf/version.py
+++ b/ByteMLPerf/byte_infer_perf/general_perf/version.py
+__version__ = '1.0.0'
--- a/ByteMLPerf/byte_infer_perf/general_perf/workloads/albert-torch-fp32.json
+++ b/ByteMLPerf/byte_infer_perf/general_perf/workloads/albert-torch-fp32.json
+{
+    "model": "albert-torch-fp32",
+    "test_perf": true,
+    "test_accuracy": true,
+    "test_numeric": true,
+    "clients": 3,
+    "iterations": 100,
+    "batch_sizes":[1,4,8,16,24],
+    "data_percent": 100,
+    "compile_only": false
+}
--- a/ByteMLPerf/byte_infer_perf/general_perf/workloads/bert-onnxruntime-fp16.json
+++ b/ByteMLPerf/byte_infer_perf/general_perf/workloads/bert-onnxruntime-fp16.json
+{
+    "model": "bert-onnxruntime-fp16",
+    "test_perf": true,
+    "test_accuracy": true,
+    "test_numeric": false,
+    "clients": 3,
+    "iterations": 100,
+    "batch_sizes":[4,8,16,32,64,128],
+    "data_percent": 100,
+    "compile_only": false
+}
\ No newline at end of file
--- a/ByteMLPerf/byte_infer_perf/general_perf/workloads/bert-onnxruntime-fp32.json
+++ b/ByteMLPerf/byte_infer_perf/general_perf/workloads/bert-onnxruntime-fp32.json
+{
+    "model": "bert-onnxruntime-fp32",
+    "test_perf": true,
+    "test_accuracy": true,
+    "test_numeric": false,
+    "clients": 3,
+    "iterations": 100,
+    "batch_sizes":[4,8,16,32,64,128],
+    "data_percent": 100,
+    "compile_only": false
+}
\ No newline at end of file
--- a/ByteMLPerf/byte_infer_perf/general_perf/workloads/bert-tf-fp32.json
+++ b/ByteMLPerf/byte_infer_perf/general_perf/workloads/bert-tf-fp32.json
+{
+    "model": "bert-tf-fp32",
+    "test_perf": true,
+    "test_accuracy": true,
+    "test_numeric": true,
+    "clients": 3,
+    "iterations": 100,
+    "batch_sizes":[4,8,16,24],
+    "data_percent": 100,
+    "compile_only": false
+}
\ No newline at end of file
--- a/ByteMLPerf/byte_infer_perf/general_perf/workloads/bert-torch-fp16.json
+++ b/ByteMLPerf/byte_infer_perf/general_perf/workloads/bert-torch-fp16.json
+{
+    "model": "bert-torch-fp16",
+    "test_perf": true,
+    "test_accuracy": true,
+    "test_numeric": false,
+    "clients": 3,
+    "iterations": 100,
+    "batch_sizes":[1,2,4,8,16,32,64,128,256,512,1024],
+    "data_percent": 100,
+    "compile_only": false
+}
--- a/ByteMLPerf/byte_infer_perf/general_perf/workloads/bert-torch-fp32.json
+++ b/ByteMLPerf/byte_infer_perf/general_perf/workloads/bert-torch-fp32.json
+{
+    "model": "bert-torch-fp32",
+    "test_perf": true,
+    "test_accuracy": true,
+    "test_numeric": false,
+    "clients": 3,
+    "iterations": 100,
+    "batch_sizes":[1,2,4,8,16,32,64,128,256,512,1024],
+    "data_percent": 100,
+    "compile_only": false
+}