#!/usr/bin/env python3 # Copyright 2023 Nvidia (authors: Yuekai Zhang) # # See LICENSE for clarification regarding multiple authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Convert triton staistic json file for better view. python3 stats_summary.py """ import json import argparse def get_args(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( "--stats_file", type=str, required=False, default="./stats.json", help="output of stats anaylasis", ) parser.add_argument( "--summary_file", type=str, required=False, default="./stats_summary.txt", help="output of stats summary", ) return parser.parse_args() if __name__ == "__main__": args = get_args() with open(args.stats_file) as stats_f, open( args.summary_file, "w" ) as summary_f: stats = json.load(stats_f) model_stats = stats["model_stats"] for model_state in model_stats: summary_f.write(f"model name is {model_state['name']} \n") model_inference_stats = model_state["inference_stats"] total_queue_time_s = ( int(model_inference_stats["queue"]["ns"]) / 10e9 ) total_infer_time_s = ( int(model_inference_stats["compute_infer"]["ns"]) / 10e9 ) total_input_time_s = ( int(model_inference_stats["compute_input"]["ns"]) / 10e9 ) total_output_time_s = ( int(model_inference_stats["compute_output"]["ns"]) / 10e9 ) summary_f.write( f"queue {total_queue_time_s:<5.2f} s, infer {total_infer_time_s:<5.2f} s, input {total_input_time_s:<5.2f} s, output {total_output_time_s:<5.2f} s \n" # noqa ) model_batch_stats = model_state["batch_stats"] for batch in model_batch_stats: batch_size = int(batch["batch_size"]) compute_input = batch["compute_input"] compute_output = batch["compute_output"] compute_infer = batch["compute_infer"] batch_count = int(compute_infer["count"]) assert ( compute_infer["count"] == compute_output["count"] == compute_input["count"] ) compute_infer_time_ms = int(compute_infer["ns"]) / 10e6 compute_input_time_ms = int(compute_input["ns"]) / 10e6 compute_output_time_ms = int(compute_output["ns"]) / 10e6 summary_f.write( f"Batch_size {batch_size:<2}, {batch_count:<5} times, infer {compute_infer_time_ms:<9.2f} ms, avg {compute_infer_time_ms/batch_count:.2f} ms, {compute_infer_time_ms/batch_count/batch_size:.2f} ms " # noqa ) summary_f.write( f"input {compute_input_time_ms:<9.2f} ms, avg {compute_input_time_ms/batch_count:.2f} ms, " # noqa ) summary_f.write( f"output {compute_output_time_ms:<9.2f} ms, avg {compute_output_time_ms/batch_count:.2f} ms \n" # noqa )