# Copyright (c) Meta Platforms, Inc. and affiliates. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import os import click import timm import torch from aitemplate.testing.benchmark_pt import benchmark_torch_function def benchmark(model, batch_size): with torch.inference_mode(): input_shape = (batch_size, 3, 224, 224) input_data = torch.randn(input_shape).cuda().half() # warm up benchmark_torch_function(100, model, input_data) # benchmark t = benchmark_torch_function(100, model, input_data) print("batch_size: {}, time: {}".format(batch_size, t)) dev_flag = os.environ.get("HIP_VISIBLE_DEVICES", "-1") dev_flag = dev_flag.replace(",", "_") with open(f"resnet50_pt_benchmark_dev_{dev_flag}.txt", "a") as f: f.write("batch_size: {}, latency: {}\n".format(batch_size, t)) @click.command() @click.option("--batch-size", default=0, type=int) def main(batch_size): model = timm.create_model( "resnet50", pretrained=True, num_classes=1000, pretrained_cfg_overlay=dict(file="./resnet50_a1_0-14fe96d1.pth") ).cuda().half() model.eval() if batch_size == 0: for batch_size in [1, 2, 4, 8, 16, 32, 64, 128, 256]: benchmark(model, batch_size) else: benchmark(model, batch_size) if __name__ == "__main__": main()