# ***************************************************************************** # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the NVIDIA CORPORATION nor the # names of its contributors may be used to endorse or promote products # derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # ***************************************************************************** import models import torch import argparse import numpy as np import json import time import os import sys from inference import checkpoint_from_distributed, unwrap_distributed, load_and_setup_model, MeasureTime, prepare_input_sequence import dllogger as DLLogger from dllogger import StdOutBackend, JSONStreamBackend, Verbosity def parse_args(parser): """ Parse commandline arguments. """ parser.add_argument('-m', '--model-name', type=str, default='', required=True, help='Model to train') parser.add_argument('--model', type=str, default='', help='Full path to the model checkpoint file') parser.add_argument('-sr', '--sampling-rate', default=22050, type=int, help='Sampling rate') parser.add_argument('--fp16', action='store_true', help='inference with AMP') parser.add_argument('-bs', '--batch-size', type=int, default=1) parser.add_argument('-o', '--output', type=str, required=True, help='Directory to save results') parser.add_argument('--log-file', type=str, default='nvlog.json', help='Filename for logging') parser.add_argument('--synth-data', action='store_true', help='Test with synthetic data') return parser def gen_text(use_synthetic_data): batch_size = 1 text_len = 140 if use_synthetic_data: text_padded = torch.randint(low=0, high=148, size=(batch_size, text_len), dtype=torch.long).cuda() input_lengths = torch.IntTensor([text_padded.size(1)]* batch_size).cuda().long() else: texts = ['The forms of printed letters should be beautiful, and that their arrangement on the page should be reasonable and a help to the shapeliness of the letters themselves.'] texts = texts[:][:text_len] text_padded, input_lengths = prepare_input_sequence(texts) return (text_padded, input_lengths) def gen_mel(use_synthetic_data, n_mel_channels, fp16): if use_synthetic_data: batch_size = 1 num_mels = 895 mel_padded = torch.zeros(batch_size, n_mel_channels, num_mels).normal_(-5.62, 1.98).cuda() else: mel_padded = torch.load("data/mel.pt") if fp16: mel_padded = mel_padded.half() return mel_padded def main(): """ Launches inference benchmark. Inference is executed on a single GPU. """ parser = argparse.ArgumentParser( description='PyTorch Tacotron 2 Inference') parser = parse_args(parser) args, _ = parser.parse_known_args() log_file = os.path.join(args.output, args.log_file) DLLogger.init(backends=[JSONStreamBackend(Verbosity.DEFAULT, log_file), StdOutBackend(Verbosity.VERBOSE)]) for k,v in vars(args).items(): DLLogger.log(step="PARAMETER", data={k:v}) DLLogger.log(step="PARAMETER", data={'model_name':'Tacotron2_PyT'}) if args.synth_data: model = load_and_setup_model(args.model_name, parser, None, args.fp16, cpu_run=False, forward_is_infer=True) else: if not os.path.isfile(args.model): print(f"File {args.model} does not exist!") sys.exit(1) model = load_and_setup_model(args.model_name, parser, args.model, args.fp16, cpu_run=False, forward_is_infer=True) if args.model_name == "Tacotron2": model = torch.jit.script(model) warmup_iters = 3 num_iters = 1+warmup_iters for i in range(num_iters): measurements = {} if args.model_name == 'Tacotron2': text_padded, input_lengths = gen_text(args.synth_data) with torch.no_grad(), MeasureTime(measurements, "inference_time"): mels, _, _ = model(text_padded, input_lengths) num_items = mels.size(0)*mels.size(2) if args.model_name == 'WaveGlow': n_mel_channels = model.upsample.in_channels mel_padded = gen_mel(args.synth_data, n_mel_channels, args.fp16) with torch.no_grad(), MeasureTime(measurements, "inference_time"): audios = model(mel_padded) audios = audios.float() num_items = audios.size(0)*audios.size(1) if i >= warmup_iters: DLLogger.log(step=(i-warmup_iters,), data={"latency": measurements['inference_time']}) DLLogger.log(step=(i-warmup_iters,), data={"items_per_sec": num_items/measurements['inference_time']}) DLLogger.log(step=tuple(), data={'infer_latency': measurements['inference_time']}) DLLogger.log(step=tuple(), data={'infer_items_per_sec': num_items/measurements['inference_time']}) DLLogger.flush() if __name__ == '__main__': main()