# Copyright (c) Facebook, Inc. and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. from collections import defaultdict from functools import partial import numpy as np import torch from tqdm import tqdm from data_utils import dump_speaker_f0_stat, F0Stat, load_audio_path, load_f0 def load_speaker(path): speakers = [] with open(path) as f: for line in f.readlines(): sample = eval(line.strip()) assert "speaker" in sample speakers.append(sample["speaker"]) return speakers def quantize_f0(speaker_to_f0, f0_stats, nbins, normalize, log): f0_all = [] for speaker, f0 in speaker_to_f0.items(): f0 = f0.raw_data if log: f0 = f0.log() mean = f0_stats[speaker]["logf0_mean"] if log else f0_stats[speaker]["f0_mean"] std = f0_stats[speaker]["logf0_std"] if log else f0_stats[speaker]["f0_std"] if normalize == "mean": f0 = f0 - mean elif normalize == "meanstd": f0 = (f0 - mean) / std f0_all.extend(f0.tolist()) hist, bin_x = np.histogram(f0_all, 100000) cum_hist = np.cumsum(hist) / len(f0_all) * 100 f0_bin = {} for num_bin in nbins: bin_offset = [] bin_size = 100 / num_bin threshold = bin_size for i in range(num_bin - 1): index = (np.abs(cum_hist - threshold)).argmin() bin_offset.append(bin_x[index]) threshold += bin_size f0_bin[num_bin] = np.array(bin_offset) return f0_bin def main(file_path, f0_dir, out_dir, out_prefix, nbins, nshards, normalize, log): audio_paths = load_audio_path(file_path) path_to_f0 = load_f0(f0_dir, nshards) speakers = load_speaker(file_path) speaker_to_f0 = defaultdict(partial(F0Stat, True)) # speaker f0 stats for audio_path, speaker in tqdm(zip(audio_paths, speakers)): f0 = path_to_f0[audio_path] speaker_to_f0[speaker].update(f0) f0_stats = dump_speaker_f0_stat(speaker_to_f0, f"{out_dir}/{out_prefix}") # quantize f0_bin = quantize_f0(speaker_to_f0, f0_stats, nbins, normalize, log) log_suffix = "_log" if log else "" f0_bin_out_file = f"{out_dir}/{out_prefix}_{normalize}_norm{log_suffix}_f0_bin.th" torch.save(f0_bin, f0_bin_out_file) if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("file_path") parser.add_argument("f0_dir", help="out_dir from preprocess_f0") parser.add_argument("out_dir") parser.add_argument("out_prefix") parser.add_argument("--nbins", nargs="+", type=int, default=[32]) parser.add_argument("--nshards", type=int, default=20, help="number of f0 shards") parser.add_argument( "--normalize", type=str, choices=["meanstd", "mean", "none"], default="mean" ) parser.add_argument("--log", action="store_true") args = parser.parse_args() print(args) main(**vars(args))