Unverified Commit ddb8577d authored by Bhargav Kathivarapu's avatar Bhargav Kathivarapu Committed by GitHub
Browse files

Migrate kaldi spectrogram (#687)



* Migrate spectrogram

* Update spectrogram in kaldi.py to support device and dtype

* Remove failing tests
Signed-off-by: default avatarBhargav Kathivarapu <bhargavkathivarapu31@gmail.com>
parent b56a27b5
{"blackman_coeff": 0.0016, "dither": 0, "energy_floor": 4.668, "frame_length": 0.625, "frame_shift": 0.25, "preemphasis_coefficient": 0.82, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 0.0121, "dither": 0, "energy_floor": 4.9643, "frame_length": 0.875, "frame_shift": 0.1875, "preemphasis_coefficient": 0.98, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 0.0378, "dither": 0, "energy_floor": 3.777, "frame_length": 0.5, "frame_shift": 0.625, "preemphasis_coefficient": 0.76, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 0.0545, "dither": 0, "energy_floor": 0.0732, "frame_length": 1.0, "frame_shift": 0.75, "preemphasis_coefficient": 0.81, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 0.1005, "dither": 0, "energy_floor": 0.3739, "frame_length": 0.5625, "frame_shift": 0.625, "preemphasis_coefficient": 0.19, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 0.1088, "dither": 0, "energy_floor": 0.6933, "frame_length": 0.5, "frame_shift": 0.75, "preemphasis_coefficient": 0.51, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 0.1777, "dither": 0, "energy_floor": 3.8992, "frame_length": 1.0, "frame_shift": 0.3125, "preemphasis_coefficient": 0.96, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 0.2384, "dither": 0, "energy_floor": 0.308, "frame_length": 0.375, "frame_shift": 0.25, "preemphasis_coefficient": 0.98, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": true, "window_type": "povey"}
{"blackman_coeff": 0.2669, "dither": 0, "energy_floor": 2.4329, "frame_length": 0.625, "frame_shift": 1.1875, "preemphasis_coefficient": 0.18, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 0.334, "dither": 0, "energy_floor": 0.5962, "frame_length": 0.25, "frame_shift": 0.5625, "preemphasis_coefficient": 0.38, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "hamming"}
{"blackman_coeff": 0.4268, "dither": 0, "energy_floor": 2.4431, "frame_length": 0.5625, "frame_shift": 0.0625, "preemphasis_coefficient": 0.95, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "hamming"}
{"blackman_coeff": 0.4774, "dither": 0, "energy_floor": 0.6982, "frame_length": 1.125, "frame_shift": 1.125, "preemphasis_coefficient": 0.27, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": true, "window_type": "povey"}
{"blackman_coeff": 0.4992, "dither": 0, "energy_floor": 3.7665, "frame_length": 0.4375, "frame_shift": 1.125, "preemphasis_coefficient": 0.42, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 0.544, "dither": 0, "energy_floor": 1.6641, "frame_length": 0.9375, "frame_shift": 0.875, "preemphasis_coefficient": 0.13, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 0.5785, "dither": 0, "energy_floor": 2.8162, "frame_length": 1.125, "frame_shift": 1.0625, "preemphasis_coefficient": 0.17, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 0.8072, "dither": 0, "energy_floor": 4.0404, "frame_length": 0.5, "frame_shift": 1.1875, "preemphasis_coefficient": 0.74, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": true, "window_type": "hamming"}
{"blackman_coeff": 0.8418, "dither": 0, "energy_floor": 4.1771, "frame_length": 0.3125, "frame_shift": 0.25, "preemphasis_coefficient": 0.48, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 0.8431, "dither": 0, "energy_floor": 0.0728, "frame_length": 0.75, "frame_shift": 0.8125, "preemphasis_coefficient": 0.1, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 0.885, "dither": 0, "energy_floor": 3.9292, "frame_length": 0.375, "frame_shift": 0.75, "preemphasis_coefficient": 0.27, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 0.9625, "dither": 0, "energy_floor": 2.5481, "frame_length": 0.6875, "frame_shift": 1.0, "preemphasis_coefficient": 0.06, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 0.9826, "dither": 0, "energy_floor": 0.7377, "frame_length": 0.375, "frame_shift": 0.6875, "preemphasis_coefficient": 0.7, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 0.9854, "dither": 0, "energy_floor": 3.8819, "frame_length": 0.25, "frame_shift": 1.0, "preemphasis_coefficient": 0.54, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": true, "window_type": "povey"}
{"blackman_coeff": 1.0303, "dither": 0, "energy_floor": 4.4583, "frame_length": 0.375, "frame_shift": 0.875, "preemphasis_coefficient": 0.39, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 1.0743, "dither": 0, "energy_floor": 0.4642, "frame_length": 1.125, "frame_shift": 0.625, "preemphasis_coefficient": 0.39, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 1.0788, "dither": 0, "energy_floor": 1.442, "frame_length": 0.1875, "frame_shift": 0.3125, "preemphasis_coefficient": 0.53, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 1.0816, "dither": 0, "energy_floor": 0.205, "frame_length": 0.1875, "frame_shift": 0.6875, "preemphasis_coefficient": 0.02, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "hamming"}
{"blackman_coeff": 1.1385, "dither": 0, "energy_floor": 4.738, "frame_length": 0.625, "frame_shift": 0.3125, "preemphasis_coefficient": 0.23, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 1.3142, "dither": 0, "energy_floor": 4.8914, "frame_length": 0.875, "frame_shift": 0.1875, "preemphasis_coefficient": 0.34, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 1.3189, "dither": 0, "energy_floor": 3.683, "frame_length": 1.125, "frame_shift": 1.125, "preemphasis_coefficient": 0.88, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": true, "window_type": "hamming"}
{"blackman_coeff": 1.3235, "dither": 0, "energy_floor": 3.8538, "frame_length": 0.25, "frame_shift": 1.0625, "preemphasis_coefficient": 0.07, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "blackman"}
{"blackman_coeff": 1.3389, "dither": 0, "energy_floor": 1.6152, "frame_length": 0.375, "frame_shift": 0.5, "preemphasis_coefficient": 0.21, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 1.3887, "dither": 0, "energy_floor": 3.3198, "frame_length": 0.375, "frame_shift": 0.125, "preemphasis_coefficient": 0.14, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 1.4127, "dither": 0, "energy_floor": 2.6264, "frame_length": 0.875, "frame_shift": 0.375, "preemphasis_coefficient": 0.69, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 1.5178, "dither": 0, "energy_floor": 2.8631, "frame_length": 1.0, "frame_shift": 0.8125, "preemphasis_coefficient": 0.95, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 1.5403, "dither": 0, "energy_floor": 0.0133, "frame_length": 1.1875, "frame_shift": 0.25, "preemphasis_coefficient": 0.59, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 1.5754, "dither": 0, "energy_floor": 0.954, "frame_length": 1.0, "frame_shift": 0.9375, "preemphasis_coefficient": 0.2, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 1.5959, "dither": 0, "energy_floor": 0.9033, "frame_length": 0.75, "frame_shift": 1.0, "preemphasis_coefficient": 0.14, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 1.6923, "dither": 0, "energy_floor": 3.5626, "frame_length": 0.6875, "frame_shift": 1.0625, "preemphasis_coefficient": 0.27, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 1.6972, "dither": 0, "energy_floor": 1.0863, "frame_length": 1.1875, "frame_shift": 0.875, "preemphasis_coefficient": 0.86, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 1.744, "dither": 0, "energy_floor": 0.5308, "frame_length": 0.5, "frame_shift": 0.125, "preemphasis_coefficient": 0.33, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "hamming"}
{"blackman_coeff": 1.7642, "dither": 0, "energy_floor": 0.4833, "frame_length": 0.25, "frame_shift": 0.8125, "preemphasis_coefficient": 0.94, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 1.8072, "dither": 0, "energy_floor": 0.8085, "frame_length": 0.5, "frame_shift": 0.25, "preemphasis_coefficient": 0.96, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 1.8836, "dither": 0, "energy_floor": 4.5145, "frame_length": 0.875, "frame_shift": 1.0625, "preemphasis_coefficient": 0.4, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 1.8946, "dither": 0, "energy_floor": 4.1442, "frame_length": 0.3125, "frame_shift": 0.875, "preemphasis_coefficient": 0.73, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 1.8988, "dither": 0, "energy_floor": 3.0931, "frame_length": 1.0625, "frame_shift": 0.3125, "preemphasis_coefficient": 0.35, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 1.9501, "dither": 0, "energy_floor": 4.3519, "frame_length": 0.4375, "frame_shift": 0.25, "preemphasis_coefficient": 0.61, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 2.0137, "dither": 0, "energy_floor": 3.1007, "frame_length": 0.625, "frame_shift": 1.0625, "preemphasis_coefficient": 0.67, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 2.0175, "dither": 0, "energy_floor": 2.9099, "frame_length": 1.0, "frame_shift": 0.5625, "preemphasis_coefficient": 0.28, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 2.1114, "dither": 0, "energy_floor": 4.5618, "frame_length": 0.25, "frame_shift": 0.875, "preemphasis_coefficient": 0.61, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 2.1472, "dither": 0, "energy_floor": 0.2, "frame_length": 1.125, "frame_shift": 0.875, "preemphasis_coefficient": 0.58, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": true, "window_type": "hamming"}
{"blackman_coeff": 2.1947, "dither": 0, "energy_floor": 1.8065, "frame_length": 0.875, "frame_shift": 0.75, "preemphasis_coefficient": 0.45, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 2.2457, "dither": 0, "energy_floor": 1.704, "frame_length": 0.75, "frame_shift": 0.5625, "preemphasis_coefficient": 0.98, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": true, "window_type": "hamming"}
{"blackman_coeff": 2.2893, "dither": 0, "energy_floor": 1.0286, "frame_length": 0.25, "frame_shift": 0.5, "preemphasis_coefficient": 0.8, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 2.3371, "dither": 0, "energy_floor": 4.4192, "frame_length": 0.8125, "frame_shift": 0.625, "preemphasis_coefficient": 0.3, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 2.3831, "dither": 0, "energy_floor": 4.8325, "frame_length": 0.25, "frame_shift": 1.125, "preemphasis_coefficient": 0.34, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "povey"}
{"blackman_coeff": 2.423, "dither": 0, "energy_floor": 0.6363, "frame_length": 0.875, "frame_shift": 0.3125, "preemphasis_coefficient": 0.77, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 2.4378, "dither": 0, "energy_floor": 1.4617, "frame_length": 0.9375, "frame_shift": 0.375, "preemphasis_coefficient": 0.53, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 2.4454, "dither": 0, "energy_floor": 1.936, "frame_length": 1.0, "frame_shift": 0.9375, "preemphasis_coefficient": 0.66, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 2.448, "dither": 0, "energy_floor": 3.8782, "frame_length": 0.5625, "frame_shift": 1.125, "preemphasis_coefficient": 0.1, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 2.5164, "dither": 0, "energy_floor": 2.7455, "frame_length": 0.875, "frame_shift": 0.9375, "preemphasis_coefficient": 0.55, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 2.5316, "dither": 0, "energy_floor": 2.3286, "frame_length": 0.75, "frame_shift": 0.75, "preemphasis_coefficient": 0.61, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 2.5487, "dither": 0, "energy_floor": 3.8457, "frame_length": 1.1875, "frame_shift": 0.9375, "preemphasis_coefficient": 0.63, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 2.6121, "dither": 0, "energy_floor": 4.3165, "frame_length": 0.6875, "frame_shift": 1.1875, "preemphasis_coefficient": 0.19, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 2.6988, "dither": 0, "energy_floor": 2.3417, "frame_length": 1.0, "frame_shift": 0.6875, "preemphasis_coefficient": 0.38, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 2.7457, "dither": 0, "energy_floor": 1.3662, "frame_length": 0.25, "frame_shift": 0.875, "preemphasis_coefficient": 0.74, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": true, "window_type": "blackman"}
{"blackman_coeff": 2.8577, "dither": 0, "energy_floor": 4.1431, "frame_length": 0.375, "frame_shift": 1.0, "preemphasis_coefficient": 1.0, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "hamming"}
{"blackman_coeff": 2.8693, "dither": 0, "energy_floor": 4.3801, "frame_length": 0.75, "frame_shift": 1.0, "preemphasis_coefficient": 0.95, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 2.8888, "dither": 0, "energy_floor": 0.4078, "frame_length": 0.3125, "frame_shift": 0.625, "preemphasis_coefficient": 0.25, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 2.9074, "dither": 0, "energy_floor": 1.6849, "frame_length": 1.125, "frame_shift": 0.625, "preemphasis_coefficient": 0.79, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 2.9303, "dither": 0, "energy_floor": 3.5172, "frame_length": 0.5, "frame_shift": 0.5, "preemphasis_coefficient": 0.04, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 3.07, "dither": 0, "energy_floor": 3.5254, "frame_length": 0.75, "frame_shift": 0.875, "preemphasis_coefficient": 0.96, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 3.1297, "dither": 0, "energy_floor": 0.3513, "frame_length": 0.4375, "frame_shift": 0.3125, "preemphasis_coefficient": 0.2, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 3.2523, "dither": 0, "energy_floor": 3.5376, "frame_length": 0.3125, "frame_shift": 0.25, "preemphasis_coefficient": 0.46, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 3.3896, "dither": 0, "energy_floor": 0.4666, "frame_length": 1.125, "frame_shift": 0.25, "preemphasis_coefficient": 0.05, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 3.537, "dither": 0, "energy_floor": 1.7032, "frame_length": 0.375, "frame_shift": 0.875, "preemphasis_coefficient": 0.17, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "hamming"}
{"blackman_coeff": 3.5378, "dither": 0, "energy_floor": 3.6594, "frame_length": 0.25, "frame_shift": 0.625, "preemphasis_coefficient": 0.54, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 3.5847, "dither": 0, "energy_floor": 3.6357, "frame_length": 1.0, "frame_shift": 0.3125, "preemphasis_coefficient": 0.79, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 3.6057, "dither": 0, "energy_floor": 1.6902, "frame_length": 1.0625, "frame_shift": 0.6875, "preemphasis_coefficient": 0.65, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 3.6498, "dither": 0, "energy_floor": 0.2005, "frame_length": 0.9375, "frame_shift": 1.125, "preemphasis_coefficient": 0.37, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 3.6648, "dither": 0, "energy_floor": 4.6742, "frame_length": 0.625, "frame_shift": 1.1875, "preemphasis_coefficient": 0.88, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": true, "window_type": "blackman"}
{"blackman_coeff": 3.6701, "dither": 0, "energy_floor": 3.7451, "frame_length": 0.8125, "frame_shift": 0.25, "preemphasis_coefficient": 0.19, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "blackman"}
{"blackman_coeff": 3.7232, "dither": 0, "energy_floor": 0.4912, "frame_length": 0.375, "frame_shift": 0.875, "preemphasis_coefficient": 0.34, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 3.7605, "dither": 0, "energy_floor": 1.6813, "frame_length": 0.25, "frame_shift": 0.5625, "preemphasis_coefficient": 0.27, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 3.7759, "dither": 0, "energy_floor": 1.7002, "frame_length": 1.0625, "frame_shift": 0.6875, "preemphasis_coefficient": 0.42, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "hamming"}
{"blackman_coeff": 3.7921, "dither": 0, "energy_floor": 3.4087, "frame_length": 0.25, "frame_shift": 1.0, "preemphasis_coefficient": 0.54, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "blackman"}
{"blackman_coeff": 3.7954, "dither": 0, "energy_floor": 3.5651, "frame_length": 0.5, "frame_shift": 0.8125, "preemphasis_coefficient": 0.06, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "blackman"}
{"blackman_coeff": 3.799, "dither": 0, "energy_floor": 3.0026, "frame_length": 0.625, "frame_shift": 1.0, "preemphasis_coefficient": 0.82, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": true, "window_type": "hamming"}
{"blackman_coeff": 3.8659, "dither": 0, "energy_floor": 1.7487, "frame_length": 1.1875, "frame_shift": 0.375, "preemphasis_coefficient": 1.0, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 3.951, "dither": 0, "energy_floor": 0.3903, "frame_length": 1.125, "frame_shift": 1.0, "preemphasis_coefficient": 0.41, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 4.0045, "dither": 0, "energy_floor": 3.061, "frame_length": 0.625, "frame_shift": 1.0625, "preemphasis_coefficient": 0.74, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 4.0187, "dither": 0, "energy_floor": 4.8148, "frame_length": 0.375, "frame_shift": 0.6875, "preemphasis_coefficient": 0.68, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 4.032, "dither": 0, "energy_floor": 2.2019, "frame_length": 1.125, "frame_shift": 0.25, "preemphasis_coefficient": 0.78, "raw_energy": true, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "rectangular"}
{"blackman_coeff": 4.0627, "dither": 0, "energy_floor": 4.1729, "frame_length": 0.625, "frame_shift": 1.125, "preemphasis_coefficient": 0.89, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 4.0736, "dither": 0, "energy_floor": 0.9155, "frame_length": 1.0625, "frame_shift": 0.5625, "preemphasis_coefficient": 0.82, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "hamming"}
{"blackman_coeff": 4.1131, "dither": 0, "energy_floor": 3.9204, "frame_length": 0.5, "frame_shift": 0.125, "preemphasis_coefficient": 0.39, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 4.1816, "dither": 0, "energy_floor": 1.665, "frame_length": 0.8125, "frame_shift": 0.375, "preemphasis_coefficient": 0.37, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 4.1897, "dither": 0, "energy_floor": 1.2668, "frame_length": 0.1875, "frame_shift": 0.625, "preemphasis_coefficient": 0.74, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "hamming"}
{"blackman_coeff": 4.2217, "dither": 0, "energy_floor": 3.6775, "frame_length": 0.3125, "frame_shift": 0.125, "preemphasis_coefficient": 0.01, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "hamming"}
{"blackman_coeff": 4.2785, "dither": 0, "energy_floor": 0.7201, "frame_length": 0.8125, "frame_shift": 0.8125, "preemphasis_coefficient": 0.3, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 4.3304, "dither": 0, "energy_floor": 1.0538, "frame_length": 0.875, "frame_shift": 1.125, "preemphasis_coefficient": 0.92, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 4.3942, "dither": 0, "energy_floor": 3.9813, "frame_length": 0.75, "frame_shift": 0.6875, "preemphasis_coefficient": 0.27, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "blackman"}
{"blackman_coeff": 4.4432, "dither": 0, "energy_floor": 2.0441, "frame_length": 0.5, "frame_shift": 0.6875, "preemphasis_coefficient": 0.77, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "hanning"}
{"blackman_coeff": 4.4459, "dither": 0, "energy_floor": 0.5135, "frame_length": 0.25, "frame_shift": 0.1875, "preemphasis_coefficient": 0.29, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 4.5486, "dither": 0, "energy_floor": 1.3248, "frame_length": 0.1875, "frame_shift": 1.125, "preemphasis_coefficient": 0.91, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": false, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 4.5535, "dither": 0, "energy_floor": 2.1772, "frame_length": 0.4375, "frame_shift": 0.875, "preemphasis_coefficient": 0.21, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "hanning"}
{"blackman_coeff": 4.5835, "dither": 0, "energy_floor": 0.3781, "frame_length": 0.875, "frame_shift": 0.875, "preemphasis_coefficient": 0.04, "raw_energy": true, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": true, "window_type": "hamming"}
{"blackman_coeff": 4.6297, "dither": 0, "energy_floor": 2.49, "frame_length": 0.5, "frame_shift": 0.25, "preemphasis_coefficient": 0.03, "raw_energy": false, "remove_dc_offset": false, "round_to_power_of_two": true, "snip_edges": true, "subtract_mean": false, "window_type": "rectangular"}
{"blackman_coeff": 4.6749, "dither": 0, "energy_floor": 4.8853, "frame_length": 0.25, "frame_shift": 0.25, "preemphasis_coefficient": 0.48, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": false, "subtract_mean": false, "window_type": "povey"}
{"blackman_coeff": 4.6971, "dither": 0, "energy_floor": 1.3632, "frame_length": 0.875, "frame_shift": 0.9375, "preemphasis_coefficient": 0.44, "raw_energy": false, "remove_dc_offset": true, "round_to_power_of_two": false, "snip_edges": true, "subtract_mean": false, "window_type": "blackman"}
......@@ -86,6 +86,17 @@ class Kaldi(common_utils.TestBaseMixin):
kaldi_result = _run_kaldi(command, 'scp', wave_file)
self.assert_equal(result, expected=kaldi_result, rtol=1e-4, atol=1e-8)
@parameterized.expand(_load_params(common_utils.get_asset_path('kaldi_test_spectrogram_args.json')))
@unittest.skipIf(_not_available('compute-spectrogram-feats'), '`compute-spectrogram-feats` not available')
def test_spectrogram(self, kwargs):
"""spectrogram should be numerically compatible with compute-spectrogram-feats"""
wave_file = common_utils.get_asset_path('kaldi_file.wav')
waveform = torchaudio.load_wav(wave_file)[0].to(dtype=self.dtype, device=self.device)
result = torchaudio.compliance.kaldi.spectrogram(waveform, **kwargs)
command = ['compute-spectrogram-feats'] + _convert_args(**kwargs) + ['scp:-', 'ark:-']
kaldi_result = _run_kaldi(command, 'scp', wave_file)
self.assert_equal(result, expected=kaldi_result, rtol=1e-4, atol=1e-8)
@parameterized.expand(_load_params(common_utils.get_asset_path('kaldi_test_mfcc_args.json')))
@unittest.skipIf(_not_available('compute-mfcc-feats'), '`compute-mfcc-feats` not available')
def test_mfcc(self, kwargs):
......
......@@ -272,6 +272,9 @@ def spectrogram(waveform: Tensor,
Tensor: A spectrogram identical to what Kaldi would output. The shape is
(m, ``padded_window_size // 2 + 1``) where m is calculated in _get_strided
"""
device, dtype = waveform.device, waveform.dtype
epsilon = _get_epsilon(device, dtype)
waveform, window_shift, window_size, padded_window_size = _get_waveform_and_window_properties(
waveform, channel, sample_frequency, frame_shift, frame_length, round_to_power_of_two, preemphasis_coefficient)
......@@ -287,7 +290,7 @@ def spectrogram(waveform: Tensor,
fft = torch.rfft(strided_input, 1, normalized=False, onesided=True)
# Convert the FFT into a power spectrum
power_spectrum = torch.max(fft.pow(2).sum(2), EPSILON).log() # size (m, padded_window_size // 2 + 1)
power_spectrum = torch.max(fft.pow(2).sum(2), epsilon).log() # size (m, padded_window_size // 2 + 1)
power_spectrum[:, 0] = signal_log_energy
power_spectrum = _subtract_column_mean(power_spectrum, subtract_mean)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment