kaldi_compatibility_impl.py 4.31 KB
Newer Older
1
2
3
4
5
6
"""Test suites for checking numerical compatibility against Kaldi"""
import subprocess

import kaldi_io
import torch
import torchaudio.functional as F
7
import torchaudio.compliance.kaldi
moto's avatar
moto committed
8
from parameterized import parameterized
9

10
11
12
13
14
15
16
17
from .common_utils import (
    TestBaseMixin,
    load_params,
    skipIfNoExec,
    get_asset_path,
    load_wav
)

18

19
20
21
22
23
24
25
26
27
def _convert_args(**kwargs):
    args = []
    for key, value in kwargs.items():
        key = '--' + key.replace('_', '-')
        value = str(value).lower() if value in [True, False] else str(value)
        args.append('%s=%s' % (key, value))
    return args


28
def _run_kaldi(command, input_type, input_value):
29
30
    """Run provided Kaldi command, pass a tensor and get the resulting tensor

31
32
33
34
35
36
    Arguments:
        input_type: str
            'ark' or 'scp'
        input_value:
            Tensor for 'ark'
            string for 'scp' (path to an audio file)
37
    """
38
    key = 'foo'
39
    process = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
40
    if input_type == 'ark':
moto's avatar
moto committed
41
        kaldi_io.write_mat(process.stdin, input_value.cpu().numpy(), key=key)
42
43
44
45
    elif input_type == 'scp':
        process.stdin.write(f'{key} {input_value}'.encode('utf8'))
    else:
        raise NotImplementedError('Unexpected type')
46
47
48
49
50
    process.stdin.close()
    result = dict(kaldi_io.read_mat_ark(process.stdout))['foo']
    return torch.from_numpy(result.copy())  # copy supresses some torch warning


51
class Kaldi(TestBaseMixin):
52
53
54
55
    def assert_equal(self, output, *, expected, rtol=None, atol=None):
        expected = expected.to(dtype=self.dtype, device=self.device)
        self.assertEqual(output, expected, rtol=rtol, atol=atol)

56
    @skipIfNoExec('apply-cmvn-sliding')
57
58
59
60
61
62
63
64
65
    def test_sliding_window_cmn(self):
        """sliding_window_cmn should be numerically compatible with apply-cmvn-sliding"""
        kwargs = {
            'cmn_window': 600,
            'min_cmn_window': 100,
            'center': False,
            'norm_vars': False,
        }

moto's avatar
moto committed
66
        tensor = torch.randn(40, 10, dtype=self.dtype, device=self.device)
67
68
        result = F.sliding_window_cmn(tensor, **kwargs)
        command = ['apply-cmvn-sliding'] + _convert_args(**kwargs) + ['ark:-', 'ark:-']
69
        kaldi_result = _run_kaldi(command, 'ark', tensor)
70
        self.assert_equal(result, expected=kaldi_result)
71

moto's avatar
moto committed
72
    @parameterized.expand(load_params('kaldi_test_fbank_args.json'))
73
    @skipIfNoExec('compute-fbank-feats')
74
    def test_fbank(self, kwargs):
75
        """fbank should be numerically compatible with compute-fbank-feats"""
76
77
        wave_file = get_asset_path('kaldi_file.wav')
        waveform = load_wav(wave_file, normalize=False)[0].to(dtype=self.dtype, device=self.device)
moto's avatar
moto committed
78
        result = torchaudio.compliance.kaldi.fbank(waveform, **kwargs)
79
80
        command = ['compute-fbank-feats'] + _convert_args(**kwargs) + ['scp:-', 'ark:-']
        kaldi_result = _run_kaldi(command, 'scp', wave_file)
81
        self.assert_equal(result, expected=kaldi_result, rtol=1e-4, atol=1e-8)
82

moto's avatar
moto committed
83
    @parameterized.expand(load_params('kaldi_test_spectrogram_args.json'))
84
    @skipIfNoExec('compute-spectrogram-feats')
85
86
    def test_spectrogram(self, kwargs):
        """spectrogram should be numerically compatible with compute-spectrogram-feats"""
87
88
        wave_file = get_asset_path('kaldi_file.wav')
        waveform = load_wav(wave_file, normalize=False)[0].to(dtype=self.dtype, device=self.device)
89
90
91
92
93
        result = torchaudio.compliance.kaldi.spectrogram(waveform, **kwargs)
        command = ['compute-spectrogram-feats'] + _convert_args(**kwargs) + ['scp:-', 'ark:-']
        kaldi_result = _run_kaldi(command, 'scp', wave_file)
        self.assert_equal(result, expected=kaldi_result, rtol=1e-4, atol=1e-8)

moto's avatar
moto committed
94
    @parameterized.expand(load_params('kaldi_test_mfcc_args.json'))
95
    @skipIfNoExec('compute-mfcc-feats')
96
97
    def test_mfcc(self, kwargs):
        """mfcc should be numerically compatible with compute-mfcc-feats"""
98
99
        wave_file = get_asset_path('kaldi_file.wav')
        waveform = load_wav(wave_file, normalize=False)[0].to(dtype=self.dtype, device=self.device)
100
101
102
103
        result = torchaudio.compliance.kaldi.mfcc(waveform, **kwargs)
        command = ['compute-mfcc-feats'] + _convert_args(**kwargs) + ['scp:-', 'ark:-']
        kaldi_result = _run_kaldi(command, 'scp', wave_file)
        self.assert_equal(result, expected=kaldi_result, rtol=1e-4, atol=1e-8)