Fix return dtype in MVDR module (#2376)

Summary: Address https://github.com/pytorch/audio/issues/2375 The MVDR module internally transforms the dtype of complex tensors to `torch.complex128` for computation and transforms it back to the original dtype before returning the Tensor. However, it didn't convert back successfully due to `specgram_enhanced.to(dtype)`, which should be `specgram_enhanced = specgram_enhanced.to(dtype)`. Fix it to make the output dtype consistent with original input. Pull Request resolved: https://github.com/pytorch/audio/pull/2376 Reviewed By: hwangjeff Differential Revision: D36280851 Pulled By: nateanl fbshipit-source-id: 553d1b98f899547209a4e3ebc59920c7ef1f3112

Fix return dtype in MVDR module (#2376)
Summary: Address https://github.com/pytorch/audio/issues/2375 The MVDR module internally transforms the dtype of complex tensors to `torch.complex128` for computation and transforms it back to the original dtype before returning the Tensor. However, it didn't convert back successfully due to `specgram_enhanced.to(dtype)`, which should be `specgram_enhanced = specgram_enhanced.to(dtype)`. Fix it to make the output dtype consistent with original input. Pull Request resolved: https://github.com/pytorch/audio/pull/2376 Reviewed By: hwangjeff Differential Revision: D36280851 Pulled By: nateanl fbshipit-source-id: 553d1b98f899547209a4e3ebc59920c7ef1f3112
2f4eb4ac · Zhaoheng Ni · Facebook GitHub Bot · eab2f39d · 2f4eb4ac · 2f4eb4ac
Commit 2f4eb4ac authored May 10, 2022 by Zhaoheng Ni Committed by Facebook GitHub Bot May 10, 2022
Showing with 18 additions and 2 deletions

test/torchaudio_unittest/transforms/transforms_test_impl.py test/torchaudio_unittest/transforms/transforms_test_impl.py +17 -0

torchaudio/transforms/_transforms.py torchaudio/transforms/_transforms.py +1 -2

No files found.
--- a/test/torchaudio_unittest/transforms/transforms_test_impl.py
+++ b/test/torchaudio_unittest/transforms/transforms_test_impl.py
@@ -131,3 +131,20 @@ class TransformsTestBase(TestBaseMixin):
            psd_np = psd_numpy(spectrogram.detach().numpy(), mask, multi_mask)
        psd = transform(spectrogram, mask)
        self.assertEqual(psd, psd_np, atol=1e-5, rtol=1e-5)
+
+    @parameterized.expand(
+        [
+            param(torch.complex64),
+            param(torch.complex128),
+        ]
+    )
+    def test_mvdr(self, dtype):
+        """Make sure the output dtype is the same as the input dtype"""
+        transform = T.MVDR()
+        waveform = get_whitenoise(sample_rate=8000, duration=0.5, n_channels=3)
+        specgram = get_spectrogram(waveform, n_fft=400)  # (channel, freq, time)
+        specgram = specgram.to(dtype)
+        mask_s = torch.rand(specgram.shape[-2:])
+        mask_n = torch.rand(specgram.shape[-2:])
+        specgram_enhanced = transform(specgram, mask_s, mask_n)
+        assert specgram_enhanced.dtype == dtype
--- a/torchaudio/transforms/_transforms.py
+++ b/torchaudio/transforms/_transforms.py
@@ -2087,8 +2087,7 @@ class MVDR(torch.nn.Module):
        # unpack batch
        specgram_enhanced = specgram_enhanced.reshape(shape[:-3] + shape[-2:])

-        specgram_enhanced.to(dtype)
-        return specgram_enhanced
+        return specgram_enhanced.to(dtype)


 class RTFMVDR(torch.nn.Module):