Fix mel spectrogram visualization in TTS tutorial (#2989)

Summary: The mel spectrograms in the TTS tutorial are upside down. The PR fixes it by using `origin="lower"` in imshow. Pull Request resolved: https://github.com/pytorch/audio/pull/2989 Reviewed By: mthrok Differential Revision: D42538349 Pulled By: nateanl fbshipit-source-id: 4388103a49bdfabf1705c1f979d44ecedd5c910a

Fix mel spectrogram visualization in TTS tutorial (#2989)
Summary: The mel spectrograms in the TTS tutorial are upside down. The PR fixes it by using `origin="lower"` in imshow. Pull Request resolved: https://github.com/pytorch/audio/pull/2989 Reviewed By: mthrok Differential Revision: D42538349 Pulled By: nateanl fbshipit-source-id: 4388103a49bdfabf1705c1f979d44ecedd5c910a
b983c665 · Zhaoheng Ni · Facebook GitHub Bot · e259f156 · b983c665
Commit b983c665 authored Jan 16, 2023 by Zhaoheng Ni Committed by Facebook GitHub Bot Jan 16, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 5 deletions

examples/tutorials/tacotron2_pipeline_tutorial.py examples/tutorials/tacotron2_pipeline_tutorial.py +5 -5

No files found.
--- a/examples/tutorials/tacotron2_pipeline_tutorial.py
+++ b/examples/tutorials/tacotron2_pipeline_tutorial.py
@@ -218,7 +218,7 @@ with torch.inference_mode():
    spec, _, _ = tacotron2.infer(processed, lengths)


-_ = plt.imshow(spec[0].cpu().detach())
+_ = plt.imshow(spec[0].cpu().detach(), origin="lower", aspect="auto")


 ######################################################################
@@ -231,7 +231,7 @@ for i in range(3):
    with torch.inference_mode():
        spec, spec_lengths, _ = tacotron2.infer(processed, lengths)
    print(spec[0].shape)
-    ax[i].imshow(spec[0].cpu().detach())
+    ax[i].imshow(spec[0].cpu().detach(), origin="lower", aspect="auto")
 plt.show()


@@ -271,7 +271,7 @@ with torch.inference_mode():
    waveforms, lengths = vocoder(spec, spec_lengths)

 fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(16, 9))
-ax1.imshow(spec[0].cpu().detach())
+ax1.imshow(spec[0].cpu().detach(), origin="lower", aspect="auto")
 ax2.plot(waveforms[0].cpu().detach())

 IPython.display.Audio(waveforms[0:1].cpu(), rate=vocoder.sample_rate)
@@ -301,7 +301,7 @@ with torch.inference_mode():
 waveforms, lengths = vocoder(spec, spec_lengths)

 fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(16, 9))
-ax1.imshow(spec[0].cpu().detach())
+ax1.imshow(spec[0].cpu().detach(), origin="lower", aspect="auto")
 ax2.plot(waveforms[0].cpu().detach())

 IPython.display.Audio(waveforms[0:1].cpu(), rate=vocoder.sample_rate)
@@ -340,7 +340,7 @@ with torch.no_grad():
    waveforms = waveglow.infer(spec)

 fig, [ax1, ax2] = plt.subplots(2, 1, figsize=(16, 9))
-ax1.imshow(spec[0].cpu().detach())
+ax1.imshow(spec[0].cpu().detach(), origin="lower", aspect="auto")
 ax2.plot(waveforms[0].cpu().detach())

 IPython.display.Audio(waveforms[0:1].cpu(), rate=22050)