Fix encoding g722 format (#3373)

Summary: g722 format only supports 16k Hz, but AVCodec does not list this. The implementation does not insert resampling and the resulting audio can be slowed down or sped up. Pull Request resolved: https://github.com/pytorch/audio/pull/3373 Reviewed By: hwangjeff Differential Revision: D46233181 Pulled By: mthrok fbshipit-source-id: 902b3f862a8f7269dc35bc871e868b0e78326c6c

Fix encoding g722 format (#3373)
Summary: g722 format only supports 16k Hz, but AVCodec does not list this. The implementation does not insert resampling and the resulting audio can be slowed down or sped up. Pull Request resolved: https://github.com/pytorch/audio/pull/3373 Reviewed By: hwangjeff Differential Revision: D46233181 Pulled By: mthrok fbshipit-source-id: 902b3f862a8f7269dc35bc871e868b0e78326c6c
1b05ca7e · moto · Facebook GitHub Bot · c120f316 · 1b05ca7e · 1b05ca7e
Commit 1b05ca7e authored May 26, 2023 by moto Committed by Facebook GitHub Bot May 26, 2023
2 changed files
--- a/test/torchaudio_unittest/io/stream_writer_test.py
+++ b/test/torchaudio_unittest/io/stream_writer_test.py
@@ -389,6 +389,22 @@ class StreamWriterCorrectnessTest(TempDirMixin, TorchaudioTestCase):
            return
        self.assertEqual(saved.shape, data.shape)

+    def test_g722_sample_rate(self):
+        """Encoding G.722 properly converts sample rate to 16k"""
+        filename = "test.g722"
+        sample_rate = 41000
+        data = get_sinusoid(sample_rate=sample_rate, n_channels=1, channels_first=False)
+
+        # write data
+        dst = self.get_temp_path(filename)
+        w = StreamWriter(dst, format="g722")
+        w.add_audio_stream(sample_rate=sample_rate, num_channels=1)
+        with w.open():
+            w.write_audio_chunk(0, data)
+
+        r = StreamReader(src=self.get_temp_path(filename))
+        self.assertEqual(r.get_src_stream_info(0).sample_rate, 16000)
+
    def test_preserve_fps(self):
        """Decimal point frame rate is properly saved


--- a/torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp
+++ b/torchaudio/csrc/ffmpeg/stream_writer/encode_process.cpp
@@ -273,6 +273,20 @@ int get_enc_sr(
    int src_sample_rate,
    const c10::optional<int>& encoder_sample_rate,
    const AVCodec* codec) {
+  // G.722 only supports 16000 Hz, but it does not list the sample rate in
+  // supported_samplerates so we hard code it here.
+  if (codec->id == AV_CODEC_ID_ADPCM_G722) {
+    if (encoder_sample_rate) {
+      auto val = encoder_sample_rate.value();
+      TORCH_CHECK(
+          val == 16'000,
+          codec->name,
+          " does not support sample rate ",
+          val,
+          ". Supported values are; 16000.");
+    }
+    return 16'000;
+  }
  if (encoder_sample_rate) {
    const int& encoder_sr = encoder_sample_rate.value();
    TORCH_CHECK(