Remove mad (#2428)

Summary: Remove the code related to libmad, which had been disabled in https://github.com/pytorch/audio/issues/2354 In https://github.com/pytorch/audio/issues/2419, we mp3 decoding to ffmpeg. But CI tests were still using libmad. This commit completely removes libmad from torchaudio. This is BC-breaking change as `apply_sox_effects_file` function cannot handle MP3, and it cannot fallback to ffmpeg. The workaround for this is to use `torchaudio.load` then `apply_sox_effects_tensor`. Pull Request resolved: https://github.com/pytorch/audio/pull/2428 Reviewed By: carolineechen Differential Revision: D36851805 Pulled By: mthrok fbshipit-source-id: f98795c59a1ac61cef511f2bbeac37f7c3c69d55

Remove mad (#2428)
Summary: Remove the code related to libmad, which had been disabled in https://github.com/pytorch/audio/issues/2354 In https://github.com/pytorch/audio/issues/2419, we mp3 decoding to ffmpeg. But CI tests were still using libmad. This commit completely removes libmad from torchaudio. This is BC-breaking change as `apply_sox_effects_file` function cannot handle MP3, and it cannot fallback to ffmpeg. The workaround for this is to use `torchaudio.load` then `apply_sox_effects_tensor`. Pull Request resolved: https://github.com/pytorch/audio/pull/2428 Reviewed By: carolineechen Differential Revision: D36851805 Pulled By: mthrok fbshipit-source-id: f98795c59a1ac61cef511f2bbeac37f7c3c69d55
d2ecba98 · moto · Facebook GitHub Bot · d01f5891 · d2ecba98 · d2ecba98
Commit d2ecba98 authored Jun 02, 2022 by moto Committed by Facebook GitHub Bot Jun 02, 2022
8 changed files
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -622,7 +622,6 @@ jobs:
          name: Install torchaudio
          command: .circleci/unittest/linux/scripts/install.sh
          environment:
-              BUILD_MAD: true
              USE_FFMPEG: true
      - run:
          name: Run tests
@@ -656,7 +655,7 @@ jobs:
          command: docker run -t --gpus all -e PYTHON_VERSION -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh
      - run:
          name: Install torchaudio
-          command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e USE_FFMPEG=1 -e BUILD_MAD=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
+          command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e USE_FFMPEG=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
      - run:
          name: Run tests
          environment:
@@ -770,7 +769,6 @@ jobs:
          environment:
              USE_FFMPEG: true
              USE_OPENMP: false
-              BUILD_MAD: true
      - run:
          name: Run tests
          command: .circleci/unittest/linux/scripts/run_test.sh

--- a/.circleci/config.yml.in
+++ b/.circleci/config.yml.in
@@ -622,7 +622,6 @@ jobs:
          name: Install torchaudio
          command: .circleci/unittest/linux/scripts/install.sh
          environment:
-              BUILD_MAD: true
              USE_FFMPEG: true
      - run:
          name: Run tests
@@ -656,7 +655,7 @@ jobs:
          command: docker run -t --gpus all -e PYTHON_VERSION -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh
      - run:
          name: Install torchaudio
-          command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e USE_FFMPEG=1 -e BUILD_MAD=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
+          command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e USE_FFMPEG=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
      - run:
          name: Run tests
          environment:
@@ -770,7 +769,6 @@ jobs:
          environment:
              USE_FFMPEG: true
              USE_OPENMP: false
-              BUILD_MAD: true
      - run:
          name: Run tests
          command: .circleci/unittest/linux/scripts/run_test.sh

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -57,7 +57,6 @@ endif()
 # Options
 option(BUILD_SOX "Build libsox statically" ON)
-option(BUILD_MAD "Enable libmad" OFF)
 option(BUILD_KALDI "Build kaldi statically" ON)
 option(BUILD_RNNT "Enable RNN transducer" ON)
 option(BUILD_CTC_DECODER "Build Flashlight CTC decoder" ON)

--- a/test/torchaudio_unittest/sox_effect/sox_effect_test.py
+++ b/test/torchaudio_unittest/sox_effect/sox_effect_test.py
@@ -7,7 +7,6 @@ from parameterized import parameterized
 from torchaudio import sox_effects
 from torchaudio._internal import module_utils as _mod_utils
 from torchaudio_unittest.common_utils import (
-    get_asset_path,
    get_sinusoid,
    get_wav_data,
    HttpServerMixin,
@@ -191,32 +190,6 @@ class TestFileFormats(TempDirMixin, PytorchTestCase):
        assert sr == expected_sr
        self.assertEqual(found, expected)
-    @parameterized.expand(
-        list(
-            itertools.product(
-                [8000, 16000],
-                [1, 2],
-            )
-        ),
-        name_func=lambda f, _, p: f'{f.__name__}_{"_".join(str(arg) for arg in p.args)}',
-    )
-    def test_mp3(self, sample_rate, num_channels):
-        """`apply_effects_file` works on various mp3 format"""
-        channels_first = True
-        effects = [["band", "300", "10"]]
-        input_path = self.get_temp_path("input.mp3")
-        reference_path = self.get_temp_path("reference.wav")
-        sox_utils.gen_audio_file(input_path, sample_rate, num_channels)
-        sox_utils.run_sox_effect(input_path, reference_path, effects)
-        expected, expected_sr = load_wav(reference_path)
-        found, sr = sox_effects.apply_effects_file(input_path, effects, channels_first=channels_first)
-        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
-        assert sr == expected_sr
-        self.assertEqual(found, expected, atol=1e-4, rtol=1e-8)
    @parameterized.expand(
        list(
            itertools.product(
@@ -270,32 +243,12 @@ class TestFileFormats(TempDirMixin, PytorchTestCase):
        self.assertEqual(found, expected)
-@skipIfNoSox
-class TestApplyEffectFileWithoutExtension(PytorchTestCase):
-    def test_mp3(self):
-        """Providing format allows to read mp3 without extension
-        libsox does not check header for mp3
-        https://github.com/pytorch/audio/issues/1040
-        The file was generated with the following command
-            ffmpeg -f lavfi -i "sine=frequency=1000:duration=5" -ar 16000 -f mp3 test_noext
-        """
-        effects = [["band", "300", "10"]]
-        path = get_asset_path("mp3_without_ext")
-        _, sr = sox_effects.apply_effects_file(path, effects, format="mp3")
-        assert sr == 16000
 @skipIfNoExec("sox")
 @skipIfNoSox
 class TestFileObject(TempDirMixin, PytorchTestCase):
    @parameterized.expand(
        [
            ("wav", None),
-            ("mp3", 128),
-            ("mp3", 320),
            ("flac", 0),
            ("flac", 5),
            ("flac", 8),
@@ -309,7 +262,6 @@ class TestFileObject(TempDirMixin, PytorchTestCase):
        sample_rate = 16000
        channels_first = True
        effects = [["band", "300", "10"]]
-        format_ = ext if ext in ["mp3"] else None
        input_path = self.get_temp_path(f"input.{ext}")
        reference_path = self.get_temp_path("reference.wav")
@@ -318,7 +270,7 @@ class TestFileObject(TempDirMixin, PytorchTestCase):
        expected, expected_sr = load_wav(reference_path)
        with open(input_path, "rb") as fileobj:
-            found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first, format=format_)
+            found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first)
        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
        assert sr == expected_sr
        self.assertEqual(found, expected)
@@ -326,8 +278,6 @@ class TestFileObject(TempDirMixin, PytorchTestCase):
    @parameterized.expand(
        [
            ("wav", None),
-            ("mp3", 128),
-            ("mp3", 320),
            ("flac", 0),
            ("flac", 5),
            ("flac", 8),
@@ -341,7 +291,6 @@ class TestFileObject(TempDirMixin, PytorchTestCase):
        sample_rate = 16000
        channels_first = True
        effects = [["band", "300", "10"]]
-        format_ = ext if ext in ["mp3"] else None
        input_path = self.get_temp_path(f"input.{ext}")
        reference_path = self.get_temp_path("reference.wav")
@@ -351,7 +300,7 @@ class TestFileObject(TempDirMixin, PytorchTestCase):
        with open(input_path, "rb") as file_:
            fileobj = io.BytesIO(file_.read())
-        found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first, format=format_)
+        found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first)
        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
        assert sr == expected_sr
        self.assertEqual(found, expected)
@@ -359,8 +308,6 @@ class TestFileObject(TempDirMixin, PytorchTestCase):
    @parameterized.expand(
        [
            ("wav", None),
-            ("mp3", 128),
-            ("mp3", 320),
            ("flac", 0),
            ("flac", 5),
            ("flac", 8),
@@ -374,7 +321,6 @@ class TestFileObject(TempDirMixin, PytorchTestCase):
        sample_rate = 16000
        channels_first = True
        effects = [["band", "300", "10"]]
-        format_ = ext if ext in ["mp3"] else None
        audio_file = f"input.{ext}"
        input_path = self.get_temp_path(audio_file)
@@ -389,7 +335,7 @@ class TestFileObject(TempDirMixin, PytorchTestCase):
            tarobj.add(input_path, arcname=audio_file)
        with tarfile.TarFile(archive_path, "r") as tarobj:
            fileobj = tarobj.extractfile(audio_file)
-            found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first, format=format_)
+            found, sr = sox_effects.apply_effects_file(fileobj, effects, channels_first=channels_first)
        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
        assert sr == expected_sr
        self.assertEqual(found, expected)
@@ -402,8 +348,6 @@ class TestFileObjectHttp(HttpServerMixin, PytorchTestCase):
    @parameterized.expand(
        [
            ("wav", None),
-            ("mp3", 128),
-            ("mp3", 320),
            ("flac", 0),
            ("flac", 5),
            ("flac", 8),
@@ -416,7 +360,6 @@ class TestFileObjectHttp(HttpServerMixin, PytorchTestCase):
        sample_rate = 16000
        channels_first = True
        effects = [["band", "300", "10"]]
-        format_ = ext if ext in ["mp3"] else None
        audio_file = f"input.{ext}"
        input_path = self.get_temp_path(audio_file)
        reference_path = self.get_temp_path("reference.wav")
@@ -427,7 +370,7 @@ class TestFileObjectHttp(HttpServerMixin, PytorchTestCase):
        url = self.get_url(audio_file)
        with requests.get(url, stream=True) as resp:
-            found, sr = sox_effects.apply_effects_file(resp.raw, effects, channels_first=channels_first, format=format_)
+            found, sr = sox_effects.apply_effects_file(resp.raw, effects, channels_first=channels_first)
        save_wav(self.get_temp_path("result.wav"), found, sr, channels_first=channels_first)
        assert sr == expected_sr
        self.assertEqual(found, expected)
--- a/third_party/patches/libmad.patch
+++ b/third_party/patches/libmad.patch
-See the followings for the origin of this patch
-http://www.linuxfromscratch.org/blfs/view/svn/multimedia/libmad.html
-http://www.linuxfromscratch.org/patches/blfs/svn/libmad-0.15.1b-fixes-1.patch
--- src/libmad/configure	2004-02-05 09:34:07.000000000 +0000
-+++ src/libmad/configure.new	2020-06-30 21:10:28.528018931 +0000
-@@ -19083,71 +19083,7 @@
- if test "$GCC" = yes
- then
-    if test -z "$arch"
-    then
-	case "$host" in
-	    i386-*)           ;;
-	    i?86-*)           arch="-march=i486" ;;
-	    arm*-empeg-*)     arch="-march=armv4 -mtune=strongarm1100" ;;
-	    armv4*-*)         arch="-march=armv4 -mtune=strongarm" ;;
-	    powerpc-*)        ;;
-	    mips*-agenda-*)   arch="-mcpu=vr4100" ;;
-	    mips*-luxsonor-*) arch="-mips1 -mcpu=r3000 -Wa,-m4010" ;;
-	esac
-    fi
-
-    case "$optimize" in
-	-O|"-O "*)
-	    optimize="-O"
-	    optimize="$optimize -fforce-mem"
-	    optimize="$optimize -fforce-addr"
-	    : #x optimize="$optimize -finline-functions"
-	    : #- optimize="$optimize -fstrength-reduce"
-	    optimize="$optimize -fthread-jumps"
-	    optimize="$optimize -fcse-follow-jumps"
-	    optimize="$optimize -fcse-skip-blocks"
-	    : #x optimize="$optimize -frerun-cse-after-loop"
-	    : #x optimize="$optimize -frerun-loop-opt"
-	    : #x optimize="$optimize -fgcse"
-	    optimize="$optimize -fexpensive-optimizations"
-	    optimize="$optimize -fregmove"
-	    : #* optimize="$optimize -fdelayed-branch"
-	    : #x optimize="$optimize -fschedule-insns"
-	    optimize="$optimize -fschedule-insns2"
-	    : #? optimize="$optimize -ffunction-sections"
-	    : #? optimize="$optimize -fcaller-saves"
-	    : #> optimize="$optimize -funroll-loops"
-	    : #> optimize="$optimize -funroll-all-loops"
-	    : #x optimize="$optimize -fmove-all-movables"
-	    : #x optimize="$optimize -freduce-all-givs"
-	    : #? optimize="$optimize -fstrict-aliasing"
-	    : #* optimize="$optimize -fstructure-noalias"
-
-	    case "$host" in
-		arm*-*)
-		    optimize="$optimize -fstrength-reduce"
-		    ;;
-		mips*-*)
-		    optimize="$optimize -fstrength-reduce"
-		    optimize="$optimize -finline-functions"
-		    ;;
-		i?86-*)
-		    optimize="$optimize -fstrength-reduce"
-		    ;;
-		powerpc-apple-*)
-		    # this triggers an internal compiler error with gcc2
-		    : #optimize="$optimize -fstrength-reduce"
-
-		    # this is really only beneficial with gcc3
-		    : #optimize="$optimize -finline-functions"
-		    ;;
-		*)
-		    # this sometimes provokes bugs in gcc 2.95.2
-		    : #optimize="$optimize -fstrength-reduce"
-		    ;;
-	    esac
-	    ;;
-    esac
-+    optimize="-O2"
- fi
- case "$host" in
-@@ -21497,6 +21433,7 @@
- then
-     case "$host" in
- 	i?86-*)     FPM="INTEL"  ;;
-+	x86_64*)    FPM="64BIT"  ;;
- 	arm*-*)     FPM="ARM"    ;;
- 	mips*-*)    FPM="MIPS"   ;;
- 	sparc*-*)   FPM="SPARC"  ;;
--- a/third_party/sox/CMakeLists.txt
+++ b/third_party/sox/CMakeLists.txt
@@ -17,25 +17,6 @@ set(envs
  "CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden $ENV{CFLAGS}"
 )
-if (BUILD_MAD)
-  ExternalProject_Add(mad
-    PREFIX ${CMAKE_CURRENT_BINARY_DIR}
-    DOWNLOAD_DIR ${ARCHIVE_DIR}
-    URL https://downloads.sourceforge.net/project/mad/libmad/0.15.1b/libmad-0.15.1b.tar.gz
-    URL_HASH SHA256=bbfac3ed6bfbc2823d3775ebb931087371e142bb0e9bb1bee51a76a6e0078690
-    PATCH_COMMAND patch < ${patch_dir}/libmad.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/mad/
-    CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/mad/configure ${COMMON_ARGS}
-    DOWNLOAD_NO_PROGRESS ON
-    LOG_DOWNLOAD ON
-    LOG_UPDATE ON
-    LOG_CONFIGURE ON
-    LOG_BUILD ON
-    LOG_INSTALL ON
-    LOG_MERGED_STDOUTERR ON
-    LOG_OUTPUT_ON_FAILURE ON
-  )
-endif (BUILD_MAD)
 ExternalProject_Add(amr
  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
  DOWNLOAD_DIR ${ARCHIVE_DIR}
@@ -176,6 +157,7 @@ set(SOX_OPTIONS
  --without-oss
  --without-id3tag
  --without-ladspa
+  --without-mad
  --without-magic
  --without-png
  --without-pulseaudio
@@ -205,30 +187,6 @@ set(sox_depends
  ogg flac vorbis opusfile lame amr
  )
-if (BUILD_MAD)
-  list(
-    APPEND
-    SOX_OPTIONS
-    --with-mad
-    )
-  list(
-    APPEND
-    SOX_LIBRARIES
-    ${INSTALL_DIR}/lib/libmad.a
-    )
-  list(
-    APPEND
-    sox_depends
-    mad
-    )
-else ()
-  list(
-    APPEND
-    SOX_OPTIONS
-    --without-mad
-    )  
-endif (BUILD_MAD)
 ExternalProject_Add(sox
  PREFIX ${CMAKE_CURRENT_BINARY_DIR}
  DEPENDS ${sox_depends}

--- a/tools/setup_helpers/extension.py
+++ b/tools/setup_helpers/extension.py
@@ -33,7 +33,6 @@ def _get_build(var, default=False):
 _BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True)
-_BUILD_MAD = _get_build("BUILD_MAD", False)
 _BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
 _BUILD_RNNT = _get_build("BUILD_RNNT", True)
 _BUILD_CTC_DECODER = False if platform.system() == "Windows" else _get_build("BUILD_CTC_DECODER", True)
@@ -101,7 +100,6 @@ class CMakeBuild(build_ext):
            "-DCMAKE_VERBOSE_MAKEFILE=ON",
            f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
            f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
-            f"-DBUILD_MAD:BOOL={'ON' if _BUILD_MAD else 'OFF'}",
            f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
            f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
            f"-DBUILD_CTC_DECODER:BOOL={'ON' if _BUILD_CTC_DECODER else 'OFF'}",

--- a/torchaudio/functional/functional.py
+++ b/torchaudio/functional/functional.py
@@ -1265,9 +1265,9 @@ def apply_codec(
        bytes, waveform, sample_rate, channels_first, compression, format, encoding, bits_per_sample
    )
    bytes.seek(0)
-    augmented, _ = torchaudio.sox_effects.sox_effects.apply_effects_file(
+    augmented, sr = torchaudio.backend.sox_io_backend.load(bytes, channels_first=channels_first, format=format)
-        bytes, effects=[["rate", f"{sample_rate}"]], channels_first=channels_first, format=format
+    if sr != sample_rate:
-    )
+        augmented = resample(augmented, sr, sample_rate)
    return augmented