Commit a71e3a40 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Add BUILD_MAD option and default to OFF (#2354)

Summary:
libmad integration should be enabled only from source-build

Pull Request resolved: https://github.com/pytorch/audio/pull/2354

Reviewed By: nateanl

Differential Revision: D36012035

Pulled By: mthrok

fbshipit-source-id: adeda8cbfd418f96245909cae6862b648a6915a7
parent 3cf7f264
...@@ -495,6 +495,7 @@ jobs: ...@@ -495,6 +495,7 @@ jobs:
name: Install torchaudio name: Install torchaudio
command: .circleci/unittest/linux/scripts/install.sh command: .circleci/unittest/linux/scripts/install.sh
environment: environment:
BUILD_MAD: true
BUILD_FFMPEG: true BUILD_FFMPEG: true
- run: - run:
name: Run tests name: Run tests
...@@ -528,7 +529,7 @@ jobs: ...@@ -528,7 +529,7 @@ jobs:
command: docker run -t --gpus all -e PYTHON_VERSION -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh command: docker run -t --gpus all -e PYTHON_VERSION -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh
- run: - run:
name: Install torchaudio name: Install torchaudio
command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e BUILD_FFMPEG=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e BUILD_FFMPEG=1 -e BUILD_MAD=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
- run: - run:
name: Run tests name: Run tests
environment: environment:
...@@ -641,6 +642,7 @@ jobs: ...@@ -641,6 +642,7 @@ jobs:
command: .circleci/unittest/linux/scripts/install.sh command: .circleci/unittest/linux/scripts/install.sh
environment: environment:
BUILD_FFMPEG: true BUILD_FFMPEG: true
BUILD_MAD: true
- run: - run:
name: Run tests name: Run tests
command: .circleci/unittest/linux/scripts/run_test.sh command: .circleci/unittest/linux/scripts/run_test.sh
......
...@@ -495,6 +495,7 @@ jobs: ...@@ -495,6 +495,7 @@ jobs:
name: Install torchaudio name: Install torchaudio
command: .circleci/unittest/linux/scripts/install.sh command: .circleci/unittest/linux/scripts/install.sh
environment: environment:
BUILD_MAD: true
BUILD_FFMPEG: true BUILD_FFMPEG: true
- run: - run:
name: Run tests name: Run tests
...@@ -528,7 +529,7 @@ jobs: ...@@ -528,7 +529,7 @@ jobs:
command: docker run -t --gpus all -e PYTHON_VERSION -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh command: docker run -t --gpus all -e PYTHON_VERSION -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh
- run: - run:
name: Install torchaudio name: Install torchaudio
command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e BUILD_FFMPEG=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e BUILD_FFMPEG=1 -e BUILD_MAD=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
- run: - run:
name: Run tests name: Run tests
environment: environment:
...@@ -641,6 +642,7 @@ jobs: ...@@ -641,6 +642,7 @@ jobs:
command: .circleci/unittest/linux/scripts/install.sh command: .circleci/unittest/linux/scripts/install.sh
environment: environment:
BUILD_FFMPEG: true BUILD_FFMPEG: true
BUILD_MAD: true
- run: - run:
name: Run tests name: Run tests
command: .circleci/unittest/linux/scripts/run_test.sh command: .circleci/unittest/linux/scripts/run_test.sh
......
...@@ -57,6 +57,7 @@ endif() ...@@ -57,6 +57,7 @@ endif()
# Options # Options
option(BUILD_SOX "Build libsox statically" ON) option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_MAD "Enable libmad" OFF)
option(BUILD_FFMPEG "Enable ffmpeg-based features" OFF) option(BUILD_FFMPEG "Enable ffmpeg-based features" OFF)
option(BUILD_KALDI "Build kaldi statically" ON) option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_RNNT "Enable RNN transducer" ON) option(BUILD_RNNT "Enable RNN transducer" ON)
......
...@@ -411,7 +411,6 @@ plot_specgram(waveform, sample_rate, title="Original") ...@@ -411,7 +411,6 @@ plot_specgram(waveform, sample_rate, title="Original")
configs = [ configs = [
({"format": "wav", "encoding": "ULAW", "bits_per_sample": 8}, "8 bit mu-law"), ({"format": "wav", "encoding": "ULAW", "bits_per_sample": 8}, "8 bit mu-law"),
({"format": "gsm"}, "GSM-FR"), ({"format": "gsm"}, "GSM-FR"),
({"format": "mp3", "compression": -9}, "MP3"),
({"format": "vorbis", "compression": -1}, "Vorbis"), ({"format": "vorbis", "compression": -1}, "Vorbis"),
] ]
waveforms = [] waveforms = []
...@@ -441,19 +440,12 @@ play_audio(waveforms[0], sample_rate) ...@@ -441,19 +440,12 @@ play_audio(waveforms[0], sample_rate)
play_audio(waveforms[1], sample_rate) play_audio(waveforms[1], sample_rate)
######################################################################
# MP3:
# ~~~~
#
play_audio(waveforms[2], sample_rate)
###################################################################### ######################################################################
# Vorbis: # Vorbis:
# ~~~~~~~ # ~~~~~~~
# #
play_audio(waveforms[3], sample_rate) play_audio(waveforms[2], sample_rate)
###################################################################### ######################################################################
# Simulating a phone recoding # Simulating a phone recoding
......
...@@ -225,13 +225,16 @@ print(metadata) ...@@ -225,13 +225,16 @@ print(metadata)
# variable bit rate (such as MP3). # variable bit rate (such as MP3).
# - ``num_frames`` can be ``0`` for GSM-FR format. # - ``num_frames`` can be ``0`` for GSM-FR format.
# #
# .. code::
metadata = torchaudio.info(SAMPLE_MP3_PATH) #
print(metadata) # metadata = torchaudio.info(SAMPLE_MP3_PATH)
# print(metadata)
metadata = torchaudio.info(SAMPLE_GSM_PATH) #
print(metadata) # metadata = torchaudio.info(SAMPLE_GSM_PATH)
# print(metadata)
#
# >>> AudioMetaData(sample_rate=44100, num_frames=110559, num_channels=2, bits_per_sample=0, encoding=MP3)
# >>> AudioMetaData(sample_rate=8000, num_frames=0, num_channels=1, bits_per_sample=0, encoding=GSM)
###################################################################### ######################################################################
# Querying file-like object # Querying file-like object
...@@ -256,13 +259,18 @@ print(metadata) ...@@ -256,13 +259,18 @@ print(metadata)
# - Use argument ``format`` to specify the audio format of the input. # - Use argument ``format`` to specify the audio format of the input.
# - The returned metadata has ``num_frames = 0`` # - The returned metadata has ``num_frames = 0``
# #
# .. code::
print("Source:", SAMPLE_MP3_URL) #
with requests.get(SAMPLE_MP3_URL, stream=True) as response: # print("Source:", SAMPLE_MP3_URL)
metadata = torchaudio.info(response.raw, format="mp3") # with requests.get(SAMPLE_MP3_URL, stream=True) as response:
# metadata = torchaudio.info(response.raw, format="mp3")
print(f"Fetched {response.raw.tell()} bytes.") #
print(metadata) # print(f"Fetched {response.raw.tell()} bytes.")
# print(metadata)
#
# >>> Source: https://pytorch-tutorial-assets.s3.amazonaws.com/steam-train-whistle-daniel_simon.mp3
# >>> Fetched 8192 bytes.
# >>> AudioMetaData(sample_rate=44100, num_frames=0, num_channels=2, bits_per_sample=0, encoding=MP3)
###################################################################### ######################################################################
# Loading audio data into Tensor # Loading audio data into Tensor
...@@ -411,7 +419,6 @@ inspect_file(path) ...@@ -411,7 +419,6 @@ inspect_file(path)
waveform, sample_rate = get_sample(resample=8000) waveform, sample_rate = get_sample(resample=8000)
formats = [ formats = [
"mp3",
"flac", "flac",
"vorbis", "vorbis",
"sph", "sph",
......
...@@ -3,11 +3,8 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") ...@@ -3,11 +3,8 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
################################################################################ ################################################################################
# sox # sox
################################################################################ ################################################################################
add_library(libsox INTERFACE)
if (BUILD_SOX) if (BUILD_SOX)
add_subdirectory(sox) add_subdirectory(sox)
target_include_directories(libsox INTERFACE ${SOX_INCLUDE_DIR})
target_link_libraries(libsox INTERFACE ${SOX_LIBRARIES})
endif() endif()
################################################################################ ################################################################################
......
...@@ -17,22 +17,24 @@ set(envs ...@@ -17,22 +17,24 @@ set(envs
"CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden $ENV{CFLAGS}" "CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden $ENV{CFLAGS}"
) )
ExternalProject_Add(mad if (BUILD_MAD)
PREFIX ${CMAKE_CURRENT_BINARY_DIR} ExternalProject_Add(mad
DOWNLOAD_DIR ${ARCHIVE_DIR} PREFIX ${CMAKE_CURRENT_BINARY_DIR}
URL https://downloads.sourceforge.net/project/mad/libmad/0.15.1b/libmad-0.15.1b.tar.gz DOWNLOAD_DIR ${ARCHIVE_DIR}
URL_HASH SHA256=bbfac3ed6bfbc2823d3775ebb931087371e142bb0e9bb1bee51a76a6e0078690 URL https://downloads.sourceforge.net/project/mad/libmad/0.15.1b/libmad-0.15.1b.tar.gz
PATCH_COMMAND patch < ${patch_dir}/libmad.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/mad/ URL_HASH SHA256=bbfac3ed6bfbc2823d3775ebb931087371e142bb0e9bb1bee51a76a6e0078690
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/mad/configure ${COMMON_ARGS} PATCH_COMMAND patch < ${patch_dir}/libmad.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/mad/
DOWNLOAD_NO_PROGRESS ON CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/mad/configure ${COMMON_ARGS}
LOG_DOWNLOAD ON DOWNLOAD_NO_PROGRESS ON
LOG_UPDATE ON LOG_DOWNLOAD ON
LOG_CONFIGURE ON LOG_UPDATE ON
LOG_BUILD ON LOG_CONFIGURE ON
LOG_INSTALL ON LOG_BUILD ON
LOG_MERGED_STDOUTERR ON LOG_INSTALL ON
LOG_OUTPUT_ON_FAILURE ON LOG_MERGED_STDOUTERR ON
) LOG_OUTPUT_ON_FAILURE ON
)
endif (BUILD_MAD)
ExternalProject_Add(amr ExternalProject_Add(amr
PREFIX ${CMAKE_CURRENT_BINARY_DIR} PREFIX ${CMAKE_CURRENT_BINARY_DIR}
...@@ -166,7 +168,6 @@ set(SOX_OPTIONS ...@@ -166,7 +168,6 @@ set(SOX_OPTIONS
--with-amrwb --with-amrwb
--with-flac --with-flac
--with-lame --with-lame
--with-mad
--with-oggvorbis --with-oggvorbis
--with-opus --with-opus
--without-alsa --without-alsa
...@@ -190,7 +191,6 @@ set(SOX_LIBRARIES ...@@ -190,7 +191,6 @@ set(SOX_LIBRARIES
${INSTALL_DIR}/lib/libsox.a ${INSTALL_DIR}/lib/libsox.a
${INSTALL_DIR}/lib/libopencore-amrnb.a ${INSTALL_DIR}/lib/libopencore-amrnb.a
${INSTALL_DIR}/lib/libopencore-amrwb.a ${INSTALL_DIR}/lib/libopencore-amrwb.a
${INSTALL_DIR}/lib/libmad.a
${INSTALL_DIR}/lib/libmp3lame.a ${INSTALL_DIR}/lib/libmp3lame.a
${INSTALL_DIR}/lib/libFLAC.a ${INSTALL_DIR}/lib/libFLAC.a
${INSTALL_DIR}/lib/libopusfile.a ${INSTALL_DIR}/lib/libopusfile.a
...@@ -201,9 +201,37 @@ set(SOX_LIBRARIES ...@@ -201,9 +201,37 @@ set(SOX_LIBRARIES
${INSTALL_DIR}/lib/libogg.a ${INSTALL_DIR}/lib/libogg.a
) )
set(sox_depends
ogg flac vorbis opusfile lame amr
)
if (BUILD_MAD)
list(
APPEND
SOX_OPTIONS
--with-mad
)
list(
APPEND
SOX_LIBRARIES
${INSTALL_DIR}/lib/libmad.a
)
list(
APPEND
sox_depends
mad
)
else ()
list(
APPEND
SOX_OPTIONS
--without-mad
)
endif (BUILD_MAD)
ExternalProject_Add(sox ExternalProject_Add(sox
PREFIX ${CMAKE_CURRENT_BINARY_DIR} PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ogg flac vorbis opusfile lame mad amr DEPENDS ${sox_depends}
DOWNLOAD_DIR ${ARCHIVE_DIR} DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2 URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
...@@ -220,6 +248,7 @@ ExternalProject_Add(sox ...@@ -220,6 +248,7 @@ ExternalProject_Add(sox
LOG_OUTPUT_ON_FAILURE ON LOG_OUTPUT_ON_FAILURE ON
) )
add_library(libsox INTERFACE)
add_dependencies(libsox sox) add_dependencies(libsox sox)
set(SOX_INCLUDE_DIR ${INSTALL_DIR}/include PARENT_SCOPE) target_include_directories(libsox INTERFACE ${INSTALL_DIR}/include)
set(SOX_LIBRARIES ${SOX_LIBRARIES} PARENT_SCOPE) target_link_libraries(libsox INTERFACE ${SOX_LIBRARIES})
...@@ -33,6 +33,7 @@ def _get_build(var, default=False): ...@@ -33,6 +33,7 @@ def _get_build(var, default=False):
_BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True) _BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True)
_BUILD_MAD = _get_build("BUILD_MAD", False)
_BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True) _BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
_BUILD_RNNT = _get_build("BUILD_RNNT", True) _BUILD_RNNT = _get_build("BUILD_RNNT", True)
_BUILD_CTC_DECODER = False if platform.system() == "Windows" else _get_build("BUILD_CTC_DECODER", True) _BUILD_CTC_DECODER = False if platform.system() == "Windows" else _get_build("BUILD_CTC_DECODER", True)
...@@ -95,6 +96,7 @@ class CMakeBuild(build_ext): ...@@ -95,6 +96,7 @@ class CMakeBuild(build_ext):
"-DCMAKE_VERBOSE_MAKEFILE=ON", "-DCMAKE_VERBOSE_MAKEFILE=ON",
f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}", f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}", f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
f"-DBUILD_MAD:BOOL={'ON' if _BUILD_MAD else 'OFF'}",
f"-DBUILD_FFMPEG:BOOL={'ON' if _BUILD_FFMPEG else 'OFF'}", f"-DBUILD_FFMPEG:BOOL={'ON' if _BUILD_FFMPEG else 'OFF'}",
f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}", f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}", f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment