Commit a71e3a40 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Add BUILD_MAD option and default to OFF (#2354)

Summary:
libmad integration should be enabled only from source-build

Pull Request resolved: https://github.com/pytorch/audio/pull/2354

Reviewed By: nateanl

Differential Revision: D36012035

Pulled By: mthrok

fbshipit-source-id: adeda8cbfd418f96245909cae6862b648a6915a7
parent 3cf7f264
......@@ -495,6 +495,7 @@ jobs:
name: Install torchaudio
command: .circleci/unittest/linux/scripts/install.sh
environment:
BUILD_MAD: true
BUILD_FFMPEG: true
- run:
name: Run tests
......@@ -528,7 +529,7 @@ jobs:
command: docker run -t --gpus all -e PYTHON_VERSION -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh
- run:
name: Install torchaudio
command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e BUILD_FFMPEG=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e BUILD_FFMPEG=1 -e BUILD_MAD=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
- run:
name: Run tests
environment:
......@@ -641,6 +642,7 @@ jobs:
command: .circleci/unittest/linux/scripts/install.sh
environment:
BUILD_FFMPEG: true
BUILD_MAD: true
- run:
name: Run tests
command: .circleci/unittest/linux/scripts/run_test.sh
......
......@@ -495,6 +495,7 @@ jobs:
name: Install torchaudio
command: .circleci/unittest/linux/scripts/install.sh
environment:
BUILD_MAD: true
BUILD_FFMPEG: true
- run:
name: Run tests
......@@ -528,7 +529,7 @@ jobs:
command: docker run -t --gpus all -e PYTHON_VERSION -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/setup_env.sh
- run:
name: Install torchaudio
command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e BUILD_FFMPEG=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
command: docker run -t --gpus all -e UPLOAD_CHANNEL -e CONDA_CHANNEL_FLAGS -e BUILD_FFMPEG=1 -e BUILD_MAD=1 -v $PWD:$PWD -w $PWD "${image_name}" .circleci/unittest/linux/scripts/install.sh
- run:
name: Run tests
environment:
......@@ -641,6 +642,7 @@ jobs:
command: .circleci/unittest/linux/scripts/install.sh
environment:
BUILD_FFMPEG: true
BUILD_MAD: true
- run:
name: Run tests
command: .circleci/unittest/linux/scripts/run_test.sh
......
......@@ -57,6 +57,7 @@ endif()
# Options
option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_MAD "Enable libmad" OFF)
option(BUILD_FFMPEG "Enable ffmpeg-based features" OFF)
option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_RNNT "Enable RNN transducer" ON)
......
......@@ -411,7 +411,6 @@ plot_specgram(waveform, sample_rate, title="Original")
configs = [
({"format": "wav", "encoding": "ULAW", "bits_per_sample": 8}, "8 bit mu-law"),
({"format": "gsm"}, "GSM-FR"),
({"format": "mp3", "compression": -9}, "MP3"),
({"format": "vorbis", "compression": -1}, "Vorbis"),
]
waveforms = []
......@@ -441,19 +440,12 @@ play_audio(waveforms[0], sample_rate)
play_audio(waveforms[1], sample_rate)
######################################################################
# MP3:
# ~~~~
#
play_audio(waveforms[2], sample_rate)
######################################################################
# Vorbis:
# ~~~~~~~
#
play_audio(waveforms[3], sample_rate)
play_audio(waveforms[2], sample_rate)
######################################################################
# Simulating a phone recoding
......
......@@ -225,13 +225,16 @@ print(metadata)
# variable bit rate (such as MP3).
# - ``num_frames`` can be ``0`` for GSM-FR format.
#
metadata = torchaudio.info(SAMPLE_MP3_PATH)
print(metadata)
metadata = torchaudio.info(SAMPLE_GSM_PATH)
print(metadata)
# .. code::
#
# metadata = torchaudio.info(SAMPLE_MP3_PATH)
# print(metadata)
#
# metadata = torchaudio.info(SAMPLE_GSM_PATH)
# print(metadata)
#
# >>> AudioMetaData(sample_rate=44100, num_frames=110559, num_channels=2, bits_per_sample=0, encoding=MP3)
# >>> AudioMetaData(sample_rate=8000, num_frames=0, num_channels=1, bits_per_sample=0, encoding=GSM)
######################################################################
# Querying file-like object
......@@ -256,13 +259,18 @@ print(metadata)
# - Use argument ``format`` to specify the audio format of the input.
# - The returned metadata has ``num_frames = 0``
#
print("Source:", SAMPLE_MP3_URL)
with requests.get(SAMPLE_MP3_URL, stream=True) as response:
metadata = torchaudio.info(response.raw, format="mp3")
print(f"Fetched {response.raw.tell()} bytes.")
print(metadata)
# .. code::
#
# print("Source:", SAMPLE_MP3_URL)
# with requests.get(SAMPLE_MP3_URL, stream=True) as response:
# metadata = torchaudio.info(response.raw, format="mp3")
#
# print(f"Fetched {response.raw.tell()} bytes.")
# print(metadata)
#
# >>> Source: https://pytorch-tutorial-assets.s3.amazonaws.com/steam-train-whistle-daniel_simon.mp3
# >>> Fetched 8192 bytes.
# >>> AudioMetaData(sample_rate=44100, num_frames=0, num_channels=2, bits_per_sample=0, encoding=MP3)
######################################################################
# Loading audio data into Tensor
......@@ -411,7 +419,6 @@ inspect_file(path)
waveform, sample_rate = get_sample(resample=8000)
formats = [
"mp3",
"flac",
"vorbis",
"sph",
......
......@@ -3,11 +3,8 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden")
################################################################################
# sox
################################################################################
add_library(libsox INTERFACE)
if (BUILD_SOX)
add_subdirectory(sox)
target_include_directories(libsox INTERFACE ${SOX_INCLUDE_DIR})
target_link_libraries(libsox INTERFACE ${SOX_LIBRARIES})
endif()
################################################################################
......
......@@ -17,22 +17,24 @@ set(envs
"CFLAGS=-I${INSTALL_DIR}/include -fvisibility=hidden $ENV{CFLAGS}"
)
ExternalProject_Add(mad
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://downloads.sourceforge.net/project/mad/libmad/0.15.1b/libmad-0.15.1b.tar.gz
URL_HASH SHA256=bbfac3ed6bfbc2823d3775ebb931087371e142bb0e9bb1bee51a76a6e0078690
PATCH_COMMAND patch < ${patch_dir}/libmad.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/mad/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/mad/configure ${COMMON_ARGS}
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
if (BUILD_MAD)
ExternalProject_Add(mad
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://downloads.sourceforge.net/project/mad/libmad/0.15.1b/libmad-0.15.1b.tar.gz
URL_HASH SHA256=bbfac3ed6bfbc2823d3775ebb931087371e142bb0e9bb1bee51a76a6e0078690
PATCH_COMMAND patch < ${patch_dir}/libmad.patch && cp ${patch_dir}/config.guess ${patch_dir}/config.sub ${CMAKE_CURRENT_BINARY_DIR}/src/mad/
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${envs} ${CMAKE_CURRENT_BINARY_DIR}/src/mad/configure ${COMMON_ARGS}
DOWNLOAD_NO_PROGRESS ON
LOG_DOWNLOAD ON
LOG_UPDATE ON
LOG_CONFIGURE ON
LOG_BUILD ON
LOG_INSTALL ON
LOG_MERGED_STDOUTERR ON
LOG_OUTPUT_ON_FAILURE ON
)
endif (BUILD_MAD)
ExternalProject_Add(amr
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
......@@ -166,7 +168,6 @@ set(SOX_OPTIONS
--with-amrwb
--with-flac
--with-lame
--with-mad
--with-oggvorbis
--with-opus
--without-alsa
......@@ -190,7 +191,6 @@ set(SOX_LIBRARIES
${INSTALL_DIR}/lib/libsox.a
${INSTALL_DIR}/lib/libopencore-amrnb.a
${INSTALL_DIR}/lib/libopencore-amrwb.a
${INSTALL_DIR}/lib/libmad.a
${INSTALL_DIR}/lib/libmp3lame.a
${INSTALL_DIR}/lib/libFLAC.a
${INSTALL_DIR}/lib/libopusfile.a
......@@ -201,9 +201,37 @@ set(SOX_LIBRARIES
${INSTALL_DIR}/lib/libogg.a
)
set(sox_depends
ogg flac vorbis opusfile lame amr
)
if (BUILD_MAD)
list(
APPEND
SOX_OPTIONS
--with-mad
)
list(
APPEND
SOX_LIBRARIES
${INSTALL_DIR}/lib/libmad.a
)
list(
APPEND
sox_depends
mad
)
else ()
list(
APPEND
SOX_OPTIONS
--without-mad
)
endif (BUILD_MAD)
ExternalProject_Add(sox
PREFIX ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ogg flac vorbis opusfile lame mad amr
DEPENDS ${sox_depends}
DOWNLOAD_DIR ${ARCHIVE_DIR}
URL https://downloads.sourceforge.net/project/sox/sox/14.4.2/sox-14.4.2.tar.bz2
URL_HASH SHA256=81a6956d4330e75b5827316e44ae381e6f1e8928003c6aa45896da9041ea149c
......@@ -220,6 +248,7 @@ ExternalProject_Add(sox
LOG_OUTPUT_ON_FAILURE ON
)
add_library(libsox INTERFACE)
add_dependencies(libsox sox)
set(SOX_INCLUDE_DIR ${INSTALL_DIR}/include PARENT_SCOPE)
set(SOX_LIBRARIES ${SOX_LIBRARIES} PARENT_SCOPE)
target_include_directories(libsox INTERFACE ${INSTALL_DIR}/include)
target_link_libraries(libsox INTERFACE ${SOX_LIBRARIES})
......@@ -33,6 +33,7 @@ def _get_build(var, default=False):
_BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True)
_BUILD_MAD = _get_build("BUILD_MAD", False)
_BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
_BUILD_RNNT = _get_build("BUILD_RNNT", True)
_BUILD_CTC_DECODER = False if platform.system() == "Windows" else _get_build("BUILD_CTC_DECODER", True)
......@@ -95,6 +96,7 @@ class CMakeBuild(build_ext):
"-DCMAKE_VERBOSE_MAKEFILE=ON",
f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
f"-DBUILD_MAD:BOOL={'ON' if _BUILD_MAD else 'OFF'}",
f"-DBUILD_FFMPEG:BOOL={'ON' if _BUILD_FFMPEG else 'OFF'}",
f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment