Commit 5053aa7f authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Remove source for flashlight-text bundle (#3236)

Summary:
Following https://github.com/pytorch/audio/pull/3232, static build of flashlight-text has been disabled and removed from nightly build.

This commit removes the related source/build from torchaudio code base.

Pull Request resolved: https://github.com/pytorch/audio/pull/3236

Reviewed By: jacobkahn

Differential Revision: D44712539

Pulled By: mthrok

fbshipit-source-id: a201c89b5046f224526309cd4e17a5105e58a949
parent ab40a3a3
...@@ -2,9 +2,3 @@ ...@@ -2,9 +2,3 @@
path = third_party/kaldi/submodule path = third_party/kaldi/submodule
url = https://github.com/kaldi-asr/kaldi url = https://github.com/kaldi-asr/kaldi
ignore = dirty ignore = dirty
[submodule "third_party/kenlm/submodule"]
path = third_party/kenlm/kenlm
url = https://github.com/kpu/kenlm
[submodule "flashlight-text"]
path = third_party/flashlight-text/submodule
url = https://github.com/flashlight/text
...@@ -56,7 +56,6 @@ option(BUILD_SOX "Build libsox statically" ON) ...@@ -56,7 +56,6 @@ option(BUILD_SOX "Build libsox statically" ON)
option(BUILD_KALDI "Build kaldi statically" ON) option(BUILD_KALDI "Build kaldi statically" ON)
option(BUILD_RIR "Enable RIR simulation" ON) option(BUILD_RIR "Enable RIR simulation" ON)
option(BUILD_RNNT "Enable RNN transducer" ON) option(BUILD_RNNT "Enable RNN transducer" ON)
option(BUILD_CTC_DECODER "Build Flashlight CTC decoder" ON)
option(BUILD_TORCHAUDIO_PYTHON_EXTENSION "Build Python extension" OFF) option(BUILD_TORCHAUDIO_PYTHON_EXTENSION "Build Python extension" OFF)
option(USE_FFMPEG "Enable ffmpeg-based features" OFF) option(USE_FFMPEG "Enable ffmpeg-based features" OFF)
option(USE_CUDA "Enable CUDA support" OFF) option(USE_CUDA "Enable CUDA support" OFF)
......
# Custom CMakeLists for building flashlight-text decoder
#
# The main difference from upstream native CMakeLists from flashlight-text.
#
# 1. Build compression libraries statically and make KenLM self-contained
# 2. Build KenLM without Boost by compiling only what is used by flashlight-text
# 3. Build KenLM and flashlight-text in one go (not required, but nice-to-have feature)
# 4. Tweak the location of bindings so that its easier for TorchAudio build process to pick up.
# (the upstream CMakeLists.txt does not install them in the same location as libflashlight-text)
# 5. Tweak the name of bindings. (remove suffix like cpython-37m-darwin)
set(CMAKE_CXX_VISIBILITY_PRESET default)
# the following line is added in order to export symbols when building on Windows
# this approach has some limitations as documented in https://github.com/pytorch/pytorch/pull/3650
if (MSVC)
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
set(
libflashlight_src
submodule/flashlight/lib/text/decoder/Utils.cpp
submodule/flashlight/lib/text/decoder/lm/KenLM.cpp
submodule/flashlight/lib/text/decoder/lm/ZeroLM.cpp
submodule/flashlight/lib/text/decoder/lm/ConvLM.cpp
submodule/flashlight/lib/text/decoder/LexiconDecoder.cpp
submodule/flashlight/lib/text/decoder/LexiconFreeDecoder.cpp
submodule/flashlight/lib/text/decoder/LexiconFreeSeq2SeqDecoder.cpp
submodule/flashlight/lib/text/decoder/LexiconSeq2SeqDecoder.cpp
submodule/flashlight/lib/text/decoder/Trie.cpp
submodule/flashlight/lib/text/String.cpp
submodule/flashlight/lib/text/dictionary/Utils.cpp
submodule/flashlight/lib/text/dictionary/Dictionary.cpp
)
torchaudio_library(
libflashlight-text
"${libflashlight_src}"
"${CMAKE_CURRENT_SOURCE_DIR}/submodule"
""
FL_TEXT_USE_KENLM
)
# TODO: update torchaudio_library to handle private links
target_link_libraries(
libflashlight-text
PRIVATE
kenlm)
if (BUILD_TORCHAUDIO_PYTHON_EXTENSION)
torchaudio_extension(
flashlight_lib_text_dictionary
submodule/bindings/python/flashlight/lib/text/_dictionary.cpp
submodule
libflashlight-text
""
)
torchaudio_extension(
flashlight_lib_text_decoder
submodule/bindings/python/flashlight/lib/text/_decoder.cpp
submodule
libflashlight-text
FL_TEXT_USE_KENLM
)
endif()
Subproject commit 98028c7da83d66c2aba6f5f8708c063d266ca5a4
# kenlm uses std::binary_function, which had BC breaking change in C++17.
# On Windows + GPU, torchaudio is compiled with C++ 17 globally.
# We use C++ 14 for KenLM
set(CMAKE_CXX_STANDARD 14)
set(
KENLM_UTIL_SOURCES
kenlm/util/bit_packing.cc
kenlm/util/double-conversion/bignum.cc
kenlm/util/double-conversion/bignum-dtoa.cc
kenlm/util/double-conversion/cached-powers.cc
kenlm/util/double-conversion/diy-fp.cc
kenlm/util/double-conversion/double-conversion.cc
kenlm/util/double-conversion/fast-dtoa.cc
kenlm/util/double-conversion/fixed-dtoa.cc
kenlm/util/double-conversion/strtod.cc
kenlm/util/ersatz_progress.cc
kenlm/util/exception.cc
kenlm/util/file.cc
kenlm/util/file_piece.cc
kenlm/util/float_to_string.cc
kenlm/util/integer_to_string.cc
kenlm/util/mmap.cc
kenlm/util/murmur_hash.cc
kenlm/util/pool.cc
kenlm/util/read_compressed.cc
kenlm/util/scoped.cc
kenlm/util/spaces.cc
kenlm/util/string_piece.cc
)
set(
KENLM_SOURCES
kenlm/lm/bhiksha.cc
kenlm/lm/binary_format.cc
kenlm/lm/config.cc
kenlm/lm/lm_exception.cc
kenlm/lm/model.cc
kenlm/lm/quantize.cc
kenlm/lm/read_arpa.cc
kenlm/lm/search_hashed.cc
kenlm/lm/search_trie.cc
kenlm/lm/trie.cc
kenlm/lm/trie_sort.cc
kenlm/lm/value_build.cc
kenlm/lm/virtual_interface.cc
kenlm/lm/vocab.cc
)
add_library(
kenlm
STATIC
"${KENLM_UTIL_SOURCES};${KENLM_SOURCES}"
)
target_include_directories(
kenlm
BEFORE
PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}"
)
set(
kenlm_deps
zlib
bzip2
)
set(
kenlm_compiler_definitions
KENLM_MAX_ORDER=6
HAVE_ZLIB
HAVE_BZLIB
)
if (MSVC)
list(
APPEND
kenlm_compiler_definitions
# To avoid warning C4003: not enough arguments for function-like macro invocation 'max'
NOMINMAX
)
endif()
if(TARGET lzma)
list(APPEND kenlm_deps lzma)
list(
APPEND
kenlm_compiler_definitions
HAVE_XZLIB
)
endif()
target_compile_definitions(
kenlm
PUBLIC
${kenlm_compiler_definitions}
)
target_link_libraries(
kenlm
PRIVATE
${kenlm_deps}
)
Subproject commit 5cea457db26950a73d638425c183b368c06ed7c6
...@@ -37,7 +37,6 @@ _BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX" ...@@ -37,7 +37,6 @@ _BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX"
_BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True) _BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
_BUILD_RIR = _get_build("BUILD_RIR", True) _BUILD_RIR = _get_build("BUILD_RIR", True)
_BUILD_RNNT = _get_build("BUILD_RNNT", True) _BUILD_RNNT = _get_build("BUILD_RNNT", True)
_BUILD_CTC_DECODER = _get_build("BUILD_CTC_DECODER", False)
_USE_FFMPEG = _get_build("USE_FFMPEG", False) _USE_FFMPEG = _get_build("USE_FFMPEG", False)
_USE_ROCM = _get_build("USE_ROCM", torch.backends.cuda.is_built() and torch.version.hip is not None) _USE_ROCM = _get_build("USE_ROCM", torch.backends.cuda.is_built() and torch.version.hip is not None)
_USE_CUDA = _get_build("USE_CUDA", torch.backends.cuda.is_built() and torch.version.hip is None) _USE_CUDA = _get_build("USE_CUDA", torch.backends.cuda.is_built() and torch.version.hip is None)
...@@ -57,14 +56,6 @@ def get_ext_modules(): ...@@ -57,14 +56,6 @@ def get_ext_modules():
Extension(name="torchaudio.lib._torchaudio_sox", sources=[]), Extension(name="torchaudio.lib._torchaudio_sox", sources=[]),
] ]
) )
if _BUILD_CTC_DECODER:
modules.extend(
[
Extension(name="torchaudio.lib.libflashlight-text", sources=[]),
Extension(name="torchaudio.lib.flashlight_lib_text_decoder", sources=[]),
Extension(name="torchaudio.lib.flashlight_lib_text_dictionary", sources=[]),
]
)
if _USE_FFMPEG: if _USE_FFMPEG:
modules.extend( modules.extend(
[ [
...@@ -119,7 +110,6 @@ class CMakeBuild(build_ext): ...@@ -119,7 +110,6 @@ class CMakeBuild(build_ext):
f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}", f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
f"-DBUILD_RIR:BOOL={'ON' if _BUILD_RIR else 'OFF'}", f"-DBUILD_RIR:BOOL={'ON' if _BUILD_RIR else 'OFF'}",
f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}", f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
f"-DBUILD_CTC_DECODER:BOOL={'ON' if _BUILD_CTC_DECODER else 'OFF'}",
"-DBUILD_TORCHAUDIO_PYTHON_EXTENSION:BOOL=ON", "-DBUILD_TORCHAUDIO_PYTHON_EXTENSION:BOOL=ON",
f"-DUSE_ROCM:BOOL={'ON' if _USE_ROCM else 'OFF'}", f"-DUSE_ROCM:BOOL={'ON' if _USE_ROCM else 'OFF'}",
f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}", f"-DUSE_CUDA:BOOL={'ON' if _USE_CUDA else 'OFF'}",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment