Clean-up ComputeKaldiPitch residue (#3403)

Summary: Follow up of: https://github.com/pytorch/audio/pull/3368 Remove files and lines no longer used. Pull Request resolved: https://github.com/pytorch/audio/pull/3403 Differential Revision: D46441462 Pulled By: mthrok fbshipit-source-id: 11b881ec4b24fa0d625c6aee9f4bd91f637f9923

Clean-up ComputeKaldiPitch residue (#3403)
Summary: Follow up of: https://github.com/pytorch/audio/pull/3368 Remove files and lines no longer used. Pull Request resolved: https://github.com/pytorch/audio/pull/3403 Differential Revision: D46441462 Pulled By: mthrok fbshipit-source-id: 11b881ec4b24fa0d625c6aee9f4bd91f637f9923
c076d1a8 · moto · Facebook GitHub Bot · e9083571 · c076d1a8 · c076d1a8
Commit c076d1a8 authored Jun 05, 2023 by moto Committed by Facebook GitHub Bot Jun 05, 2023
7 changed files
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -74,7 +74,6 @@ python setup.py develop
 Some environmnet variables that change the build behavior
 - `BUILD_SOX`: Deteremines whether build and bind libsox in non-Windows environments. (no effect in Windows as libsox integration is not available) Default value is 1 (build and bind). Use 0 for disabling it.
 - `USE_CUDA`: Determines whether build the custom CUDA kernel. Default to the availability of CUDA-compatible GPUs.
- `BUILD_KALDI`: Determines whether build Kaldi extension. This is required for `kaldi_pitch` function. Default value is 1 on Linux/macOS and 0 on Windows.
 - `BUILD_RNNT`: Determines whether build RNN-T loss function. Default value is 1.
 - `BUILD_CUDA_CTC_DECODER`: Determines whether build decoder features based on CUDA CTC decoder. Default value is 1. (`USE_CUDA` has to be 1.)


--- a/docs/source/functional.rst
+++ b/docs/source/functional.rst
@@ -80,7 +80,6 @@ Feature Extractions
   compute_deltas
   detect_pitch_frequency
   sliding_window_cmn
-   compute_kaldi_pitch
   spectral_centroid

 Multi-channel

--- a/tools/setup_helpers/extension.py
+++ b/tools/setup_helpers/extension.py
@@ -34,7 +34,6 @@ def _get_build(var, default=False):


 _BUILD_SOX = False if platform.system() == "Windows" else _get_build("BUILD_SOX", True)
-_BUILD_KALDI = False if platform.system() == "Windows" else _get_build("BUILD_KALDI", True)
 _BUILD_RIR = _get_build("BUILD_RIR", True)
 _BUILD_RNNT = _get_build("BUILD_RNNT", True)
 _USE_FFMPEG = _get_build("USE_FFMPEG", False)
@@ -117,7 +116,6 @@ class CMakeBuild(build_ext):
            "-DCMAKE_VERBOSE_MAKEFILE=ON",
            f"-DPython_INCLUDE_DIR={distutils.sysconfig.get_python_inc()}",
            f"-DBUILD_SOX:BOOL={'ON' if _BUILD_SOX else 'OFF'}",
-            f"-DBUILD_KALDI:BOOL={'ON' if _BUILD_KALDI else 'OFF'}",
            f"-DBUILD_RIR:BOOL={'ON' if _BUILD_RIR else 'OFF'}",
            f"-DBUILD_RNNT:BOOL={'ON' if _BUILD_RNNT else 'OFF'}",
            f"-DBUILD_ALIGN:BOOL={'ON' if _BUILD_ALIGN else 'OFF'}",

--- a/torchaudio/csrc/kaldi.cpp
+++ b/torchaudio/csrc/kaldi.cpp
-#include <torch/script.h>
-#include "feat/pitch-functions.h"
-
-namespace torchaudio {
-namespace kaldi {
-
-namespace {
-
-torch::Tensor denormalize(const torch::Tensor& t) {
-  auto ret = t;
-  auto pos = t > 0, neg = t < 0;
-  ret.index_put({pos}, t.index({pos}) * 32767);
-  ret.index_put({neg}, t.index({neg}) * 32768);
-  return ret;
-}
-
-torch::Tensor compute_kaldi_pitch(
-    const torch::Tensor& wave,
-    const ::kaldi::PitchExtractionOptions& opts) {
-  ::kaldi::VectorBase<::kaldi::BaseFloat> input(wave);
-  ::kaldi::Matrix<::kaldi::BaseFloat> output;
-  ::kaldi::ComputeKaldiPitch(opts, input, &output);
-  return output.tensor_;
-}
-
-} // namespace
-
-torch::Tensor ComputeKaldiPitch(
-    const torch::Tensor& wave,
-    double sample_frequency,
-    double frame_length,
-    double frame_shift,
-    double min_f0,
-    double max_f0,
-    double soft_min_f0,
-    double penalty_factor,
-    double lowpass_cutoff,
-    double resample_frequency,
-    double delta_pitch,
-    double nccf_ballast,
-    int64_t lowpass_filter_width,
-    int64_t upsample_filter_width,
-    int64_t max_frames_latency,
-    int64_t frames_per_chunk,
-    bool simulate_first_pass_online,
-    int64_t recompute_frame,
-    bool snip_edges) {
-  TORCH_CHECK(wave.ndimension() == 2, "Input tensor must be 2 dimentional.");
-  TORCH_CHECK(wave.device().is_cpu(), "Input tensor must be on CPU.");
-  TORCH_CHECK(
-      wave.dtype() == torch::kFloat32, "Input tensor must be float32 type.");
-
-  ::kaldi::PitchExtractionOptions opts;
-  opts.samp_freq = static_cast<::kaldi::BaseFloat>(sample_frequency);
-  opts.frame_shift_ms = static_cast<::kaldi::BaseFloat>(frame_shift);
-  opts.frame_length_ms = static_cast<::kaldi::BaseFloat>(frame_length);
-  opts.min_f0 = static_cast<::kaldi::BaseFloat>(min_f0);
-  opts.max_f0 = static_cast<::kaldi::BaseFloat>(max_f0);
-  opts.soft_min_f0 = static_cast<::kaldi::BaseFloat>(soft_min_f0);
-  opts.penalty_factor = static_cast<::kaldi::BaseFloat>(penalty_factor);
-  opts.lowpass_cutoff = static_cast<::kaldi::BaseFloat>(lowpass_cutoff);
-  opts.resample_freq = static_cast<::kaldi::BaseFloat>(resample_frequency);
-  opts.delta_pitch = static_cast<::kaldi::BaseFloat>(delta_pitch);
-  opts.lowpass_filter_width = static_cast<::kaldi::int32>(lowpass_filter_width);
-  opts.upsample_filter_width =
-      static_cast<::kaldi::int32>(upsample_filter_width);
-  opts.max_frames_latency = static_cast<::kaldi::int32>(max_frames_latency);
-  opts.frames_per_chunk = static_cast<::kaldi::int32>(frames_per_chunk);
-  opts.simulate_first_pass_online = simulate_first_pass_online;
-  opts.recompute_frame = static_cast<::kaldi::int32>(recompute_frame);
-  opts.snip_edges = snip_edges;
-
-  // Kaldi's float type expects value range of int16 expressed as float
-  torch::Tensor wave_ = denormalize(wave);
-
-  auto batch_size = wave_.size(0);
-  std::vector<torch::Tensor> results(batch_size);
-  at::parallel_for(0, batch_size, 1, [&](int64_t begin, int64_t end) {
-    for (auto i = begin; i < end; ++i) {
-      results[i] = compute_kaldi_pitch(wave_.index({i}), opts);
-    }
-  });
-  return torch::stack(results, 0);
-}
-
-TORCH_LIBRARY_FRAGMENT(torchaudio, m) {
-  m.def(
-      "torchaudio::kaldi_ComputeKaldiPitch",
-      &torchaudio::kaldi::ComputeKaldiPitch);
-}
-
-} // namespace kaldi
-} // namespace torchaudio
--- a/torchaudio/csrc/pybind/pybind.cpp
+++ b/torchaudio/csrc/pybind/pybind.cpp
@@ -5,7 +5,6 @@ namespace torchaudio {
 namespace {

 PYBIND11_MODULE(_torchaudio, m) {
-  m.def("is_kaldi_available", &is_kaldi_available, "");
  m.def("is_rir_available", &is_rir_available, "");
  m.def("is_align_available", &is_align_available, "");
  m.def("cuda_version", &cuda_version, "");

--- a/torchaudio/csrc/utils.cpp
+++ b/torchaudio/csrc/utils.cpp
@@ -7,14 +7,6 @@

 namespace torchaudio {

-bool is_kaldi_available() {
-#ifdef INCLUDE_KALDI
-  return true;
-#else
-  return false;
-#endif
-}
-
 bool is_rir_available() {
 #ifdef INCLUDE_RIR
  return true;

--- a/torchaudio/csrc/utils.h
+++ b/torchaudio/csrc/utils.h
@@ -2,7 +2,6 @@
 #include <torch/torch.h>

 namespace torchaudio {
-bool is_kaldi_available();
 bool is_rir_available();
 bool is_align_available();
 c10::optional<int64_t> cuda_version();