effects.cpp 3.76 KB
Newer Older
1
2
3
#include <libtorchaudio/sox/effects.h>
#include <libtorchaudio/sox/effects_chain.h>
#include <libtorchaudio/sox/utils.h>
4
5
#include <sox.h>

Moto Hira's avatar
Moto Hira committed
6
namespace torchaudio::sox {
7
8
9
10
namespace {

enum SoxEffectsResourceState { NotInitialized, Initialized, ShutDown };
SoxEffectsResourceState SOX_RESOURCE_STATE = NotInitialized;
moto's avatar
moto committed
11
std::mutex SOX_RESOUCE_STATE_MUTEX;
12
13
14
15

} // namespace

void initialize_sox_effects() {
moto's avatar
moto committed
16
17
18
19
  const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);

  switch (SOX_RESOURCE_STATE) {
    case NotInitialized:
20
21
      TORCH_CHECK(
          sox_init() == SOX_SUCCESS, "Failed to initialize sox effects.");
moto's avatar
moto committed
22
      SOX_RESOURCE_STATE = Initialized;
hwangjeff's avatar
hwangjeff committed
23
      break;
moto's avatar
moto committed
24
25
26
    case Initialized:
      break;
    case ShutDown:
27
28
      TORCH_CHECK(
          false, "SoX Effects has been shut down. Cannot initialize again.");
29
30
31
32
  }
};

void shutdown_sox_effects() {
moto's avatar
moto committed
33
34
35
36
  const std::lock_guard<std::mutex> lock(SOX_RESOUCE_STATE_MUTEX);

  switch (SOX_RESOURCE_STATE) {
    case NotInitialized:
37
      TORCH_CHECK(false, "SoX Effects is not initialized. Cannot shutdown.");
moto's avatar
moto committed
38
    case Initialized:
39
40
      TORCH_CHECK(
          sox_quit() == SOX_SUCCESS, "Failed to initialize sox effects.");
moto's avatar
moto committed
41
      SOX_RESOURCE_STATE = ShutDown;
hwangjeff's avatar
hwangjeff committed
42
      break;
moto's avatar
moto committed
43
44
    case ShutDown:
      break;
45
  }
moto's avatar
moto committed
46
47
}

hwangjeff's avatar
hwangjeff committed
48
auto apply_effects_tensor(
49
50
    torch::Tensor waveform,
    int64_t sample_rate,
hwangjeff's avatar
hwangjeff committed
51
52
    const std::vector<std::vector<std::string>>& effects,
    bool channels_first) -> std::tuple<torch::Tensor, int64_t> {
53
  validate_input_tensor(waveform);
moto's avatar
moto committed
54
55

  // Create SoxEffectsChain
56
  const auto dtype = waveform.dtype();
Moto Hira's avatar
Moto Hira committed
57
  SoxEffectsChain chain(
58
59
      /*input_encoding=*/get_tensor_encodinginfo(dtype),
      /*output_encoding=*/get_tensor_encodinginfo(dtype));
moto's avatar
moto committed
60
61
62

  // Prepare output buffer
  std::vector<sox_sample_t> out_buffer;
63
  out_buffer.reserve(waveform.numel());
moto's avatar
moto committed
64
65

  // Build and run effects chain
66
  chain.addInputTensor(&waveform, sample_rate, channels_first);
moto's avatar
moto committed
67
68
  for (const auto& effect : effects) {
    chain.addEffect(effect);
69
  }
moto's avatar
moto committed
70
71
72
73
74
75
76
77
78
  chain.addOutputBuffer(&out_buffer);
  chain.run();

  // Create tensor from buffer
  auto out_tensor = convert_to_tensor(
      /*buffer=*/out_buffer.data(),
      /*num_samples=*/out_buffer.size(),
      /*num_channels=*/chain.getOutputNumChannels(),
      dtype,
hwangjeff's avatar
hwangjeff committed
79
      /*normalize=*/false,
moto's avatar
moto committed
80
81
      channels_first);

82
83
  return std::tuple<torch::Tensor, int64_t>(
      out_tensor, chain.getOutputSampleRate());
84
85
}

hwangjeff's avatar
hwangjeff committed
86
87
88
auto apply_effects_file(
    const std::string& path,
    const std::vector<std::vector<std::string>>& effects,
89
90
91
    std::optional<bool> normalize,
    std::optional<bool> channels_first,
    const std::optional<std::string>& format)
92
    -> std::tuple<torch::Tensor, int64_t> {
moto's avatar
moto committed
93
94
95
96
97
  // Open input file
  SoxFormat sf(sox_open_read(
      path.c_str(),
      /*signal=*/nullptr,
      /*encoding=*/nullptr,
98
      /*filetype=*/format.has_value() ? format.value().c_str() : nullptr));
moto's avatar
moto committed
99

100
  validate_input_file(sf, path);
moto's avatar
moto committed
101
102
103
104
105
106
107
108

  const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);

  // Prepare output
  std::vector<sox_sample_t> out_buffer;
  out_buffer.reserve(sf->signal.length);

  // Create and run SoxEffectsChain
Moto Hira's avatar
Moto Hira committed
109
  SoxEffectsChain chain(
moto's avatar
moto committed
110
      /*input_encoding=*/sf->encoding,
111
      /*output_encoding=*/get_tensor_encodinginfo(dtype));
moto's avatar
moto committed
112
113
114
115

  chain.addInputFile(sf);
  for (const auto& effect : effects) {
    chain.addEffect(effect);
116
  }
moto's avatar
moto committed
117
118
119
120
  chain.addOutputBuffer(&out_buffer);
  chain.run();

  // Create tensor from buffer
121
  bool channels_first_ = channels_first.value_or(true);
moto's avatar
moto committed
122
123
124
125
126
  auto tensor = convert_to_tensor(
      /*buffer=*/out_buffer.data(),
      /*num_samples=*/out_buffer.size(),
      /*num_channels=*/chain.getOutputNumChannels(),
      dtype,
127
128
      normalize.value_or(true),
      channels_first_);
moto's avatar
moto committed
129

130
131
  return std::tuple<torch::Tensor, int64_t>(
      tensor, chain.getOutputSampleRate());
132
}
Moto Hira's avatar
Moto Hira committed
133
} // namespace torchaudio::sox