utils.h 3.68 KB
Newer Older
moto's avatar
moto committed
1
2
3
4
#ifndef TORCHAUDIO_SOX_UTILS_H
#define TORCHAUDIO_SOX_UTILS_H

#include <sox.h>
5
#include <torch/types.h>
moto's avatar
moto committed
6

Moto Hira's avatar
Moto Hira committed
7
namespace torchaudio::sox {
moto's avatar
moto committed
8

moto's avatar
moto committed
9
10
11
12
13
14
15
16
17
18
19
20
21
////////////////////////////////////////////////////////////////////////////////
// APIs for Python interaction
////////////////////////////////////////////////////////////////////////////////

/// Set sox global options
void set_seed(const int64_t seed);

void set_verbosity(const int64_t verbosity);

void set_use_threads(const bool use_threads);

void set_buffer_size(const int64_t buffer_size);

22
23
int64_t get_buffer_size();

moto's avatar
moto committed
24
25
std::vector<std::vector<std::string>> list_effects();

26
27
28
std::vector<std::string> list_read_formats();

std::vector<std::string> list_write_formats();
moto's avatar
moto committed
29
30
31
32
33
34
35
36

////////////////////////////////////////////////////////////////////////////////
// Utilities for sox_io / sox_effects implementations
////////////////////////////////////////////////////////////////////////////////

const std::unordered_set<std::string> UNSUPPORTED_EFFECTS =
    {"input", "output", "spectrogram", "noiseprof", "noisered", "splice"};

moto's avatar
moto committed
37
38
39
40
41
42
43
44
45
/// helper class to automatically close sox_format_t*
struct SoxFormat {
  explicit SoxFormat(sox_format_t* fd) noexcept;
  SoxFormat(const SoxFormat& other) = delete;
  SoxFormat(SoxFormat&& other) = delete;
  SoxFormat& operator=(const SoxFormat& other) = delete;
  SoxFormat& operator=(SoxFormat&& other) = delete;
  ~SoxFormat();
  sox_format_t* operator->() const noexcept;
46
  operator sox_format_t*() const noexcept;
moto's avatar
moto committed
47

48
49
  void close();

moto's avatar
moto committed
50
51
52
53
 private:
  sox_format_t* fd_;
};

54
55
56
57
///
/// Verify that input file is found, has known encoding, and not empty
void validate_input_file(const SoxFormat& sf, const std::string& path);

58
59
///
/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32
Moto Hira's avatar
Moto Hira committed
60
void validate_input_tensor(const torch::Tensor&);
61

moto's avatar
moto committed
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
///
/// Get target dtype for the given encoding and precision.
caffe2::TypeMeta get_dtype(
    const sox_encoding_t encoding,
    const unsigned precision);

///
/// Convert sox_sample_t buffer to uint8/int16/int32/float32 Tensor
/// NOTE: This function might modify the values in the input buffer to
/// reduce the number of memory copy.
/// @param buffer Pointer to buffer that contains audio data.
/// @param num_samples The number of samples to read.
/// @param num_channels The number of channels. Used to reshape the resulting
/// Tensor.
/// @param dtype Target dtype. Determines the output dtype and value range in
/// conjunction with normalization.
/// @param noramlize Perform normalization. Only effective when dtype is not
/// kFloat32. When effective, the output tensor is kFloat32 type and value range
/// is [-1.0, 1.0]
/// @param channels_first When True, output Tensor has shape of [num_channels,
/// num_frames].
torch::Tensor convert_to_tensor(
    sox_sample_t* buffer,
    const int32_t num_samples,
    const int32_t num_channels,
    const caffe2::TypeMeta dtype,
    const bool normalize,
    const bool channels_first);

91
/// Extract extension from file path
Moto Hira's avatar
Moto Hira committed
92
const std::string get_filetype(const std::string& path);
93
94
95

/// Get sox_signalinfo_t for passing a torch::Tensor object.
sox_signalinfo_t get_signalinfo(
96
97
    const torch::Tensor* waveform,
    const int64_t sample_rate,
Moto Hira's avatar
Moto Hira committed
98
    const std::string& filetype,
99
    const bool channels_first);
100

101
102
/// Get sox_encodinginfo_t for Tensor I/O
sox_encodinginfo_t get_tensor_encodinginfo(const caffe2::TypeMeta dtype);
103

104
105
/// Get sox_encodinginfo_t for saving to file/file object
sox_encodinginfo_t get_encodinginfo_for_save(
106
    const std::string& format,
Moto Hira's avatar
Moto Hira committed
107
108
109
110
    const caffe2::TypeMeta& dtype,
    const c10::optional<double>& compression,
    const c10::optional<std::string>& encoding,
    const c10::optional<int64_t>& bits_per_sample);
111

Moto Hira's avatar
Moto Hira committed
112
} // namespace torchaudio::sox
moto's avatar
moto committed
113
#endif