"llm/vscode:/vscode.git/clone" did not exist on "b886bec3f93294ce37f09253ef7e669ab05eb949"
utils.h 3.61 KB
Newer Older
moto's avatar
moto committed
1
2
3
4
#ifndef TORCHAUDIO_SOX_UTILS_H
#define TORCHAUDIO_SOX_UTILS_H

#include <sox.h>
5
#include <torch/types.h>
moto's avatar
moto committed
6

Moto Hira's avatar
Moto Hira committed
7
namespace torchaudio::sox {
moto's avatar
moto committed
8

moto's avatar
moto committed
9
10
11
12
13
14
15
16
17
18
19
20
21
////////////////////////////////////////////////////////////////////////////////
// APIs for Python interaction
////////////////////////////////////////////////////////////////////////////////

/// Set sox global options
void set_seed(const int64_t seed);

void set_verbosity(const int64_t verbosity);

void set_use_threads(const bool use_threads);

void set_buffer_size(const int64_t buffer_size);

22
23
int64_t get_buffer_size();

moto's avatar
moto committed
24
25
std::vector<std::vector<std::string>> list_effects();

26
27
28
std::vector<std::string> list_read_formats();

std::vector<std::string> list_write_formats();
moto's avatar
moto committed
29
30
31
32
33

////////////////////////////////////////////////////////////////////////////////
// Utilities for sox_io / sox_effects implementations
////////////////////////////////////////////////////////////////////////////////

moto-meta's avatar
moto-meta committed
34
extern const std::unordered_set<std::string> UNSUPPORTED_EFFECTS;
moto's avatar
moto committed
35

moto's avatar
moto committed
36
37
38
39
40
41
42
43
44
/// helper class to automatically close sox_format_t*
struct SoxFormat {
  explicit SoxFormat(sox_format_t* fd) noexcept;
  SoxFormat(const SoxFormat& other) = delete;
  SoxFormat(SoxFormat&& other) = delete;
  SoxFormat& operator=(const SoxFormat& other) = delete;
  SoxFormat& operator=(SoxFormat&& other) = delete;
  ~SoxFormat();
  sox_format_t* operator->() const noexcept;
45
  operator sox_format_t*() const noexcept;
moto's avatar
moto committed
46

47
48
  void close();

moto's avatar
moto committed
49
50
51
52
 private:
  sox_format_t* fd_;
};

53
54
55
56
///
/// Verify that input file is found, has known encoding, and not empty
void validate_input_file(const SoxFormat& sf, const std::string& path);

57
58
///
/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32
Moto Hira's avatar
Moto Hira committed
59
void validate_input_tensor(const torch::Tensor&);
60

moto's avatar
moto committed
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
///
/// Get target dtype for the given encoding and precision.
caffe2::TypeMeta get_dtype(
    const sox_encoding_t encoding,
    const unsigned precision);

///
/// Convert sox_sample_t buffer to uint8/int16/int32/float32 Tensor
/// NOTE: This function might modify the values in the input buffer to
/// reduce the number of memory copy.
/// @param buffer Pointer to buffer that contains audio data.
/// @param num_samples The number of samples to read.
/// @param num_channels The number of channels. Used to reshape the resulting
/// Tensor.
/// @param dtype Target dtype. Determines the output dtype and value range in
/// conjunction with normalization.
/// @param noramlize Perform normalization. Only effective when dtype is not
/// kFloat32. When effective, the output tensor is kFloat32 type and value range
/// is [-1.0, 1.0]
/// @param channels_first When True, output Tensor has shape of [num_channels,
/// num_frames].
torch::Tensor convert_to_tensor(
    sox_sample_t* buffer,
    const int32_t num_samples,
    const int32_t num_channels,
    const caffe2::TypeMeta dtype,
    const bool normalize,
    const bool channels_first);

90
/// Extract extension from file path
Moto Hira's avatar
Moto Hira committed
91
const std::string get_filetype(const std::string& path);
92
93
94

/// Get sox_signalinfo_t for passing a torch::Tensor object.
sox_signalinfo_t get_signalinfo(
95
96
    const torch::Tensor* waveform,
    const int64_t sample_rate,
Moto Hira's avatar
Moto Hira committed
97
    const std::string& filetype,
98
    const bool channels_first);
99

100
101
/// Get sox_encodinginfo_t for Tensor I/O
sox_encodinginfo_t get_tensor_encodinginfo(const caffe2::TypeMeta dtype);
102

103
104
/// Get sox_encodinginfo_t for saving to file/file object
sox_encodinginfo_t get_encodinginfo_for_save(
105
    const std::string& format,
Moto Hira's avatar
Moto Hira committed
106
107
108
109
    const caffe2::TypeMeta& dtype,
    const c10::optional<double>& compression,
    const c10::optional<std::string>& encoding,
    const c10::optional<int64_t>& bits_per_sample);
110

Moto Hira's avatar
Moto Hira committed
111
} // namespace torchaudio::sox
moto's avatar
moto committed
112
#endif