Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
08f188b2
Unverified
Commit
08f188b2
authored
Feb 02, 2021
by
Prabhat Roy
Committed by
GitHub
Feb 02, 2021
Browse files
Restructure C++ code to allow per file registration of custom ops (#1221)
Co-authored-by:
Prabhat Roy
<
prabhatroy@fb.com
>
parent
4608a5b2
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
122 additions
and
173 deletions
+122
-173
torchaudio/__init__.py
torchaudio/__init__.py
+1
-1
torchaudio/backend/sox_io_backend.py
torchaudio/backend/sox_io_backend.py
+2
-5
torchaudio/csrc/sox/effects.cpp
torchaudio/csrc/sox/effects.cpp
+29
-14
torchaudio/csrc/sox/effects.h
torchaudio/csrc/sox/effects.h
+6
-4
torchaudio/csrc/sox/effects_chain.cpp
torchaudio/csrc/sox/effects_chain.cpp
+15
-9
torchaudio/csrc/sox/effects_chain.h
torchaudio/csrc/sox/effects_chain.h
+4
-1
torchaudio/csrc/sox/io.cpp
torchaudio/csrc/sox/io.cpp
+26
-7
torchaudio/csrc/sox/io.h
torchaudio/csrc/sox/io.h
+1
-1
torchaudio/csrc/sox/register.cpp
torchaudio/csrc/sox/register.cpp
+0
-81
torchaudio/csrc/sox/utils.cpp
torchaudio/csrc/sox/utils.cpp
+30
-25
torchaudio/csrc/sox/utils.h
torchaudio/csrc/sox/utils.h
+4
-19
torchaudio/sox_effects/sox_effects.py
torchaudio/sox_effects/sox_effects.py
+4
-6
No files found.
torchaudio/__init__.py
View file @
08f188b2
...
...
@@ -6,7 +6,7 @@ from torchaudio import (
kaldi_io
,
utils
,
sox_effects
,
transforms
transforms
,
)
USE_SOUNDFILE_LEGACY_INTERFACE
=
None
...
...
torchaudio/backend/sox_io_backend.py
View file @
08f188b2
...
...
@@ -162,12 +162,9 @@ def load(
if
hasattr
(
filepath
,
'read'
):
return
torchaudio
.
_torchaudio
.
load_audio_fileobj
(
filepath
,
frame_offset
,
num_frames
,
normalize
,
channels_first
,
format
)
signal
=
torch
.
ops
.
torchaudio
.
sox_io_load_audio_file
(
os
.
fspath
(
filepath
),
frame_offset
,
num_frames
,
normalize
,
channels_first
,
format
)
return
signal
.
get_tensor
(),
signal
.
get_sample_rate
()
signal
=
torch
.
ops
.
torchaudio
.
sox_io_load_audio_file
(
filepath
=
os
.
fspath
(
filepath
)
return
torch
.
ops
.
torchaudio
.
sox_io_load_audio_file
(
filepath
,
frame_offset
,
num_frames
,
normalize
,
channels_first
,
format
)
return
signal
.
get_tensor
(),
signal
.
get_sample_rate
()
@
torch
.
jit
.
unused
...
...
torchaudio/csrc/sox/effects.cpp
View file @
08f188b2
...
...
@@ -50,24 +50,25 @@ void shutdown_sox_effects() {
}
}
c10
::
intrusive_ptr
<
TensorSignal
>
apply_effects_tensor
(
const
c10
::
intrusive_ptr
<
TensorSignal
>&
input_signal
,
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
)
{
auto
in_tensor
=
input_signal
->
getTensor
();
validate_input_tensor
(
in_tensor
);
std
::
tuple
<
torch
::
Tensor
,
int64_t
>
apply_effects_tensor
(
torch
::
Tensor
waveform
,
int64_t
sample_rate
,
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
,
bool
channels_first
)
{
validate_input_tensor
(
waveform
);
// Create SoxEffectsChain
const
auto
dtype
=
in_tens
or
.
dtype
();
const
auto
dtype
=
wavef
or
m
.
dtype
();
torchaudio
::
sox_effects_chain
::
SoxEffectsChain
chain
(
/*input_encoding=*/
get_encodinginfo
(
"wav"
,
dtype
),
/*output_encoding=*/
get_encodinginfo
(
"wav"
,
dtype
));
// Prepare output buffer
std
::
vector
<
sox_sample_t
>
out_buffer
;
out_buffer
.
reserve
(
in_tens
or
.
numel
());
out_buffer
.
reserve
(
wavef
or
m
.
numel
());
// Build and run effects chain
chain
.
addInputTensor
(
input_signal
.
get
()
);
chain
.
addInputTensor
(
&
waveform
,
sample_rate
,
channels_first
);
for
(
const
auto
&
effect
:
effects
)
{
chain
.
addEffect
(
effect
);
}
...
...
@@ -75,7 +76,6 @@ c10::intrusive_ptr<TensorSignal> apply_effects_tensor(
chain
.
run
();
// Create tensor from buffer
const
auto
channels_first
=
input_signal
->
getChannelsFirst
();
auto
out_tensor
=
convert_to_tensor
(
/*buffer=*/
out_buffer
.
data
(),
/*num_samples=*/
out_buffer
.
size
(),
...
...
@@ -84,11 +84,11 @@ c10::intrusive_ptr<TensorSignal> apply_effects_tensor(
/*noramlize=*/
false
,
channels_first
);
return
c10
::
make_intrusive
<
TensorSignal
>
(
out_tensor
,
chain
.
getOutputSampleRate
()
,
channels_first
);
return
std
::
tuple
<
torch
::
Tensor
,
int64_t
>
(
out_tensor
,
chain
.
getOutputSampleRate
());
}
c10
::
intrusive_ptr
<
TensorSignal
>
apply_effects_file
(
std
::
tuple
<
torch
::
Tensor
,
int64_t
>
apply_effects_file
(
const
std
::
string
path
,
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
,
c10
::
optional
<
bool
>&
normalize
,
...
...
@@ -131,8 +131,8 @@ c10::intrusive_ptr<TensorSignal> apply_effects_file(
normalize
.
value_or
(
true
),
channels_first_
);
return
c10
::
make_intrusive
<
TensorSignal
>
(
tensor
,
chain
.
getOutputSampleRate
()
,
channels_first_
);
return
std
::
tuple
<
torch
::
Tensor
,
int64_t
>
(
tensor
,
chain
.
getOutputSampleRate
());
}
#ifdef TORCH_API_INCLUDE_EXTENSION_H
...
...
@@ -238,5 +238,20 @@ std::tuple<torch::Tensor, int64_t> apply_effects_fileobj(
#endif // TORCH_API_INCLUDE_EXTENSION_H
TORCH_LIBRARY_FRAGMENT
(
torchaudio
,
m
)
{
m
.
def
(
"torchaudio::sox_effects_initialize_sox_effects"
,
&
torchaudio
::
sox_effects
::
initialize_sox_effects
);
m
.
def
(
"torchaudio::sox_effects_shutdown_sox_effects"
,
&
torchaudio
::
sox_effects
::
shutdown_sox_effects
);
m
.
def
(
"torchaudio::sox_effects_apply_effects_tensor"
,
&
torchaudio
::
sox_effects
::
apply_effects_tensor
);
m
.
def
(
"torchaudio::sox_effects_apply_effects_file"
,
&
torchaudio
::
sox_effects
::
apply_effects_file
);
}
}
// namespace sox_effects
}
// namespace torchaudio
torchaudio/csrc/sox/effects.h
View file @
08f188b2
...
...
@@ -15,11 +15,13 @@ void initialize_sox_effects();
void
shutdown_sox_effects
();
c10
::
intrusive_ptr
<
torchaudio
::
sox_utils
::
TensorSignal
>
apply_effects_tensor
(
const
c10
::
intrusive_ptr
<
torchaudio
::
sox_utils
::
TensorSignal
>&
input_signal
,
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
);
std
::
tuple
<
torch
::
Tensor
,
int64_t
>
apply_effects_tensor
(
torch
::
Tensor
waveform
,
int64_t
sample_rate
,
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
,
bool
channels_first
);
c10
::
intrusive_ptr
<
torchaudio
::
sox_utils
::
TensorSignal
>
apply_effects_file
(
std
::
tuple
<
torch
::
Tensor
,
int64_t
>
apply_effects_file
(
const
std
::
string
path
,
std
::
vector
<
std
::
vector
<
std
::
string
>>
effects
,
c10
::
optional
<
bool
>&
normalize
,
...
...
torchaudio/csrc/sox/effects_chain.cpp
View file @
08f188b2
...
...
@@ -36,12 +36,14 @@ struct SoxEffect {
/// helper classes for passing the location of input tensor and output buffer
///
/// drain/flow callback functions require plaing C style function signature and
/// the way to pass extra data is to attach data to sox_
f
ffect_t::priv pointer.
/// The following structs will be assigned to sox_
f
ffect_t::priv pointer which
/// the way to pass extra data is to attach data to sox_
e
ffect_t::priv pointer.
/// The following structs will be assigned to sox_
e
ffect_t::priv pointer which
/// gives sox_effect_t an access to input Tensor and output buffer object.
struct
TensorInputPriv
{
size_t
index
;
TensorSignal
*
signal
;
torch
::
Tensor
*
waveform
;
int64_t
sample_rate
;
bool
channels_first
;
};
struct
TensorOutputPriv
{
std
::
vector
<
sox_sample_t
>*
buffer
;
...
...
@@ -55,8 +57,7 @@ int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
// Retrieve the input Tensor and current index
auto
priv
=
static_cast
<
TensorInputPriv
*>
(
effp
->
priv
);
auto
index
=
priv
->
index
;
auto
signal
=
priv
->
signal
;
auto
tensor
=
signal
->
getTensor
();
auto
tensor
=
*
(
priv
->
waveform
);
auto
num_channels
=
effp
->
out_signal
.
channels
;
// Adjust the number of samples to read
...
...
@@ -71,7 +72,7 @@ int tensor_input_drain(sox_effect_t* effp, sox_sample_t* obuf, size_t* osamp) {
const
auto
tensor_
=
[
&
]()
{
auto
i_frame
=
index
/
num_channels
;
auto
num_frames
=
*
osamp
/
num_channels
;
auto
t
=
(
signal
->
getC
hannels
F
irst
()
)
auto
t
=
(
priv
->
c
hannels
_f
irst
)
?
tensor
.
index
({
Slice
(),
Slice
(
i_frame
,
i_frame
+
num_frames
)}).
t
()
:
tensor
.
index
({
Slice
(
i_frame
,
i_frame
+
num_frames
),
Slice
()});
return
unnormalize_wav
(
t
.
reshape
({
-
1
})).
contiguous
();
...
...
@@ -193,13 +194,18 @@ void SoxEffectsChain::run() {
sox_flow_effects
(
sec_
,
NULL
,
NULL
);
}
void
SoxEffectsChain
::
addInputTensor
(
TensorSignal
*
signal
)
{
in_sig_
=
get_signalinfo
(
signal
,
"wav"
);
void
SoxEffectsChain
::
addInputTensor
(
torch
::
Tensor
*
waveform
,
int64_t
sample_rate
,
bool
channels_first
)
{
in_sig_
=
get_signalinfo
(
waveform
,
sample_rate
,
"wav"
,
channels_first
);
interm_sig_
=
in_sig_
;
SoxEffect
e
(
sox_create_effect
(
get_tensor_input_handler
()));
auto
priv
=
static_cast
<
TensorInputPriv
*>
(
e
->
priv
);
priv
->
signal
=
signal
;
priv
->
index
=
0
;
priv
->
waveform
=
waveform
;
priv
->
sample_rate
=
sample_rate
;
priv
->
channels_first
=
channels_first
;
if
(
sox_add_effect
(
sec_
,
e
,
&
interm_sig_
,
&
in_sig_
)
!=
SOX_SUCCESS
)
{
throw
std
::
runtime_error
(
"Internal Error: Failed to add effect: input_tensor"
);
...
...
torchaudio/csrc/sox/effects_chain.h
View file @
08f188b2
...
...
@@ -30,7 +30,10 @@ class SoxEffectsChain {
SoxEffectsChain
&
operator
=
(
SoxEffectsChain
&&
other
)
=
delete
;
~
SoxEffectsChain
();
void
run
();
void
addInputTensor
(
torchaudio
::
sox_utils
::
TensorSignal
*
signal
);
void
addInputTensor
(
torch
::
Tensor
*
waveform
,
int64_t
sample_rate
,
bool
channels_first
);
void
addInputFile
(
sox_format_t
*
sf
);
void
addOutputBuffer
(
std
::
vector
<
sox_sample_t
>*
output_buffer
);
void
addOutputFile
(
sox_format_t
*
sf
);
...
...
torchaudio/csrc/sox/io.cpp
View file @
08f188b2
...
...
@@ -131,7 +131,7 @@ std::vector<std::vector<std::string>> get_effects(
}
// namespace
c10
::
intrusive_ptr
<
TensorSignal
>
load_audio_file
(
std
::
tuple
<
torch
::
Tensor
,
int64_t
>
load_audio_file
(
const
std
::
string
&
path
,
c10
::
optional
<
int64_t
>&
frame_offset
,
c10
::
optional
<
int64_t
>&
num_frames
,
...
...
@@ -153,7 +153,6 @@ void save_audio_file(
c10
::
optional
<
std
::
string
>
dtype
)
{
validate_input_tensor
(
tensor
);
auto
signal
=
TensorSignal
(
tensor
,
sample_rate
,
channels_first
);
if
(
tensor
.
dtype
()
!=
torch
::
kFloat32
&&
dtype
.
has_value
())
{
throw
std
::
runtime_error
(
"dtype conversion only supported for float32 tensors"
);
...
...
@@ -174,7 +173,8 @@ void save_audio_file(
num_channels
==
1
,
"amr-nb format only supports single channel audio."
);
tensor
=
(
unnormalize_wav
(
tensor
)
/
65536
).
to
(
torch
::
kInt16
);
}
const
auto
signal_info
=
get_signalinfo
(
&
signal
,
filetype
);
const
auto
signal_info
=
get_signalinfo
(
&
tensor
,
sample_rate
,
filetype
,
channels_first
);
const
auto
encoding_info
=
get_encodinginfo
(
filetype
,
tgt_dtype
,
compression
);
SoxFormat
sf
(
sox_open_write
(
...
...
@@ -192,7 +192,7 @@ void save_audio_file(
torchaudio
::
sox_effects_chain
::
SoxEffectsChain
chain
(
/*input_encoding=*/
get_encodinginfo
(
"wav"
,
tensor
.
dtype
()),
/*output_encoding=*/
sf
->
encoding
);
chain
.
addInputTensor
(
&
signal
);
chain
.
addInputTensor
(
&
tensor
,
sample_rate
,
channels_first
);
chain
.
addOutputFile
(
sf
);
chain
.
run
();
}
...
...
@@ -294,7 +294,6 @@ void save_audio_fileobj(
c10
::
optional
<
std
::
string
>
dtype
)
{
validate_input_tensor
(
tensor
);
auto
signal
=
TensorSignal
(
tensor
,
sample_rate
,
channels_first
);
if
(
tensor
.
dtype
()
!=
torch
::
kFloat32
&&
dtype
.
has_value
())
{
throw
std
::
runtime_error
(
"dtype conversion only supported for float32 tensors"
);
...
...
@@ -312,7 +311,8 @@ void save_audio_fileobj(
}
tensor
=
(
unnormalize_wav
(
tensor
)
/
65536
).
to
(
torch
::
kInt16
);
}
const
auto
signal_info
=
get_signalinfo
(
&
signal
,
filetype
);
const
auto
signal_info
=
get_signalinfo
(
&
tensor
,
sample_rate
,
filetype
,
channels_first
);
const
auto
encoding_info
=
get_encodinginfo
(
filetype
,
tgt_dtype
,
compression
);
AutoReleaseBuffer
buffer
;
...
...
@@ -333,7 +333,7 @@ void save_audio_fileobj(
torchaudio
::
sox_effects_chain
::
SoxEffectsChain
chain
(
/*input_encoding=*/
get_encodinginfo
(
"wav"
,
tensor
.
dtype
()),
/*output_encoding=*/
sf
->
encoding
);
chain
.
addInputTensor
(
&
signal
);
chain
.
addInputTensor
(
&
tensor
,
sample_rate
,
channels_first
);
chain
.
addOutputFileObj
(
sf
,
&
buffer
.
ptr
,
&
buffer
.
size
,
&
fileobj
);
chain
.
run
();
...
...
@@ -346,5 +346,24 @@ void save_audio_fileobj(
#endif // TORCH_API_INCLUDE_EXTENSION_H
TORCH_LIBRARY_FRAGMENT
(
torchaudio
,
m
)
{
m
.
class_
<
torchaudio
::
sox_io
::
SignalInfo
>
(
"SignalInfo"
)
.
def
(
"get_sample_rate"
,
&
torchaudio
::
sox_io
::
SignalInfo
::
getSampleRate
)
.
def
(
"get_num_channels"
,
&
torchaudio
::
sox_io
::
SignalInfo
::
getNumChannels
)
.
def
(
"get_num_frames"
,
&
torchaudio
::
sox_io
::
SignalInfo
::
getNumFrames
)
.
def
(
"get_bits_per_sample"
,
&
torchaudio
::
sox_io
::
SignalInfo
::
getBitsPerSample
)
.
def
(
"get_encoding"
,
&
torchaudio
::
sox_io
::
SignalInfo
::
getEncoding
);
m
.
def
(
"torchaudio::sox_io_get_info"
,
&
torchaudio
::
sox_io
::
get_info_file
);
m
.
def
(
"torchaudio::sox_io_load_audio_file"
,
&
torchaudio
::
sox_io
::
load_audio_file
);
m
.
def
(
"torchaudio::sox_io_save_audio_file"
,
&
torchaudio
::
sox_io
::
save_audio_file
);
}
}
// namespace sox_io
}
// namespace torchaudio
torchaudio/csrc/sox/io.h
View file @
08f188b2
...
...
@@ -35,7 +35,7 @@ c10::intrusive_ptr<SignalInfo> get_info_file(
const
std
::
string
&
path
,
c10
::
optional
<
std
::
string
>&
format
);
c10
::
intrusive_ptr
<
torchaudio
::
sox_utils
::
TensorSignal
>
load_audio_file
(
std
::
tuple
<
torch
::
Tensor
,
int64_t
>
load_audio_file
(
const
std
::
string
&
path
,
c10
::
optional
<
int64_t
>&
frame_offset
,
c10
::
optional
<
int64_t
>&
num_frames
,
...
...
torchaudio/csrc/sox/register.cpp
deleted
100644 → 0
View file @
4608a5b2
#include <torchaudio/csrc/sox/effects.h>
#include <torchaudio/csrc/sox/io.h>
#include <torchaudio/csrc/sox/utils.h>
TORCH_LIBRARY_FRAGMENT
(
torchaudio
,
m
)
{
//////////////////////////////////////////////////////////////////////////////
// sox_utils.h
//////////////////////////////////////////////////////////////////////////////
m
.
class_
<
torchaudio
::
sox_utils
::
TensorSignal
>
(
"TensorSignal"
)
.
def
(
torch
::
init
<
torch
::
Tensor
,
int64_t
,
bool
>
())
.
def
(
"get_tensor"
,
&
torchaudio
::
sox_utils
::
TensorSignal
::
getTensor
)
.
def
(
"get_sample_rate"
,
&
torchaudio
::
sox_utils
::
TensorSignal
::
getSampleRate
)
.
def
(
"get_channels_first"
,
&
torchaudio
::
sox_utils
::
TensorSignal
::
getChannelsFirst
);
m
.
def
(
"torchaudio::sox_utils_set_seed"
,
&
torchaudio
::
sox_utils
::
set_seed
);
m
.
def
(
"torchaudio::sox_utils_set_verbosity"
,
&
torchaudio
::
sox_utils
::
set_verbosity
);
m
.
def
(
"torchaudio::sox_utils_set_use_threads"
,
&
torchaudio
::
sox_utils
::
set_use_threads
);
m
.
def
(
"torchaudio::sox_utils_set_buffer_size"
,
&
torchaudio
::
sox_utils
::
set_buffer_size
);
m
.
def
(
"torchaudio::sox_utils_list_effects"
,
&
torchaudio
::
sox_utils
::
list_effects
);
m
.
def
(
"torchaudio::sox_utils_list_read_formats"
,
&
torchaudio
::
sox_utils
::
list_read_formats
);
m
.
def
(
"torchaudio::sox_utils_list_write_formats"
,
&
torchaudio
::
sox_utils
::
list_write_formats
);
//////////////////////////////////////////////////////////////////////////////
// sox_io.h
//////////////////////////////////////////////////////////////////////////////
m
.
class_
<
torchaudio
::
sox_io
::
SignalInfo
>
(
"SignalInfo"
)
.
def
(
"get_sample_rate"
,
&
torchaudio
::
sox_io
::
SignalInfo
::
getSampleRate
)
.
def
(
"get_num_channels"
,
&
torchaudio
::
sox_io
::
SignalInfo
::
getNumChannels
)
.
def
(
"get_num_frames"
,
&
torchaudio
::
sox_io
::
SignalInfo
::
getNumFrames
)
.
def
(
"get_bits_per_sample"
,
&
torchaudio
::
sox_io
::
SignalInfo
::
getBitsPerSample
)
.
def
(
"get_encoding"
,
&
torchaudio
::
sox_io
::
SignalInfo
::
getEncoding
);
m
.
def
(
"torchaudio::sox_io_get_info"
,
&
torchaudio
::
sox_io
::
get_info_file
);
m
.
def
(
"torchaudio::sox_io_load_audio_file("
"str path,"
"int? frame_offset=None,"
"int? num_frames=None,"
"bool? normalize=True,"
"bool? channels_first=False,"
"str? format=None"
") -> __torch__.torch.classes.torchaudio.TensorSignal"
,
&
torchaudio
::
sox_io
::
load_audio_file
);
m
.
def
(
"torchaudio::sox_io_save_audio_file"
,
&
torchaudio
::
sox_io
::
save_audio_file
);
//////////////////////////////////////////////////////////////////////////////
// sox_effects.h
//////////////////////////////////////////////////////////////////////////////
m
.
def
(
"torchaudio::sox_effects_initialize_sox_effects"
,
&
torchaudio
::
sox_effects
::
initialize_sox_effects
);
m
.
def
(
"torchaudio::sox_effects_shutdown_sox_effects"
,
&
torchaudio
::
sox_effects
::
shutdown_sox_effects
);
m
.
def
(
"torchaudio::sox_effects_apply_effects_tensor"
,
&
torchaudio
::
sox_effects
::
apply_effects_tensor
);
m
.
def
(
"torchaudio::sox_effects_apply_effects_file"
,
&
torchaudio
::
sox_effects
::
apply_effects_file
);
}
torchaudio/csrc/sox/utils.cpp
View file @
08f188b2
...
...
@@ -61,24 +61,6 @@ std::vector<std::string> list_read_formats() {
return
formats
;
}
TensorSignal
::
TensorSignal
(
torch
::
Tensor
tensor_
,
int64_t
sample_rate_
,
bool
channels_first_
)
:
tensor
(
tensor_
),
sample_rate
(
sample_rate_
),
channels_first
(
channels_first_
){};
torch
::
Tensor
TensorSignal
::
getTensor
()
const
{
return
tensor
;
}
int64_t
TensorSignal
::
getSampleRate
()
const
{
return
sample_rate
;
}
bool
TensorSignal
::
getChannelsFirst
()
const
{
return
channels_first
;
}
SoxFormat
::
SoxFormat
(
sox_format_t
*
fd
)
noexcept
:
fd_
(
fd
)
{}
SoxFormat
::~
SoxFormat
()
{
close
();
...
...
@@ -297,15 +279,16 @@ unsigned get_precision(
}
sox_signalinfo_t
get_signalinfo
(
const
TensorSignal
*
signal
,
const
std
::
string
filetype
)
{
auto
tensor
=
signal
->
getTensor
();
const
torch
::
Tensor
*
waveform
,
const
int64_t
sample_rate
,
const
std
::
string
filetype
,
const
bool
channels_first
)
{
return
sox_signalinfo_t
{
/*rate=*/
static_cast
<
sox_rate_t
>
(
s
ignal
->
getS
ample
R
ate
()
),
/*rate=*/
static_cast
<
sox_rate_t
>
(
sample
_r
ate
),
/*channels=*/
static_cast
<
unsigned
>
(
tensor
.
size
(
signal
->
getC
hannels
F
irst
()
?
0
:
1
)),
/*precision=*/
get_precision
(
filetype
,
tensor
.
dtype
()),
/*length=*/
static_cast
<
uint64_t
>
(
tensor
.
numel
())};
static_cast
<
unsigned
>
(
waveform
->
size
(
c
hannels
_f
irst
?
0
:
1
)),
/*precision=*/
get_precision
(
filetype
,
waveform
->
dtype
()),
/*length=*/
static_cast
<
uint64_t
>
(
waveform
->
numel
())};
}
sox_encodinginfo_t
get_encodinginfo
(
...
...
@@ -364,5 +347,27 @@ uint64_t read_fileobj(py::object* fileobj, const uint64_t size, char* buffer) {
#endif // TORCH_API_INCLUDE_EXTENSION_H
TORCH_LIBRARY_FRAGMENT
(
torchaudio
,
m
)
{
m
.
def
(
"torchaudio::sox_utils_set_seed"
,
&
torchaudio
::
sox_utils
::
set_seed
);
m
.
def
(
"torchaudio::sox_utils_set_verbosity"
,
&
torchaudio
::
sox_utils
::
set_verbosity
);
m
.
def
(
"torchaudio::sox_utils_set_use_threads"
,
&
torchaudio
::
sox_utils
::
set_use_threads
);
m
.
def
(
"torchaudio::sox_utils_set_buffer_size"
,
&
torchaudio
::
sox_utils
::
set_buffer_size
);
m
.
def
(
"torchaudio::sox_utils_list_effects"
,
&
torchaudio
::
sox_utils
::
list_effects
);
m
.
def
(
"torchaudio::sox_utils_list_read_formats"
,
&
torchaudio
::
sox_utils
::
list_read_formats
);
m
.
def
(
"torchaudio::sox_utils_list_write_formats"
,
&
torchaudio
::
sox_utils
::
list_write_formats
);
}
}
// namespace sox_utils
}
// namespace torchaudio
torchaudio/csrc/sox/utils.h
View file @
08f188b2
...
...
@@ -30,23 +30,6 @@ std::vector<std::string> list_read_formats();
std
::
vector
<
std
::
string
>
list_write_formats
();
/// Class for exchanging signal infomation (tensor + meta data) between
/// C++ and Python for read/write operation.
struct
TensorSignal
:
torch
::
CustomClassHolder
{
torch
::
Tensor
tensor
;
int64_t
sample_rate
;
bool
channels_first
;
TensorSignal
(
torch
::
Tensor
tensor_
,
int64_t
sample_rate_
,
bool
channels_first_
);
torch
::
Tensor
getTensor
()
const
;
int64_t
getSampleRate
()
const
;
bool
getChannelsFirst
()
const
;
};
////////////////////////////////////////////////////////////////////////////////
// Utilities for sox_io / sox_effects implementations
////////////////////////////////////////////////////////////////////////////////
...
...
@@ -120,8 +103,10 @@ const std::string get_filetype(const std::string path);
/// Get sox_signalinfo_t for passing a torch::Tensor object.
sox_signalinfo_t
get_signalinfo
(
const
TensorSignal
*
signal
,
const
std
::
string
filetype
);
const
torch
::
Tensor
*
waveform
,
const
int64_t
sample_rate
,
const
std
::
string
filetype
,
const
bool
channels_first
);
/// Get sox_encofinginfo_t for saving audoi file
sox_encodinginfo_t
get_encodinginfo
(
...
...
torchaudio/sox_effects/sox_effects.py
View file @
08f188b2
...
...
@@ -63,7 +63,7 @@ def apply_effects_tensor(
Note:
This function works in the way very similar to ``sox`` command, however there are slight
differences. For example, ``sox`` comm
n
ad adds certain effects automatically (such as
differences. For example, ``sox`` comma
n
d adds certain effects automatically (such as
``rate`` effect after ``speed`` and ``pitch`` and other effects), but this function does
only applies the given effects. (Therefore, to actually apply ``speed`` effect, you also
need to give ``rate`` effect with desired sampling rate.)
...
...
@@ -149,9 +149,8 @@ def apply_effects_tensor(
>>> waveform, sample_rate = transform(waveform, input_sample_rate)
>>> assert sample_rate == 8000
"""
in_signal
=
torch
.
classes
.
torchaudio
.
TensorSignal
(
tensor
,
sample_rate
,
channels_first
)
out_signal
=
torch
.
ops
.
torchaudio
.
sox_effects_apply_effects_tensor
(
in_signal
,
effects
)
return
out_signal
.
get_tensor
(),
out_signal
.
get_sample_rate
()
return
torch
.
ops
.
torchaudio
.
sox_effects_apply_effects_tensor
(
tensor
,
sample_rate
,
effects
,
channels_first
)
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
...
...
@@ -268,6 +267,5 @@ def apply_effects_file(
return
torchaudio
.
_torchaudio
.
apply_effects_fileobj
(
path
,
effects
,
normalize
,
channels_first
,
format
)
path
=
os
.
fspath
(
path
)
signal
=
torch
.
ops
.
torchaudio
.
sox_effects_apply_effects_file
(
return
torch
.
ops
.
torchaudio
.
sox_effects_apply_effects_file
(
path
,
effects
,
normalize
,
channels_first
,
format
)
return
signal
.
get_tensor
(),
signal
.
get_sample_rate
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment