Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Torchaudio
Commits
4d52106f
Unverified
Commit
4d52106f
authored
Jun 05, 2020
by
moto
Committed by
GitHub
Jun 05, 2020
Browse files
Move all SoX I/O functions to _sox_backend for better modularity (#695)
parent
e5eb4857
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
196 additions
and
191 deletions
+196
-191
torchaudio/__init__.py
torchaudio/__init__.py
+8
-187
torchaudio/_sox_backend.py
torchaudio/_sox_backend.py
+188
-4
No files found.
torchaudio/__init__.py
View file @
4d52106f
import
os.path
from
pathlib
import
Path
from
typing
import
Any
,
Callable
,
Optional
,
Tuple
,
Union
import
torch
from
torch
import
Tensor
from
torchaudio
import
(
compliance
,
...
...
@@ -16,6 +14,14 @@ from torchaudio._backend import (
get_audio_backend
,
set_audio_backend
,
)
from
torchaudio._sox_backend
import
(
save_encinfo
,
sox_signalinfo_t
,
sox_encodinginfo_t
,
get_sox_option_t
,
get_sox_encoding_t
,
get_sox_bool
,
)
from
torchaudio._soundfile_backend
import
SignalInfo
,
EncodingInfo
from
torchaudio._internal
import
(
module_utils
as
_mod_utils
,
...
...
@@ -130,77 +136,6 @@ def save(filepath: str, src: Tensor, sample_rate: int, precision: int = 16, chan
)
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
save_encinfo
(
filepath
:
str
,
src
:
Tensor
,
channels_first
:
bool
=
True
,
signalinfo
:
Optional
[
SignalInfo
]
=
None
,
encodinginfo
:
Optional
[
EncodingInfo
]
=
None
,
filetype
:
Optional
[
str
]
=
None
)
->
None
:
r
"""Saves a tensor of an audio signal to disk as a standard format like mp3, wav, etc.
Args:
filepath (str): Path to audio file
src (Tensor): An input 2D tensor of shape `[C x L]` or `[L x C]` where L is
the number of audio frames, C is the number of channels
channels_first (bool, optional): Set channels first or length first in result. (Default: ``True``)
signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the
audio type cannot be automatically determined (Default: ``None``).
encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the
audio type cannot be automatically determined (Default: ``None``).
filetype (str, optional): A filetype or extension to be set if sox cannot determine it
automatically. (Default: ``None``)
Example
>>> data, sample_rate = torchaudio.load('foo.mp3')
>>> torchaudio.save('foo.wav', data, sample_rate)
"""
ch_idx
,
len_idx
=
(
0
,
1
)
if
channels_first
else
(
1
,
0
)
# check if save directory exists
abs_dirpath
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
filepath
))
if
not
os
.
path
.
isdir
(
abs_dirpath
):
raise
OSError
(
"Directory does not exist: {}"
.
format
(
abs_dirpath
))
# check that src is a CPU tensor
_misc_ops
.
check_input
(
src
)
# Check/Fix shape of source data
if
src
.
dim
()
==
1
:
# 1d tensors as assumed to be mono signals
src
.
unsqueeze_
(
ch_idx
)
elif
src
.
dim
()
>
2
or
src
.
size
(
ch_idx
)
>
16
:
# assumes num_channels < 16
raise
ValueError
(
"Expected format where C < 16, but found {}"
.
format
(
src
.
size
()))
# sox stores the sample rate as a float, though practically sample rates are almost always integers
# convert integers to floats
if
signalinfo
:
if
signalinfo
.
rate
and
not
isinstance
(
signalinfo
.
rate
,
float
):
if
float
(
signalinfo
.
rate
)
==
signalinfo
.
rate
:
signalinfo
.
rate
=
float
(
signalinfo
.
rate
)
else
:
raise
TypeError
(
'Sample rate should be a float or int'
)
# check if the bit precision (i.e. bits per sample) is an integer
if
signalinfo
.
precision
and
not
isinstance
(
signalinfo
.
precision
,
int
):
if
int
(
signalinfo
.
precision
)
==
signalinfo
.
precision
:
signalinfo
.
precision
=
int
(
signalinfo
.
precision
)
else
:
raise
TypeError
(
'Bit precision should be an integer'
)
# programs such as librosa normalize the signal, unnormalize if detected
if
src
.
min
()
>=
-
1.0
and
src
.
max
()
<=
1.0
:
src
=
src
*
(
1
<<
31
)
src
=
src
.
long
()
# set filetype and allow for files with no extensions
extension
=
os
.
path
.
splitext
(
filepath
)[
1
]
filetype
=
extension
[
1
:]
if
len
(
extension
)
>
0
else
filetype
# transpose from C x L -> L x C
if
channels_first
:
src
=
src
.
transpose
(
1
,
0
)
# save data to file
src
=
src
.
contiguous
()
_torchaudio
.
write_audio_file
(
filepath
,
src
,
signalinfo
,
encodinginfo
,
filetype
)
def
info
(
filepath
:
str
)
->
Tuple
[
SignalInfo
,
EncodingInfo
]:
r
"""Gets metadata from an audio file without loading the signal.
...
...
@@ -216,117 +151,3 @@ def info(filepath: str) -> Tuple[SignalInfo, EncodingInfo]:
>>> rate, channels, encoding = si.rate, si.channels, ei.encoding
"""
return
_get_audio_backend_module
().
info
(
filepath
)
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
sox_signalinfo_t
()
->
SignalInfo
:
r
"""Create a sox_signalinfo_t object. This object can be used to set the sample
rate, number of channels, length, bit precision and headroom multiplier
primarily for effects
Returns: sox_signalinfo_t(object)
- rate (float), sample rate as a float, practically will likely be an integer float
- channel (int), number of audio channels
- precision (int), bit precision
- length (int), length of audio in samples * channels, 0 for unspecified and -1 for unknown
- mult (float, optional), headroom multiplier for effects and ``None`` for no multiplier
Example
>>> si = torchaudio.sox_signalinfo_t()
>>> si.channels = 1
>>> si.rate = 16000.
>>> si.precision = 16
>>> si.length = 0
"""
return
_torchaudio
.
sox_signalinfo_t
()
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
sox_encodinginfo_t
()
->
EncodingInfo
:
r
"""Create a sox_encodinginfo_t object. This object can be used to set the encoding
type, bit precision, compression factor, reverse bytes, reverse nibbles,
reverse bits and endianness. This can be used in an effects chain to encode the
final output or to save a file with a specific encoding. For example, one could
use the sox ulaw encoding to do 8-bit ulaw encoding. Note in a tensor output
the result will be a 32-bit number, but number of unique values will be determined by
the bit precision.
Returns: sox_encodinginfo_t(object)
- encoding (sox_encoding_t), output encoding
- bits_per_sample (int), bit precision, same as `precision` in sox_signalinfo_t
- compression (float), compression for lossy formats, 0.0 for default compression
- reverse_bytes (sox_option_t), reverse bytes, use sox_option_default
- reverse_nibbles (sox_option_t), reverse nibbles, use sox_option_default
- reverse_bits (sox_option_t), reverse bytes, use sox_option_default
- opposite_endian (sox_bool), change endianness, use sox_false
Example
>>> ei = torchaudio.sox_encodinginfo_t()
>>> ei.encoding = torchaudio.get_sox_encoding_t(1)
>>> ei.bits_per_sample = 16
>>> ei.compression = 0
>>> ei.reverse_bytes = torchaudio.get_sox_option_t(2)
>>> ei.reverse_nibbles = torchaudio.get_sox_option_t(2)
>>> ei.reverse_bits = torchaudio.get_sox_option_t(2)
>>> ei.opposite_endian = torchaudio.get_sox_bool(0)
"""
ei
=
_torchaudio
.
sox_encodinginfo_t
()
sdo
=
get_sox_option_t
(
2
)
# sox_default_option
ei
.
reverse_bytes
=
sdo
ei
.
reverse_nibbles
=
sdo
ei
.
reverse_bits
=
sdo
return
ei
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
get_sox_encoding_t
(
i
:
int
=
None
)
->
EncodingInfo
:
r
"""Get enum of sox_encoding_t for sox encodings.
Args:
i (int, optional): Choose type or get a dict with all possible options
use ``__members__`` to see all options when not specified. (Default: ``None``)
Returns:
sox_encoding_t: A sox_encoding_t type for output encoding
"""
if
i
is
None
:
# one can see all possible values using the .__members__ attribute
return
_torchaudio
.
sox_encoding_t
else
:
return
_torchaudio
.
sox_encoding_t
(
i
)
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
get_sox_option_t
(
i
:
int
=
2
)
->
Any
:
r
"""Get enum of sox_option_t for sox encodinginfo options.
Args:
i (int, optional): Choose type or get a dict with all possible options
use ``__members__`` to see all options when not specified.
(Default: ``sox_option_default`` or ``2``)
Returns:
sox_option_t: A sox_option_t type
"""
if
i
is
None
:
return
_torchaudio
.
sox_option_t
else
:
return
_torchaudio
.
sox_option_t
(
i
)
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
get_sox_bool
(
i
:
int
=
0
)
->
Any
:
r
"""Get enum of sox_bool for sox encodinginfo options.
Args:
i (int, optional): Choose type or get a dict with all possible options
use ``__members__`` to see all options when not specified. (Default:
``sox_false`` or ``0``)
Returns:
sox_bool: A sox_bool type
"""
if
i
is
None
:
return
_torchaudio
.
sox_bool
else
:
return
_torchaudio
.
sox_bool
(
i
)
torchaudio/_sox_backend.py
View file @
4d52106f
import
os.path
from
typing
import
Optional
,
Tuple
from
typing
import
Any
,
Optional
,
Tuple
import
torch
from
torch
import
Tensor
import
torchaudio
from
torchaudio._internal
import
(
module_utils
as
_mod_utils
,
misc_ops
as
_misc_ops
,
...
...
@@ -65,16 +64,201 @@ def load(filepath: str,
def
save
(
filepath
:
str
,
src
:
Tensor
,
sample_rate
:
int
,
precision
:
int
=
16
,
channels_first
:
bool
=
True
)
->
None
:
r
"""See torchaudio.save"""
si
=
torchaudio
.
sox_signalinfo_t
()
si
=
sox_signalinfo_t
()
ch_idx
=
0
if
channels_first
else
1
si
.
rate
=
sample_rate
si
.
channels
=
1
if
src
.
dim
()
==
1
else
src
.
size
(
ch_idx
)
si
.
length
=
src
.
numel
()
si
.
precision
=
precision
return
torchaudio
.
save_encinfo
(
filepath
,
src
,
channels_first
,
si
)
return
save_encinfo
(
filepath
,
src
,
channels_first
,
si
)
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
info
(
filepath
:
str
)
->
Tuple
[
SignalInfo
,
EncodingInfo
]:
r
"""See torchaudio.info"""
return
_torchaudio
.
get_info
(
filepath
)
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
save_encinfo
(
filepath
:
str
,
src
:
Tensor
,
channels_first
:
bool
=
True
,
signalinfo
:
Optional
[
SignalInfo
]
=
None
,
encodinginfo
:
Optional
[
EncodingInfo
]
=
None
,
filetype
:
Optional
[
str
]
=
None
)
->
None
:
r
"""Saves a tensor of an audio signal to disk as a standard format like mp3, wav, etc.
Args:
filepath (str): Path to audio file
src (Tensor): An input 2D tensor of shape `[C x L]` or `[L x C]` where L is
the number of audio frames, C is the number of channels
channels_first (bool, optional): Set channels first or length first in result. (Default: ``True``)
signalinfo (sox_signalinfo_t, optional): A sox_signalinfo_t type, which could be helpful if the
audio type cannot be automatically determined (Default: ``None``).
encodinginfo (sox_encodinginfo_t, optional): A sox_encodinginfo_t type, which could be set if the
audio type cannot be automatically determined (Default: ``None``).
filetype (str, optional): A filetype or extension to be set if sox cannot determine it
automatically. (Default: ``None``)
Example
>>> data, sample_rate = torchaudio.load('foo.mp3')
>>> torchaudio.save('foo.wav', data, sample_rate)
"""
ch_idx
,
len_idx
=
(
0
,
1
)
if
channels_first
else
(
1
,
0
)
# check if save directory exists
abs_dirpath
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
filepath
))
if
not
os
.
path
.
isdir
(
abs_dirpath
):
raise
OSError
(
"Directory does not exist: {}"
.
format
(
abs_dirpath
))
# check that src is a CPU tensor
_misc_ops
.
check_input
(
src
)
# Check/Fix shape of source data
if
src
.
dim
()
==
1
:
# 1d tensors as assumed to be mono signals
src
.
unsqueeze_
(
ch_idx
)
elif
src
.
dim
()
>
2
or
src
.
size
(
ch_idx
)
>
16
:
# assumes num_channels < 16
raise
ValueError
(
"Expected format where C < 16, but found {}"
.
format
(
src
.
size
()))
# sox stores the sample rate as a float, though practically sample rates are almost always integers
# convert integers to floats
if
signalinfo
:
if
signalinfo
.
rate
and
not
isinstance
(
signalinfo
.
rate
,
float
):
if
float
(
signalinfo
.
rate
)
==
signalinfo
.
rate
:
signalinfo
.
rate
=
float
(
signalinfo
.
rate
)
else
:
raise
TypeError
(
'Sample rate should be a float or int'
)
# check if the bit precision (i.e. bits per sample) is an integer
if
signalinfo
.
precision
and
not
isinstance
(
signalinfo
.
precision
,
int
):
if
int
(
signalinfo
.
precision
)
==
signalinfo
.
precision
:
signalinfo
.
precision
=
int
(
signalinfo
.
precision
)
else
:
raise
TypeError
(
'Bit precision should be an integer'
)
# programs such as librosa normalize the signal, unnormalize if detected
if
src
.
min
()
>=
-
1.0
and
src
.
max
()
<=
1.0
:
src
=
src
*
(
1
<<
31
)
src
=
src
.
long
()
# set filetype and allow for files with no extensions
extension
=
os
.
path
.
splitext
(
filepath
)[
1
]
filetype
=
extension
[
1
:]
if
len
(
extension
)
>
0
else
filetype
# transpose from C x L -> L x C
if
channels_first
:
src
=
src
.
transpose
(
1
,
0
)
# save data to file
src
=
src
.
contiguous
()
_torchaudio
.
write_audio_file
(
filepath
,
src
,
signalinfo
,
encodinginfo
,
filetype
)
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
sox_signalinfo_t
()
->
SignalInfo
:
r
"""Create a sox_signalinfo_t object. This object can be used to set the sample
rate, number of channels, length, bit precision and headroom multiplier
primarily for effects
Returns: sox_signalinfo_t(object)
- rate (float), sample rate as a float, practically will likely be an integer float
- channel (int), number of audio channels
- precision (int), bit precision
- length (int), length of audio in samples * channels, 0 for unspecified and -1 for unknown
- mult (float, optional), headroom multiplier for effects and ``None`` for no multiplier
Example
>>> si = torchaudio.sox_signalinfo_t()
>>> si.channels = 1
>>> si.rate = 16000.
>>> si.precision = 16
>>> si.length = 0
"""
return
_torchaudio
.
sox_signalinfo_t
()
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
sox_encodinginfo_t
()
->
EncodingInfo
:
r
"""Create a sox_encodinginfo_t object. This object can be used to set the encoding
type, bit precision, compression factor, reverse bytes, reverse nibbles,
reverse bits and endianness. This can be used in an effects chain to encode the
final output or to save a file with a specific encoding. For example, one could
use the sox ulaw encoding to do 8-bit ulaw encoding. Note in a tensor output
the result will be a 32-bit number, but number of unique values will be determined by
the bit precision.
Returns: sox_encodinginfo_t(object)
- encoding (sox_encoding_t), output encoding
- bits_per_sample (int), bit precision, same as `precision` in sox_signalinfo_t
- compression (float), compression for lossy formats, 0.0 for default compression
- reverse_bytes (sox_option_t), reverse bytes, use sox_option_default
- reverse_nibbles (sox_option_t), reverse nibbles, use sox_option_default
- reverse_bits (sox_option_t), reverse bytes, use sox_option_default
- opposite_endian (sox_bool), change endianness, use sox_false
Example
>>> ei = torchaudio.sox_encodinginfo_t()
>>> ei.encoding = torchaudio.get_sox_encoding_t(1)
>>> ei.bits_per_sample = 16
>>> ei.compression = 0
>>> ei.reverse_bytes = torchaudio.get_sox_option_t(2)
>>> ei.reverse_nibbles = torchaudio.get_sox_option_t(2)
>>> ei.reverse_bits = torchaudio.get_sox_option_t(2)
>>> ei.opposite_endian = torchaudio.get_sox_bool(0)
"""
ei
=
_torchaudio
.
sox_encodinginfo_t
()
sdo
=
get_sox_option_t
(
2
)
# sox_default_option
ei
.
reverse_bytes
=
sdo
ei
.
reverse_nibbles
=
sdo
ei
.
reverse_bits
=
sdo
return
ei
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
get_sox_encoding_t
(
i
:
int
=
None
)
->
EncodingInfo
:
r
"""Get enum of sox_encoding_t for sox encodings.
Args:
i (int, optional): Choose type or get a dict with all possible options
use ``__members__`` to see all options when not specified. (Default: ``None``)
Returns:
sox_encoding_t: A sox_encoding_t type for output encoding
"""
if
i
is
None
:
# one can see all possible values using the .__members__ attribute
return
_torchaudio
.
sox_encoding_t
else
:
return
_torchaudio
.
sox_encoding_t
(
i
)
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
get_sox_option_t
(
i
:
int
=
2
)
->
Any
:
r
"""Get enum of sox_option_t for sox encodinginfo options.
Args:
i (int, optional): Choose type or get a dict with all possible options
use ``__members__`` to see all options when not specified.
(Default: ``sox_option_default`` or ``2``)
Returns:
sox_option_t: A sox_option_t type
"""
if
i
is
None
:
return
_torchaudio
.
sox_option_t
else
:
return
_torchaudio
.
sox_option_t
(
i
)
@
_mod_utils
.
requires_module
(
'torchaudio._torchaudio'
)
def
get_sox_bool
(
i
:
int
=
0
)
->
Any
:
r
"""Get enum of sox_bool for sox encodinginfo options.
Args:
i (int, optional): Choose type or get a dict with all possible options
use ``__members__`` to see all options when not specified. (Default:
``sox_false`` or ``0``)
Returns:
sox_bool: A sox_bool type
"""
if
i
is
None
:
return
_torchaudio
.
sox_bool
else
:
return
_torchaudio
.
sox_bool
(
i
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment