Commit 47716772 authored by moto's avatar moto Committed by Facebook GitHub Bot
Browse files

Fix style to prep #3414 (#3415)

Summary: Pull Request resolved: https://github.com/pytorch/audio/pull/3415

Differential Revision: D46526437

Pulled By: mthrok

fbshipit-source-id: f78d19c19d7e68f67712412de35d9ed50f47263b
parent 91db978b
......@@ -20,35 +20,15 @@ import torchaudio
print(torch.__version__)
print(torchaudio.__version__)
######################################################################
#
try:
from torchaudio.io import StreamReader
except ModuleNotFoundError:
try:
import google.colab
print(
"""
To enable running this notebook in Google Colab, install the requisite
third party libraries by running the following code:
!add-apt-repository -y ppa:savoury1/ffmpeg4
!apt-get -qq install -y ffmpeg
"""
)
except ModuleNotFoundError:
pass
raise
import IPython
import matplotlib.pyplot as plt
from torchaudio.io import StreamReader
base_url = "https://download.pytorch.org/torchaudio/tutorial-assets"
AUDIO_URL = f"{base_url}/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
VIDEO_URL = f"{base_url}/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4.mp4"
######################################################################
# Audio / Video device input
# --------------------------
......
......@@ -65,29 +65,8 @@ import torchaudio
print(torch.__version__)
print(torchaudio.__version__)
######################################################################
#
try:
from torchaudio.io import StreamReader
except ModuleNotFoundError:
try:
import google.colab
print(
"""
To enable running this notebook in Google Colab, install the requisite
third party libraries by running the following code:
!add-apt-repository -y ppa:savoury1/ffmpeg4
!apt-get -qq install -y ffmpeg
"""
)
except ModuleNotFoundError:
pass
raise
import matplotlib.pyplot as plt
from torchaudio.io import StreamReader
base_url = "https://download.pytorch.org/torchaudio/tutorial-assets"
AUDIO_URL = f"{base_url}/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
......
......@@ -74,7 +74,9 @@ from torchaudio.io import StreamWriter
from torchaudio.utils import download_asset
AUDIO_PATH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
VIDEO_PATH = download_asset("tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4")
VIDEO_PATH = download_asset(
"tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4"
)
######################################################################
#
......@@ -140,7 +142,7 @@ s.add_audio_stream(sample_rate, num_channels, format="s16")
# Write audio to the device
with s.open():
for i in range(0, num_frames, 256):
s.write_audio_chunk(0, waveform[i:i+256])
s.write_audio_chunk(0, waveform[i : i + 256])
######################################################################
#
......@@ -186,8 +188,12 @@ width, height = 640, 360
# a background thread and give chunks
running = True
def video_streamer(path, frames_per_chunk):
import queue, threading
import queue
import threading
from torchaudio.io import StreamReader
q = queue.Queue()
......@@ -196,9 +202,9 @@ def video_streamer(path, frames_per_chunk):
def _streamer():
streamer = StreamReader(path)
streamer.add_basic_video_stream(
frames_per_chunk, format="rgb24",
frame_rate=frame_rate, width=width, height=height)
for (chunk_, ) in streamer.stream():
frames_per_chunk, format="rgb24", frame_rate=frame_rate, width=width, height=height
)
for (chunk_,) in streamer.stream():
q.put(chunk_)
if not running:
break
......
......@@ -51,27 +51,7 @@ import torchaudio
print(torch.__version__)
print(torchaudio.__version__)
######################################################################
#
try:
from torchaudio.io import StreamWriter
except ImportError:
try:
import google.colab
print(
"""
To enable running this notebook in Google Colab, install nightly
torch and torchaudio builds by adding the following code block to the top
of the notebook before running it:
!pip3 uninstall -y torch torchvision torchaudio
!pip3 install --pre torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/nightly/cpu
"""
)
except ModuleNotFoundError:
pass
raise
from torchaudio.io import StreamWriter
print("FFmpeg library versions")
for k, v in torchaudio.utils.ffmpeg_utils.get_versions().items():
......@@ -84,9 +64,10 @@ import io
import os
import tempfile
from torchaudio.utils import download_asset
from IPython.display import Audio, Video
from torchaudio.utils import download_asset
SAMPLE_PATH = download_asset("tutorial-assets/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav")
WAVEFORM, SAMPLE_RATE = torchaudio.load(SAMPLE_PATH, channels_first=False)
NUM_FRAMES, NUM_CHANNELS = WAVEFORM.shape
......@@ -503,6 +484,8 @@ print(f"{bytes2[:10]}...{bytes2[-10:]}\n")
assert bytes1 == bytes2
import matplotlib.pyplot as plt
######################################################################
#
# Example - Spectrum Visualizer
......@@ -517,7 +500,6 @@ assert bytes1 == bytes2
# then use StreamWriter to convert them to video with the original audio.
import torchaudio.transforms as T
import matplotlib.pyplot as plt
######################################################################
#
......@@ -548,7 +530,7 @@ specs = trans(WAVEFORM.T)[0].T
#
spec_db = T.AmplitudeToDB(stype="magnitude", top_db=80)(specs.T)
_ = plt.imshow(spec_db, aspect="auto", origin='lower')
_ = plt.imshow(spec_db, aspect="auto", origin="lower")
######################################################################
#
......@@ -569,21 +551,27 @@ ncols, nrows = fig.canvas.get_width_height()
def _plot(data):
ax.clear()
x = list(range(len(data)))
R, G, B = 238/255, 76/255, 44/255
R, G, B = 238 / 255, 76 / 255, 44 / 255
for coeff, alpha in [(0.8, 0.7), (1, 1)]:
d = data ** coeff
d = data**coeff
ax.fill_between(x, d, -d, color=[R, G, B, alpha])
xlim = n_fft // 2 + 1
ax.set_xlim([-1, n_fft // 2 + 1])
ax.set_ylim([-1, 1])
ax.text(
xlim, 0.95,
xlim,
0.95,
f"Created with TorchAudio\n{torchaudio.__version__}",
color="white", ha="right", va="top", backgroundcolor="black")
color="white",
ha="right",
va="top",
backgroundcolor="black",
)
fig.canvas.draw()
frame = torch.frombuffer(fig.canvas.tostring_rgb(), dtype=torch.uint8)
return frame.reshape(nrows, ncols, 3).permute(2, 0, 1)
# sphinx_gallery_defer_figures
######################################################################
......@@ -604,10 +592,10 @@ with s.open():
# Process by second
for t in range(0, NUM_FRAMES, SAMPLE_RATE):
# Write audio chunk
s.write_audio_chunk(0, WAVEFORM[t:t + SAMPLE_RATE, :])
s.write_audio_chunk(0, WAVEFORM[t : t + SAMPLE_RATE, :])
# write 1 second of video chunk
frames = [_plot(spec) for spec in specs[i:i+frame_rate]]
frames = [_plot(spec) for spec in specs[i : i + frame_rate]]
if frames:
s.write_video_chunk(1, torch.stack(frames))
i += frame_rate
......
......@@ -33,17 +33,14 @@ print(torchaudio.__version__)
#
try:
from torchaudio.prototype.functional import (
sinc_impulse_response,
frequency_impulse_response,
filter_waveform,
)
from torchaudio.prototype.functional import filter_waveform, frequency_impulse_response, sinc_impulse_response
except ModuleNotFoundError:
print(
"Failed to import prototype DSP features. "
"Please install torchaudio nightly builds. "
"Please refer to https://pytorch.org/get-started/locally "
"for instructions to install a nightly build.")
"for instructions to install a nightly build."
)
raise
import matplotlib.pyplot as plt
......@@ -67,7 +64,7 @@ SAMPLE_RATE = 16_000
duration = 4
num_frames = int(duration * SAMPLE_RATE)
noise = torch.rand((num_frames, )) - 0.5
noise = torch.rand((num_frames,)) - 0.5
######################################################################
......@@ -80,6 +77,7 @@ def plot_input():
axes[1].specgram(noise, Fs=SAMPLE_RATE)
Audio(noise, rate=SAMPLE_RATE)
plot_input()
######################################################################
......@@ -101,7 +99,7 @@ num_filters = 64 * duration
window_size = 2049
f_cutoff = torch.linspace(0.0, 0.8, num_filters)
kernel = sinc_impulse_response(f_cutoff , window_size)
kernel = sinc_impulse_response(f_cutoff, window_size)
######################################################################
#
......@@ -116,6 +114,7 @@ filtered = filter_waveform(noise, kernel)
# Let's look at the spectrogram of the resulting audio and listen to it.
#
def plot_sinc_ir(waveform, cutoff, sample_rate, vol=0.2):
num_frames = waveform.size(0)
duration = num_frames / sample_rate
......@@ -160,7 +159,7 @@ f_cutoff = f_cutoff_base + f_cutoff_osci
######################################################################
#
kernel = sinc_impulse_response(f_cutoff , window_size)
kernel = sinc_impulse_response(f_cutoff, window_size)
filtered = filter_waveform(noise, kernel)
######################################################################
......@@ -182,7 +181,7 @@ f_cutoff = 0.07 + 0.06 * torch.sin(torch.cumsum(f_lfo, dim=0))
######################################################################
#
kernel = sinc_impulse_response(f_cutoff , window_size)
kernel = sinc_impulse_response(f_cutoff, window_size)
filtered = filter_waveform(noise, kernel)
######################################################################
......@@ -200,13 +199,14 @@ plot_sinc_ir(filtered, f_cutoff, SAMPLE_RATE)
#
magnitudes = torch.sin(torch.linspace(0, 10, 64))**4.0
magnitudes = torch.sin(torch.linspace(0, 10, 64)) ** 4.0
kernel = frequency_impulse_response(magnitudes)
filtered = filter_waveform(noise, kernel.unsqueeze(0))
######################################################################
#
def plot_waveform(magnitudes, filtered, sample_rate):
nyquist = sample_rate / 2
num_samples = filtered.size(-1)
......@@ -218,8 +218,10 @@ def plot_waveform(magnitudes, filtered, sample_rate):
offsets = duration * interval
# Select N magnitudes for overlays
mags = torch.stack(
[magnitudes for _ in range(N)] if magnitudes.ndim == 1 else
[magnitudes[int(i * magnitudes.size(0))] for i in interval])
[magnitudes for _ in range(N)]
if magnitudes.ndim == 1
else [magnitudes[int(i * magnitudes.size(0))] for i in interval]
)
mag_x = offsets.unsqueeze(-1) + 0.1 * mags
mag_y = torch.linspace(0, nyquist, magnitudes.size(-1)).tile((N, 1))
......@@ -229,6 +231,7 @@ def plot_waveform(magnitudes, filtered, sample_rate):
ax.specgram(filtered, Fs=sample_rate)
return Audio(filtered, rate=sample_rate)
######################################################################
#
plot_waveform(magnitudes, filtered, SAMPLE_RATE)
......@@ -237,8 +240,7 @@ plot_waveform(magnitudes, filtered, SAMPLE_RATE)
#
# It is also possible to make a non-stationary filter.
magnitudes = torch.stack(
[torch.linspace(0.0, w, 1000) for w in torch.linspace(4.0, 40.0, 250)])
magnitudes = torch.stack([torch.linspace(0.0, w, 1000) for w in torch.linspace(4.0, 40.0, 250)])
magnitudes = torch.sin(magnitudes) ** 4.0
######################################################################
......
......@@ -155,7 +155,10 @@ def _main():
long_description=long_description,
long_description_content_type="text/markdown",
url="https://github.com/pytorch/audio",
author="Soumith Chintala, David Pollack, Sean Naren, Peter Goldsborough, Moto Hira, Caroline Chen, Jeff Hwang, Zhaoheng Ni, Xiaohui Zhang",
author=(
"Soumith Chintala, David Pollack, Sean Naren, Peter Goldsborough, "
"Moto Hira, Caroline Chen, Jeff Hwang, Zhaoheng Ni, Xiaohui Zhang"
),
author_email="soumith@pytorch.org",
maintainer="Moto Hira, Caroline Chen, Jeff Hwang, Zhaoheng Ni, Xiaohui Zhang",
maintainer_email="moto@meta.com",
......
......@@ -76,8 +76,9 @@ def _main():
conf = cfg["model"]
del conf["w2v_path"]
keep = ["_name", "task", "model"]
for key in list(k for k in conf["w2v_args"] if k not in keep):
del conf["w2v_args"][key]
for key in conf["w2v_args"]:
if key not in keep:
del conf["w2v_args"][key]
conf["data"] = "/foo/bar/"
conf["w2v_args"]["task"]["data"] = "/foo/bar"
conf["w2v_args"]["task"]["labels"] = []
......
......@@ -10,71 +10,69 @@ _CHECKSUMS = {
"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b": "209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4", # noqa: E501
"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols": "408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027", # noqa: E501
}
_PUNCTUATIONS = set(
[
"!EXCLAMATION-POINT",
'"CLOSE-QUOTE',
'"DOUBLE-QUOTE',
'"END-OF-QUOTE',
'"END-QUOTE',
'"IN-QUOTES',
'"QUOTE',
'"UNQUOTE',
"#HASH-MARK",
"#POUND-SIGN",
"#SHARP-SIGN",
"%PERCENT",
"&AMPERSAND",
"'END-INNER-QUOTE",
"'END-QUOTE",
"'INNER-QUOTE",
"'QUOTE",
"'SINGLE-QUOTE",
"(BEGIN-PARENS",
"(IN-PARENTHESES",
"(LEFT-PAREN",
"(OPEN-PARENTHESES",
"(PAREN",
"(PARENS",
"(PARENTHESES",
")CLOSE-PAREN",
")CLOSE-PARENTHESES",
")END-PAREN",
")END-PARENS",
")END-PARENTHESES",
")END-THE-PAREN",
")PAREN",
")PARENS",
")RIGHT-PAREN",
")UN-PARENTHESES",
"+PLUS",
",COMMA",
"--DASH",
"-DASH",
"-HYPHEN",
"...ELLIPSIS",
".DECIMAL",
".DOT",
".FULL-STOP",
".PERIOD",
".POINT",
"/SLASH",
":COLON",
";SEMI-COLON",
";SEMI-COLON(1)",
"?QUESTION-MARK",
"{BRACE",
"{LEFT-BRACE",
"{OPEN-BRACE",
"}CLOSE-BRACE",
"}RIGHT-BRACE",
]
)
_PUNCTUATIONS = {
"!EXCLAMATION-POINT",
'"CLOSE-QUOTE',
'"DOUBLE-QUOTE',
'"END-OF-QUOTE',
'"END-QUOTE',
'"IN-QUOTES',
'"QUOTE',
'"UNQUOTE',
"#HASH-MARK",
"#POUND-SIGN",
"#SHARP-SIGN",
"%PERCENT",
"&AMPERSAND",
"'END-INNER-QUOTE",
"'END-QUOTE",
"'INNER-QUOTE",
"'QUOTE",
"'SINGLE-QUOTE",
"(BEGIN-PARENS",
"(IN-PARENTHESES",
"(LEFT-PAREN",
"(OPEN-PARENTHESES",
"(PAREN",
"(PARENS",
"(PARENTHESES",
")CLOSE-PAREN",
")CLOSE-PARENTHESES",
")END-PAREN",
")END-PARENS",
")END-PARENTHESES",
")END-THE-PAREN",
")PAREN",
")PARENS",
")RIGHT-PAREN",
")UN-PARENTHESES",
"+PLUS",
",COMMA",
"--DASH",
"-DASH",
"-HYPHEN",
"...ELLIPSIS",
".DECIMAL",
".DOT",
".FULL-STOP",
".PERIOD",
".POINT",
"/SLASH",
":COLON",
";SEMI-COLON",
";SEMI-COLON(1)",
"?QUESTION-MARK",
"{BRACE",
"{LEFT-BRACE",
"{OPEN-BRACE",
"}CLOSE-BRACE",
"}RIGHT-BRACE",
}
def _parse_dictionary(lines: Iterable[str], exclude_punctuations: bool) -> List[str]:
_alt_re = re.compile(r"\([0-9]+\)")
cmudict: List[Tuple[str, List[str]]] = list()
cmudict: List[Tuple[str, List[str]]] = []
for line in lines:
if not line or line.startswith(";;;"): # ignore comments
continue
......
......@@ -1339,6 +1339,9 @@ def apply_codec(
return augmented
_CPU = torch.device("cpu")
def _get_sinc_resample_kernel(
orig_freq: int,
new_freq: int,
......@@ -1347,7 +1350,7 @@ def _get_sinc_resample_kernel(
rolloff: float = 0.99,
resampling_method: str = "sinc_interp_hann",
beta: Optional[float] = None,
device: torch.device = torch.device("cpu"),
device: torch.device = _CPU,
dtype: Optional[torch.dtype] = None,
):
if not (int(orig_freq) == orig_freq and int(new_freq) == new_freq):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment