Commit ffeba11a authored by mayp777's avatar mayp777
Browse files

UPDATE

parent 29deb085
import torchaudio
from torchaudio.prototype.pipelines import VGGISH
def test_vggish():
input_sr = VGGISH.sample_rate
input_proc = VGGISH.get_input_processor()
model = VGGISH.get_model()
path = torchaudio.utils.download_asset("test-assets/Chopin_Ballade_-1_In_G_Minor,_Op._23_excerpt.mp3")
waveform, sr = torchaudio.load(path, backend="ffmpeg")
waveform = waveform.mean(axis=0)
waveform = torchaudio.functional.resample(waveform, sr, input_sr)
batch = input_proc(waveform)
assert batch.shape == (62, 1, 96, 64)
output = model(batch)
assert output.shape == (62, 128)
import pytest
import torchaudio
from torchaudio.pipelines import EMFORMER_RNNT_BASE_LIBRISPEECH
from torchaudio.prototype.pipelines import EMFORMER_RNNT_BASE_MUSTC, EMFORMER_RNNT_BASE_TEDLIUM3
@pytest.mark.parametrize(
"bundle,lang,expected",
[
(EMFORMER_RNNT_BASE_LIBRISPEECH, "en", "i have that curiosity beside me at this moment"),
(EMFORMER_RNNT_BASE_MUSTC, "en", "I had that curiosity beside me at this moment."),
(EMFORMER_RNNT_BASE_TEDLIUM3, "en", "i had that curiosity beside me at this moment"),
],
)
def test_rnnt(bundle, sample_speech, expected):
......
import pytest
import torchaudio
from torchaudio.pipelines import SQUIM_OBJECTIVE, SQUIM_SUBJECTIVE
@pytest.mark.parametrize(
"lang,expected",
[
("en", [0.9978380799293518, 4.23893404006958, 24.217193603515625]),
],
)
def test_squim_objective_pretrained_weights(lang, expected, sample_speech):
"""Test that the metric scores estimated by SquimObjective Bundle is identical to the expected result."""
bundle = SQUIM_OBJECTIVE
# Get SquimObjective model
model = bundle.get_model()
# Create a synthetic waveform
waveform, sample_rate = torchaudio.load(sample_speech)
scores = model(waveform)
for i in range(3):
assert abs(scores[i].item() - expected[i]) < 1e-5
@pytest.mark.parametrize(
"task,expected",
[
("speech_separation", [3.9257140159606934, 3.9391300678253174]),
],
)
def test_squim_subjective_pretrained_weights(task, expected, mixture_source, clean_sources):
"""Test that the metric scores estimated by SquimSubjective Bundle is identical to the expected result."""
bundle = SQUIM_SUBJECTIVE
# Get SquimObjective model
model = bundle.get_model()
# Load input mixture audio
waveform, sample_rate = torchaudio.load(mixture_source)
for i, source in enumerate(clean_sources):
# Load clean reference
clean_waveform, sample_rate = torchaudio.load(source)
score = model(waveform, clean_waveform)
assert abs(score.item() - expected[i]) < 1e-5
import os
import pytest
import torchaudio
from torchaudio.pipelines import (
......@@ -24,6 +26,11 @@ from torchaudio.pipelines import (
WAV2VEC2_LARGE,
WAV2VEC2_LARGE_LV60K,
WAV2VEC2_XLSR53,
WAV2VEC2_XLSR_1B,
WAV2VEC2_XLSR_300M,
WAVLM_BASE,
WAVLM_BASE_PLUS,
WAVLM_LARGE,
)
......@@ -37,6 +44,9 @@ from torchaudio.pipelines import (
HUBERT_BASE,
HUBERT_LARGE,
HUBERT_XLARGE,
WAVLM_BASE,
WAVLM_BASE_PLUS,
WAVLM_LARGE,
],
)
def test_pretraining_models(bundle):
......@@ -44,6 +54,19 @@ def test_pretraining_models(bundle):
bundle.get_model()
@pytest.mark.skipif("CI" not in os.environ, reason="Run tests only in CI environment.")
@pytest.mark.parametrize(
"bundle",
[
WAV2VEC2_XLSR_300M,
WAV2VEC2_XLSR_1B,
],
)
def test_xlsr_pretraining_models(bundle):
"""Smoke test of downloading weights for pretraining models"""
bundle.get_model()
@pytest.mark.parametrize(
"bundle,lang,expected",
[
......@@ -53,7 +76,7 @@ def test_pretraining_models(bundle):
(WAV2VEC2_ASR_LARGE_10M, "en", "I|HAD|THAT|CURIOUSITY|BESIDE|ME|AT|THIS|MOMENT|"),
(WAV2VEC2_ASR_LARGE_100H, "en", "I|HAD|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|"),
(WAV2VEC2_ASR_LARGE_960H, "en", "I|HAD|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|"),
(WAV2VEC2_ASR_LARGE_LV60K_10M, "en", "I|HAD|THAT|CURIOUSSITY|BESID|ME|AT|THISS|MOMENT|"),
(WAV2VEC2_ASR_LARGE_LV60K_10M, "en", "I|HAD|THAT|CURIOUSITY|BESID|ME|AT|THISS|MOMENT|"),
(WAV2VEC2_ASR_LARGE_LV60K_100H, "en", "I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|"),
(WAV2VEC2_ASR_LARGE_LV60K_960H, "en", "I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|"),
(HUBERT_ASR_LARGE, "en", "I|HAVE|THAT|CURIOSITY|BESIDE|ME|AT|THIS|MOMENT|"),
......
#!/usr/bin/env python3
"""Run smoke tests"""
import argparse
import logging
import torchaudio # noqa: F401
import torchaudio.compliance.kaldi # noqa: F401
import torchaudio.datasets # noqa: F401
import torchaudio.functional # noqa: F401
import torchaudio.models # noqa: F401
import torchaudio.pipelines # noqa: F401
import torchaudio.sox_effects # noqa: F401
import torchaudio.transforms # noqa: F401
import torchaudio.utils # noqa: F401
from torchaudio.io import StreamReader # noqa: F401
def base_smoke_test():
import torchaudio # noqa: F401
import torchaudio.compliance.kaldi # noqa: F401
import torchaudio.datasets # noqa: F401
import torchaudio.functional # noqa: F401
import torchaudio.models # noqa: F401
import torchaudio.pipelines # noqa: F401
import torchaudio.sox_effects # noqa: F401
import torchaudio.transforms # noqa: F401
import torchaudio.utils # noqa: F401
def ffmpeg_test():
from torchaudio.io import StreamReader # noqa: F401
def _run_smoke_test(check_ffmpeg):
base_smoke_test()
if not check_ffmpeg:
print("Skipping ffmpeg test.")
else:
ffmpeg_test()
print("Smoke test passed.")
def main(args=None) -> None:
options = _parse_args(args)
if options.debug:
logging.basicConfig(format="%(levelname)s: %(message)s", level=logging.DEBUG)
_chdir()
_run_smoke_test(options.ffmpeg)
def _parse_args(args):
parser = argparse.ArgumentParser()
# Warning: Please note this option should not be widely used, only use it when absolutely necessary
parser.add_argument("--no-ffmpeg", dest="ffmpeg", action="store_false")
parser.add_argument("--debug", action="store_true", help="Enable debug logging.")
return parser.parse_args(args)
def _chdir():
# smoke test should not be performed on the root directory of checked out source code.
import os
from pathlib import Path
os.chdir(Path(__file__).parent)
assert "torchaudio" not in os.listdir(os.getcwd())
if __name__ == "__main__":
main()
from smoke_test import main
main(["--no-ffmpeg"])
# 更新系统
sudo apt-get update
sudo apt-get upgrade -y
# 安装必要的依赖 liblame-dev -> libmp3lame-dev
sudo apt-get install -y libmad0 libmad0-dev libid3tag0 libid3tag0-dev libmp3lame-dev libflac-dev libvorbis-dev yasm nasm
# 安装 Python 包
pip3 install pytorch-lightning==1.9.3 -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install unidecode -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install inflect -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install pytest==7.3.1 -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install expecttest -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install parameterized -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install scipy -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install scikit-learn -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install urllib3==1.26.14 -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install SentencePiece -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install deep-phonemizer -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install librosa -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
# pip install demucs -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install Pillow -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install flashlight -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install kaldi_io -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install flashlight-text -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install tinytag -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
# 设置 PKG_CONFIG_PATH
export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH
# ffmpeg ffmpeg-4.1.11 -> ffmpeg-4.4.4 --enable-libmp3lame
wget --no-check-certificate https://www.ffmpeg.org/releases/ffmpeg-4.4.4.tar.gz
tar xzf ffmpeg-4.4.4.tar.gz
cd ffmpeg-4.4.4
./configure --enable-shared --enable-libmp3lame
make -j32
sudo make install
cd -
# opencore-amr-0.1.6
wget --no-check-certificate https://sourceforge.net/projects/opencore-amr/files/opencore-amr/opencore-amr-0.1.6.tar.gz
tar xzf opencore-amr-0.1.6.tar.gz
cd opencore-amr-0.1.6
./configure
make -j32
sudo make install
cd -
# amrnb-11.0.0.0
#wget http://www.penguin.cz/~utx/ftp/amr/amrnb-11.0.0.0.tar.bz2
#tar xf amrnb-11.0.0.0.tar.bz2
#cd amrnb-11.0.0.0
#./configure
#make -j32
#sudo make install
#cd -
# amrwb-11.0.0.0
#wget http://www.penguin.cz/~utx/ftp/amr/amrwb-11.0.0.0.tar.bz2
#tar xf amrwb-11.0.0.0.tar.bz2
#cd amrwb-11.0.0.0
#./configure
#make -j32
#sudo make install
#cd -
# libao-1.2.0
wget http://downloads.xiph.org/releases/ao/libao-1.2.0.tar.gz
tar xzf libao-1.2.0.tar.gz
cd libao-1.2.0
./configure
make -j32
sudo make install
cd -
# libogg-1.3.5
wget http://downloads.xiph.org/releases/ogg/libogg-1.3.5.tar.xz
tar xf libogg-1.3.5.tar.xz
cd libogg-1.3.5
./configure --disable-static
make -j32
sudo make install
cd -
# opus-1.4
wget --no-check-certificate http://downloads.xiph.org/releases/opus/opus-1.4.tar.gz
tar xzf opus-1.4.tar.gz
cd opus-1.4
./configure
make -j32
sudo make install
cd -
# opusfile-0.12
wget http://downloads.xiph.org/releases/opus/opusfile-0.12.tar.gz
tar xzf opusfile-0.12.tar.gz
cd opusfile-0.12
./configure
make -j32
sudo make install
cd -
# libopusenc-0.2.1
wget --no-check-certificate https://archive.mozilla.org/pub/opus/libopusenc-0.2.1.tar.gz
tar xzf libopusenc-0.2.1.tar.gz
cd libopusenc-0.2.1
./configure
make -j32
sudo make install
cd -
# opus-tools-0.2
# wget https://archive.mozilla.org/pub/opus/opus-tools-0.2.tar.gz
# tar xzf opus-tools-0.2.tar.gz
# cd opus-tools-0.2
# ./configure
# make -j32
# sudo make install
# cd -
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
# wavpack-4.80.0
wget --no-check-certificate https://www.wavpack.com/wavpack-4.80.0.tar.bz2
tar -jxf wavpack-4.80.0.tar.bz2
cd wavpack-4.80.0
./configure
make -j32
sudo make install
cd -
# sox-14.4.2
wget --no-check-certificate https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2.tar.gz
tar xzf sox-14.4.2.tar.gz
cd sox-14.4.2
./configure
make -j32
sudo make install
cd -
# 更新系统
sudo dnf update --assumeno
sudo dnf clean all
sudo dnf makecache
sudo dnf install epel-release -y
# 安装必要的依赖
sudo dnf install -y \
libmad \
libmad-devel \
libid3tag \
libid3tag-devel \
lame-devel \
flac \
flac-devel \
libvorbis \
libvorbis-devel \
opus \
opus-devel \
yasm \
nasm
# 安装 Python 包
pip install numpy==1.24.3 -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install pytorch-lightning==1.9.3 -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install unidecode -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install inflect -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install pytest==7.3.1 -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install expecttest -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install parameterized -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install scipy -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install scikit-learn -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip3 install urllib3==1.26.14 -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install SentencePiece -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install deep-phonemizer -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install librosa -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install Pillow -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install flashlight -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install kaldi_io -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install flashlight-text -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
pip install tinytag -i http://mirrors.aliyun.com/pypi/simple --trusted-host mirrors.aliyun.com
# 设置 PKG_CONFIG_PATH
export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig:$PKG_CONFIG_PATH
# ffmpeg ffmpeg-4.1.11 -> ffmpeg-4.4.4 --enable-libmp3lame
wget --no-check-certificate https://www.ffmpeg.org/releases/ffmpeg-4.4.4.tar.gz
tar xzf ffmpeg-4.4.4.tar.gz
cd ffmpeg-4.4.4
./configure --enable-shared --enable-libmp3lame
make -j32
sudo make install
cd -
# opencore-amr-0.1.6
wget --no-check-certificate https://sourceforge.net/projects/opencore-amr/files/opencore-amr/opencore-amr-0.1.6.tar.gz
tar xzf opencore-amr-0.1.6.tar.gz
cd opencore-amr-0.1.6
./configure
make -j32
sudo make install
cd -
# libao-1.2.0
wget http://downloads.xiph.org/releases/ao/libao-1.2.0.tar.gz
tar xzf libao-1.2.0.tar.gz
cd libao-1.2.0
./configure
make -j32
sudo make install
cd -
# libogg-1.3.5
wget http://downloads.xiph.org/releases/ogg/libogg-1.3.5.tar.xz
tar xf libogg-1.3.5.tar.xz
cd libogg-1.3.5
./configure --disable-static
make -j32
sudo make install
cd -
# opus-1.4
wget --no-check-certificate http://downloads.xiph.org/releases/opus/opus-1.4.tar.gz
tar xzf opus-1.4.tar.gz
cd opus-1.4
./configure
make -j32
sudo make install
cd -
# opusfile-0.12
wget http://downloads.xiph.org/releases/opus/opusfile-0.12.tar.gz
tar xzf opusfile-0.12.tar.gz
cd opusfile-0.12
./configure
make -j32
sudo make install
cd -
# libopusenc-0.2.1
wget --no-check-certificate https://archive.mozilla.org/pub/opus/libopusenc-0.2.1.tar.gz
tar xzf libopusenc-0.2.1.tar.gz
cd libopusenc-0.2.1
./configure
make -j32
sudo make install
cd -
# opus-tools-0.2
wget https://archive.mozilla.org/pub/opus/opus-tools-0.2.tar.gz
tar xzf opus-tools-0.2.tar.gz
cd opus-tools-0.2
./configure
make -j32
sudo make install
cd -
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
# wavpack-4.80.0
wget --no-check-certificate https://www.wavpack.com/wavpack-4.80.0.tar.bz2
tar -jxf wavpack-4.80.0.tar.bz2
cd wavpack-4.80.0
./configure
make -j32
sudo make install
cd -
# sox-14.4.2
wget --no-check-certificate https://sourceforge.net/projects/sox/files/sox/14.4.2/sox-14.4.2.tar.gz
tar xzf sox-14.4.2.tar.gz
cd sox-14.4.2
./configure
make -j32
sudo make install
cd -
* RATRACE_wave_f_nm_np1_fr_goo_37.avi
* Source: HMDB-51 dataset ("wave" subset)
https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/#Downloads
* License: Creative Commons Attribution 4.0 International License.
* Note: This file does not have proper PTS values thus useful for testing seek for such files.
......@@ -76,8 +76,9 @@ def _main():
conf = cfg["model"]
del conf["w2v_path"]
keep = ["_name", "task", "model"]
for key in list(k for k in conf["w2v_args"] if k not in keep):
del conf["w2v_args"][key]
for key in conf["w2v_args"]:
if key not in keep:
del conf["w2v_args"][key]
conf["data"] = "/foo/bar/"
conf["w2v_args"]["task"]["data"] = "/foo/bar"
conf["w2v_args"]["task"]["labels"] = []
......
{
"_name": "wav2vec2",
"extractor_mode": "layer_norm",
"encoder_layers": 48,
"encoder_embed_dim": 1280,
"encoder_ffn_embed_dim": 5120,
"encoder_attention_heads": 16,
"activation_fn": "gelu",
"dropout": 0.0,
"attention_dropout": 0.0,
"activation_dropout": 0.0,
"encoder_layerdrop": 0.0,
"dropout_input": 0.1,
"dropout_features": 0.1,
"final_dim": 1024,
"layer_norm_first": true,
"conv_feature_layers": "[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]",
"conv_bias": true,
"logit_temp": 0.1,
"quantize_targets": true,
"quantize_input": false,
"same_quantizer": false,
"target_glu": false,
"feature_grad_mult": 1.0,
"latent_vars": 320,
"latent_groups": 2,
"latent_dim": 0,
"mask_length": 10,
"mask_prob": 0.65,
"mask_selection": "static",
"mask_other": 0.0,
"no_mask_overlap": false,
"mask_min_space": 1,
"mask_channel_length": 10,
"mask_channel_prob": 0.0,
"mask_channel_selection": "static",
"mask_channel_other": 0.0,
"no_mask_channel_overlap": false,
"mask_channel_min_space": 1,
"num_negatives": 100,
"negatives_from_everywhere": false,
"cross_sample_negatives": 0,
"codebook_negatives": 0,
"conv_pos": 128,
"conv_pos_groups": 16,
"latent_temp": [
2.0,
0.1,
0.999995
]
}
{
"_name": "wav2vec2",
"extractor_mode": "layer_norm",
"encoder_layers": 48,
"encoder_embed_dim": 1920,
"encoder_ffn_embed_dim": 7680,
"encoder_attention_heads": 16,
"activation_fn": "gelu",
"dropout": 0.0,
"attention_dropout": 0.0,
"activation_dropout": 0.0,
"encoder_layerdrop": 0.0,
"dropout_input": 0.1,
"dropout_features": 0.1,
"final_dim": 1024,
"layer_norm_first": true,
"conv_feature_layers": "[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]",
"conv_bias": true,
"logit_temp": 0.1,
"quantize_targets": true,
"quantize_input": false,
"same_quantizer": false,
"target_glu": false,
"feature_grad_mult": 1.0,
"latent_vars": 320,
"latent_groups": 2,
"latent_dim": 0,
"mask_length": 10,
"mask_prob": 0.65,
"mask_selection": "static",
"mask_other": 0.0,
"no_mask_overlap": false,
"mask_min_space": 1,
"mask_channel_length": 10,
"mask_channel_prob": 0.0,
"mask_channel_selection": "static",
"mask_channel_other": 0.0,
"no_mask_channel_overlap": false,
"mask_channel_min_space": 1,
"num_negatives": 100,
"negatives_from_everywhere": false,
"cross_sample_negatives": 0,
"codebook_negatives": 0,
"conv_pos": 128,
"conv_pos_groups": 16,
"latent_temp": [
2.0,
0.1,
0.999995
]
}
{
"_name": "wav2vec2",
"extractor_mode": "layer_norm",
"encoder_layers": 24,
"encoder_embed_dim": 1024,
"encoder_ffn_embed_dim": 4096,
"encoder_attention_heads": 16,
"activation_fn": "gelu",
"dropout": 0.0,
"attention_dropout": 0.0,
"activation_dropout": 0.0,
"encoder_layerdrop": 0.0,
"dropout_input": 0.0,
"dropout_features": 0.0,
"final_dim": 768,
"layer_norm_first": true,
"conv_feature_layers": "[(512, 10, 5)] + [(512, 3, 2)] * 4 + [(512,2,2)] + [(512,2,2)]",
"conv_bias": true,
"logit_temp": 0.1,
"quantize_targets": true,
"quantize_input": false,
"same_quantizer": false,
"target_glu": false,
"feature_grad_mult": 1.0,
"latent_vars": 320,
"latent_groups": 2,
"latent_dim": 0,
"mask_length": 10,
"mask_prob": 0.65,
"mask_selection": "static",
"mask_other": 0.0,
"no_mask_overlap": false,
"mask_min_space": 1,
"mask_channel_length": 10,
"mask_channel_prob": 0.0,
"mask_channel_selection": "static",
"mask_channel_other": 0.0,
"no_mask_channel_overlap": false,
"mask_channel_min_space": 1,
"num_negatives": 100,
"negatives_from_everywhere": false,
"cross_sample_negatives": 0,
"codebook_negatives": 0,
"conv_pos": 128,
"conv_pos_groups": 16,
"latent_temp": [
2.0,
0.1,
0.999995
]
}
{
"activation_dropout": 0.0,
"apply_spec_augment": true,
"architectures": [
"Wav2Vec2Model"
],
"attention_dropout": 0.1,
"bos_token_id": 1,
"codevector_dim": 1024,
"contrastive_logits_temperature": 0.1,
"conv_bias": true,
"conv_dim": [
512,
512,
512,
512,
512,
512,
512
],
"conv_kernel": [
10,
3,
3,
3,
3,
2,
2
],
"conv_stride": [
5,
2,
2,
2,
2,
2,
2
],
"ctc_loss_reduction": "sum",
"ctc_zero_infinity": false,
"diversity_loss_weight": 0.1,
"do_stable_layer_norm": true,
"eos_token_id": 2,
"feat_extract_activation": "gelu",
"feat_extract_dropout": 0.0,
"feat_extract_norm": "layer",
"feat_proj_dropout": 0.1,
"feat_quantizer_dropout": 0.0,
"final_dropout": 0.0,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout": 0.1,
"hidden_size": 1280,
"initializer_range": 0.02,
"intermediate_size": 5120,
"layer_norm_eps": 1e-05,
"layerdrop": 0.1,
"mask_feature_length": 10,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_prob": 0.075,
"model_type": "wav2vec2",
"num_attention_heads": 16,
"num_codevector_groups": 2,
"num_codevectors_per_group": 320,
"num_conv_pos_embedding_groups": 16,
"num_conv_pos_embeddings": 128,
"num_feat_extract_layers": 7,
"num_hidden_layers": 48,
"num_negatives": 100,
"pad_token_id": 0,
"proj_codevector_dim": 1024,
"torch_dtype": "float32",
"transformers_version": "4.12.0.dev0",
"use_weighted_layer_sum": false
}
{
"activation_dropout": 0.0,
"apply_spec_augment": true,
"architectures": [
"Wav2Vec2Model"
],
"attention_dropout": 0.1,
"bos_token_id": 1,
"codevector_dim": 1024,
"contrastive_logits_temperature": 0.1,
"conv_bias": true,
"conv_dim": [
512,
512,
512,
512,
512,
512,
512
],
"conv_kernel": [
10,
3,
3,
3,
3,
2,
2
],
"conv_stride": [
5,
2,
2,
2,
2,
2,
2
],
"ctc_loss_reduction": "sum",
"ctc_zero_infinity": false,
"diversity_loss_weight": 0.1,
"do_stable_layer_norm": true,
"eos_token_id": 2,
"feat_extract_activation": "gelu",
"feat_extract_dropout": 0.0,
"feat_extract_norm": "layer",
"feat_proj_dropout": 0.1,
"feat_quantizer_dropout": 0.0,
"final_dropout": 0.0,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout": 0.1,
"hidden_size": 1920,
"initializer_range": 0.02,
"intermediate_size": 7680,
"layer_norm_eps": 1e-05,
"layerdrop": 0.1,
"mask_feature_length": 10,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_prob": 0.075,
"model_type": "wav2vec2",
"num_attention_heads": 16,
"num_codevector_groups": 2,
"num_codevectors_per_group": 320,
"num_conv_pos_embedding_groups": 16,
"num_conv_pos_embeddings": 128,
"num_feat_extract_layers": 7,
"num_hidden_layers": 48,
"num_negatives": 100,
"pad_token_id": 0,
"proj_codevector_dim": 1024,
"torch_dtype": "float32",
"transformers_version": "4.12.0.dev0"
}
{
"activation_dropout": 0.0,
"apply_spec_augment": true,
"architectures": [
"Wav2Vec2Model"
],
"attention_dropout": 0.1,
"bos_token_id": 1,
"codevector_dim": 768,
"contrastive_logits_temperature": 0.1,
"conv_bias": true,
"conv_dim": [
512,
512,
512,
512,
512,
512,
512
],
"conv_kernel": [
10,
3,
3,
3,
3,
2,
2
],
"conv_stride": [
5,
2,
2,
2,
2,
2,
2
],
"ctc_loss_reduction": "sum",
"ctc_zero_infinity": false,
"diversity_loss_weight": 0.1,
"do_stable_layer_norm": true,
"eos_token_id": 2,
"feat_extract_activation": "gelu",
"feat_extract_dropout": 0.0,
"feat_extract_norm": "layer",
"feat_proj_dropout": 0.1,
"feat_quantizer_dropout": 0.0,
"final_dropout": 0.0,
"gradient_checkpointing": false,
"hidden_act": "gelu",
"hidden_dropout": 0.1,
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"layerdrop": 0.1,
"mask_feature_length": 10,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_prob": 0.075,
"model_type": "wav2vec2",
"num_attention_heads": 16,
"num_codevector_groups": 2,
"num_codevectors_per_group": 320,
"num_conv_pos_embedding_groups": 16,
"num_conv_pos_embeddings": 128,
"num_feat_extract_layers": 7,
"num_hidden_layers": 24,
"num_negatives": 100,
"pad_token_id": 0,
"proj_codevector_dim": 768,
"torch_dtype": "float32",
"transformers_version": "4.12.0.dev0",
"use_weighted_layer_sum": false
}
{
"activation_dropout": 0.0,
"adapter_kernel_size": 3,
"adapter_stride": 2,
"add_adapter": false,
"apply_spec_augment": true,
"architectures": [
"WavLMModel"
],
"attention_dropout": 0.1,
"bos_token_id": 1,
"classifier_proj_size": 256,
"codevector_dim": 256,
"contrastive_logits_temperature": 0.1,
"conv_bias": false,
"conv_dim": [
512,
512,
512,
512,
512,
512,
512
],
"conv_kernel": [
10,
3,
3,
3,
3,
2,
2
],
"conv_stride": [
5,
2,
2,
2,
2,
2,
2
],
"ctc_loss_reduction": "sum",
"ctc_zero_infinity": false,
"diversity_loss_weight": 0.1,
"do_stable_layer_norm": false,
"eos_token_id": 2,
"feat_extract_activation": "gelu",
"feat_extract_norm": "group",
"feat_proj_dropout": 0.1,
"feat_quantizer_dropout": 0.0,
"final_dropout": 0.0,
"freeze_feat_extract_train": true,
"hidden_act": "gelu",
"hidden_dropout": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_norm_eps": 1e-05,
"layerdrop": 0.05,
"mask_channel_length": 10,
"mask_channel_min_space": 1,
"mask_channel_other": 0.0,
"mask_channel_prob": 0.0,
"mask_channel_selection": "static",
"mask_feature_length": 10,
"mask_feature_min_masks": 0,
"mask_feature_prob": 0.0,
"mask_time_length": 10,
"mask_time_min_masks": 2,
"mask_time_min_space": 1,
"mask_time_other": 0.0,
"mask_time_prob": 0.05,
"mask_time_selection": "static",
"max_bucket_distance": 800,
"model_type": "wavlm",
"no_mask_channel_overlap": false,
"no_mask_time_overlap": false,
"num_adapter_layers": 3,
"num_attention_heads": 12,
"num_buckets": 320,
"num_codevector_groups": 2,
"num_codevectors_per_group": 320,
"num_conv_pos_embedding_groups": 16,
"num_conv_pos_embeddings": 128,
"num_ctc_classes": 80,
"num_feat_extract_layers": 7,
"num_hidden_layers": 12,
"num_negatives": 100,
"output_hidden_size": 768,
"pad_token_id": 0,
"proj_codevector_dim": 256,
"tokenizer_class": "Wav2Vec2CTCTokenizer",
"torch_dtype": "float32",
"transformers_version": "4.15.0.dev0",
"use_weighted_layer_sum": false,
"vocab_size": 32
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment