Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
7fb2a8b3
"...git@developer.sourcefind.cn:chenpangpang/diffusers.git" did not exist on "21bbc633c4d7b9bb7f74caf4b248c6a4079a85c6"
Unverified
Commit
7fb2a8b3
authored
Oct 14, 2021
by
Patrick von Platen
Committed by
GitHub
Oct 14, 2021
Browse files
up (#14008)
parent
7604557e
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
35 additions
and
35 deletions
+35
-35
docs/source/model_doc/speech_to_text.rst
docs/source/model_doc/speech_to_text.rst
+2
-2
docs/source/model_doc/speech_to_text_2.rst
docs/source/model_doc/speech_to_text_2.rst
+2
-2
examples/pytorch/test_examples.py
examples/pytorch/test_examples.py
+2
-2
examples/research_projects/wav2vec2/README.md
examples/research_projects/wav2vec2/README.md
+2
-2
examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py
...les/research_projects/wav2vec2/test_wav2vec2_deepspeed.py
+1
-1
src/transformers/models/hubert/modeling_hubert.py
src/transformers/models/hubert/modeling_hubert.py
+2
-2
src/transformers/models/hubert/modeling_tf_hubert.py
src/transformers/models/hubert/modeling_tf_hubert.py
+2
-2
src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py
...speech_encoder_decoder/modeling_speech_encoder_decoder.py
+1
-1
src/transformers/models/speech_to_text/modeling_speech_to_text.py
...sformers/models/speech_to_text/modeling_speech_to_text.py
+1
-1
src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py
src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py
+3
-3
src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
+2
-2
src/transformers/models/wav2vec2/modeling_wav2vec2.py
src/transformers/models/wav2vec2/modeling_wav2vec2.py
+4
-4
tests/test_modeling_flax_wav2vec2.py
tests/test_modeling_flax_wav2vec2.py
+1
-1
tests/test_modeling_hubert.py
tests/test_modeling_hubert.py
+1
-1
tests/test_modeling_speech_to_text.py
tests/test_modeling_speech_to_text.py
+1
-1
tests/test_modeling_tf_hubert.py
tests/test_modeling_tf_hubert.py
+1
-1
tests/test_modeling_tf_wav2vec2.py
tests/test_modeling_tf_wav2vec2.py
+1
-1
tests/test_modeling_wav2vec2.py
tests/test_modeling_wav2vec2.py
+1
-1
tests/test_pipelines_audio_classification.py
tests/test_pipelines_audio_classification.py
+1
-1
tests/test_pipelines_automatic_speech_recognition.py
tests/test_pipelines_automatic_speech_recognition.py
+4
-4
No files found.
docs/source/model_doc/speech_to_text.rst
View file @
7fb2a8b3
...
@@ -66,7 +66,7 @@ be installed as follows: ``apt install libsndfile1-dev``
...
@@ -66,7 +66,7 @@ be installed as follows: ``apt install libsndfile1-dev``
... batch["speech"] = speech
... batch["speech"] = speech
... return batch
... return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
...
@@ -98,7 +98,7 @@ be installed as follows: ``apt install libsndfile1-dev``
...
@@ -98,7 +98,7 @@ be installed as follows: ``apt install libsndfile1-dev``
... batch["speech"] = speech
... batch["speech"] = speech
... return batch
... return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
...
...
docs/source/model_doc/speech_to_text_2.rst
View file @
7fb2a8b3
...
@@ -68,7 +68,7 @@ predicted token ids.
...
@@ -68,7 +68,7 @@ predicted token ids.
... batch["speech"] = speech
... batch["speech"] = speech
... return batch
... return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
...
@@ -86,7 +86,7 @@ predicted token ids.
...
@@ -86,7 +86,7 @@ predicted token ids.
>>> from datasets import load_dataset
>>> from datasets import load_dataset
>>> from transformers import pipeline
>>> from transformers import pipeline
>>> librispeech_en = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> librispeech_en = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> asr = pipeline("automatic-speech-recognition", model="facebook/s2t-wav2vec2-large-en-de", feature_extractor="facebook/s2t-wav2vec2-large-en-de")
>>> asr = pipeline("automatic-speech-recognition", model="facebook/s2t-wav2vec2-large-en-de", feature_extractor="facebook/s2t-wav2vec2-large-en-de")
>>> translation_de = asr(librispeech_en[0]["file"])
>>> translation_de = asr(librispeech_en[0]["file"])
...
...
examples/pytorch/test_examples.py
View file @
7fb2a8b3
...
@@ -391,7 +391,7 @@ class ExamplesTests(TestCasePlus):
...
@@ -391,7 +391,7 @@ class ExamplesTests(TestCasePlus):
run_speech_recognition_ctc.py
run_speech_recognition_ctc.py
--output_dir
{
tmp_dir
}
--output_dir
{
tmp_dir
}
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
--dataset_name
patrickvonplaten
/librispeech_asr_dummy
--dataset_name
hf-internal-testing
/librispeech_asr_dummy
--dataset_config_name clean
--dataset_config_name clean
--train_split_name validation
--train_split_name validation
--eval_split_name validation
--eval_split_name validation
...
@@ -460,7 +460,7 @@ class ExamplesTests(TestCasePlus):
...
@@ -460,7 +460,7 @@ class ExamplesTests(TestCasePlus):
run_wav2vec2_pretraining_no_trainer.py
run_wav2vec2_pretraining_no_trainer.py
--output_dir
{
tmp_dir
}
--output_dir
{
tmp_dir
}
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
--dataset_name
patrickvonplaten
/librispeech_asr_dummy
--dataset_name
hf-internal-testing
/librispeech_asr_dummy
--dataset_config_names clean
--dataset_config_names clean
--dataset_split_names validation
--dataset_split_names validation
--learning_rate 1e-4
--learning_rate 1e-4
...
...
examples/research_projects/wav2vec2/README.md
View file @
7fb2a8b3
...
@@ -155,7 +155,7 @@ run_asr.py \
...
@@ -155,7 +155,7 @@ run_asr.py \
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
--dataset_name=
patrickvonplaten
/librispeech_asr_dummy --dataset_config_name=clean \
--dataset_name=
hf-internal-testing
/librispeech_asr_dummy --dataset_config_name=clean \
--train_split_name=validation --validation_split_name=validation --orthography=timit \
--train_split_name=validation --validation_split_name=validation --orthography=timit \
--preprocessing_num_workers=1 --group_by_length --freeze_feature_extractor --verbose_logging \
--preprocessing_num_workers=1 --group_by_length --freeze_feature_extractor --verbose_logging \
--deepspeed ds_config_wav2vec2_zero2.json
--deepspeed ds_config_wav2vec2_zero2.json
...
@@ -179,7 +179,7 @@ run_asr.py \
...
@@ -179,7 +179,7 @@ run_asr.py \
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
--dataset_name=
patrickvonplaten
/librispeech_asr_dummy --dataset_config_name=clean \
--dataset_name=
hf-internal-testing
/librispeech_asr_dummy --dataset_config_name=clean \
--train_split_name=validation --validation_split_name=validation --orthography=timit \
--train_split_name=validation --validation_split_name=validation --orthography=timit \
--preprocessing_num_workers=1 --group_by_length --freeze_feature_extractor --verbose_logging \
--preprocessing_num_workers=1 --group_by_length --freeze_feature_extractor --verbose_logging \
--deepspeed ds_config_wav2vec2_zero3.json
--deepspeed ds_config_wav2vec2_zero3.json
...
...
examples/research_projects/wav2vec2/test_wav2vec2_deepspeed.py
View file @
7fb2a8b3
...
@@ -155,7 +155,7 @@ class TestDeepSpeedWav2Vec2(TestCasePlus):
...
@@ -155,7 +155,7 @@ class TestDeepSpeedWav2Vec2(TestCasePlus):
output_dir
=
self
.
get_auto_remove_tmp_dir
(
"./xxx"
,
after
=
False
)
output_dir
=
self
.
get_auto_remove_tmp_dir
(
"./xxx"
,
after
=
False
)
args
=
f
"""
args
=
f
"""
--model_name_or_path
{
model_name
}
--model_name_or_path
{
model_name
}
--dataset_name
patrickvonplaten
/librispeech_asr_dummy
--dataset_name
hf-internal-testing
/librispeech_asr_dummy
--dataset_config_name clean
--dataset_config_name clean
--train_split_name validation
--train_split_name validation
--validation_split_name validation
--validation_split_name validation
...
...
src/transformers/models/hubert/modeling_hubert.py
View file @
7fb2a8b3
...
@@ -953,7 +953,7 @@ class HubertModel(HubertPreTrainedModel):
...
@@ -953,7 +953,7 @@ class HubertModel(HubertPreTrainedModel):
... batch["speech"] = speech
... batch["speech"] = speech
... return batch
... return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
...
@@ -1059,7 +1059,7 @@ class HubertForCTC(HubertPreTrainedModel):
...
@@ -1059,7 +1059,7 @@ class HubertForCTC(HubertPreTrainedModel):
... batch["speech"] = speech
... batch["speech"] = speech
... return batch
... return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
...
...
src/transformers/models/hubert/modeling_tf_hubert.py
View file @
7fb2a8b3
...
@@ -1412,7 +1412,7 @@ class TFHubertModel(TFHubertPreTrainedModel):
...
@@ -1412,7 +1412,7 @@ class TFHubertModel(TFHubertPreTrainedModel):
... batch["speech"] = speech
... batch["speech"] = speech
... return batch
... return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
...
@@ -1522,7 +1522,7 @@ class TFHubertForCTC(TFHubertPreTrainedModel):
...
@@ -1522,7 +1522,7 @@ class TFHubertForCTC(TFHubertPreTrainedModel):
... batch["speech"] = speech
... batch["speech"] = speech
... return batch
... return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
...
...
src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py
View file @
7fb2a8b3
...
@@ -414,7 +414,7 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
...
@@ -414,7 +414,7 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
>>> batch["speech"] = speech
>>> batch["speech"] = speech
>>> return batch
>>> return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
...
...
src/transformers/models/speech_to_text/modeling_speech_to_text.py
View file @
7fb2a8b3
...
@@ -1306,7 +1306,7 @@ class Speech2TextForConditionalGeneration(Speech2TextPreTrainedModel):
...
@@ -1306,7 +1306,7 @@ class Speech2TextForConditionalGeneration(Speech2TextPreTrainedModel):
>>> batch["speech"] = speech
>>> batch["speech"] = speech
>>> return batch
>>> return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_features = processor(ds["speech"][0], sampling_rate=16000, return_tensors="pt").input_features # Batch size 1
>>> input_features = processor(ds["speech"][0], sampling_rate=16000, return_tensors="pt").input_features # Batch size 1
...
...
src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py
View file @
7fb2a8b3
...
@@ -944,7 +944,7 @@ FLAX_WAV2VEC2_MODEL_DOCSTRING = """
...
@@ -944,7 +944,7 @@ FLAX_WAV2VEC2_MODEL_DOCSTRING = """
>>> batch["speech"] = speech
>>> batch["speech"] = speech
>>> return batch
>>> return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="np").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="np").input_values # Batch size 1
...
@@ -1045,7 +1045,7 @@ FLAX_WAV2VEC2_FOR_CTC_DOCSTRING = """
...
@@ -1045,7 +1045,7 @@ FLAX_WAV2VEC2_FOR_CTC_DOCSTRING = """
>>> batch["speech"] = speech
>>> batch["speech"] = speech
>>> return batch
>>> return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="np").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="np").input_values # Batch size 1
...
@@ -1233,7 +1233,7 @@ FLAX_WAV2VEC2_FOR_PRETRAINING_DOCSTRING = """
...
@@ -1233,7 +1233,7 @@ FLAX_WAV2VEC2_FOR_PRETRAINING_DOCSTRING = """
... return batch
... return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = feature_extractor(ds["speech"][0], return_tensors="np").input_values # Batch size 1
>>> input_values = feature_extractor(ds["speech"][0], return_tensors="np").input_values # Batch size 1
...
...
src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py
View file @
7fb2a8b3
...
@@ -1406,7 +1406,7 @@ class TFWav2Vec2Model(TFWav2Vec2PreTrainedModel):
...
@@ -1406,7 +1406,7 @@ class TFWav2Vec2Model(TFWav2Vec2PreTrainedModel):
>>> batch["speech"] = speech
>>> batch["speech"] = speech
>>> return batch
>>> return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
...
@@ -1516,7 +1516,7 @@ class TFWav2Vec2ForCTC(TFWav2Vec2PreTrainedModel):
...
@@ -1516,7 +1516,7 @@ class TFWav2Vec2ForCTC(TFWav2Vec2PreTrainedModel):
>>> batch["speech"] = speech
>>> batch["speech"] = speech
>>> return batch
>>> return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
...
...
src/transformers/models/wav2vec2/modeling_wav2vec2.py
View file @
7fb2a8b3
...
@@ -1146,7 +1146,7 @@ class Wav2Vec2Model(Wav2Vec2PreTrainedModel):
...
@@ -1146,7 +1146,7 @@ class Wav2Vec2Model(Wav2Vec2PreTrainedModel):
>>> batch["speech"] = speech
>>> batch["speech"] = speech
>>> return batch
>>> return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
...
@@ -1280,7 +1280,7 @@ class Wav2Vec2ForPreTraining(Wav2Vec2PreTrainedModel):
...
@@ -1280,7 +1280,7 @@ class Wav2Vec2ForPreTraining(Wav2Vec2PreTrainedModel):
... return batch
... return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = feature_extractor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
>>> input_values = feature_extractor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
...
@@ -1442,7 +1442,7 @@ class Wav2Vec2ForMaskedLM(Wav2Vec2PreTrainedModel):
...
@@ -1442,7 +1442,7 @@ class Wav2Vec2ForMaskedLM(Wav2Vec2PreTrainedModel):
>>> batch["speech"] = speech
>>> batch["speech"] = speech
>>> return batch
>>> return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
...
@@ -1536,7 +1536,7 @@ class Wav2Vec2ForCTC(Wav2Vec2PreTrainedModel):
...
@@ -1536,7 +1536,7 @@ class Wav2Vec2ForCTC(Wav2Vec2PreTrainedModel):
>>> batch["speech"] = speech
>>> batch["speech"] = speech
>>> return batch
>>> return batch
>>> ds = load_dataset("
patrickvonplaten
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = load_dataset("
hf-internal-testing
/librispeech_asr_dummy", "clean", split="validation")
>>> ds = ds.map(map_to_array)
>>> ds = ds.map(map_to_array)
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
...
...
tests/test_modeling_flax_wav2vec2.py
View file @
7fb2a8b3
...
@@ -366,7 +366,7 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
...
@@ -366,7 +366,7 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
batch
[
"speech"
]
=
speech
batch
[
"speech"
]
=
speech
return
batch
return
batch
ds
=
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
...
...
tests/test_modeling_hubert.py
View file @
7fb2a8b3
...
@@ -623,7 +623,7 @@ class HubertModelIntegrationTest(unittest.TestCase):
...
@@ -623,7 +623,7 @@ class HubertModelIntegrationTest(unittest.TestCase):
batch
[
"speech"
]
=
speech
batch
[
"speech"
]
=
speech
return
batch
return
batch
ds
=
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
...
...
tests/test_modeling_speech_to_text.py
View file @
7fb2a8b3
...
@@ -723,7 +723,7 @@ class Speech2TextModelIntegrationTests(unittest.TestCase):
...
@@ -723,7 +723,7 @@ class Speech2TextModelIntegrationTests(unittest.TestCase):
batch
[
"speech"
]
=
speech
batch
[
"speech"
]
=
speech
return
batch
return
batch
ds
=
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
ds
.
sort
(
"id"
).
select
(
range
(
num_samples
)).
map
(
map_to_array
)
ds
=
ds
.
sort
(
"id"
).
select
(
range
(
num_samples
)).
map
(
map_to_array
)
return
ds
[
"speech"
][:
num_samples
]
return
ds
[
"speech"
][:
num_samples
]
...
...
tests/test_modeling_tf_hubert.py
View file @
7fb2a8b3
...
@@ -489,7 +489,7 @@ class TFHubertModelIntegrationTest(unittest.TestCase):
...
@@ -489,7 +489,7 @@ class TFHubertModelIntegrationTest(unittest.TestCase):
batch
[
"speech"
]
=
speech
batch
[
"speech"
]
=
speech
return
batch
return
batch
ds
=
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
...
...
tests/test_modeling_tf_wav2vec2.py
View file @
7fb2a8b3
...
@@ -489,7 +489,7 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
...
@@ -489,7 +489,7 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
batch
[
"speech"
]
=
speech
batch
[
"speech"
]
=
speech
return
batch
return
batch
ds
=
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
...
...
tests/test_modeling_wav2vec2.py
View file @
7fb2a8b3
...
@@ -910,7 +910,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
...
@@ -910,7 +910,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
batch
[
"speech"
]
=
speech
batch
[
"speech"
]
=
speech
return
batch
return
batch
ds
=
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
ds
=
ds
.
filter
(
lambda
x
:
x
[
"id"
]
in
ids
).
sort
(
"id"
).
map
(
map_to_array
)
...
...
tests/test_pipelines_audio_classification.py
View file @
7fb2a8b3
...
@@ -62,7 +62,7 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
...
@@ -62,7 +62,7 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
)
)
# test with a local file
# test with a local file
dataset
=
datasets
.
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
dataset
=
datasets
.
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
)
filename
=
dataset
[
0
][
"file"
]
filename
=
dataset
[
0
][
"file"
]
output
=
audio_classifier
(
filename
)
output
=
audio_classifier
(
filename
)
self
.
assertEqual
(
self
.
assertEqual
(
...
...
tests/test_pipelines_automatic_speech_recognition.py
View file @
7fb2a8b3
...
@@ -74,7 +74,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
...
@@ -74,7 +74,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
from
datasets
import
load_dataset
from
datasets
import
load_dataset
ds
=
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
).
sort
(
"id"
)
ds
=
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
).
sort
(
"id"
)
filename
=
ds
[
40
][
"file"
]
filename
=
ds
[
40
][
"file"
]
output
=
speech_recognizer
(
filename
)
output
=
speech_recognizer
(
filename
)
self
.
assertEqual
(
output
,
{
"text"
:
"A MAN SAID TO THE UNIVERSE SIR I EXIST"
})
self
.
assertEqual
(
output
,
{
"text"
:
"A MAN SAID TO THE UNIVERSE SIR I EXIST"
})
...
@@ -92,7 +92,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
...
@@ -92,7 +92,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
from
datasets
import
load_dataset
from
datasets
import
load_dataset
ds
=
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
).
sort
(
"id"
)
ds
=
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
).
sort
(
"id"
)
filename
=
ds
[
40
][
"file"
]
filename
=
ds
[
40
][
"file"
]
output
=
speech_recognizer
(
filename
)
output
=
speech_recognizer
(
filename
)
self
.
assertEqual
(
output
,
{
"text"
:
'Ein Mann sagte zum Universum : " Sir, ich existiert! "'
})
self
.
assertEqual
(
output
,
{
"text"
:
'Ein Mann sagte zum Universum : " Sir, ich existiert! "'
})
...
@@ -114,7 +114,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
...
@@ -114,7 +114,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output
=
asr
(
waveform
)
output
=
asr
(
waveform
)
self
.
assertEqual
(
output
,
{
"text"
:
""
})
self
.
assertEqual
(
output
,
{
"text"
:
""
})
ds
=
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
).
sort
(
"id"
)
ds
=
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
).
sort
(
"id"
)
filename
=
ds
[
40
][
"file"
]
filename
=
ds
[
40
][
"file"
]
output
=
asr
(
filename
)
output
=
asr
(
filename
)
self
.
assertEqual
(
output
,
{
"text"
:
"A MAN SAID TO THE UNIVERSE SIR I EXIST"
})
self
.
assertEqual
(
output
,
{
"text"
:
"A MAN SAID TO THE UNIVERSE SIR I EXIST"
})
...
@@ -144,7 +144,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
...
@@ -144,7 +144,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output
=
asr
(
waveform
)
output
=
asr
(
waveform
)
self
.
assertEqual
(
output
,
{
"text"
:
"(Applausi)"
})
self
.
assertEqual
(
output
,
{
"text"
:
"(Applausi)"
})
ds
=
load_dataset
(
"
patrickvonplaten
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
).
sort
(
"id"
)
ds
=
load_dataset
(
"
hf-internal-testing
/librispeech_asr_dummy"
,
"clean"
,
split
=
"validation"
).
sort
(
"id"
)
filename
=
ds
[
40
][
"file"
]
filename
=
ds
[
40
][
"file"
]
output
=
asr
(
filename
)
output
=
asr
(
filename
)
self
.
assertEqual
(
output
,
{
"text"
:
"Un uomo disse all'universo:
\"
Signore, io esisto."
})
self
.
assertEqual
(
output
,
{
"text"
:
"Un uomo disse all'universo:
\"
Signore, io esisto."
})
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment