Unverified Commit e215e6de authored by Matthijs Hollemans's avatar Matthijs Hollemans Committed by GitHub
Browse files

make SpeechT5 doc examples deterministic (#21470)

* make doc examples deterministic

* add IGNORE_RESULT
parent 182afb7d
...@@ -2296,7 +2296,9 @@ class SpeechT5ForSpeechToText(SpeechT5PreTrainedModel): ...@@ -2296,7 +2296,9 @@ class SpeechT5ForSpeechToText(SpeechT5PreTrainedModel):
>>> from transformers import SpeechT5Processor, SpeechT5ForSpeechToText >>> from transformers import SpeechT5Processor, SpeechT5ForSpeechToText
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = load_dataset(
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
... ) # doctest: +IGNORE_RESULT
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate
...@@ -2570,7 +2572,7 @@ class SpeechT5ForTextToSpeech(SpeechT5PreTrainedModel): ...@@ -2570,7 +2572,7 @@ class SpeechT5ForTextToSpeech(SpeechT5PreTrainedModel):
Example: Example:
```python ```python
>>> from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan >>> from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan, set_seed
>>> import torch >>> import torch
>>> processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts") >>> processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
...@@ -2580,10 +2582,12 @@ class SpeechT5ForTextToSpeech(SpeechT5PreTrainedModel): ...@@ -2580,10 +2582,12 @@ class SpeechT5ForTextToSpeech(SpeechT5PreTrainedModel):
>>> inputs = processor(text="Hello, my dog is cute", return_tensors="pt") >>> inputs = processor(text="Hello, my dog is cute", return_tensors="pt")
>>> speaker_embeddings = torch.zeros((1, 512)) # or load xvectors from a file >>> speaker_embeddings = torch.zeros((1, 512)) # or load xvectors from a file
>>> set_seed(555) # make deterministic
>>> # generate speech >>> # generate speech
>>> speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder) >>> speech = model.generate_speech(inputs["input_ids"], speaker_embeddings, vocoder=vocoder)
>>> speech.shape >>> speech.shape
torch.Size([15872]) torch.Size([16384])
``` ```
""" """
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
...@@ -2764,11 +2768,13 @@ class SpeechT5ForSpeechToSpeech(SpeechT5PreTrainedModel): ...@@ -2764,11 +2768,13 @@ class SpeechT5ForSpeechToSpeech(SpeechT5PreTrainedModel):
Example: Example:
```python ```python
>>> from transformers import SpeechT5Processor, SpeechT5ForSpeechToSpeech, SpeechT5HifiGan >>> from transformers import SpeechT5Processor, SpeechT5ForSpeechToSpeech, SpeechT5HifiGan, set_seed
>>> from datasets import load_dataset >>> from datasets import load_dataset
>>> import torch >>> import torch
>>> dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation") >>> dataset = load_dataset(
... "hf-internal-testing/librispeech_asr_demo", "clean", split="validation"
... ) # doctest: +IGNORE_RESULT
>>> dataset = dataset.sort("id") >>> dataset = dataset.sort("id")
>>> sampling_rate = dataset.features["audio"].sampling_rate >>> sampling_rate = dataset.features["audio"].sampling_rate
...@@ -2781,10 +2787,12 @@ class SpeechT5ForSpeechToSpeech(SpeechT5PreTrainedModel): ...@@ -2781,10 +2787,12 @@ class SpeechT5ForSpeechToSpeech(SpeechT5PreTrainedModel):
>>> speaker_embeddings = torch.zeros((1, 512)) # or load xvectors from a file >>> speaker_embeddings = torch.zeros((1, 512)) # or load xvectors from a file
>>> set_seed(555) # make deterministic
>>> # generate speech >>> # generate speech
>>> speech = model.generate_speech(inputs["input_values"], speaker_embeddings, vocoder=vocoder) >>> speech = model.generate_speech(inputs["input_values"], speaker_embeddings, vocoder=vocoder)
>>> speech.shape >>> speech.shape
torch.Size([77312]) torch.Size([77824])
``` ```
""" """
return_dict = return_dict if return_dict is not None else self.config.use_return_dict return_dict = return_dict if return_dict is not None else self.config.use_return_dict
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment