Unverified Commit f21bc421 authored by Santiago Castro's avatar Santiago Castro Committed by GitHub
Browse files

Use tqdm.auto in Pipeline docs (#14920)

It's better for e.g. notebook.
parent f012c00a
...@@ -79,14 +79,14 @@ GPU. If it doesn't don't hesitate to create an issue. ...@@ -79,14 +79,14 @@ GPU. If it doesn't don't hesitate to create an issue.
import datasets import datasets
from transformers import pipeline from transformers import pipeline
from transformers.pipelines.base import KeyDataset from transformers.pipelines.base import KeyDataset
import tqdm from tqdm.auto import tqdm
pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0) pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
dataset = datasets.load_dataset("superb", name="asr", split="test") dataset = datasets.load_dataset("superb", name="asr", split="test")
# KeyDataset (only *pt*) will simply return the item in the dict returned by the dataset item # KeyDataset (only *pt*) will simply return the item in the dict returned by the dataset item
# as we're not interested in the *target* part of the dataset. # as we're not interested in the *target* part of the dataset.
for out in tqdm.tqdm(pipe(KeyDataset(dataset, "file"))): for out in tqdm(pipe(KeyDataset(dataset, "file"))):
print(out) print(out)
# {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"} # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
# {"text": ....} # {"text": ....}
...@@ -130,7 +130,6 @@ whenever the pipeline uses its streaming ability (so when passing lists or `Data ...@@ -130,7 +130,6 @@ whenever the pipeline uses its streaming ability (so when passing lists or `Data
from transformers import pipeline from transformers import pipeline
from transformers.pipelines.base import KeyDataset from transformers.pipelines.base import KeyDataset
import datasets import datasets
import tqdm
dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised") dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
pipe = pipeline("text-classification", device=0) pipe = pipeline("text-classification", device=0)
...@@ -153,8 +152,7 @@ Example where it's mostly a speedup: ...@@ -153,8 +152,7 @@ Example where it's mostly a speedup:
```python ```python
from transformers import pipeline from transformers import pipeline
from torch.utils.data import Dataset from torch.utils.data import Dataset
import tqdm from tqdm.auto import tqdm
pipe = pipeline("text-classification", device=0) pipe = pipeline("text-classification", device=0)
...@@ -172,7 +170,7 @@ dataset = MyDataset() ...@@ -172,7 +170,7 @@ dataset = MyDataset()
for batch_size in [1, 8, 64, 256]: for batch_size in [1, 8, 64, 256]:
print("-" * 30) print("-" * 30)
print(f"Streaming batch_size={batch_size}") print(f"Streaming batch_size={batch_size}")
for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)): for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
pass pass
``` ```
...@@ -228,7 +226,7 @@ Streaming batch_size=256 ...@@ -228,7 +226,7 @@ Streaming batch_size=256
0%| | 0/1000 [00:00<?, ?it/s] 0%| | 0/1000 [00:00<?, ?it/s]
Traceback (most recent call last): Traceback (most recent call last):
File "/home/nicolas/src/transformers/test.py", line 42, in <module> File "/home/nicolas/src/transformers/test.py", line 42, in <module>
for out in tqdm.tqdm(pipe(dataset, batch_size=256), total=len(dataset)): for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
.... ....
q = q / math.sqrt(dim_per_head) # (bs, n_heads, q_length, dim_per_head) q = q / math.sqrt(dim_per_head) # (bs, n_heads, q_length, dim_per_head)
RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch) RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment