Use tqdm.auto in Pipeline docs (#14920)

It's better for e.g. notebook.

Use tqdm.auto in Pipeline docs (#14920)
It's better for e.g. notebook.
f21bc421 · Santiago Castro · GitHub · f012c00a · f21bc421
Unverified Commit f21bc421 authored Jan 10, 2022 by Santiago Castro Committed by GitHub Jan 10, 2022
Show whitespace changes
Inline Side-by-side

Showing with 5 additions and 7 deletions

docs/source/main_classes/pipelines.mdx docs/source/main_classes/pipelines.mdx +5 -7

No files found.
--- a/docs/source/main_classes/pipelines.mdx
+++ b/docs/source/main_classes/pipelines.mdx
@@ -79,14 +79,14 @@ GPU. If it doesn't don't hesitate to create an issue.
 import datasets
 from transformers import pipeline
 from transformers.pipelines.base import KeyDataset
-import tqdm
+from tqdm.auto import tqdm

 pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
 dataset = datasets.load_dataset("superb", name="asr", split="test")

 # KeyDataset (only *pt*) will simply return the item in the dict returned by the dataset item
 # as we're not interested in the *target* part of the dataset.
-for out in tqdm.tqdm(pipe(KeyDataset(dataset, "file"))):
+for out in tqdm(pipe(KeyDataset(dataset, "file"))):
    print(out)
    # {"text": "NUMBER TEN FRESH NELLY IS WAITING ON YOU GOOD NIGHT HUSBAND"}
    # {"text": ....}
@@ -130,7 +130,6 @@ whenever the pipeline uses its streaming ability (so when passing lists or `Data
 from transformers import pipeline
 from transformers.pipelines.base import KeyDataset
 import datasets
-import tqdm

 dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
 pipe = pipeline("text-classification", device=0)
@@ -153,8 +152,7 @@ Example where it's mostly a speedup:
 ```python
 from transformers import pipeline
 from torch.utils.data import Dataset
-import tqdm
-
+from tqdm.auto import tqdm

 pipe = pipeline("text-classification", device=0)

@@ -172,7 +170,7 @@ dataset = MyDataset()
 for batch_size in [1, 8, 64, 256]:
    print("-" * 30)
    print(f"Streaming batch_size={batch_size}")
-    for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
+    for out in tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
        pass
 ```

@@ -228,7 +226,7 @@ Streaming batch_size=256
  0%|                                                                                 | 0/1000 [00:00<?, ?it/s]
 Traceback (most recent call last):
  File "/home/nicolas/src/transformers/test.py", line 42, in <module>
-    for out in tqdm.tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
+    for out in tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
 ....
    q = q / math.sqrt(dim_per_head)  # (bs, n_heads, q_length, dim_per_head)
 RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)