"...py_test/git@developer.sourcefind.cn:change/sglang.git" did not exist on "96a5e4dd795b675210b0d18f5e9fab69ec69bb6e"
Unverified Commit be236361 authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

Adding `batch_size` support for (almost) all pipelines (#13724)



* Tentative enabling of `batch_size` for pipelines.

* Add systematic test for pipeline batching.

* Enabling batch_size on almost all pipelines

- Not `zero-shot` (it's already passing stuff as batched so trickier)
- Not `QA` (preprocess uses squad features, we need to switch to real
tensors at this boundary.

* Adding `min_length_for_response` for conversational.

* Making CTC, speech mappings avaiable regardless of framework.

* Attempt at fixing automatic tests (ffmpeg not enabled for fast tests)

* Removing ffmpeg dependency in tests.

* Small fixes.

* Slight cleanup.

* Adding docs

and adressing comments.

* Quality.

* Update docs/source/main_classes/pipelines.rst
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/pipelines/question_answering.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/pipelines/zero_shot_classification.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Improving docs.

* Update docs/source/main_classes/pipelines.rst
Co-authored-by: default avatarPhilipp Schmid <32632186+philschmid@users.noreply.github.com>

* N -> oberved_batch_size

softmax trick.

* Follow `padding_side`.

* Supporting image pipeline batching (and padding).

* Rename `unbatch` -> `loader_batch`.

* unbatch_size forgot.

* Custom padding for offset mappings.

* Attempt to remove librosa.

* Adding require_audio.

* torchaudio.

* Back to using datasets librosa.

* Adding help to set a pad_token on the tokenizer.

* Update src/transformers/pipelines/base.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/pipelines/base.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Update src/transformers/pipelines/base.py
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>

* Quality.
Co-authored-by: default avatarSylvain Gugger <35901082+sgugger@users.noreply.github.com>
Co-authored-by: default avatarPhilipp Schmid <32632186+philschmid@users.noreply.github.com>
parent 4469010c
......@@ -71,6 +71,11 @@ GPU. If it doesn't don't hesitate to create an issue.
.. code-block::
import datasets
from transformers import pipeline
from transformers.pipelines.base import KeyDataset
import tqdm
pipe = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h", device=0)
dataset = datasets.load_dataset("superb", name="asr", split="test")
......@@ -85,6 +90,144 @@ GPU. If it doesn't don't hesitate to create an issue.
.. autofunction:: transformers.pipeline
Pipeline batching
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
All pipelines (except `zero-shot-classification` and `question-answering` currently) can use batching. This will work
whenever the pipeline uses its streaming ability (so when passing lists or :obj:`Dataset`).
.. code-block::
from transformers import pipeline
from transformers.pipelines.base import KeyDataset
import datasets
import tqdm
dataset = datasets.load_dataset("imdb", name="plain_text", split="unsupervised")
pipe = pipeline("text-classification", device=0)
for out in pipe(KeyDataset(dataset, "text"), batch_size=8, truncation="only_first"):
print(out)
# [{'label': 'POSITIVE', 'score': 0.9998743534088135}]
# Exactly the same output as before, but the content are passed
# as batches to the model
.. warning::
However, this is not automatically a win for performance. It can be either a 10x speedup or 5x slowdown depending
on hardware, data and the actual model being used.
Example where it's most a speedup:
.. code-block::
from transformers import pipeline
from torch.utils.data import Dataset
import tqdm
pipe = pipeline("text-classification", device=0)
class MyDataset(Dataset):
def __len__(self):
return 5000
def __getitem__(self, i):
return "This is a test"
dataset = MyDataset()
for batch_size in [1, 8, 64, 256]:
print("-" * 30)
print(f"Streaming batch_size={batch_size}")
for out in tqdm.tqdm(pipe(dataset, batch_size=batch_size), total=len(dataset)):
pass
.. code-block::
# On GTX 970
------------------------------
Streaming no batching
100%|██████████████████████████████████████████████████████████████████████| 5000/5000 [00:26<00:00, 187.52it/s]
------------------------------
Streaming batch_size=8
100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:04<00:00, 1205.95it/s]
------------------------------
Streaming batch_size=64
100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:02<00:00, 2478.24it/s]
------------------------------
Streaming batch_size=256
100%|█████████████████████████████████████████████████████████████████████| 5000/5000 [00:01<00:00, 2554.43it/s]
(diminishing returns, saturated the GPU)
Example where it's most a slowdown:
.. code-block::
class MyDataset(Dataset):
def __len__(self):
return 5000
def __getitem__(self, i):
if i % 64 == 0:
n = 100
else:
n = 1
return "This is a test" * n
This is a occasional very long sentence compared to the other. In that case, the **whole** batch will need to be 400
tokens long, so the whole batch will be [64, 400] instead of [64, 4], leading to the high slowdown. Even worse, on
bigger batches, the program simply crashes.
.. code-block::
------------------------------
Streaming no batching
100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:05<00:00, 183.69it/s]
------------------------------
Streaming batch_size=8
100%|█████████████████████████████████████████████████████████████████████| 1000/1000 [00:03<00:00, 265.74it/s]
------------------------------
Streaming batch_size=64
100%|██████████████████████████████████████████████████████████████████████| 1000/1000 [00:26<00:00, 37.80it/s]
------------------------------
Streaming batch_size=256
0%| | 0/1000 [00:00<?, ?it/s]
Traceback (most recent call last):
File "/home/nicolas/src/transformers/test.py", line 42, in <module>
for out in tqdm.tqdm(pipe(dataset, batch_size=256), total=len(dataset)):
....
q = q / math.sqrt(dim_per_head) # (bs, n_heads, q_length, dim_per_head)
RuntimeError: CUDA out of memory. Tried to allocate 376.00 MiB (GPU 0; 3.95 GiB total capacity; 1.72 GiB already allocated; 354.88 MiB free; 2.46 GiB reserved in total by PyTorch)
There are no good (general) solutions for this problem, and your mileage may vary depending on your use cases. Rule of
thumb:
For users, a rule of thumb is:
- **Measure performance on your load, with your hardware. Measure, measure, and keep measuring. Real numbers are the
only way to go.**
- If you are latency constrained (live product doing inference), don't batch
- If you are using CPU, don't batch.
- If you are using throughput (you want to run your model on a bunch of static data), on GPU, then:
- If you have no clue about the size of the sequence_length ("natural" data), by default don't batch, measure and
try tentatively to add it, add OOM checks to recover when it will fail (and it will at some point if you don't
control the sequence_length.)
- If your sequence_length is super regular, then batching is more likely to be VERY interesting, measure and push
it until you get OOMs.
- The larger the GPU the more likely batching is going to be more interesting
- As soon as you enable batching, make sure you can handle OOMs nicely.
Implementing a pipeline
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
......@@ -584,6 +584,7 @@ if is_torch_available():
[
"MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING",
"MODEL_FOR_CAUSAL_LM_MAPPING",
"MODEL_FOR_CTC_MAPPING",
"MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING",
"MODEL_FOR_IMAGE_SEGMENTATION_MAPPING",
"MODEL_FOR_MASKED_LM_MAPPING",
......@@ -594,6 +595,7 @@ if is_torch_available():
"MODEL_FOR_QUESTION_ANSWERING_MAPPING",
"MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING",
"MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING",
"MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING",
"MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING",
"MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING",
"MODEL_MAPPING",
......@@ -2430,6 +2432,7 @@ if TYPE_CHECKING:
from .models.auto import (
MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING,
MODEL_FOR_CAUSAL_LM_MAPPING,
MODEL_FOR_CTC_MAPPING,
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING,
MODEL_FOR_IMAGE_SEGMENTATION_MAPPING,
MODEL_FOR_MASKED_LM_MAPPING,
......@@ -2440,6 +2443,7 @@ if TYPE_CHECKING:
MODEL_FOR_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING,
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING,
MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING,
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING,
MODEL_MAPPING,
......
......@@ -169,6 +169,10 @@ class AutomaticSpeechRecognitionPipeline(Pipeline):
elif model_class in MODEL_FOR_CTC_MAPPING.values():
outputs = self.model(**model_inputs)
tokens = outputs.logits.squeeze(0).argmax(dim=-1)
else:
logger.warning("This is an unknown class, treating it as CTC.")
outputs = self.model(**model_inputs)
tokens = outputs.logits.squeeze(0).argmax(dim=-1)
return tokens
def postprocess(self, model_outputs):
......
......@@ -25,6 +25,7 @@ from contextlib import contextmanager
from os.path import abspath, exists
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union
import numpy as np
from packaging import version
from ..feature_extraction_utils import PreTrainedFeatureExtractor
......@@ -59,12 +60,80 @@ if TYPE_CHECKING:
logger = logging.get_logger(__name__)
def collate_fn(items):
def no_collate_fn(items):
if len(items) != 1:
raise ValueError("This collate_fn is meant to be used with batch_size=1")
return items[0]
def _pad(items, key, padding_value, padding_side):
batch_size = len(items)
if isinstance(items[0][key], torch.Tensor):
# Others include `attention_mask` etc...
shape = items[0][key].shape
dim = len(shape)
if dim == 4:
# This is probable image so padding shouldn't be necessary
# B, C, H, W
return torch.cat([item[key] for item in items], dim=0)
max_length = max(item[key].shape[1] for item in items)
dtype = items[0][key].dtype
if dim == 2:
tensor = torch.zeros((batch_size, max_length), dtype=dtype) + padding_value
elif dim == 3:
tensor = torch.zeros((batch_size, max_length, shape[-1]), dtype=dtype) + padding_value
for i, item in enumerate(items):
if dim == 2:
if padding_side == "left":
tensor[i, -len(item[key][0]) :] = item[key][0].clone()
else:
tensor[i, : len(item[key][0])] = item[key][0].clone()
elif dim == 3:
if padding_side == "left":
tensor[i, -len(item[key][0]) :, :] = item[key][0].clone()
else:
tensor[i, : len(item[key][0]), :] = item[key][0].clone()
return tensor
else:
return [item[key] for item in items]
def pad_collate_fn(tokenizer, feature_extractor):
padding_side = "right"
if tokenizer is None and feature_extractor is None:
raise ValueError("Pipeline without tokenizer or feature_extractor cannot do batching")
if tokenizer is not None:
if tokenizer.pad_token_id is None:
raise ValueError(
"Pipeline with tokenizer without pad_token cannot do batching. You can try to set it with "
"`pipe.tokenizer.pad_token_id = model.config.eos_token_id`."
)
else:
padding_value = tokenizer.pad_token_id
padding_side = tokenizer.padding_side
if feature_extractor is not None:
# Feature extractor can be images, where no padding is expected
padding_value = getattr(feature_extractor, "padding_value", None)
padding_side = getattr(feature_extractor, "padding_side", None)
def inner(items):
keys = set(items[0].keys())
for item in items:
if set(item.keys()) != keys:
raise ValueError(
f"The elements of the batch contain different keys. Cannot batch them ({set(item.keys())} != {keys})"
)
# input_values, input_pixels, input_ids, ...
padded = {
key: _pad(items, key, padding_value if key.startswith("input_") else 0, padding_side) for key in keys
}
return padded
return inner
def infer_framework_load_model(
model,
config: AutoConfig,
......@@ -591,6 +660,13 @@ PIPELINE_INIT_ARGS = r"""
is provided.
task (:obj:`str`, defaults to :obj:`""`):
A task-identifier for the pipeline.
num_workers (:obj:`int`, `optional`, defaults to 8):
When the pipeline will use `DataLoader` (when passing a dataset, on GPU for a Pytorch model), the number of
workers to be used.
batch_size (:obj:`int`, `optional`, defaults to 1):
When the pipeline will use `DataLoader` (when passing a dataset, on GPU for a Pytorch model), the size of
the batch to use, for inference this is not always beneficial, please read `Batching with pipelines
<https://huggingface.co/transformers/main_classes/pipelines.html#pipeline-batching>`_ .
args_parser (:class:`~transformers.pipelines.ArgumentHandler`, `optional`):
Reference to the object in charge of parsing supplied pipeline parameters.
device (:obj:`int`, `optional`, defaults to -1):
......@@ -617,10 +693,44 @@ if is_torch_available():
return processed
class PipelineIterator(IterableDataset):
def __init__(self, loader, infer, params):
def __init__(self, loader, infer, params, loader_batch_size=None):
"""
Roughly equivalent to
.. code-block::
for item in loader:
yield infer(item, **params)
Arguments:
loader (:obj:`torch.utils.data.DataLoader` or any iterator):
The iterator that will be used to apply :obj:`infer` on.
infer (any function):
The function to apply of each element of :obj:`loader`.
params (:obj:`dict`):
The parameters passed to :obj:`infer` along with every item
loader_batch_size (:obj:`int`, `optional`):
If specified, the items of :obj:`loader` are supposed to come as batch, and are loader_batched here
making it roughly behave as
.. code-block::
for items in loader:
for i in loader_batch_size:
item = items[i]
yield infer(item, **params)
"""
self.loader = loader
self.infer = infer
self.params = params
if loader_batch_size == 1:
# Let's spare some time by deactivating altogether
loader_batch_size = None
self.loader_batch_size = loader_batch_size
# Internal bookkeeping
self._loader_batch_index = None
self._loader_batch_data = None
def __len__(self):
return len(self.loader)
......@@ -629,10 +739,49 @@ if is_torch_available():
self.iterator = iter(self.loader)
return self
def loader_batch_item(self):
if isinstance(self._loader_batch_data, torch.Tensor):
result = self._loader_batch_data[self._loader_batch_index]
else:
loader_batched = {}
for k, element in self._loader_batch_data.items():
if k == "past_key_values":
continue
if isinstance(element[self._loader_batch_index], torch.Tensor):
loader_batched[k] = element[self._loader_batch_index].unsqueeze(0)
elif isinstance(element[self._loader_batch_index], np.ndarray):
loader_batched[k] = np.expand_dims(element[self._loader_batch_index], 0)
else:
loader_batched[k] = element[self._loader_batch_index]
result = self._loader_batch_data.__class__(loader_batched)
self._loader_batch_index += 1
return result
def __next__(self):
if self._loader_batch_index is not None and self._loader_batch_index < self.loader_batch_size:
return self.loader_batch_item()
item = next(self.iterator)
processed = self.infer(item, **self.params)
return processed
if self.loader_batch_size is not None:
if isinstance(processed, torch.Tensor):
first_tensor = processed
else:
key = list(processed.keys())[0]
first_tensor = processed[key]
if isinstance(first_tensor, list):
observed_batch_size = len(first_tensor)
else:
observed_batch_size = first_tensor.shape[0]
if 0 < observed_batch_size < self.loader_batch_size:
# Could be last batch so we can't unroll as many
# elements.
self.loader_batch_size = observed_batch_size
self._loader_batch_data = processed
self._loader_batch_index = 0
return self.loader_batch_item()
else:
return processed
class KeyDataset(Dataset):
def __init__(self, dataset: Dataset, key: str):
......@@ -881,17 +1030,20 @@ class Pipeline(_ScikitCompat):
raise ValueError(f"Framework {self.framework} is not supported")
return model_outputs
def get_iterator(self, inputs, num_workers: int, preprocess_params, forward_params, postprocess_params):
def get_iterator(
self, inputs, num_workers: int, batch_size: int, preprocess_params, forward_params, postprocess_params
):
if "TOKENIZERS_PARALLELISM" not in os.environ:
logger.info("Disabling tokenizer parallelism, we're using DataLoader multithreading already")
os.environ["TOKENIZERS_PARALLELISM"] = "false"
dataset = PipelineDataset(inputs, self.preprocess, preprocess_params)
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=1, collate_fn=collate_fn)
model_iterator = PipelineIterator(dataloader, self.forward, forward_params)
collate_fn = no_collate_fn if batch_size == 1 else pad_collate_fn(self.tokenizer, self.feature_extractor)
dataloader = DataLoader(dataset, num_workers=num_workers, batch_size=batch_size, collate_fn=collate_fn)
model_iterator = PipelineIterator(dataloader, self.forward, forward_params, loader_batch_size=batch_size)
final_iterator = PipelineIterator(model_iterator, self.postprocess, postprocess_params)
return final_iterator
def __call__(self, inputs, *args, num_workers=8, **kwargs):
def __call__(self, inputs, *args, num_workers=0, batch_size=1, **kwargs):
if args:
logger.warning(f"Ignoring args : {args}")
preprocess_params, forward_params, postprocess_params = self._sanitize_parameters(**kwargs)
......@@ -910,14 +1062,16 @@ class Pipeline(_ScikitCompat):
if isinstance(inputs, list):
if self.framework == "pt":
final_iterator = self.get_iterator(
inputs, num_workers, preprocess_params, forward_params, postprocess_params
inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
)
outputs = [output for output in final_iterator]
return outputs
else:
return self.run_multi(inputs, preprocess_params, forward_params, postprocess_params)
elif Dataset is not None and isinstance(inputs, Dataset):
return self.get_iterator(inputs, num_workers, preprocess_params, forward_params, postprocess_params)
return self.get_iterator(
inputs, num_workers, batch_size, preprocess_params, forward_params, postprocess_params
)
else:
return self.run_single(inputs, preprocess_params, forward_params, postprocess_params)
......
......@@ -243,7 +243,7 @@ class ConversationalPipeline(Pipeline):
return outputs[0]
return outputs
def preprocess(self, conversation: Conversation) -> Dict[str, Any]:
def preprocess(self, conversation: Conversation, min_length_for_response=32) -> Dict[str, Any]:
if not isinstance(conversation, Conversation):
raise ValueError("ConversationalPipeline, expects Conversation as inputs")
if conversation.new_user_input is None:
......@@ -274,18 +274,18 @@ class ConversationalPipeline(Pipeline):
if "attention_mask" in model_inputs:
model_inputs["attention_mask"] = model_inputs["attention_mask"][:, -trim:]
conversation = model_inputs.pop("conversation")
model_inputs["max_length"] = max_length
generate_kwargs["max_length"] = max_length
output_ids = self.model.generate(**model_inputs, **generate_kwargs)
if self.model.config.is_encoder_decoder:
start_position = 1
else:
start_position = n
return {"output_ids": output_ids[0, start_position:], "conversation": conversation}
return {"output_ids": output_ids[:, start_position:], "conversation": conversation}
def postprocess(self, model_outputs, clean_up_tokenization_spaces=True):
output_ids = model_outputs["output_ids"]
answer = self.tokenizer.decode(
output_ids,
output_ids[0],
skip_special_tokens=True,
clean_up_tokenization_spaces=clean_up_tokenization_spaces,
)
......
......@@ -89,14 +89,14 @@ class FillMaskPipeline(Pipeline):
def _forward(self, model_inputs):
model_outputs = self.model(**model_inputs)
model_outputs["input_ids"] = model_inputs["input_ids"][0]
model_outputs["input_ids"] = model_inputs["input_ids"]
return model_outputs
def postprocess(self, model_outputs, top_k=5, target_ids=None):
# Cap top_k if there are targets
if target_ids is not None and target_ids.shape[0] < top_k:
top_k = target_ids.shape[0]
input_ids = model_outputs["input_ids"]
input_ids = model_outputs["input_ids"][0]
outputs = model_outputs["logits"]
result = []
......
......@@ -114,11 +114,12 @@ class ObjectDetectionPipeline(Pipeline):
def _forward(self, model_inputs):
target_size = model_inputs.pop("target_size")
outputs = self.model(**model_inputs)
model_outputs = {"outputs": outputs, "target_size": target_size}
model_outputs = outputs.__class__({"target_size": target_size, **outputs})
return model_outputs
def postprocess(self, model_outputs, threshold=0.9):
raw_annotations = self.feature_extractor.post_process(model_outputs["outputs"], model_outputs["target_size"])
target_size = model_outputs["target_size"]
raw_annotations = self.feature_extractor.post_process(model_outputs, target_size)
raw_annotation = raw_annotations[0]
keep = raw_annotation["scores"] > threshold
scores = raw_annotation["scores"][keep]
......
......@@ -8,9 +8,12 @@ from ..data import SquadExample, SquadFeatures, squad_convert_examples_to_featur
from ..file_utils import PaddingStrategy, add_end_docstrings, is_tf_available, is_torch_available
from ..modelcard import ModelCard
from ..tokenization_utils import PreTrainedTokenizer
from ..utils import logging
from .base import PIPELINE_INIT_ARGS, ArgumentHandler, Pipeline
logger = logging.get_logger(__name__)
if TYPE_CHECKING:
from ..modeling_tf_utils import TFPreTrainedModel
from ..modeling_utils import PreTrainedModel
......@@ -241,6 +244,9 @@ class QuestionAnsweringPipeline(Pipeline):
- **end** (:obj:`int`) -- The character end index of the answer (in the tokenized version of the input).
- **answer** (:obj:`str`) -- The answer to the question.
"""
if kwargs.get("batch_size", 1) > 1:
logger.error("Batch_size > 1 is not supported for question answering pipeline, setting it to 1.")
kwargs["batch_size"] = 1
# Convert inputs to features
examples = self._args_parser(*args, **kwargs)
......
......@@ -204,12 +204,12 @@ class TokenClassificationPipeline(Pipeline):
offset_mapping = model_inputs.pop("offset_mapping", None)
sentence = model_inputs.pop("sentence")
if self.framework == "tf":
outputs = self.model(model_inputs.data)[0][0]
logits = self.model(model_inputs.data)[0]
else:
outputs = self.model(**model_inputs)[0][0]
logits = self.model(**model_inputs)[0]
return {
"outputs": outputs,
"logits": logits,
"special_tokens_mask": special_tokens_mask,
"offset_mapping": offset_mapping,
"sentence": sentence,
......@@ -217,13 +217,16 @@ class TokenClassificationPipeline(Pipeline):
}
def postprocess(self, model_outputs, aggregation_strategy=AggregationStrategy.NONE):
outputs = model_outputs["outputs"].numpy()
logits = model_outputs["logits"][0].numpy()
sentence = model_outputs["sentence"]
input_ids = model_outputs["input_ids"][0]
offset_mapping = model_outputs["offset_mapping"][0] if model_outputs["offset_mapping"] is not None else None
special_tokens_mask = model_outputs["special_tokens_mask"][0].numpy()
scores = np.exp(outputs) / np.exp(outputs).sum(-1, keepdims=True)
maxes = np.max(logits, axis=-1, keepdims=True)
shifted_exp = np.exp(logits - maxes)
scores = shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
pre_entities = self.gather_pre_entities(
sentence, input_ids, scores, offset_mapping, special_tokens_mask, aggregation_strategy
)
......
......@@ -183,6 +183,9 @@ class ZeroShotClassificationPipeline(Pipeline):
- **labels** (:obj:`List[str]`) -- The labels sorted by order of likelihood.
- **scores** (:obj:`List[float]`) -- The probabilities for each of the labels.
"""
if kwargs.get("batch_size", 1) > 1:
logger.error("Batch size > 1 is not supported for zero-shot pipeline, setting batch_size=1.")
kwargs["batch_size"] = 1
if len(args) == 0:
pass
......
......@@ -313,6 +313,9 @@ MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING = None
MODEL_FOR_CAUSAL_LM_MAPPING = None
MODEL_FOR_CTC_MAPPING = None
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING = None
......@@ -343,6 +346,9 @@ MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING = None
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING = None
MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING = None
MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING = None
......
......@@ -24,6 +24,7 @@ from transformers.testing_utils import (
require_datasets,
require_tf,
require_torch,
require_torchaudio,
slow,
)
......@@ -35,15 +36,16 @@ from .test_pipelines_common import ANY, PipelineTestCaseMeta
class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_AUDIO_CLASSIFICATION_MAPPING
@require_datasets
@slow
def run_pipeline_test(self, model, tokenizer, feature_extractor):
import datasets
def get_test_pipeline(self, model, tokenizer, feature_extractor):
audio_classifier = AudioClassificationPipeline(model=model, feature_extractor=feature_extractor)
# test with a raw waveform
audio = np.zeros((34000,))
audio2 = np.zeros((14000,))
return audio_classifier, [audio2, audio]
def run_pipeline_test(self, audio_classifier, examples):
audio2, audio = examples
output = audio_classifier(audio)
# by default a model is initialized with num_labels=2
self.assertEqual(
......@@ -61,10 +63,17 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
],
)
self.run_torchaudio(audio_classifier)
@require_datasets
@require_torchaudio
def run_torchaudio(self, audio_classifier):
import datasets
# test with a local file
dataset = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
filename = dataset[0]["file"]
output = audio_classifier(filename)
audio = dataset[0]["audio"]["array"]
output = audio_classifier(audio)
self.assertEqual(
output,
[
......
......@@ -14,11 +14,28 @@
import unittest
import numpy as np
import pytest
from transformers import AutoFeatureExtractor, AutoTokenizer, Speech2TextForConditionalGeneration, Wav2Vec2ForCTC
from transformers import (
MODEL_FOR_CTC_MAPPING,
MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING,
AutoFeatureExtractor,
AutoTokenizer,
Speech2TextForConditionalGeneration,
Wav2Vec2ForCTC,
)
from transformers.pipelines import AutomaticSpeechRecognitionPipeline, pipeline
from transformers.testing_utils import is_pipeline_test, require_datasets, require_torch, require_torchaudio, slow
from transformers.testing_utils import (
is_pipeline_test,
require_datasets,
require_tf,
require_torch,
require_torchaudio,
slow,
)
from .test_pipelines_common import ANY, PipelineTestCaseMeta
# We can't use this mixin because it assumes TF support.
......@@ -26,14 +43,42 @@ from transformers.testing_utils import is_pipeline_test, require_datasets, requi
@is_pipeline_test
class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = {
k: v
for k, v in (list(MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING.items()) if MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING else [])
+ (MODEL_FOR_CTC_MAPPING.items() if MODEL_FOR_CTC_MAPPING else [])
}
def get_test_pipeline(self, model, tokenizer, feature_extractor):
if tokenizer is None:
# Side effect of no Fast Tokenizer class for these model, so skipping
# But the slow tokenizer test should still run as they're quite small
self.skipTest("No tokenizer available")
return
# return None, None
speech_recognizer = AutomaticSpeechRecognitionPipeline(
model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
)
# test with a raw waveform
audio = np.zeros((34000,))
audio2 = np.zeros((14000,))
return speech_recognizer, [audio, audio2]
def run_pipeline_test(self, speech_recognizer, examples):
audio = np.zeros((34000,))
outputs = speech_recognizer(audio)
self.assertEqual(outputs, {"text": ANY(str)})
@require_torch
@slow
def test_pt_defaults(self):
pipeline("automatic-speech-recognition", framework="pt")
@require_torch
def test_torch_small(self):
def test_small_model_pt(self):
import numpy as np
speech_recognizer = pipeline(
......@@ -46,6 +91,10 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
output = speech_recognizer(waveform)
self.assertEqual(output, {"text": "(Applaudissements)"})
@require_tf
def test_small_model_tf(self):
self.skipTest("Tensorflow not supported yet.")
@require_torch
def test_torch_small_no_tokenizer_files(self):
# test that model without tokenizer file cannot be loaded
......
......@@ -12,8 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import importlib
import logging
import random
import string
import unittest
from abc import abstractmethod
......@@ -21,6 +23,7 @@ from functools import lru_cache
from unittest import skipIf
from transformers import FEATURE_EXTRACTOR_MAPPING, TOKENIZER_MAPPING, AutoFeatureExtractor, AutoTokenizer, pipeline
from transformers.pipelines.base import _pad
from transformers.testing_utils import is_pipeline_test, require_torch
......@@ -73,6 +76,12 @@ def get_tiny_config_from_class(configuration_class):
@lru_cache(maxsize=100)
def get_tiny_tokenizer_from_checkpoint(checkpoint):
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
if tokenizer.vocab_size < 300:
# Wav2Vec2ForCTC for instance
# ByT5Tokenizer
# all are already small enough and have no Fast version that can
# be retrained
return tokenizer
logger.info("Training new from iterator ...")
vocabulary = string.ascii_letters + string.digits + " "
tokenizer = tokenizer.train_new_from_iterator(vocabulary, vocab_size=len(vocabulary), show_progress=False)
......@@ -87,6 +96,12 @@ def get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config):
feature_extractor = None
if hasattr(tiny_config, "image_size") and feature_extractor:
feature_extractor = feature_extractor.__class__(size=tiny_config.image_size, crop_size=tiny_config.image_size)
# Speech2TextModel specific.
if hasattr(tiny_config, "input_feat_per_channel") and feature_extractor:
feature_extractor = feature_extractor.__class__(
feature_size=tiny_config.input_feat_per_channel, num_mel_bins=tiny_config.input_feat_per_channel
)
return feature_extractor
......@@ -136,7 +151,26 @@ class PipelineTestCaseMeta(type):
else:
tokenizer = None
feature_extractor = get_tiny_feature_extractor_from_checkpoint(checkpoint, tiny_config)
self.run_pipeline_test(model, tokenizer, feature_extractor)
pipeline, examples = self.get_test_pipeline(model, tokenizer, feature_extractor)
if pipeline is None:
# The test can disable itself, but it should be very marginal
# Concerns: Wav2Vec2ForCTC without tokenizer test (FastTokenizer don't exist)
return
self.run_pipeline_test(pipeline, examples)
def run_batch_test(pipeline, examples):
# Need to copy because `Conversation` are stateful
if pipeline.tokenizer is not None and pipeline.tokenizer.pad_token_id is None:
return # No batching for this and it's OK
# 10 examples with batch size 4 means there needs to be a unfinished batch
# which is important for the unbatcher
dataset = [copy.deepcopy(random.choice(examples)) for i in range(10)]
for item in pipeline(dataset, batch_size=4):
pass
run_batch_test(pipeline, examples)
return test
......@@ -211,3 +245,85 @@ class CommonPipelineTest(unittest.TestCase):
dataset = MyDataset()
for output in text_classifier(dataset):
self.assertEqual(output, {"label": ANY(str), "score": ANY(float)})
@is_pipeline_test
class PipelinePadTest(unittest.TestCase):
@require_torch
def test_pipeline_padding(self):
import torch
items = [
{
"label": "label1",
"input_ids": torch.LongTensor([[1, 23, 24, 2]]),
"attention_mask": torch.LongTensor([[0, 1, 1, 0]]),
},
{
"label": "label2",
"input_ids": torch.LongTensor([[1, 23, 24, 43, 44, 2]]),
"attention_mask": torch.LongTensor([[0, 1, 1, 1, 1, 0]]),
},
]
self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
self.assertTrue(
torch.allclose(
_pad(items, "input_ids", 10, "right"),
torch.LongTensor([[1, 23, 24, 2, 10, 10], [1, 23, 24, 43, 44, 2]]),
)
)
self.assertTrue(
torch.allclose(
_pad(items, "input_ids", 10, "left"),
torch.LongTensor([[10, 10, 1, 23, 24, 2], [1, 23, 24, 43, 44, 2]]),
)
)
self.assertTrue(
torch.allclose(
_pad(items, "attention_mask", 0, "right"), torch.LongTensor([[0, 1, 1, 0, 0, 0], [0, 1, 1, 1, 1, 0]])
)
)
@require_torch
def test_pipeline_image_padding(self):
import torch
items = [
{
"label": "label1",
"pixel_values": torch.zeros((1, 3, 10, 10)),
},
{
"label": "label2",
"pixel_values": torch.zeros((1, 3, 10, 10)),
},
]
self.assertEqual(_pad(items, "label", 0, "right"), ["label1", "label2"])
self.assertTrue(
torch.allclose(
_pad(items, "pixel_values", 10, "right"),
torch.zeros((2, 3, 10, 10)),
)
)
@require_torch
def test_pipeline_offset_mapping(self):
import torch
items = [
{
"offset_mappings": torch.zeros([1, 11, 2], dtype=torch.long),
},
{
"offset_mappings": torch.zeros([1, 4, 2], dtype=torch.long),
},
]
self.assertTrue(
torch.allclose(
_pad(items, "offset_mappings", 0, "right"),
torch.zeros((2, 11, 2), dtype=torch.long),
),
)
......@@ -54,8 +54,11 @@ class ConversationalPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseM
else []
)
def run_pipeline_test(self, model, tokenizer, feature_extractor):
def get_test_pipeline(self, model, tokenizer, feature_extractor):
conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
return conversation_agent, [Conversation("Hi there!")]
def run_pipeline_test(self, conversation_agent, _):
# Simple
outputs = conversation_agent(Conversation("Hi there!"))
self.assertEqual(outputs, Conversation(past_user_inputs=["Hi there!"], generated_responses=[ANY(str)]))
......
......@@ -14,7 +14,15 @@
import unittest
from transformers import MODEL_MAPPING, TF_MODEL_MAPPING, CLIPConfig, FeatureExtractionPipeline, LxmertConfig, pipeline
from transformers import (
MODEL_MAPPING,
TF_MODEL_MAPPING,
CLIPConfig,
FeatureExtractionPipeline,
LxmertConfig,
Wav2Vec2Config,
pipeline,
)
from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch
from .test_pipelines_common import PipelineTestCaseMeta
......@@ -61,12 +69,12 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
raise ValueError("We expect lists of floats, nothing else")
return shape
def run_pipeline_test(self, model, tokenizer, feature_extractor):
def get_test_pipeline(self, model, tokenizer, feature_extractor):
if tokenizer is None:
self.skipTest("No tokenizer")
return
elif isinstance(model.config, (LxmertConfig, CLIPConfig)):
elif isinstance(model.config, (LxmertConfig, CLIPConfig, Wav2Vec2Config)):
self.skipTest(
"This is an Lxmert bimodal model, we need to find a more consistent way to switch on those models."
)
......@@ -81,11 +89,12 @@ class FeatureExtractionPipelineTests(unittest.TestCase, metaclass=PipelineTestCa
)
return
feature_extractor = FeatureExtractionPipeline(
model=model, tokenizer=tokenizer, feature_extractor=feature_extractor
)
return feature_extractor, ["This is a test", "This is another test"]
def run_pipeline_test(self, feature_extractor, examples):
outputs = feature_extractor("This is a test")
shape = self.get_shape(outputs)
......
......@@ -159,22 +159,32 @@ class FillMaskPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", framework="pt")
unmasker.tokenizer.pad_token_id = None
unmasker.tokenizer.pad_token = None
self.run_pipeline_test(unmasker.model, unmasker.tokenizer, None)
self.run_pipeline_test(unmasker, [])
@require_tf
def test_model_no_pad_tf(self):
unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", framework="tf")
unmasker.tokenizer.pad_token_id = None
unmasker.tokenizer.pad_token = None
self.run_pipeline_test(unmasker.model, unmasker.tokenizer, None)
self.run_pipeline_test(unmasker, [])
def run_pipeline_test(self, model, tokenizer, feature_extractor):
def get_test_pipeline(self, model, tokenizer, feature_extractor):
if tokenizer is None or tokenizer.mask_token_id is None:
self.skipTest("The provided tokenizer has no mask token, (probably reformer or wav2vec2)")
fill_masker = FillMaskPipeline(model=model, tokenizer=tokenizer)
examples = [
f"This is another {tokenizer.mask_token} test",
]
return fill_masker, examples
outputs = fill_masker(f"This is a {tokenizer.mask_token}")
def run_pipeline_test(self, fill_masker, examples):
tokenizer = fill_masker.tokenizer
model = fill_masker.model
outputs = fill_masker(
f"This is a {tokenizer.mask_token}",
)
self.assertEqual(
outputs,
[
......
......@@ -44,9 +44,17 @@ else:
class ImageClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING
@require_datasets
def run_pipeline_test(self, model, tokenizer, feature_extractor):
def get_test_pipeline(self, model, tokenizer, feature_extractor):
image_classifier = ImageClassificationPipeline(model=model, feature_extractor=feature_extractor)
examples = [
Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png"),
"http://images.cocodataset.org/val2017/000000039769.jpg",
]
return image_classifier, examples
@require_datasets
def run_pipeline_test(self, image_classifier, examples):
outputs = image_classifier("./tests/fixtures/tests_samples/COCO/000000039769.png")
self.assertEqual(
......
......@@ -53,9 +53,12 @@ else:
class ObjectDetectionPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_OBJECT_DETECTION_MAPPING
@require_datasets
def run_pipeline_test(self, model, tokenizer, feature_extractor):
def get_test_pipeline(self, model, tokenizer, feature_extractor):
object_detector = ObjectDetectionPipeline(model=model, feature_extractor=feature_extractor)
return object_detector, ["./tests/fixtures/tests_samples/COCO/000000039769.png"]
@require_datasets
def run_pipeline_test(self, object_detector, examples):
outputs = object_detector("./tests/fixtures/tests_samples/COCO/000000039769.png", threshold=0.0)
self.assertGreater(len(outputs), 0)
......
......@@ -32,13 +32,20 @@ class QAPipelineTests(unittest.TestCase, metaclass=PipelineTestCaseMeta):
model_mapping = MODEL_FOR_QUESTION_ANSWERING_MAPPING
tf_model_mapping = TF_MODEL_FOR_QUESTION_ANSWERING_MAPPING
def run_pipeline_test(self, model, tokenizer, feature_extractor):
def get_test_pipeline(self, model, tokenizer, feature_extractor):
if isinstance(model.config, LxmertConfig):
# This is an bimodal model, we need to find a more consistent way
# to switch on those models.
return
return None, None
question_answerer = QuestionAnsweringPipeline(model, tokenizer)
examples = [
{"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
{"question": "In what field is HuggingFace ?", "context": "HuggingFace is an AI startup."},
]
return question_answerer, examples
def run_pipeline_test(self, question_answerer, _):
outputs = question_answerer(
question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris."
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment