Onnx fix test (#10663)

* Allow to pass kwargs to model's from_pretrained when using pipeline. * Disable the use of past_keys_values for GPT2 when exporting to ONNX. * style * Remove comment. * Appease the documentation gods * Fix style Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>

Onnx fix test (#10663)
* Allow to pass kwargs to model's from_pretrained when using pipeline. * Disable the use of past_keys_values for GPT2 when exporting to ONNX. * style * Remove comment. * Appease the documentation gods * Fix style Co-authored-by: Lysandre <lysandre.debut@reseau.eseo.fr>
3ab68203 · Funtowicz Morgan · GitHub · a637ae00 · 3ab68203 · 3ab68203
Unverified Commit 3ab68203 authored Mar 11, 2021 by Funtowicz Morgan Committed by GitHub Mar 11, 2021
3 changed files
--- a/src/transformers/convert_graph_to_onnx.py
+++ b/src/transformers/convert_graph_to_onnx.py
@@ -222,7 +222,9 @@ def infer_shapes(nlp: Pipeline, framework: str) -> Tuple[List[str], List[str], D
    return input_vars, output_names, dynamic_axes, tokens


-def load_graph_from_args(pipeline_name: str, framework: str, model: str, tokenizer: Optional[str] = None) -> Pipeline:
+def load_graph_from_args(
+    pipeline_name: str, framework: str, model: str, tokenizer: Optional[str] = None, **models_kwargs
+) -> Pipeline:
    """
    Convert the set of arguments provided through the CLI to an actual pipeline reference (tokenizer + model

@@ -248,7 +250,7 @@ def load_graph_from_args(pipeline_name: str, framework: str, model: str, tokeniz
    print(f"Loading pipeline (model: {model}, tokenizer: {tokenizer})")

    # Allocate tokenizer and model
-    return pipeline(pipeline_name, model=model, tokenizer=tokenizer, framework=framework)
+    return pipeline(pipeline_name, model=model, tokenizer=tokenizer, framework=framework, model_kwargs=models_kwargs)


 def convert_pytorch(nlp: Pipeline, opset: int, output: Path, use_external_format: bool):
@@ -335,6 +337,7 @@ def convert(
    tokenizer: Optional[str] = None,
    use_external_format: bool = False,
    pipeline_name: str = "feature-extraction",
+    **model_kwargs
 ):
    """
    Convert the pipeline object to the ONNX Intermediate Representation (IR) format
@@ -347,6 +350,7 @@ def convert(
        tokenizer: The name of the model to load for the pipeline, default to the model's name if not provided
        use_external_format: Split the model definition from its parameters to allow model bigger than 2GB (PyTorch only)
        pipeline_name: The kind of pipeline to instantiate (ner, question-answering, etc.)
+        model_kwargs: Keyword arguments to be forwarded to the model constructor

    Returns:

@@ -354,7 +358,7 @@ def convert(
    print(f"ONNX opset version set to: {opset}")

    # Load the pipeline
-    nlp = load_graph_from_args(pipeline_name, framework, model, tokenizer)
+    nlp = load_graph_from_args(pipeline_name, framework, model, tokenizer, **model_kwargs)

    if not output.parent.exists():
        print(f"Creating folder {output.parent}")

--- a/src/transformers/pipelines/__init__.py
+++ b/src/transformers/pipelines/__init__.py
@@ -246,6 +246,7 @@ def pipeline(
    framework: Optional[str] = None,
    revision: Optional[str] = None,
    use_fast: bool = True,
+    model_kwargs: Dict[str, Any] = {},
    **kwargs
 ) -> Pipeline:
    """
@@ -307,6 +308,9 @@ def pipeline(
            artifacts on huggingface.co, so ``revision`` can be any identifier allowed by git.
        use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`):
            Whether or not to use a Fast tokenizer if possible (a :class:`~transformers.PreTrainedTokenizerFast`).
+        model_kwargs:
+            Additional dictionary of keyword arguments passed along to the model's :obj:`from_pretrained(...,
+            **model_kwargs)` function.
        kwargs:
            Additional keyword arguments passed along to the specific pipeline init (see the documentation for the
            corresponding pipeline class for possible values).
@@ -383,7 +387,6 @@ def pipeline(
    # Instantiate model if needed
    if isinstance(model, str):
        # Handle transparent TF/PT model conversion
-        model_kwargs = {}
        if framework == "pt" and model.endswith(".h5"):
            model_kwargs["from_tf"] = True
            logger.warning(

--- a/tests/test_onnx.py
+++ b/tests/test_onnx.py
@@ -38,19 +38,23 @@ class FuncNonContiguousArgs:


 class OnnxExportTestCase(unittest.TestCase):
-    MODEL_TO_TEST = ["bert-base-cased", "gpt2", "roberta-base"]
+    MODEL_TO_TEST = [
+        # (model_name, model_kwargs)
+        ("bert-base-cased", {}),
+        ("gpt2", {"use_cache": False}),  # We don't support exporting GPT2 past keys anymore
+    ]

    @require_tf
    @slow
    def test_export_tensorflow(self):
-        for model in OnnxExportTestCase.MODEL_TO_TEST:
-            self._test_export(model, "tf", 12)
+        for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
+            self._test_export(model, "tf", 12, **model_kwargs)

    @require_torch
    @slow
    def test_export_pytorch(self):
-        for model in OnnxExportTestCase.MODEL_TO_TEST:
-            self._test_export(model, "pt", 12)
+        for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
+            self._test_export(model, "pt", 12, **model_kwargs)

    @require_torch
    @slow
@@ -71,8 +75,8 @@ class OnnxExportTestCase(unittest.TestCase):
    @require_tf
    @slow
    def test_quantize_tf(self):
-        for model in OnnxExportTestCase.MODEL_TO_TEST:
-            path = self._test_export(model, "tf", 12)
+        for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
+            path = self._test_export(model, "tf", 12, **model_kwargs)
            quantized_path = quantize(Path(path))

            # Ensure the actual quantized model is not bigger than the original one
@@ -82,15 +86,15 @@ class OnnxExportTestCase(unittest.TestCase):
    @require_torch
    @slow
    def test_quantize_pytorch(self):
-        for model in OnnxExportTestCase.MODEL_TO_TEST:
-            path = self._test_export(model, "pt", 12)
+        for model, model_kwargs in OnnxExportTestCase.MODEL_TO_TEST:
+            path = self._test_export(model, "pt", 12, **model_kwargs)
            quantized_path = quantize(path)

            # Ensure the actual quantized model is not bigger than the original one
            if quantized_path.stat().st_size >= Path(path).stat().st_size:
                self.fail("Quantized model is bigger than initial ONNX model")

-    def _test_export(self, model, framework, opset, tokenizer=None):
+    def _test_export(self, model, framework, opset, tokenizer=None, **model_kwargs):
        try:
            # Compute path
            with TemporaryDirectory() as tempdir:
@@ -101,7 +105,7 @@ class OnnxExportTestCase(unittest.TestCase):
                path.parent.rmdir()

            # Export
-            convert(framework, model, path, opset, tokenizer)
+            convert(framework, model, path, opset, tokenizer, **model_kwargs)

            return path
        except Exception as e: