Fix ONNX tests for ONNX Runtime v1.13.1 (#19950)

* Fix ONNX tests for ONNX Runtime v1.13.1 Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

Fix ONNX tests for ONNX Runtime v1.13.1 (#19950)
* Fix ONNX tests for ONNX Runtime v1.13.1 Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
243439a8 · lewtun · GitHub · 0b294c23 · 243439a8
Unverified Commit 243439a8 authored Oct 31, 2022 by lewtun Committed by GitHub Oct 31, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 32 additions and 13 deletions

src/transformers/convert_graph_to_onnx.py src/transformers/convert_graph_to_onnx.py +32 -13

No files found.
--- a/src/transformers/convert_graph_to_onnx.py
+++ b/src/transformers/convert_graph_to_onnx.py
@@ -435,6 +435,7 @@ def quantize(onnx_model_path: Path) -> Path:
    Returns: The Path generated for the quantized
    """
    import onnx
+    import onnxruntime
    from onnx.onnx_pb import ModelProto
    from onnxruntime.quantization import QuantizationMode
    from onnxruntime.quantization.onnx_quantizer import ONNXQuantizer
@@ -454,19 +455,37 @@ def quantize(onnx_model_path: Path) -> Path:
    copy_model.CopyFrom(onnx_model)
    # Construct quantizer
-    quantizer = ONNXQuantizer(
+    # onnxruntime renamed input_qType to activation_qType in v1.13.1, so we
-        model=copy_model,
+    # check the onnxruntime version to ensure backward compatibility.
-        per_channel=False,
+    # See also: https://github.com/microsoft/onnxruntime/pull/12873
-        reduce_range=False,
+    if parse(onnxruntime.__version__) < parse("1.13.1"):
-        mode=QuantizationMode.IntegerOps,
+        quantizer = ONNXQuantizer(
-        static=False,
+            model=copy_model,
-        weight_qType=True,
+            per_channel=False,
-        input_qType=False,
+            reduce_range=False,
-        tensors_range=None,
+            mode=QuantizationMode.IntegerOps,
-        nodes_to_quantize=None,
+            static=False,
-        nodes_to_exclude=None,
+            weight_qType=True,
-        op_types_to_quantize=list(IntegerOpsRegistry),
+            input_qType=False,
-    )
+            tensors_range=None,
+            nodes_to_quantize=None,
+            nodes_to_exclude=None,
+            op_types_to_quantize=list(IntegerOpsRegistry),
+        )
+    else:
+        quantizer = ONNXQuantizer(
+            model=copy_model,
+            per_channel=False,
+            reduce_range=False,
+            mode=QuantizationMode.IntegerOps,
+            static=False,
+            weight_qType=True,
+            activation_qType=False,
+            tensors_range=None,
+            nodes_to_quantize=None,
+            nodes_to_exclude=None,
+            op_types_to_quantize=list(IntegerOpsRegistry),
+        )
    # Quantize and export
    quantizer.quantize_model()