Fix ONNX tests for ONNX Runtime v1.13.1 (#19950)

* Fix ONNX tests for ONNX Runtime v1.13.1 Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>

Fix ONNX tests for ONNX Runtime v1.13.1 (#19950)
* Fix ONNX tests for ONNX Runtime v1.13.1 Co-authored-by: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
243439a8 · lewtun · GitHub · 0b294c23 · 243439a8
Unverified Commit 243439a8 authored Oct 31, 2022 by lewtun Committed by GitHub Oct 31, 2022
Show whitespace changes
Inline Side-by-side

Showing with 32 additions and 13 deletions

src/transformers/convert_graph_to_onnx.py src/transformers/convert_graph_to_onnx.py +32 -13

No files found.
--- a/src/transformers/convert_graph_to_onnx.py
+++ b/src/transformers/convert_graph_to_onnx.py
@@ -435,6 +435,7 @@ def quantize(onnx_model_path: Path) -> Path:
    Returns: The Path generated for the quantized
    """
    import onnx
+    import onnxruntime
    from onnx.onnx_pb import ModelProto
    from onnxruntime.quantization import QuantizationMode
    from onnxruntime.quantization.onnx_quantizer import ONNXQuantizer
@@ -454,6 +455,10 @@ def quantize(onnx_model_path: Path) -> Path:
    copy_model.CopyFrom(onnx_model)

    # Construct quantizer
+    # onnxruntime renamed input_qType to activation_qType in v1.13.1, so we
+    # check the onnxruntime version to ensure backward compatibility.
+    # See also: https://github.com/microsoft/onnxruntime/pull/12873
+    if parse(onnxruntime.__version__) < parse("1.13.1"):
        quantizer = ONNXQuantizer(
            model=copy_model,
            per_channel=False,
@@ -467,6 +472,20 @@ def quantize(onnx_model_path: Path) -> Path:
            nodes_to_exclude=None,
            op_types_to_quantize=list(IntegerOpsRegistry),
        )
+    else:
+        quantizer = ONNXQuantizer(
+            model=copy_model,
+            per_channel=False,
+            reduce_range=False,
+            mode=QuantizationMode.IntegerOps,
+            static=False,
+            weight_qType=True,
+            activation_qType=False,
+            tensors_range=None,
+            nodes_to_quantize=None,
+            nodes_to_exclude=None,
+            op_types_to_quantize=list(IntegerOpsRegistry),
+        )

    # Quantize and export
    quantizer.quantize_model()