Update onnx proto and onnx parser to handle fp8 types (#2493)

Updated onnx.proto files to be same as onnx==1.14.1 that is used by MIGraphX. Added more switch cases in onnx parser to handle FP8 dtypes.

Update onnx proto and onnx parser to handle fp8 types (#2493)
Updated onnx.proto files to be same as onnx==1.14.1 that is used by MIGraphX. Added more switch cases in onnx parser to handle FP8 dtypes.
9c46821c · Umang Yadav · GitHub · 02f7405a · 9c46821c · 9c46821c
Unverified Commit 9c46821c authored Dec 04, 2023 by Umang Yadav Committed by GitHub Dec 04, 2023
Show whitespace changes
Inline Side-by-side

Showing with 216 additions and 61 deletions

src/onnx/onnx.proto src/onnx/onnx.proto +193 -61

src/onnx/onnx_parser.cpp src/onnx/onnx_parser.cpp +23 -0

No files found.
--- a/src/onnx/onnx.proto
+++ b/src/onnx/onnx.proto
@@ -3,8 +3,8 @@
 //


-// Copyright (c) ONNX Project Contributors.
-// Licensed under the MIT license.
+// SPDX-License-Identifier: Apache-2.0
+

 syntax = "proto2";

@@ -27,13 +27,6 @@ package onnx_for_migraphx;

 // Notes
 //
-// Release
-//
-// We are still in the very early stage of defining ONNX. The current
-// version of ONNX is a starting point. While we are actively working
-// towards a complete spec, we would like to get the community involved
-// by sharing our working version of ONNX.
-//
 // Protobuf compatibility
 //
 // To simplify framework compatibility, ONNX is defined using the subset of protobuf
@@ -92,15 +85,28 @@ enum Version {
  //   - Add sparse initializers
  IR_VERSION_2019_9_19 = 0x0000000000000006;

-  // IR VERSION 7 published on <TBD>
+  // IR VERSION 7 published on May 8, 2020
+  // - Add support to allow function body graph to rely on multiple external opreator sets.
  // - Add a list to promote inference graph's initializers to global and
  //   mutable variables. Global variables are visible in all graphs of the
  //   stored models.
  // - Add message TrainingInfoProto to store initialization
  //   method and training algorithm. The execution of TrainingInfoProto
  //   can modify the values of mutable variables.
-  // - Make inference graph callable from TrainingInfoProto via GraphCall operator.
-  IR_VERSION = 0x0000000000000007;
+  // - Implicitly add inference graph into each TrainingInfoProto's algorithm.
+  IR_VERSION_2020_5_8 = 0x0000000000000007;
+
+  // IR VERSION 8 published on July 30, 2021
+  // Introduce TypeProto.SparseTensor
+  // Introduce TypeProto.Optional
+  // Added a list of FunctionProtos local to the model
+  // Deprecated since_version and operator status from FunctionProto
+  IR_VERSION_2021_7_30 = 0x0000000000000008;
+
+  // IR VERSION 9 published on TBD
+  // Added AttributeProto to FunctionProto so that default attribute values can be set.
+  // Added FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ.
+  IR_VERSION = 0x0000000000000009;
 }

 // Attributes
@@ -121,6 +127,7 @@ message AttributeProto {
    TENSOR = 4;
    GRAPH = 5;
    SPARSE_TENSOR = 11;
+    TYPE_PROTO = 13;

    FLOATS = 6;
    INTS = 7;
@@ -128,6 +135,7 @@ message AttributeProto {
    TENSORS = 9;
    GRAPHS = 10;
    SPARSE_TENSORS = 12;
+    TYPE_PROTOS = 14;
  }

  // The name field MUST be present for this version of the IR.
@@ -159,6 +167,7 @@ message AttributeProto {
  optional SparseTensorProto sparse_tensor = 22;  // sparse tensor value
  // Do not use field below, it's deprecated.
  // optional ValueProto v = 12;         // value - subsumes everything but graph
+  optional TypeProto tp = 14;          // type proto

  repeated float floats = 7;          // list of floats
  repeated int64 ints = 8;            // list of ints
@@ -166,6 +175,7 @@ message AttributeProto {
  repeated TensorProto tensors = 10;  // list of tensors
  repeated GraphProto graphs = 11;    // list of graph
  repeated SparseTensorProto sparse_tensors = 23; // list of sparse tensors
+  repeated TypeProto type_protos = 15;// list of type protos
 }

 // Defines information on value, including the name, the type, and
@@ -211,7 +221,7 @@ message NodeProto {
 // TrainingInfoProto stores information for training a model.
 // In particular, this defines two functionalities: an initialization-step
 // and a training-algorithm-step. Initialization resets the model
-// back to its original state as if no training has been consumed.
+// back to its original state as if no training has been performed.
 // Training algorithm improves the model based on input data.
 //
 // The semantics of the initialization-step is that the initializers
@@ -224,8 +234,8 @@ message NodeProto {
 // training algorithm's step. After the execution of a
 // TrainingInfoProto.algorithm, the initializers specified by "update_binding"
 // may be immediately updated. If the targeted training algorithm contains
-// consecutive update stages (such as block coordinate descent methods),
-// the user needs to create a TrainingInfoProto for each stage.
+// consecutive update steps (such as block coordinate descent methods),
+// the user needs to create a TrainingInfoProto for each step.
 message TrainingInfoProto {
  // This field describes a graph to compute the initial tensors
  // upon starting the training process. Initialization graph has no input
@@ -239,20 +249,38 @@ message TrainingInfoProto {
  // iteration to zero.
  //
  // By default, this field is an empty graph and its evaluation does not
-  // produce any output.
+  // produce any output. Thus, no initializer would be changed by default.
  optional GraphProto initialization = 1;

  // This field represents a training algorithm step. Given required inputs,
  // it computes outputs to update initializers in its own or inference graph's
-  // initializer lists. In general, this graph contains loss node, gradient node,
-  // optimizer node, increment of iteration count, and some calls to the inference
-  // graph.
+  // initializer lists. In general, this field contains loss node, gradient node,
+  // optimizer node, increment of iteration count.
  //
-  // The field algorithm.node is the only place the user can use GraphCall
-  // operator. The only callable graph is the one stored in ModelProto.graph.
+  // An execution of the training algorithm step is performed by executing the
+  // graph obtained by combining the inference graph (namely "ModelProto.graph")
+  // and the "algorithm" graph. That is, the actual the actual
+  // input/initializer/output/node/value_info/sparse_initializer list of
+  // the training graph is the concatenation of
+  // "ModelProto.graph.input/initializer/output/node/value_info/sparse_initializer"
+  // and "algorithm.input/initializer/output/node/value_info/sparse_initializer"
+  // in that order. This combined graph must satisfy the normal ONNX conditions.
+  // Now, let's provide a visualization of graph combination for clarity.
+  // Let the inference graph (i.e., "ModelProto.graph") be
+  //    tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d
+  // and the "algorithm" graph be
+  //    tensor_d -> Add -> tensor_e
+  // The combination process results
+  //    tensor_a, tensor_b -> MatMul -> tensor_c -> Sigmoid -> tensor_d -> Add -> tensor_e
+  //
+  // Notice that an input of a node in the "algorithm" graph may reference the
+  // output of a node in the inference graph (but not the other way round). Also, inference
+  // node cannot reference inputs of "algorithm". With these restrictions, inference graph
+  // can always be run independently without training information.
  //
  // By default, this field is an empty graph and its evaluation does not
-  // produce any output.
+  // produce any output. Evaluating the default training step never
+  // update any initializers.
  optional GraphProto algorithm = 2;

  // This field specifies the bindings from the outputs of "initialization" to
@@ -284,23 +312,16 @@ message TrainingInfoProto {
  // be multiple key-value pairs in "update_binding".
  //
  // The initializers appears as keys in "update_binding" are considered
-  // mutable and globally-visible variables. This implies some behaviors
+  // mutable variables. This implies some behaviors
  // as described below.
  //
-  //  1. We have only unique keys in all "update_binding"s so that two global
+  //  1. We have only unique keys in all "update_binding"s so that two
  //     variables may not have the same name. This ensures that one
-  //     global variable is assigned up to once.
+  //     variable is assigned up to once.
  //  2. The keys must appear in names of "ModelProto.graph.initializer" or
  //     "TrainingInfoProto.algorithm.initializer".
-  //  3. The values must be output names of "algorithm".
-  //  4. If an optional input of a graph is omitted when using GraphCall, the
-  //     global variable with the same name may be used.
-  //  5. When using GraphCall, the users always can pass values to optional 
-  //     inputs of the called graph even if the associated initializers appears
-  //     as keys in "update_binding"s.
-  //  6. The graphs in TrainingInfoProto's can use global variables as
-  //     their operator inputs.
-  //  7. Mutable variables are initialized to the value specified by the
+  //  3. The values must be output names of "algorithm" or "ModelProto.graph.output".
+  //  4. Mutable variables are initialized to the value specified by the
  //     corresponding initializer, and then potentially updated by
  //     "initializer_binding"s and "update_binding"s in "TrainingInfoProto"s.
  //
@@ -375,13 +396,31 @@ message ModelProto {
  //
  // If this field is empty, the training behavior of the model is undefined.
  repeated TrainingInfoProto training_info = 20;
+
+  // A list of function protos local to the model.
+  //
+  // Name of the function "FunctionProto.name" should be unique within the domain "FunctionProto.domain".
+  // In case of any conflicts the behavior (whether the model local functions are given higher priority,
+  // or standard opserator sets are given higher priotity or this is treated as error) is defined by
+  // the runtimes.
+  //
+  // The operator sets imported by FunctionProto should be compatible with the ones
+  // imported by ModelProto and other model local FunctionProtos.
+  // Example, if same operator set say 'A' is imported by a FunctionProto and ModelProto
+  // or by 2 FunctionProtos then versions for the operator set may be different but,
+  // the operator schema returned for op_type, domain, version combination
+  // for both the versions should be same for every node in the function body.
+  //
+  // One FunctionProto can reference other FunctionProto in the model, however, recursive reference
+  // is not allowed.
+  repeated FunctionProto functions = 25;
 };

 // StringStringEntryProto follows the pattern for cross-proto-version maps.
 // See https://developers.google.com/protocol-buffers/docs/proto3#maps
 message StringStringEntryProto {
  optional string key = 1;
-  optional string value= 2;
+  optional string value = 2;
 };

 message TensorAnnotation {
@@ -409,8 +448,9 @@ message GraphProto {
  optional string name = 2;   // namespace Graph

  // A list of named tensor values, used to specify constant inputs of the graph.
-  // Each TensorProto entry must have a distinct name (within the list) that
-  // MAY also appear in the input list.
+  // Each initializer (both TensorProto as well SparseTensorProto) MUST have a name.
+  // The name MUST be unique across both initializer and sparse_initializer,
+  // but the name MAY also appear in the input list.
  repeated TensorProto initializer = 5;

  // Initializers (see above) stored in sparse format.
@@ -433,13 +473,8 @@ message GraphProto {
  // which means, tensor 'a_scale' and tensor 'a_zero_point' are scale and zero point of tensor 'a' in the model.
  repeated TensorAnnotation quantization_annotation = 14;

-  // DO NOT USE the following fields, they were deprecated from earlier versions.
-  // repeated string input = 3;
-  // repeated string output = 4;
-  // optional int64 ir_version = 6;
-  // optional int64 producer_version = 7;
-  // optional string producer_tag = 8;
-  // optional string domain = 9;
+  reserved 3, 4, 6 to 9;
+  reserved "ir_version", "producer_version", "producer_tag", "domain";
 }

 // Tensors
@@ -474,6 +509,17 @@ message TensorProto {
    // This format has 1 sign bit, 8 exponent bits, and 7 mantissa bits.
    BFLOAT16 = 16;

+    // Non-IEEE floating-point format based on papers
+    // FP8 Formats for Deep Learning, https://arxiv.org/abs/2209.05433,
+    // 8-bit Numerical Formats For Deep Neural Networks, https://arxiv.org/pdf/2206.02915.pdf.
+    // Operators supported FP8 are Cast, CastLike, QuantizeLinear, DequantizeLinear.
+    // The computation usually happens inside a block quantize / dequantize
+    // fused by the runtime.
+    FLOAT8E4M3FN = 17;    // float 8, mostly used for coefficients, supports nan, not inf 
+    FLOAT8E4M3FNUZ = 18;  // float 8, mostly used for coefficients, supports nan, not inf, no negative zero 
+    FLOAT8E5M2 = 19;      // follows IEEE 754, supports nan, inf, mostly used for gradients
+    FLOAT8E5M2FNUZ = 20;  // follows IEEE 754, supports nan, inf, mostly used for gradients, no negative zero
+
    // Future extensions go here.
  }

@@ -507,11 +553,11 @@ message TensorProto {
  // When this field is present, the data_type field MUST be FLOAT or COMPLEX64.
  repeated float float_data = 4 [packed = true];

-  // For int32, uint8, int8, uint16, int16, bool, and float16 values
-  // float16 values must be bit-wise converted to an uint16_t prior
+  // For int32, uint8, int8, uint16, int16, bool, float8, and float16 values
+  // float16 and float8 values must be bit-wise converted to an uint16_t prior
  // to writing to the buffer.
  // When this field is present, the data_type field MUST be
-  // INT32, INT16, INT8, UINT16, UINT8, BOOL, or FLOAT16
+  // INT32, INT16, INT8, UINT16, UINT8, BOOL, FLOAT16, BFLOAT16, FLOAT8E4M3FN, FLOAT8E4M3FNUZ, FLOAT8E5M2, FLOAT8E5M2FNUZ
  repeated int32 int32_data = 5 [packed = true];

  // For strings.
@@ -589,6 +635,8 @@ message TensorProto {
 message SparseTensorProto {
  // The sequence of non-default values are encoded as a tensor of shape [NNZ].
  // The default-value is zero for numeric tensors, and empty-string for string tensors.
+  // values must have a non-empty name present which serves as a name for SparseTensorProto
+  // when used in sparse_initializer list.
  optional TensorProto values = 1;

  // The indices of the non-default values, which may be stored in one of two formats.
@@ -619,7 +667,7 @@ message TensorShapeProto {
    // Standard denotation can optionally be used to denote tensor
    // dimensions with standard semantic descriptions to ensure
    // that operations are applied to the correct axis of a tensor.
-    // Refer to https://github.com/onnx/onnx/blob/master/docs/DimensionDenotation.md#denotation-definition
+    // Refer to https://github.com/onnx/onnx/blob/main/docs/DimensionDenotation.md#denotation-definition
    // for pre-defined dimension denotations.
    optional string denotation = 3;
  };
@@ -656,6 +704,23 @@ message TypeProto {
    optional TypeProto value_type = 2;
  };

+  // wrapper for Tensor, Sequence, or Map
+  message Optional {
+    // The type and optional shape of the element wrapped.
+    // This field MUST be present for this version of the IR.
+    // Possible values correspond to OptionalProto.DataType enum
+    optional TypeProto elem_type = 1;
+  };
+
+
+  message SparseTensor {
+    // This field MUST NOT have the value of UNDEFINED
+    // This field MUST have a valid TensorProto.DataType value
+    // This field MUST be present for this version of the IR.
+    optional int32 elem_type = 1;
+    optional TensorShapeProto shape = 2;
+  }
+

  oneof value {
    // The type of a tensor.
@@ -672,11 +737,18 @@ message TypeProto {
    // The type of a map.
    Map map_type = 5;

+    // The type of an optional.
+    Optional optional_type = 9;
+
+
+    // Type of the sparse tensor
+    SparseTensor sparse_tensor_type = 8;
+
  }

  // An optional denotation can be used to denote the whole
  // type with a standard semantic description as to what is
-  // stored inside. Refer to https://github.com/onnx/onnx/blob/master/docs/TypeDenotation.md#type-denotation-definition
+  // stored inside. Refer to https://github.com/onnx/onnx/blob/main/docs/TypeDenotation.md#type-denotation-definition
  // for pre-defined type denotations.
  optional string denotation = 6;
 }
@@ -696,7 +768,67 @@ message OperatorSetIdProto {
  optional int64 version = 2;
 }

+// Operator/function status.
+enum OperatorStatus {
+    EXPERIMENTAL = 0;
+    STABLE = 1;
+}
+
+message FunctionProto {
+  // The name of the function, similar usage of op_type in OperatorProto.
+  // Combined with FunctionProto.domain, this forms the unique identity of
+  // the FunctionProto.
+  optional string name = 1;
+
+  // Deprecated since IR Version 8
+  // optional int64 since_version = 2;
+  reserved 2;
+  reserved "since_version";
+
+  // Deprecated since IR Version 8
+  // optional OperatorStatus status = 3;
+  reserved 3;
+  reserved "status";
+
+  // The inputs and outputs of the function.
+  repeated string input = 4;
+  repeated string output = 5;
+
+  // The attribute parameters of the function.
+  // It is for function parameters without default values.
+  repeated string attribute = 6;
+
+  // The attribute protos of the function.
+  // It is for function attributes with default values.
+  // A function attribute shall be represented either as
+  // a string attribute or an AttributeProto, not both.
+  repeated AttributeProto attribute_proto = 11;
+
+  // The nodes in the function.
+  repeated NodeProto node = 7;
+  // A human-readable documentation for this function. Markdown is allowed.
+  optional string doc_string = 8;
+
+  // The OperatorSets this function body (graph) relies on.
+  //
+  // All nodes in the function body (graph) will bind against the operator
+  // with the same-domain/same-op_type operator with the HIGHEST version
+  // in the referenced operator sets. This means at most one version can be relied
+  // for one domain.
+  //
+  // The operator sets imported by FunctionProto should be compatible with the ones
+  // imported by ModelProto. Example, if same operator set say 'A' is imported by FunctionProto
+  // and ModelProto then versions for the operator set may be different but,
+  // the operator schema returned for op_type, domain, version combination
+  // for both the versions should be same.
+
+  repeated OperatorSetIdProto opset_import = 9;
+
+  // The domain which this function belongs to. Combined with FunctionProto.name, this forms the unique identity of
+  // the FunctionProto.
+  optional string domain = 10;
+}
+

 // For using protobuf-lite
 option optimize_for = LITE_RUNTIME;
\ No newline at end of file
-
--- a/src/onnx/onnx_parser.cpp
+++ b/src/onnx/onnx_parser.cpp
@@ -34,7 +34,9 @@
 #include <migraphx/file_buffer.hpp>
 #include <migraphx/filesystem.hpp>
 #include <migraphx/op/unknown.hpp>
+#include <migraphx/float8.hpp>
 #include <migraphx/env.hpp>
+#include <onnx.pb.h>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -484,6 +486,8 @@ literal onnx_parser::parse_value(const onnx::AttributeProto& attr) const
    case onnx::AttributeProto::TENSORS:
    case onnx::AttributeProto::SPARSE_TENSOR:
    case onnx::AttributeProto::SPARSE_TENSORS:
+    case onnx::AttributeProto::TYPE_PROTOS:
+    case onnx::AttributeProto::TYPE_PROTO:
    case onnx::AttributeProto::GRAPHS: return {};
    }
    MIGRAPHX_THROW("PARSE_VALUE: Invalid attribute type " + std::to_string(attr.type()));
@@ -545,6 +549,18 @@ literal onnx_parser::parse_tensor(const onnx::TensorProto& t) const
    case onnx::TensorProto::DOUBLE:
        return create_literal(shape::double_type, dims, t.double_data());
    case onnx::TensorProto::FLOAT: return create_literal(shape::float_type, dims, t.float_data());
+    case onnx::TensorProto::FLOAT8E4M3FNUZ: {
+        std::vector<int32_t> data_int32(t.int32_data().begin(), t.int32_data().end());
+        std::vector<migraphx::fp8::fp8e4m3fnuz> data_fp8;
+        std::transform(data_int32.begin(),
+                       data_int32.end(),
+                       std::back_inserter(data_fp8),
+                       [](float raw_val) { return migraphx::fp8::fp8e4m3fnuz{raw_val}; });
+        return create_literal(shape::fp8e4m3fnuz_type, dims, data_fp8);
+    }
+    case onnx::TensorProto::FLOAT8E5M2FNUZ:
+    case onnx::TensorProto::FLOAT8E5M2:
+    case onnx::TensorProto::FLOAT8E4M3FN:
    case onnx::TensorProto::UNDEFINED:
    case onnx::TensorProto::STRING:
    case onnx::TensorProto::COMPLEX64:
@@ -609,6 +625,13 @@ shape::type_t get_type(int dtype)
    case 11: return shape::double_type;
    case 12: return shape::uint32_type;
    case 13: return shape::uint64_type;
+    case 18: return shape::fp8e4m3fnuz_type;
+    case 14:
+    case 15:
+    case 16:
+    case 17:
+    case 19:
+    case 20:
    default: {
        MIGRAPHX_THROW("Prototensor data type " + std::to_string(dtype) + " not supported");
    }