Debugging edits

Softmax fp32 propagate_constant fp64 layernorm fp32

Debugging edits
Softmax fp32 propagate_constant fp64 layernorm fp32
961f0e1b · charlie · ecfd3834 · 961f0e1b · 961f0e1b · 961f0e1b
Commit 961f0e1b authored Sep 26, 2023 by charlie
6 changed files
--- a/src/driver/verify.cpp
+++ b/src/driver/verify.cpp
@@ -37,6 +37,9 @@ inline namespace MIGRAPHX_INLINE_NS {

 std::vector<argument> run_ref(program p, const parameter_map& inputs)
 {
+    // DEBUG
+    quantize_fp16(p);
+
    p.compile(migraphx::make_target("ref"));
    auto out = p.eval(inputs);
    std::cout << p << std::endl;

--- a/src/onnx/onnx_parser.cpp
+++ b/src/onnx/onnx_parser.cpp
@@ -555,7 +555,6 @@ literal onnx_parser::parse_tensor(const onnx::TensorProto& t) const
 shape onnx_parser::parse_type(const onnx::TypeProto& t) const
 {
    shape::type_t shape_type = get_type(t.tensor_type().elem_type());
-
    std::vector<shape::dynamic_dimension> dynamic_dims;
    auto&& tensor_dims = t.tensor_type().shape().dim();
    std::transform(tensor_dims.begin(),

--- a/src/onnx/parse_softmax.cpp
+++ b/src/onnx/parse_softmax.cpp
@@ -57,7 +57,14 @@ struct parse_softmax : op_parser<parse_softmax>
            axis = parser.parse_value(info.attributes.at("axis")).at<int>();
        }

+        // previous version
        return info.add_instruction(make_op(opd.op_name, {{"axis", axis}}), args);
+
+        // converted version
+        // auto convert0 = info.add_instruction(make_op("convert", {{"target_type",
+        // shape::float_type}}), args); auto softmax_ins = info.add_instruction(make_op(opd.op_name,
+        // {{"axis", axis}}), convert0); return info.add_instruction(make_op("convert",
+        // {{"target_type", shape::half_type}}), softmax_ins);
    }
 };


--- a/src/propagate_constant.cpp
+++ b/src/propagate_constant.cpp
@@ -29,6 +29,7 @@
 #include <migraphx/par_for.hpp>
 #include <migraphx/env.hpp>
 #include <unordered_set>
+#include <migraphx/make_op.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -79,9 +80,43 @@ void propagate_constant::apply(module& m) const
    // Compute literals in parallel
    std::vector<instruction_ref> const_instrs_vec{const_instrs.begin(), const_instrs.end()};
    std::vector<argument> literals(const_instrs_vec.size());
-    par_for(const_instrs_vec.size(), 1, [&](const auto i) {
-        literals[i] = const_instrs_vec[i]->eval();
-    });
+    for(int i = 0; i < const_instrs_vec.size(); ++i)
+    {
+        // DEBUG
+        auto ins = const_instrs_vec[i];
+        if(ins->get_shape().type() == shape::half_type)
+        {
+            auto inputs = ins->inputs();
+            std::vector<instruction_ref> new_inputs(inputs.size());
+            std::vector<instruction_ref> added_instructions;
+            std::transform(inputs.begin(), inputs.end(), new_inputs.begin(), [&](auto input) {
+                auto input_type = input->get_shape().type();
+                if(input_type != shape::half_type and input_type != shape::float_type)
+                    return input;
+                auto ai = m.add_instruction(
+                    make_op("convert", {{"target_type", shape::double_type}}), input);
+                added_instructions.push_back(ai);
+                return ai;
+            });
+            auto new_ins = m.add_instruction(ins->get_operator(), new_inputs);
+            added_instructions.push_back(new_ins);
+            auto after_convert = m.add_instruction(
+                make_op("convert", {{"target_type", ins->get_shape().type()}}), new_ins);
+            added_instructions.push_back(after_convert);
+            literals[i] = after_convert->eval();
+            for(auto a_ins : added_instructions)
+            {
+                m.remove_instruction(a_ins);
+            }
+        }
+        else
+        {
+            literals[i] = const_instrs_vec[i]->eval();
+        }
+
+        // Original
+        //    literals[i] = const_instrs_vec[i]->eval();
+    }

    // Replace instructions in m
    for(size_t i = 0; i < const_instrs_vec.size(); i++)

--- a/src/quantize_fp16.cpp
+++ b/src/quantize_fp16.cpp
@@ -56,10 +56,18 @@ static void quantize_module(module& m, const std::vector<std::string>& ins_names
        auto inputs = ins->inputs();
        std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto input) {
            auto input_type = input->get_shape().type();
-            if(input_type != shape::float_type and input_type != shape::double_type)
+            // ORIGINAL
+            // if(input_type != shape::float_type and input_type != shape::double_type)
+            //    return input;
+            // return m.insert_instruction(
+            //    ins, make_op("convert", {{"target_type", shape::half_type}}), input);
+
+            // DEBUG hack to fp32 atleast
+            if(input_type != shape::half_type)
                return input;
            return m.insert_instruction(
-                ins, make_op("convert", {{"target_type", shape::half_type}}), input);
+                ins, make_op("convert", {{"target_type", shape::float_type}}), input);
+
        });

        // Insert quantized ins

--- a/src/targets/gpu/prefuse_ops.cpp
+++ b/src/targets/gpu/prefuse_ops.cpp
@@ -99,6 +99,19 @@ struct find_layernorm
        if(contains(r.instructions, "eps"))
            eps = r.instructions["eps"]->eval().at<float>();

+        // DEBUG
+        // if(ins->get_shape().type() == shape::half_type)
+        //{
+        //    auto bconvert = m.insert_instruction(ins, make_op("convert", {{"target_type",
+        //    shape::float_type}}), x_ins); auto ln_ins = m.insert_instruction(ins, layernorm{eps},
+        //    bconvert); m.replace_instruction(ins, make_op("convert", {{"target_type",
+        //    shape::half_type}}), ln_ins);
+        //}
+        // else
+        //{
+        //  m.replace_instruction(ins, layernorm{eps}, x_ins);
+        //}
+
        m.replace_instruction(ins, layernorm{eps}, x_ins);
    }
 };