Merge branch 'fp32_fp16_convert' into ins_fp32_fp16

5482a441 · Shucai Xiao · af00eea8 · 45dddfa7 · 5482a441 · 5482a441
Commit 5482a441 authored Apr 08, 2019 by Shucai Xiao
16 changed files
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -18,6 +18,7 @@ add_library(migraphx
    generate.cpp
    instruction.cpp
    program.cpp
+    quantization.cpp
    shape.cpp
    schedule.cpp
    simplify_algebra.cpp

--- a/src/include/migraphx/instruction.hpp
+++ b/src/include/migraphx/instruction.hpp
@@ -27,6 +27,7 @@ struct instruction
    void replace(const shape& r);

    void recompute_shape();
+    void recompute_ins_shape();

    void clear_arguments();

@@ -67,7 +68,10 @@ struct instruction

    static void backreference(instruction_ref ref);

-    static void replace_argument(instruction_ref ins, instruction_ref old, instruction_ref new_ins);
+    static void replace_argument(instruction_ref ins,
+                                 instruction_ref old,
+                                 instruction_ref new_ins,
+                                 bool recompute_shape = true);

    static void
    replace(instruction_ref ins, operation o, const shape& r, std::vector<instruction_ref> args);

--- a/src/include/migraphx/op/fp_conversion.hpp
+++ b/src/include/migraphx/op/fp_conversion.hpp
+#ifndef MIGRAPHX_GUARD_OPERATORS_FP_CONVERSION_HPP
+#define MIGRAPHX_GUARD_OPERATORS_FP_CONVERSION_HPP
+
+#include <array>
+#include <migraphx/op/binary.hpp>
+#include <migraphx/operation.hpp>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/streamutils.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/shape_for_each.hpp>
+#include <migraphx/config.hpp>
+#include <cmath>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+
+struct fp_conversion
+{
+    bool reduce_precision = true;
+    std::string name() const { return "fp_conversion"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1);
+        if(reduce_precision)
+        {
+            if(inputs.front().type() != shape::float_type)
+            {
+                MIGRAPHX_THROW("FP_CONVERSION: input arguments must be type float");
+            }
+
+            return {shape::half_type, inputs.front().lens()};
+        }
+        else
+        {
+            if(inputs.front().type() != shape::half_type)
+            {
+                MIGRAPHX_THROW("FP_CONVERSION: input arguments must be type fp16");
+            }
+
+            return {shape::float_type, inputs.front().lens()};
+        }
+    }
+
+    argument compute(const shape& output_shape, std::vector<argument> args) const
+    {
+        argument result{output_shape};
+        result.visit([&](auto output) {
+            args.front().visit(
+                [&](auto input) { std::copy(input.begin(), input.end(), output.begin()); });
+        });
+
+        return result;
+    }
+};
+
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/include/migraphx/operators.hpp
+++ b/src/include/migraphx/operators.hpp
@@ -22,6 +22,7 @@
 #include <migraphx/op/elu.hpp>
 #include <migraphx/op/exp.hpp>
 #include <migraphx/op/flatten.hpp>
+#include <migraphx/op/fp_conversion.hpp>
 #include <migraphx/op/gather.hpp>
 #include <migraphx/op/gru.hpp>
 #include <migraphx/op/identity.hpp>

--- a/src/include/migraphx/quantization.hpp
+++ b/src/include/migraphx/quantization.hpp
+#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_QUANTIZATION_HPP
+#define MIGRAPHX_GUARD_MIGRAPHLIB_QUANTIZATION_HPP
+
+#include <list>
+#include <unordered_map>
+#include <migraphx/operation.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/builtin.hpp>
+#include <migraphx/instruction_ref.hpp>
+#include <migraphx/target.hpp>
+#include <migraphx/tracer.hpp>
+#include <migraphx/env.hpp>
+#include <migraphx/config.hpp>
+#include <algorithm>
+#include <iostream>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+void quantize(program& prog);
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/instruction.cpp
+++ b/src/instruction.cpp
@@ -30,6 +30,13 @@ void instruction::replace(const shape& r)

 void instruction::recompute_shape() { replace(compute_shape(op, arguments)); }

+void instruction::recompute_ins_shape()
+{
+    auto r = compute_shape(op, arguments);
+    if(r != result)
+        result = r;
+}
+
 void instruction::clear_arguments()
 {
    for(auto&& arg : arguments)
@@ -126,10 +133,12 @@ void instruction::backreference(instruction_ref ref)

 void instruction::replace_argument(instruction_ref ins,
                                   instruction_ref old,
-                                   instruction_ref new_ins)
+                                   instruction_ref new_ins,
+                                   bool recompute_shape)
 {
    ins->replace_argument(old, new_ins);
    backreference(ins);
+    if(recompute_shape)
        ins->recompute_shape();
 }


--- a/src/opt/memory_coloring_impl.cpp
+++ b/src/opt/memory_coloring_impl.cpp
@@ -177,7 +177,7 @@ void memory_coloring_impl::build()
 void memory_coloring_impl::rewrite()
 {
    std::vector<std::size_t> dims;
-    dims.push_back(required_bytes / sizeof(float));
+    dims.push_back((required_bytes + sizeof(float) - 1) / sizeof(float));
    shape s                       = {shape::float_type, dims};
    instruction_ref scratch_param = p_program->add_parameter("scratch", s);
    for(auto ins : iterator_for(*p_program))

--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -2,6 +2,7 @@
 #include <pybind11/pybind11.h>
 #include <pybind11/stl.h>
 #include <migraphx/program.hpp>
+#include <migraphx/quantization.hpp>
 #include <migraphx/generate.hpp>
 #include <migraphx/cpu/target.hpp>
 #include <migraphx/stringutils.hpp>
@@ -181,6 +182,7 @@ PYBIND11_MODULE(migraphx, m)
    });

    m.def("generate_argument", &migraphx::generate_argument, py::arg("s"), py::arg("seed") = 0);
+    m.def("quantize", &migraphx::quantize);

 #ifdef HAVE_GPU
    m.def("allocate_gpu", &migraphx::gpu::allocate_gpu, py::arg("s"), py::arg("host") = false);

--- a/src/quantization.cpp
+++ b/src/quantization.cpp
+#include <migraphx/program.hpp>
+#include <migraphx/quantization.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/op/fp_conversion.hpp>
+#include <migraphx/target.hpp>
+#include <migraphx/env.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/time.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+instruction_ref convert_fp32_fp16(program& prog, instruction_ref& ins)
+{
+    assert(ins->get_shape().type() == shape::float_type);
+    assert(ins->name().front() == '@');
+    instruction_ref ins_fp16{};
+    if(ins->name() == "@literal")
+    {
+        std::vector<float> values;
+        auto l_fp32 = ins->get_literal();
+        shape s     = ins->get_shape();
+        l_fp32.visit([&](auto val) { values.assign(val.begin(), val.end()); });
+        ins_fp16 = prog.add_literal(literal({shape::half_type, s.lens()}, values));
+    }
+    else if(ins->name() == "@param")
+    {
+        if(ins == std::prev(prog.end()))
+        {
+            ins_fp16 = prog.add_instruction(op::fp_conversion{}, ins);
+        }
+        else
+        {
+            ins_fp16 = prog.insert_instruction(std::next(ins), op::fp_conversion{}, ins);
+        }
+    }
+
+    return ins_fp16;
+}
+
+void quantize(program& prog)
+{
+    bool reduced_precision = false;
+    for(auto ins : iterator_for(prog))
+    {
+        // convert float_type to half_type
+        if(ins->name().front() == '@' && ins->get_shape().type() == shape::float_type)
+        {
+            auto ins_fp16 = convert_fp32_fp16(prog, ins);
+            auto outputs  = ins->outputs();
+            for(auto output : outputs)
+            {
+                if(output != ins_fp16)
+                {
+                    instruction::replace_argument(output, ins, ins_fp16, false);
+                }
+            }
+
+            reduced_precision = true;
+        }
+    }
+
+    // add another instruction at last to convert fp16 to fp32
+    if(reduced_precision)
+    {
+        for(auto ins : iterator_for(prog))
+        {
+            if(ins->name().front() != '@')
+            {
+                ins->recompute_ins_shape();
+            }
+        }
+
+        auto ins = std::prev(prog.end());
+        if(ins->get_shape().type() == shape::half_type)
+        {
+            prog.add_instruction(op::fp_conversion{false}, ins);
+        }
+    }
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -713,6 +713,18 @@ struct cpu_logsoftmax
    }
 };

+struct cpu_fp_conversion
+{
+    op::fp_conversion op;
+    std::string name() const { return "cpu_fp_conversion"; }
+    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
+
+    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
+    {
+        return op.compute(output_shape, std::move(args));
+    }
+};
+
 struct add_op
 {
    std::string name() const { return "add"; }
@@ -823,6 +835,7 @@ struct cpu_apply
        apply_map["pad"]           = extend_op<cpu_pad, op::pad>();
        apply_map["concat"]        = extend_op<cpu_concat, op::concat>();
        apply_map["gather"]        = extend_op<cpu_gather, op::gather>();
+        apply_map["fp_conversion"] = extend_op<cpu_fp_conversion, op::fp_conversion>();
        apply_map["logsoftmax"]    = extend_op<cpu_logsoftmax, op::logsoftmax>();
        apply_map["leaky_relu"]    = extend_op<cpu_unary<leaky_relu_op>, op::leaky_relu>();
        apply_map["elu"]           = extend_op<cpu_unary<elu_op>, op::elu>();

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -27,6 +27,7 @@ add_library(migraphx_device
    device/add_relu.cpp
    device/contiguous.cpp
    device/logsoftmax.cpp
+    device/fp_conversion.cpp
    device/mul.cpp
    device/concat.cpp
    device/pad.cpp
@@ -50,6 +51,7 @@ add_library(migraphx_gpu
    convolution.cpp
    softmax.cpp
    logsoftmax.cpp
+    fp_conversion.cpp
    contiguous.cpp
    concat.cpp
    relu.cpp

--- a/src/targets/gpu/device/fp_conversion.cpp
+++ b/src/targets/gpu/device/fp_conversion.cpp
+#include <migraphx/gpu/device/fp_conversion.hpp>
+#include <migraphx/gpu/device/nary.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+argument
+fp_conversion(hipStream_t stream, const shape& output_shape, const std::vector<argument>& args)
+{
+    args.back().visit([&](auto output) {
+        args.front().visit([&](auto input) {
+            const auto* input_ptr = device_cast(input.data());
+            auto* output_ptr      = device_cast(output.data());
+            gs_launch(stream,
+                      output_shape.elements())([=](auto i) { output_ptr[i] = input_ptr[i]; });
+        });
+    });
+
+    return args.back();
+}
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/fp_conversion.cpp
+++ b/src/targets/gpu/fp_conversion.cpp
+#include <migraphx/gpu/fp_conversion.hpp>
+#include <migraphx/gpu/device/fp_conversion.hpp>
+#include <migraphx/gpu/context.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+shape hip_fp_conversion::compute_shape(std::vector<shape> inputs) const
+{
+    inputs.pop_back();
+    return op.compute_shape(inputs);
+}
+
+argument hip_fp_conversion::compute(context& ctx,
+                                    const shape& output_shape,
+                                    const std::vector<argument>& args) const
+{
+    return device::fp_conversion(ctx.get_stream().get(), output_shape, args);
+}
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/include/migraphx/gpu/device/fp_conversion.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/fp_conversion.hpp
+
+#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_FP_CONVERSION_HPP
+#define MIGRAPHX_GUARD_RTGLIB_DEVICE_FP_CONVERSION_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+#include <hip/hip_runtime_api.h>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+namespace device {
+
+argument
+fp_conversion(hipStream_t stream, const shape& output_shape, const std::vector<argument>& args);
+
+} // namespace device
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/include/migraphx/gpu/fp_conversion.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/fp_conversion.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_FP_CONVERSION_HPP
+#define MIGRAPHX_GUARD_RTGLIB_FP_CONVERSION_HPP
+
+#include <migraphx/shape.hpp>
+#include <migraphx/op/fp_conversion.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct context;
+
+struct hip_fp_conversion
+{
+    op::fp_conversion op;
+    std::string name() const { return "gpu::fp_conversion"; }
+    shape compute_shape(std::vector<shape> inputs) const;
+    argument
+    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
+    int output_alias(const std::vector<shape>& shapes) const { return shapes.size() - 1; }
+};
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -45,6 +45,7 @@
 #include <migraphx/gpu/pad.hpp>
 #include <migraphx/gpu/gather.hpp>
 #include <migraphx/gpu/lrn.hpp>
+#include <migraphx/gpu/fp_conversion.hpp>
 #include <utility>
 #include <functional>
 #include <algorithm>
@@ -101,6 +102,7 @@ struct miopen_apply
        add_extend_op<hip_logsoftmax, op::logsoftmax>("logsoftmax");
        add_extend_op<hip_gather, op::gather>("gather");
        add_extend_op<hip_pad, op::pad>("pad");
+        add_extend_op<hip_fp_conversion, op::fp_conversion>("fp_conversion");

        add_lrn_op();
        add_convolution_op();