Add flags to driver to run quantization (#361)

* Add flags to quantize in driver * Formatting * Fix compile error

Add flags to driver to run quantization (#361)
* Add flags to quantize in driver * Formatting * Fix compile error
f445d962 · Paul Fultz II · mvermeulen · ef5e7ce0 · f445d962 · f445d962
Commit f445d962 authored Sep 16, 2019 by Paul Fultz II Committed by mvermeulen Sep 16, 2019
5 changed files
--- a/src/driver/main.cpp
+++ b/src/driver/main.cpp
@@ -7,6 +7,7 @@
 #include <migraphx/onnx.hpp>
 #include <migraphx/stringutils.hpp>
+#include <migraphx/quantization.hpp>
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/generate.hpp>
 #include <migraphx/dead_code_elimination.hpp>
@@ -79,32 +80,47 @@ struct loader
 struct compiler
 {
+    static const int q_fp16 = 1;
+    static const int q_int8 = 2;
    loader l;
-    bool gpu = true;
+    bool gpu     = true;
+    int quantize = 0;
    std::vector<std::string> fill1;
    void parse(argument_parser& ap)
    {
        l.parse(ap);
        ap(gpu, {"--gpu"}, ap.help("Compile on the gpu"), ap.set_value(true));
        ap(gpu, {"--cpu"}, ap.help("Compile on the cpu"), ap.set_value(false));
+        ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(q_fp16));
+        ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(q_int8));
        ap(fill1, {"--fill1"}, ap.help("Fill parameter with 1s"), ap.append());
    }
-    program compile()
+    auto params(const program& p, bool use_gpu = true)
-    {
-        auto p = l.load();
-        compile_program(p, gpu);
-        return p;
-    }
-    auto params(const program& p)
    {
        program::parameter_map m;
        for(auto&& s : fill1)
            m[s] = fill_argument(p.get_parameter_shape(s), 1);
-        fill_param_map(m, p, gpu);
+        fill_param_map(m, p, use_gpu && gpu);
        return m;
    }
+    program compile()
+    {
+        auto p = l.load();
+        auto t = get_target(gpu);
+        if(quantize == q_fp16)
+        {
+            quantize_fp16(p);
+        }
+        else if(quantize == q_int8)
+        {
+            quantize_int8(p, t, {params(p, false)});
+        }
+        p.compile(t);
+        return p;
+    }
 };
 struct read : command<read>

--- a/src/driver/perf.cpp
+++ b/src/driver/perf.cpp
@@ -45,6 +45,22 @@ program::parameter_map create_param_map(const program& p, bool gpu)
    return m;
 }
+target get_target(bool gpu)
+{
+    if(gpu)
+    {
+#ifdef HAVE_GPU
+        return gpu::target{};
+#else
+        MIGRAPHX_THROW("Gpu not supported.");
+#endif
+    }
+    else
+    {
+        return cpu::target{};
+    }
+}
 void compile_program(program& p, bool gpu)
 {
    if(gpu)

--- a/src/driver/perf.hpp
+++ b/src/driver/perf.hpp
@@ -9,6 +9,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 program::parameter_map fill_param_map(program::parameter_map& m, const program& p, bool gpu);
 program::parameter_map create_param_map(const program& p, bool gpu = true);
+target get_target(bool gpu);
 void compile_program(program& p, bool gpu = true);
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/include/migraphx/quantization.hpp
+++ b/src/include/migraphx/quantization.hpp
@@ -38,7 +38,7 @@ capture_arguments(program& prog, T&& t, const std::vector<std::string>& ins_name
 void quantize_int8(program& prog,
                   const target& t,
-                   std::vector<program::parameter_map>& calibration,
+                   const std::vector<program::parameter_map>& calibration,
                   const std::vector<std::string>& ins_names = {"dot", "convolution"});
 void quantize_int8_impl(program& prog,
                        const std::vector<std::pair<float, float>>& quant_params,

--- a/src/quantization.cpp
+++ b/src/quantization.cpp
@@ -414,7 +414,7 @@ void quantize_int8_impl(program& prog,
 void quantize_int8(program& prog,
                   const target& t,
-                   std::vector<program::parameter_map>& calibration,
+                   const std::vector<program::parameter_map>& calibration,
                   const std::vector<std::string>& ins_names)
 {
    // insert capture operator
@@ -433,8 +433,8 @@ void quantize_int8(program& prog,
        {
            if(arg.count(x.first) > 0)
            {
-                assert(x.second == arg[x.first].get_shape());
+                assert(x.second == arg.at(x.first).get_shape());
-                m[x.first] = t.copy_to(arg[x.first]);
+                m[x.first] = t.copy_to(arg.at(x.first));
            }
            else
            {