merge changes from branch int8_quantization.

75f5ed4a · Shucai Xiao · 3119fa01 · 2363d06c · 75f5ed4a · 75f5ed4a
Commit 75f5ed4a authored Jun 10, 2019 by Shucai Xiao
7 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -39,8 +39,6 @@ else()
    set(MIGRAPHX_ENABLE_GPU Off CACHE BOOL "")
 endif()
-set(MIGRAPHX_ENABLE_TF Off CACHE BOOL "")
 add_compile_options(-std=c++14)
 list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

--- a/requirements.txt
+++ b/requirements.txt
-google/protobuf -DCMAKE_POSITION_INDEPENDENT_CODE=On
+google/protobuf@v3.8.0 -DCMAKE_POSITION_INDEPENDENT_CODE=On -X subdir -Dprotobuf_BUILD_TESTS=Off
 RadeonOpenCompute/rocm-cmake@42f6740 --build
 ROCmSoftwarePlatform/rocBLAS@30a992ae02fda568688bcd190edd5e277d6674d9
 ROCmSoftwarePlatform/MIOpen@1.8.0

--- a/src/include/migraphx/op/convert.hpp
+++ b/src/include/migraphx/op/convert.hpp
@@ -42,10 +42,10 @@ struct convert : unary<convert>
            float res = scale * x + shift;
            if(target_type == shape::int8_type)
            {
-                int factor = (res > 0) ? 1 : -1;
+                int factor = (res >= 0.0f) ? 1 : -1;
                res        = res + factor * 0.5f;
-                res        = res > 127.0 ? 127.0 : res;
+                res        = res > 127.0f ? 127.0f : res;
-                res        = res < -128.0 ? -128.0 : res;
+                res        = res < -128.0f ? -128.0f : res;
            }
            return res;

--- a/src/py/CMakeLists.txt
+++ b/src/py/CMakeLists.txt
@@ -12,12 +12,7 @@ if(MIGRAPHX_ENABLE_PYTHON)
        C_VISIBILITY_PRESET hidden
        CXX_VISIBILITY_PRESET hidden
    )
-    if(MIGRAPHX_ENABLE_TF)
+    target_link_libraries(migraphx_py PRIVATE migraphx migraphx_tf migraphx_onnx migraphx_cpu)
-        target_link_libraries(migraphx_py PRIVATE migraphx migraphx_tf migraphx_cpu)
-        target_compile_definitions(migraphx_py PRIVATE -DENABLE_TF)
-    else()
-        target_link_libraries(migraphx_py PRIVATE migraphx migraphx_onnx migraphx_cpu)
-    endif()
    if(MIGRAPHX_ENABLE_GPU)
        target_link_libraries(migraphx_py PRIVATE migraphx_gpu)
        target_compile_definitions(migraphx_py PRIVATE -DHAVE_GPU)

--- a/src/py/migraphx_py.cpp
+++ b/src/py/migraphx_py.cpp
@@ -6,11 +6,8 @@
 #include <migraphx/generate.hpp>
 #include <migraphx/cpu/target.hpp>
 #include <migraphx/stringutils.hpp>
-#ifdef ENABLE_TF
 #include <migraphx/tf.hpp>
-#else
 #include <migraphx/onnx.hpp>
-#endif
 #ifdef HAVE_GPU
 #include <migraphx/gpu/target.hpp>
@@ -162,16 +159,13 @@ PYBIND11_MODULE(migraphx, m)
        .def("__ne__", std::not_equal_to<migraphx::program>{})
        .def("__repr__", [](const migraphx::program& p) { return migraphx::to_string(p); });
-#ifdef ENABLE_TF
    m.def("parse_tf",
          &migraphx::parse_tf,
          "Parse tf protobuf (default format is nhwc)",
          py::arg("filename"),
          py::arg("is_nhwc") = true);
-#else
    m.def("parse_onnx", &migraphx::parse_onnx);
-#endif
    m.def("get_target", [](const std::string& name) -> migraphx::target {
        if(name == "cpu")
            return migraphx::cpu::target{};

--- a/src/quantization.cpp
+++ b/src/quantization.cpp
@@ -327,9 +327,7 @@ void quantize_int8(program& prog,
                ins,
                op::quant_convolution{padding, stride, dilation, padding_mode, group},
                converted_inputs);
-            auto fp_conv = prog.insert_instruction(
+            prog.replace_instruction(ins, op::convert{orig_type, adjust_factor, 0.0f}, quant_conv);
-                ins, op::convert{shape::float_type, adjust_factor, 0.0f}, quant_conv);
-            prog.replace_instruction(ins, op::convert{orig_type, 1.0f, 0.0f}, fp_conv);
        }
        else
        {

--- a/src/targets/gpu/device/convert.cpp
+++ b/src/targets/gpu/device/convert.cpp
@@ -21,7 +21,7 @@ void convert(hipStream_t stream,
            {
                gs_launch(stream, result.get_shape().elements())([=](auto i) {
                    float res     = input_ptr[i] * scale + shift;
-                    int factor    = (res > 0) ? 1 : -1;
+                    int factor    = (res >= 0.0f) ? 1 : -1;
                    output_ptr[i] = static_cast<int8_t>(
                        std::min<float>(std::max<float>(-128.0f, res + factor * 0.5), 127.0f));
                });