added more operators, started tests for perf and verify

5cd9877d · Khalique · ee55f2c8 · 5cd9877d · 5cd9877d · 5cd9877d
Commit 5cd9877d authored Jan 31, 2019 by Khalique
Showing with 271 additions and 17 deletions

src/tf/CMakeLists.txt src/tf/CMakeLists.txt +8 -8

src/tf/perf_tf.cpp src/tf/perf_tf.cpp +41 -0

src/tf/tf.cpp src/tf/tf.cpp +69 -9

src/tf/verify_tf.cpp src/tf/verify_tf.cpp +153 -0

No files found.
--- a/src/tf/CMakeLists.txt
+++ b/src/tf/CMakeLists.txt
@@ -33,12 +33,12 @@ add_executable(read_tf read_tf.cpp)
 rocm_clang_tidy_check(read_tf)
 target_link_libraries(read_tf migraphx_tf)
+if(MIGRAPHX_ENABLE_GPU)
+add_executable(verify_tf verify_tf.cpp)
+rocm_clang_tidy_check(verify_tf)
+target_link_libraries(verify_tf migraphx_tf migraphx_cpu migraphx_gpu)
-# add_executable(verify_onnx verify_onnx.cpp)
+add_executable(perf_tf perf_tf.cpp)
-# rocm_clang_tidy_check(verify_onnx)
+rocm_clang_tidy_check(perf_tf)
-# target_link_libraries(verify_onnx migraphx_onnx migraphx_cpu migraphx_gpu)
+target_link_libraries(perf_tf migraphx_tf migraphx_cpu migraphx_gpu)
+endif()
-# add_executable(perf_onnx perf_onnx.cpp)
-# rocm_clang_tidy_check(perf_onnx)
-# target_link_libraries(perf_onnx migraphx_onnx migraphx_cpu migraphx_gpu)
-# endif()
--- a/src/tf/perf_tf.cpp
+++ b/src/tf/perf_tf.cpp
+#include <migraphx/tf.hpp>
+#include <migraphx/gpu/target.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/verify.hpp>
+migraphx::program::parameter_map create_param_map(const migraphx::program& p, bool gpu = true)
+{
+    migraphx::program::parameter_map m;
+    for(auto&& x : p.get_parameter_shapes())
+    {
+        if(gpu)
+            m[x.first] = migraphx::gpu::to_gpu(migraphx::generate_argument(x.second));
+        else
+            m[x.first] = migraphx::generate_argument(x.second);
+    }
+    return m;
+}
+int main(int argc, char const* argv[])
+{
+    if(argc > 1)
+    {
+        bool is_nhwc = true;
+        if(argc > 2)
+        {
+            if(strcmp(argv[2], "nchw") == 0)
+                is_nhwc = false;
+        }
+        std::string file = argv[1];
+        std::size_t n    = argc > 3 ? std::stoul(argv[3]) : 50;
+        auto p           = migraphx::parse_tf(file, is_nhwc);
+        std::cout << "Compiling ... " << std::endl;
+        p.compile(migraphx::gpu::target{});
+        std::cout << "Allocating params ... " << std::endl;
+        auto m = create_param_map(p);
+        std::cout << "Running performance report ... " << std::endl;
+        p.perf_report(std::cout, n, m);
+    }
+}
--- a/src/tf/tf.cpp
+++ b/src/tf/tf.cpp
@@ -36,20 +36,34 @@ struct tf_parser
    std::unordered_map<std::string, op_func> ops;
+    void nhwc_to_nchw(std::size_t& dim)
+    {
+        switch(dim)
+        {
+        case 0: dim = 0; break;
+        case 1: dim = 2; break;
+        case 2: dim = 3; break;
+        case 3: dim = 1; break;
+        }
+    }
    tf_parser()
    {
        add_generic_op("Identity", op::identity{});
        add_generic_op("Relu", op::relu{});
-        add_binary_op("BiasAdd", op::add{});
+        // add_binary_op("BiasAdd", op::add{});
        add_mem_op("AvgPool", &tf_parser::parse_pooling);
-        // add_mem_op("ConcatV2", &tf_parser::parse_concat);
+        add_mem_op("BiasAdd", &tf_parser::parse_biasadd);
+        add_mem_op("ConcatV2", &tf_parser::parse_concat);
        add_mem_op("Const", &tf_parser::parse_constant);
        add_mem_op("Conv2D", &tf_parser::parse_conv);
        add_mem_op("FusedBatchNorm", &tf_parser::parse_batchnorm);
        add_mem_op("MaxPool", &tf_parser::parse_pooling);
-        // add_mem_op("Reshape", &tf_parser::parse_reshape);
+        add_mem_op("Reshape", &tf_parser::parse_reshape);
+        add_mem_op("Softmax", &tf_parser::parse_softmax);
+        add_mem_op("Squeeze", &tf_parser::parse_squeeze);
    }
    template <class F>
@@ -76,10 +90,18 @@ struct tf_parser
    template <class T>
    void add_binary_op(std::string name, T x)
    {
-        add_op(name, [this, x](attribute_map, std::vector<instruction_ref> args) {
+            add_op(name, [this, x](attribute_map attributes, std::vector<instruction_ref> args) {
            if(args.size() != 2)
                MIGRAPHX_THROW("binary operators should have 2 operands");
-            return add_broadcastable_binary_op(args[0], args[1], x);
+            auto l0 = args[1];
+            if(contains(attributes, "data_format"))
+            {
+                if(is_nhwc)
+                {
+                    l0 = prog.add_instruction(op::transpose{{0,3,1,2}}, args[1]);
+                }
+            }
+            return add_broadcastable_binary_op(args[0], l0, x);
        });
    }
@@ -138,10 +160,10 @@ struct tf_parser
    instruction_ref
    parse_batchnorm(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
    {
-        float epsilon  = 1e-4f;
+        float epsilon  = 1e-5f;
-        float momentum = 1.f;
+        float momentum = 0.9f;
        op::batch_norm_inference::bn_infer_mode_t bn_mode =
-            op::batch_norm_inference::per_activation;
+            op::batch_norm_inference::spatial;
        if(contains(attributes, "epsilon"))
        {
            epsilon = attributes.at("epsilon").f();
@@ -151,16 +173,30 @@ struct tf_parser
        return prog.add_instruction(op, std::move(args));
    }
+    instruction_ref
+    parse_biasadd(const std::string&, attribute_map, std::vector<instruction_ref> args)
+    {
+        // assume second arg is bias
+        std::vector<int64_t> dims;
+        copy(args[0]->get_shape().lens(), std::back_inserter(dims));
+        auto l0 = prog.add_instruction(op::reshape{dims}, args[1]);
+        return prog.add_instruction(op::add{}, args[0], l0);
+    }
    instruction_ref
    parse_concat(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
    {
        // get index for axis within args
        std::size_t axis_idx = attributes.at("N").i();
        std::size_t axis     = args[axis_idx]->eval().at<int64_t>();
+        if(is_nhwc and axis < 4)
+        {
+            nhwc_to_nchw(axis);
+        }
        op::concat op{axis};
        // return only first N arguments (assuming last index is the axis value)
        return prog.add_instruction(
-            op, std::vector<instruction_ref>(args.begin(), args.begin() + axis));
+            op, std::vector<instruction_ref>(args.begin(), args.begin() + args.size() - 1));
    }
    instruction_ref parse_constant(const std::string&,
@@ -330,6 +366,30 @@ struct tf_parser
        }
    }
+    instruction_ref
+    parse_softmax(const std::string&, const attribute_map&, std::vector<instruction_ref> args)
+    {
+        auto dims = args.front()->get_shape().lens();
+        auto r =
+            prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1]), 1, 1}}, args.front());
+        auto s = prog.add_instruction(op::softmax{}, r);
+        return prog.add_instruction(op::reshape{{long(dims[0]), long(dims[1])}}, s);
+    }
+    instruction_ref
+    parse_squeeze(const std::string&, attribute_map attributes, std::vector<instruction_ref> args)
+    {
+        op::squeeze op;
+        auto axes = attributes.at("squeeze_dims").list().i();
+        copy(axes, std::back_inserter(op.axes));
+        auto l0 = args[0];
+        if(is_nhwc)
+        {
+            l0 = prog.add_instruction(op::transpose{{0,2,3,1}}, args[0]);
+        }
+        return prog.add_instruction(op, l0);
+    }
    void parse_graph(const tensorflow::GraphDef& graph)
    {
        nodes = get_nodes(graph, input_nodes);

--- a/src/tf/verify_tf.cpp
+++ b/src/tf/verify_tf.cpp
+#include <migraphx/tf.hpp>
+#include <migraphx/cpu/target.hpp>
+#include <migraphx/gpu/target.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/verify_args.hpp>
+#include <migraphx/instruction.hpp>
+template <class T>
+auto get_hash(const T& x)
+{
+    return std::hash<T>{}(x);
+}
+template <class F>
+migraphx::argument run_cpu(F f)
+{
+    auto p = f();
+    p.compile(migraphx::cpu::target{});
+    migraphx::program::parameter_map m;
+    for(auto&& x : p.get_parameter_shapes())
+    {
+        m[x.first] = migraphx::generate_argument(x.second, get_hash(x.first));
+    }
+    auto out = p.eval(m);
+    std::cout << p << std::endl;
+    return out;
+}
+template <class F>
+migraphx::argument run_gpu(F f)
+{
+    auto p = f();
+    p.compile(migraphx::gpu::target{});
+    migraphx::program::parameter_map m;
+    for(auto&& x : p.get_parameter_shapes())
+    {
+        m[x.first] =
+            migraphx::gpu::to_gpu(migraphx::generate_argument(x.second, get_hash(x.first)));
+    }
+    auto out = migraphx::gpu::from_gpu(p.eval(m));
+    std::cout << p << std::endl;
+    return migraphx::gpu::from_gpu(out);
+}
+template <class F>
+void verify_program(const std::string& name, F f, double tolerance = 100)
+{
+    auto x = run_cpu(f);
+    auto y = run_gpu(f);
+    migraphx::verify_args(name, x, y, tolerance);
+    // std::cout << "cpu: " << x << std::endl;
+    // std::cout << "gpu: " << y << std::endl;
+}
+void verify_instructions(const migraphx::program& prog, double tolerance = 80)
+{
+    for(auto&& ins : prog)
+    {
+        if(ins.name().front() == '@')
+            continue;
+        if(ins.name() == "broadcast")
+            continue;
+        if(ins.name() == "transpose")
+            continue;
+        if(ins.name() == "reshape")
+            continue;
+        auto create_program = [&] {
+            migraphx::program p;
+            std::vector<migraphx::instruction_ref> inputs;
+            for(auto&& arg : ins.inputs())
+            {
+                if(arg->name() == "@literal")
+                    inputs.push_back(p.add_literal(arg->get_literal()));
+                else
+                    inputs.push_back(
+                        p.add_parameter(std::to_string(inputs.size()), arg->get_shape()));
+            }
+            p.add_instruction(ins.get_operator(), inputs);
+            return p;
+        };
+        try
+        {
+            std::cout << "Verify: " << ins.name() << std::endl;
+            std::cout << create_program() << std::endl;
+            verify_program(ins.name(), create_program, tolerance);
+        }
+        catch(...)
+        {
+            std::cout << "Instruction " << ins.name() << " threw an exception." << std::endl;
+            throw;
+        }
+    }
+}
+template <class F>
+void verify_reduced(F f, int n, double tolerance = 80)
+{
+    auto create_program = [&] {
+        migraphx::program p = f();
+        auto last           = std::prev(p.end(), n + 1);
+        p.remove_instructions(last, p.end());
+        return p;
+    };
+    std::cout << "Verify: " << std::endl;
+    std::cout << create_program() << std::endl;
+    verify_program(std::to_string(n), create_program, tolerance);
+}
+template <class F>
+void verify_reduced_program(F f, double tolerance = 80)
+{
+    migraphx::program p = f();
+    auto n              = std::distance(p.begin(), p.end());
+    for(std::size_t i = 0; i < n; i++)
+    {
+        verify_reduced(f, i, tolerance);
+    }
+}
+int main(int argc, char const* argv[])
+{
+    std::vector<std::string> args(argv + 1, argv + argc);
+    if(not args.empty())
+    {
+        bool is_nhwc = true;
+        if(std::any_of(args.begin(), args.end(), [](const auto& s) { return s == "nchw"; }))
+        {
+            is_nhwc = false;
+        }
+        std::string file = args.front();
+        auto p           = migraphx::parse_tf(file, is_nhwc);
+        std::cout << p << std::endl;
+        if(std::any_of(args.begin(), args.end(), [](const auto& s) { return s == "-i"; }))
+        {
+            verify_instructions(p);
+        }
+        else if(std::any_of(args.begin(), args.end(), [](const auto& s) { return s == "-r"; }))
+        {
+            verify_reduced_program([&] { return migraphx::parse_tf(file, is_nhwc); });
+        }
+        else
+        {
+            verify_program(file, [&] { return migraphx::parse_tf(file, is_nhwc); });
+        }
+    }
+}