Split cpu and reference implementation (#671)

* Add all_targets cmake target * Rename target * Add ref target * Rename tests * Refactor compiler target * Formatting * Verify for every target * Formatting * Add verify test suite * Formatting * Add initial test programs * Formatting * Add rnn tests * Formatting * Validate gpu * Formatting * Remove old gpu tests * Fix gpu tests * Fix ref error * Fix tidy issues * Formatting * Tidy fixes * Fix header in python api * Rename to ref * Use ref in verify_onnx * Fix tidy issue * Build with verbose on * Fix typo * Remove verbose * rename some cpu prefix to ref Co-authored-by: Shucai Xiao <Shucai.Xiao@amd.com>

Split cpu and reference implementation (#671)
* Add all_targets cmake target * Rename target * Add ref target * Rename tests * Refactor compiler target * Formatting * Verify for every target * Formatting * Add verify test suite * Formatting * Add initial test programs * Formatting * Add rnn tests * Formatting * Validate gpu * Formatting * Remove old gpu tests * Fix gpu tests * Fix ref error * Fix tidy issues * Formatting * Tidy fixes * Fix header in python api * Rename to ref * Use ref in verify_onnx * Fix tidy issue * Build with verbose on * Fix typo * Remove verbose * rename some cpu prefix to ref Co-authored-by: Shucai Xiao <Shucai.Xiao@amd.com>
500d9441 · Paul Fultz II · GitHub · ba33d25c · 500d9441 · 500d9441
Unverified Commit 500d9441 authored Nov 04, 2020 by Paul Fultz II Committed by GitHub Nov 04, 2020
20 changed files
--- a/src/targets/ref/include/migraphx/ref/gemm.hpp
+++ b/src/targets/ref/include/migraphx/ref/gemm.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_GEMM_HPP
+#define MIGRAPHX_GUARD_RTGLIB_CPU_GEMM_HPP
+
+#include <migraphx/argument.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace ref {
+
+void migemm(
+    const argument& c_arg, const argument& a_arg, const argument& b_arg, float alpha, float beta);
+void migemm(const argument& c_arg,
+            const argument& a_arg,
+            const argument& b_arg,
+            int32_t alpha,
+            int32_t beta);
+
+} // namespace ref
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/ref/include/migraphx/ref/lowering.hpp
+++ b/src/targets/ref/include/migraphx/ref/lowering.hpp
+#ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
+#define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
+
+#include <migraphx/program.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace ref {
+
+struct lowering
+{
+    std::string name() const { return "ref::lowering"; }
+    void apply(program& p) const;
+};
+
+} // namespace ref
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/ref/include/migraphx/ref/target.hpp
+++ b/src/targets/ref/include/migraphx/ref/target.hpp
+#ifndef MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
+#define MIGRAPHX_GUARD_MIGRAPHLIB_CPU_TARGET_HPP
+
+#include <migraphx/program.hpp>
+#include <migraphx/register_target.hpp>
+#include <migraphx/compile_options.hpp>
+#include <migraphx/ref/context.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+struct pass;
+namespace ref {
+
+struct target
+{
+    std::string name() const;
+    std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;
+    migraphx::context get_context() const { return context{}; }
+
+    argument copy_to(const argument& arg) const { return arg; }
+    argument copy_from(const argument& arg) const { return arg; }
+    argument allocate(const shape& s) const;
+};
+
+MIGRAPHX_REGISTER_TARGET(target);
+
+} // namespace ref
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif
--- a/src/targets/ref/lowering.cpp
+++ b/src/targets/ref/lowering.cpp
--- a/src/targets/ref/target.cpp
+++ b/src/targets/ref/target.cpp
+
+#include <migraphx/ref/target.hpp>
+#include <migraphx/ref/lowering.hpp>
+#include <migraphx/register_target.hpp>
+#include <migraphx/pass.hpp>
+#include <migraphx/auto_contiguous.hpp>
+#include <migraphx/rewrite_rnn.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/generate.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace ref {
+
+std::string target::name() const { return "ref"; }
+
+std::vector<pass> target::get_passes(migraphx::context&, const compile_options&) const
+{
+    return {rewrite_rnn{},
+            dead_code_elimination{},
+            auto_contiguous{},
+            dead_code_elimination{},
+            lowering{},
+            dead_code_elimination{}};
+}
+
+argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
+
+MIGRAPHX_REGISTER_TARGET(target);
+
+} // namespace ref
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/verify_args.cpp
+++ b/src/verify_args.cpp
@@ -5,75 +5,75 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

 bool verify_args(const std::string& name,
-                 const argument& cpu_arg,
-                 const argument& gpu_arg,
+                 const argument& ref_arg,
+                 const argument& target_arg,
                 double tolerance)
 {
    bool passed = true;
-    visit_all(cpu_arg, gpu_arg)([&](auto cpu, auto gpu) {
+    visit_all(ref_arg, target_arg)([&](auto ref, auto target) {
        double error;
-        passed = verify_range(cpu, gpu, tolerance, &error);
+        passed = verify_range(ref, target, tolerance, &error);
        if(not passed)
        {
            // TODO: Check for nans
            std::cout << "FAILED: " << name << std::endl;
            std::cout << "error: " << error << std::endl;
-            if(cpu.size() < 32)
-                std::cout << "cpu:" << cpu << std::endl;
-            if(gpu.size() < 32)
-                std::cout << "gpu:" << gpu << std::endl;
-            if(range_zero(cpu))
-                std::cout << "Cpu data is all zeros" << std::endl;
-            if(range_zero(gpu))
-                std::cout << "Gpu data is all zeros" << std::endl;
+            if(ref.size() < 32)
+                std::cout << "ref:" << ref << std::endl;
+            if(target.size() < 32)
+                std::cout << "target:" << target << std::endl;
+            if(range_zero(ref))
+                std::cout << "Ref data is all zeros" << std::endl;
+            if(range_zero(target))
+                std::cout << "Target data is all zeros" << std::endl;

-            auto mxdiff = max_diff(cpu, gpu);
+            auto mxdiff = max_diff(ref, target);
            std::cout << "Max diff: " << mxdiff << std::endl;

-            auto idx = mismatch_idx(cpu, gpu, float_equal);
-            if(idx < range_distance(cpu))
+            auto idx = mismatch_idx(ref, target, float_equal);
+            if(idx < range_distance(ref))
            {
-                std::cout << "Mismatch at " << idx << ": " << cpu[idx] << " != " << gpu[idx]
+                std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
                          << std::endl;
            }

-            auto cpu_nan_idx = find_idx(cpu, not_finite);
-            if(cpu_nan_idx >= 0)
-                std::cout << "Non finite number found in cpu at " << cpu_nan_idx << ": "
-                          << cpu[cpu_nan_idx] << std::endl;
+            auto ref_nan_idx = find_idx(ref, not_finite);
+            if(ref_nan_idx >= 0)
+                std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
+                          << ref[ref_nan_idx] << std::endl;

-            auto gpu_nan_idx = find_idx(gpu, not_finite);
-            if(gpu_nan_idx >= 0)
-                std::cout << "Non finite number found in gpu at " << gpu_nan_idx << ": "
-                          << gpu[gpu_nan_idx] << std::endl;
+            auto target_nan_idx = find_idx(target, not_finite);
+            if(target_nan_idx >= 0)
+                std::cout << "Non finite number found in target at " << target_nan_idx << ": "
+                          << target[target_nan_idx] << std::endl;
            std::cout << std::endl;
        }
        else
        {
-            if(range_zero(cpu))
-                std::cout << "Cpu data is all zeros" << std::endl;
-            if(range_zero(gpu))
-                std::cout << "Gpu data is all zeros" << std::endl;
+            if(range_zero(ref))
+                std::cout << "Ref data is all zeros" << std::endl;
+            if(range_zero(target))
+                std::cout << "Target data is all zeros" << std::endl;

-            // auto mxdiff = max_diff(cpu, gpu);
+            // auto mxdiff = max_diff(ref, target);
            // std::cout << "Max diff: " << mxdiff << std::endl;

-            // auto idx = mismatch_idx(cpu, gpu, float_equal);
-            // if(idx < range_distance(cpu))
+            // auto idx = mismatch_idx(ref, target, float_equal);
+            // if(idx < range_distance(ref))
            // {
-            //     std::cout << "Mismatch at " << idx << ": " << cpu[idx] << " != " << gpu[idx]
+            //     std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
            //               << std::endl;
            // }

-            auto cpu_nan_idx = find_idx(cpu, not_finite);
-            if(cpu_nan_idx >= 0)
-                std::cout << "Non finite number found in cpu at " << cpu_nan_idx << ": "
-                          << cpu[cpu_nan_idx] << std::endl;
+            auto ref_nan_idx = find_idx(ref, not_finite);
+            if(ref_nan_idx >= 0)
+                std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
+                          << ref[ref_nan_idx] << std::endl;

-            auto gpu_nan_idx = find_idx(gpu, not_finite);
-            if(gpu_nan_idx >= 0)
-                std::cout << "Non finite number found in gpu at " << gpu_nan_idx << ": "
-                          << gpu[gpu_nan_idx] << std::endl;
+            auto target_nan_idx = find_idx(target, not_finite);
+            if(target_nan_idx >= 0)
+                std::cout << "Non finite number found in target at " << target_nan_idx << ": "
+                          << target[target_nan_idx] << std::endl;
            // std::cout << std::endl;
        }
    });

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -67,6 +67,7 @@ function(add_test_command NAME EXE)
            add_test(NAME ${NAME} COMMAND ${EXE} ${ARGN})
        endif()
    endif()
+    set_tests_properties(${NAME} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED")
 endfunction()

 function(add_test_executable TEST_NAME)
@@ -85,8 +86,7 @@ function(add_test_executable TEST_NAME)
    add_test_command(${TEST_NAME} ${TEST_COMMAND})
    add_dependencies(tests ${TEST_NAME})
    add_dependencies(check ${TEST_NAME})
-    set_tests_properties(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED")
-    target_link_libraries(${TEST_NAME} migraphx migraphx_cpu migraphx_onnx)
+    target_link_libraries(${TEST_NAME} migraphx migraphx_ref migraphx_onnx)
    target_include_directories(${TEST_NAME} PUBLIC include)
 endfunction(add_test_executable)

@@ -122,7 +122,7 @@ foreach(ONNX_TEST ${ONNX_TESTS})
    set(TEST_NAME test_${BASE_NAME})
    add_executable(${TEST_NAME} ${TES_ONNX_DIR}/${ONNX_TEST})
    rocm_clang_tidy_check(${TEST_NAME})
-    target_link_libraries(${TEST_NAME} migraphx_onnx migraphx_cpu)
+    target_link_libraries(${TEST_NAME} migraphx_onnx migraphx_ref)
    target_include_directories(${TEST_NAME} PUBLIC include)
    add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/onnx) 
    add_dependencies(tests ${TEST_NAME})
@@ -132,13 +132,14 @@ endforeach()
 # tf test
 add_executable(test_tf tf/tf_test.cpp)
 rocm_clang_tidy_check(test_tf)
-target_link_libraries(test_tf migraphx_tf migraphx_cpu)
+target_link_libraries(test_tf migraphx_tf migraphx_ref)
 target_include_directories(test_tf PUBLIC include)
 add_test(NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/tf) 
 add_dependencies(tests test_tf)
 add_dependencies(check test_tf)

 add_subdirectory(api)
+add_subdirectory(verify)
 if(MIGRAPHX_ENABLE_PYTHON)
 add_subdirectory(py)
 endif()
@@ -172,7 +173,7 @@ function(test_headers PREFIX)
 endfunction()

 test_headers(migraphx ${CMAKE_SOURCE_DIR}/src/include/migraphx/*.hpp)
-test_headers(migraphx/cpu ${CMAKE_SOURCE_DIR}/src/targets/cpu/include/migraphx/cpu/*.hpp)
+test_headers(migraphx/ref ${CMAKE_SOURCE_DIR}/src/targets/ref/include/migraphx/ref/*.hpp)
 if(MIGRAPHX_ENABLE_GPU)
 test_headers(migraphx/gpu ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraphx/gpu/*.hpp)
 endif()
--- a/test/api/CMakeLists.txt
+++ b/test/api/CMakeLists.txt
@@ -11,7 +11,7 @@ function(add_api_test TEST_NAME TEST_SRC)
 endfunction()


-add_api_test(cpu test_cpu.cpp)
+add_api_test(ref test_cpu.cpp)
 add_api_test(save_load test_save_load.cpp)
 add_api_test(op test_op_construct.cpp)
 if(MIGRAPHX_ENABLE_GPU)

--- a/test/api/test_cpu.cpp
+++ b/test/api/test_cpu.cpp
@@ -6,7 +6,7 @@ TEST_CASE(load_and_run)
 {
    auto p             = migraphx::parse_onnx("conv_relu_maxpool_test.onnx");
    auto shapes_before = p.get_output_shapes();
-    p.compile(migraphx::target("cpu"));
+    p.compile(migraphx::target("ref"));
    auto shapes_after = p.get_output_shapes();
    CHECK(shapes_before.size() == 1);
    CHECK(shapes_before.size() == shapes_after.size());
@@ -26,7 +26,7 @@ TEST_CASE(load_and_run_init_list)
 {
    auto p             = migraphx::parse_onnx("conv_relu_maxpool_test.onnx");
    auto shapes_before = p.get_output_shapes();
-    p.compile(migraphx::target("cpu"));
+    p.compile(migraphx::target("ref"));
    auto shapes_after = p.get_output_shapes();
    CHECK(shapes_before.size() == 1);
    CHECK(shapes_before.size() == shapes_after.size());
@@ -61,7 +61,7 @@ TEST_CASE(quantize_int8)
 {
    auto p1        = migraphx::parse_onnx("gemm_ex_test.onnx");
    const auto& p2 = p1;
-    auto t         = migraphx::target("cpu");
+    auto t         = migraphx::target("ref");
    migraphx::quantize_int8_options options;
    migraphx::quantize_int8(p1, t, options);

@@ -84,7 +84,7 @@ TEST_CASE(load_and_run_user_input_shape)
    options.set_input_parameter_shape("0", {2, 3, 64, 64});
    auto p             = migraphx::parse_onnx("conv_relu_maxpool_test.onnx", options);
    auto shapes_before = p.get_output_shapes();
-    p.compile(migraphx::target("cpu"));
+    p.compile(migraphx::target("ref"));
    auto shapes_after = p.get_output_shapes();
    CHECK(shapes_before.size() == 1);
    CHECK(shapes_before.size() == shapes_after.size());
@@ -104,7 +104,7 @@ TEST_CASE(zero_parameter)
 {
    auto p             = migraphx::parse_onnx("constant_fill_test.onnx");
    auto shapes_before = p.get_output_shapes();
-    p.compile(migraphx::target("cpu"));
+    p.compile(migraphx::target("ref"));
    auto shapes_after = p.get_output_shapes();
    CHECK(shapes_before.size() == 1);
    CHECK(shapes_before.size() == shapes_after.size());

--- a/test/context_test.cpp
+++ b/test/context_test.cpp
 #include <migraphx/serialize.hpp>
 #include <migraphx/context.hpp>
-#include <migraphx/cpu/context.hpp>
+#include <migraphx/ref/context.hpp>
 #include <migraphx/functional.hpp>
 #include <test.hpp>

 TEST_CASE(context)
 {
-    migraphx::context ctx = migraphx::cpu::context{};
+    migraphx::context ctx = migraphx::ref::context{};
    migraphx::value v     = ctx.to_value();
    EXPECT(v.empty());

-    migraphx::context cpu_ctx = migraphx::cpu::context{};
+    migraphx::context cpu_ctx = migraphx::ref::context{};
    cpu_ctx.from_value(v);
 }


--- a/test/gpu/fast_math.cpp
+++ b/test/gpu/fast_math.cpp
 #include <test.hpp>
 #include <migraphx/quantization.hpp>
 #include <migraphx/iterator_for.hpp>
-#include "test_utils.hpp"
-#include "test.hpp"
+#include <migraphx/op/add.hpp>
+#include <migraphx/op/mul.hpp>
+#include <migraphx/op/multibroadcast.hpp>
+#include <migraphx/op/pow.hpp>
+#include <migraphx/op/tanh.hpp>
+#include <migraphx/gpu/target.hpp>
+#include <migraphx/instruction.hpp>

 migraphx::program create_gelu()
 {

--- a/test/gpu/ops_test.cpp
+++ b/test/gpu/ops_test.cpp
--- a/test/gpu/quantization.cpp
+++ b/test/gpu/quantization.cpp
--- a/test/gpu/rnn_ops_test.cpp
+++ b/test/gpu/rnn_ops_test.cpp
--- a/test/gpu/test_utils.hpp
+++ b/test/gpu/test_utils.hpp
--- a/test/onnx/verify_onnx.cpp
+++ b/test/onnx/verify_onnx.cpp
--- a/test/perf_report.cpp
+++ b/test/perf_report.cpp
--- a/test/program_test.cpp
+++ b/test/program_test.cpp
--- a/test/py/CMakeLists.txt
+++ b/test/py/CMakeLists.txt
@@ -23,7 +23,7 @@ foreach(PYTHON_VERSION ${PYTHON_VERSIONS})
    add_dependencies(check migraphx_py_${PYTHON_VERSION})
 endforeach()

-add_py_test(cpu test_cpu.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
+add_py_test(ref test_cpu.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
 add_py_test(save_load test_save_load.py WORKING_DIRECTORY ${TEST_ONNX_DIR})
 if(MIGRAPHX_ENABLE_GPU)
 add_py_test(gpu_offload test_gpu_offload.py WORKING_DIRECTORY ${TEST_ONNX_DIR})

--- a/test/py/test_cpu.py
+++ b/test/py/test_cpu.py