Added more convolution tests

88bdd75a · Scott Thornton · bb13878f · 0005506c · 88bdd75a · 88bdd75a
Commit 88bdd75a authored Jun 11, 2018 by Scott Thornton
6 changed files
--- a/src/targets/miopen/include/rtg/miopen/miopen_target.hpp
+++ b/src/targets/miopen/include/rtg/miopen/miopen_target.hpp
+#ifndef RTG_GUARD_RTGLIB_MIOPEN_TARGET_HPP
+#define RTG_GUARD_RTGLIB_MIOPEN_TARGET_HPP
+#include <rtg/program.hpp>
+namespace rtg {
+namespace miopen {
+struct miopen_target
+{
+    std::string name() const;
+    void apply(program& p) const;
+};
+} // namespace miopen
+} // namespace rtg
+#endif
--- a/src/targets/miopen/miopen_target.cpp
+++ b/src/targets/miopen/miopen_target.cpp
+#include <rtg/miopen/miopen_target.hpp>
+#include <rtg/manage_ptr.hpp>
+#include <rtg/instruction.hpp>
+#include <rtg/operators.hpp>
+#include <miopen/miopen.h>
+namespace rtg {
+namespace miopen {
+struct hip_allocate
+{
+    std::string name() const { return "hip::allocate"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(1);
+        return inputs.front();
+    }
+    argument compute(shape output_shape, std::vector<argument>) const
+    {
+        char* data = nullptr;
+        // TODO: Check return status
+        hipMalloc(&data, output_shape.bytes());
+        return {output_shape, data};
+    }
+};
+struct hip_free
+{
+    std::string name() const { return "hip::free"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(1);
+        return {};
+    }
+    argument compute(shape, std::vector<argument> args) const
+    {
+        // TODO: Check return status
+        hipFree(args.front().data());
+        return {};
+    }
+};
+using miopen_handle     = RTG_MANAGE_PTR(miopenHandle_t, miopenDestroy);
+using tensor_descriptor = RTG_MANAGE_PTR(miopenTensorDescriptor_t, miopenDestroyTensorDescriptor);
+using convolution_descriptor = RTG_MANAGE_PTR(miopenConvolutionDescriptor_t,
+                                              miopenDestroyConvolutionDescriptor);
+using activation_descriptor  = RTG_MANAGE_PTR(miopenActivationDescriptor_t,
+                                             miopenDestroyActivationDescriptor);
+template <class Result, class F, class... Ts>
+Result make_obj(F f, Ts... xs)
+{
+    typename Result::pointer x = nullptr;
+    auto status                = f(&x, xs...);
+    Result r{x};
+    if(status != miopenStatusSuccess)
+        RTG_THROW("MIOpen call failed");
+    return r;
+}
+tensor_descriptor make_tensor(const rtg::shape& s)
+{
+    auto t = make_obj<tensor_descriptor>(&miopenCreateTensorDescriptor);
+    // Convert to ints
+    std::vector<int> lens(s.lens().begin(), s.lens().end());
+    std::vector<int> strides(s.strides().begin(), s.strides().end());
+    miopenDataType_t d;
+    if(s.type() == shape::float_type)
+        d = miopenFloat;
+    else
+        RTG_THROW("Unsupported type");
+    miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data());
+    return t;
+}
+convolution_descriptor make_conv(const rtg::convolution& op)
+{
+    auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
+    miopenInitConvolutionDescriptor(c.get(),
+                                    miopenConvolution,
+                                    op.padding[0],
+                                    op.padding[1],
+                                    op.stride[0],
+                                    op.stride[1],
+                                    op.dilation[0],
+                                    op.dilation[1]);
+    return c;
+}
+activation_descriptor make_relu()
+{
+    auto ad = make_obj<activation_descriptor>(&miopenCreateActivationDescriptor);
+    miopenSetActivationDescriptor(ad.get(), miopenActivationRELU, 0, 0, 0);
+    return ad;
+}
+struct miopen_convolution
+{
+    convolution op;
+    shared<convolution_descriptor> cd;
+    std::string name() const { return "miopen::convolution"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(4);
+        return op.compute_shape({inputs.at(1), inputs.at(2)});
+    }
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        auto x_desc = make_tensor(args[1].get_shape());
+        auto w_desc = make_tensor(args[2].get_shape());
+        auto y_desc = make_tensor(output_shape);
+        float alpha = 1, beta = 0;
+        int algo_count;
+        miopenConvAlgoPerf_t perf;
+        miopenFindConvolutionForwardAlgorithm(args[0].get(),
+                                              x_desc.get(),
+                                              args[1].get(),
+                                              w_desc.get(),
+                                              args[2].get(),
+                                              cd.get(),
+                                              y_desc.get(),
+                                              args[3].get(),
+                                              1,
+                                              &algo_count,
+                                              &perf,
+                                              nullptr,
+                                              0,
+                                              false);
+        miopenConvolutionForward(args[0].get(),
+                                 &alpha,
+                                 x_desc.get(),
+                                 args[1].get(),
+                                 w_desc.get(),
+                                 args[2].get(),
+                                 cd.get(),
+                                 perf.fwd_algo,
+                                 &beta,
+                                 y_desc.get(),
+                                 args[3].get(),
+                                 nullptr,
+                                 0);
+        return args[3];
+    }
+};
+struct miopen_relu
+{
+    shared<activation_descriptor> ad;
+    std::string name() const { return "miopen::relu"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs}.has(3);
+        return inputs.at(1);
+    }
+    argument compute(shape output_shape, std::vector<argument> args) const
+    {
+        float alpha = 1, beta = 0;
+        auto x_desc = make_tensor(args[1].get_shape());
+        auto y_desc = make_tensor(output_shape);
+        miopenActivationForward(args[0].get(),
+                                ad.get(),
+                                &alpha,
+                                x_desc.get(),
+                                args[1].get(),
+                                &beta,
+                                y_desc.get(),
+                                args[2].get());
+        return args[2];
+    }
+};
+struct miopen_apply
+{
+    program* prog = nullptr;
+    instruction_ref handle{};
+    void apply()
+    {
+        handle = prog->add_parameter("handle", shape{shape::any_type});
+        for(auto it = prog->begin(); it != prog->end(); it++)
+        {
+            if(it->op.name() == "convolution")
+            {
+                apply_convolution(it);
+            }
+            else if(it->op.name() == "activation")
+            {
+                apply_activation(it);
+            }
+        }
+    }
+    instruction_ref insert_allocation(instruction_ref ins, const shape& s)
+    {
+        if(ins == --prog->end())
+        {
+            return prog->add_parameter("output", s);
+        }
+        else
+        {
+            auto is     = prog->add_outline(s);
+            auto result = prog->insert_instruction(ins, hip_allocate{}, is);
+            prog->insert_instruction(++ins, hip_free{}, result);
+            return result;
+        }
+    }
+    void apply_convolution(instruction_ref ins)
+    {
+        auto&& op   = any_cast<convolution>(ins->op);
+        auto cd     = make_conv(op);
+        auto output = insert_allocation(ins, ins->result);
+        prog->replace_instruction(ins,
+                                  miopen_convolution{op, std::move(cd)},
+                                  handle,
+                                  ins->arguments.at(0),
+                                  ins->arguments.at(1),
+                                  output);
+    }
+    void apply_activation(instruction_ref ins)
+    {
+        auto&& op = any_cast<activation>(ins->op);
+        auto ad   = make_relu();
+        if(op.mode == "relu")
+        {
+            auto output = insert_allocation(ins, ins->result);
+            prog->replace_instruction(
+                ins, miopen_relu{std::move(ad)}, handle, ins->arguments.at(0), output);
+        }
+    }
+};
+std::string miopen_target::name() const { return "miopen"; }
+void miopen_target::apply(program& p) const { miopen_apply{&p}.apply(); }
+} // namespace miopen
+} // namespace rtg
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -4,9 +4,20 @@ cmake_policy(SET CMP0057 NEW)
 include(CTest)
 find_package(Threads REQUIRED)
-add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
+include(ProcessorCount)
+ProcessorCount(N)
+set(CTEST_PARALLEL_LEVEL ${N} CACHE STRING "CTest parallel level")
+add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -j ${CTEST_PARALLEL_LEVEL} -C ${CMAKE_CFG_INTDIR})
 add_custom_target(tests)
+find_program(RTG_GDB gdb)
+if(RTG_GDB)
+    set(RTG_TEST_GDB On CACHE BOOL "")
+else()
+    set(RTG_TEST_GDB Off CACHE BOOL "")
+endif()
 set(SKIP_TESTS)
 function(add_test_command NAME EXE)
@@ -23,13 +34,21 @@ function(add_test_command NAME EXE)
                    %1 ${ARGN}")
        add_test(NAME ${NAME} COMMAND ${WINE_CMD} cmd /c "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd" $<TARGET_FILE:${EXE}>)
    else()
-        if(MIOPEN_TEST_GDB)
+        if(RTG_TEST_GDB)
+            # add_test(NAME ${NAME} COMMAND ${RTG_GDB} 
+            #     --batch
+            #     --return-child-result
+            #     -ex "set disable-randomization off"
+            #     -ex run
+            #     -ex backtrace
+            #     --args $<TARGET_FILE:${EXE}> ${ARGN})
            file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmake"
                CONTENT "
                execute_process(COMMAND $<TARGET_FILE:${EXE}> ${ARGN} RESULT_VARIABLE RESULT)
                if(NOT RESULT EQUAL 0)
+                    # TODO: check for core files based on pid when setting /proc/sys/kernel/core_uses_pid
                    if(EXISTS core)
-                        execute_process(COMMAND gdb $<TARGET_FILE:${EXE}> core -batch -ex bt)
+                        execute_process(COMMAND ${RTG_GDB} $<TARGET_FILE:${EXE}> core -batch -ex bt)
                    endif()
                    message(FATAL_ERROR \"Test failed\")
                endif()
@@ -59,8 +78,8 @@ function(add_test_executable TEST_NAME)
    add_dependencies(tests ${TEST_NAME})
    add_dependencies(check ${TEST_NAME})
    set_tests_properties(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED")
-    target_link_libraries(${TEST_NAME} rtg)
+    target_link_libraries(${TEST_NAME} rtg rtg_cpu)
-    target_link_libraries(${TEST_NAME} rtg_cpu)
+    target_include_directories(${TEST_NAME} PUBLIC include)
 endfunction(add_test_executable)
 file(GLOB TESTS *.cpp)
@@ -69,3 +88,14 @@ foreach(TEST ${TESTS})
    get_filename_component(BASE_NAME ${TEST} NAME_WE)
    add_test_executable(test_${BASE_NAME} ${TEST})
 endforeach()
+if(RTG_ENABLE_MIOPEN)
+    # miopen tests
+    file(GLOB MIOPEN_TESTS miopen/*.cpp)
+    foreach(TEST ${MIOPEN_TESTS})
+        get_filename_component(BASE_NAME ${TEST} NAME_WE)
+        add_test_executable(test_miopen_${BASE_NAME} ${TEST})
+        target_link_libraries(test_miopen_${BASE_NAME} rtg_miopen)
+    endforeach()
+endif()
--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -5,9 +5,6 @@
 #include <rtg/operators.hpp>
 #include <rtg/cpu/cpu_target.hpp>
-using rtg::shape;
-using rtg::argument;
 void exp_test() {
    rtg::program p;
    rtg::shape s{rtg::shape::float_type, {3}};
@@ -26,37 +23,37 @@ void exp_test() {
 void gemm_test() {
    rtg::program p;
-    std::vector<float> A = {-0.00925222,  0.56250403,  0.70107397,  0.75402161, -0.505885  ,
+    std::vector<float> a = {-0.00925222,  0.56250403,  0.70107397,  0.75402161, -0.505885  ,
                             1.33628943, -0.11413   , -0.31270559,  1.59336732, -0.19361027,
                            -0.91620867,  0.40108416, -0.06969921,  0.68483471, -0.39906632,
                            -1.66423624,  0.69040076, -1.31490171, -0.11282616, -0.79391814};
-    std::vector<float> B = { 6.09568541e-01,  -6.10527007e-01,   3.66646462e-01,
+    std::vector<float> b = { 6.09568541e-01,  -6.10527007e-01,   3.66646462e-01,
                             1.18951101e-01,   5.58777432e-01,  -3.21296298e-01,
                            -5.95997198e-01,  -5.01425721e-01,  -2.84606807e-01,
                            -5.73673557e-01,  -8.99430260e-01,  -4.25103093e-01,
                             1.53027987e+00,  -3.81407415e-04,  -3.29650255e-01};
-    std::vector<float> C = {-1.56327541e+00,  -7.09570140e-01,  -5.37424982e-01,
+    std::vector<float> c = {-1.56327541e+00,  -7.09570140e-01,  -5.37424982e-01,
                            -2.22994831e-01,  -2.15586437e+00,   2.09177941e-03,
                            -1.47279677e+00,   2.02627040e-01,  -6.04527691e-01,
                            -1.29885596e+00,   2.16294914e+00,  -1.48101497e-01};
    rtg::shape a_shape{rtg::shape::float_type, {4,5}};
-    auto a = p.add_literal(rtg::literal{a_shape, A});
+    auto al = p.add_literal(rtg::literal{a_shape, a});
    rtg::shape b_shape{rtg::shape::float_type, {5,3}};
-    auto b = p.add_literal(rtg::literal{b_shape, B});
+    auto bl = p.add_literal(rtg::literal{b_shape, b});
-    p.add_instruction(rtg::gemm{}, a, b);
+    p.add_instruction(rtg::gemm{}, al, bl);
    p.compile(rtg::cpu::cpu_target{});
    auto result = p.eval({});
    std::vector<float> results_vector(12);
    memcpy(results_vector.data(), result.data(), 12*sizeof(float));
    float tol = 1e-6;
    for (int i = 0; i < results_vector.size(); i++) {
-        assert(std::abs(results_vector[i]-C[i]) < tol);
+        assert(std::abs(results_vector[i]-c[i]) < tol);
    }
 }
 void softmax_test() {
    rtg::program p;
-    std::vector<float> A = {-5.61869681e-01,   9.07827199e-01,   1.29255986e+00,
+    std::vector<float> a = {-5.61869681e-01,   9.07827199e-01,   1.29255986e+00,
                             3.18533443e-02,  -1.22183852e-03,  -2.83830553e-01,
                            -1.03245842e+00,  -9.28322077e-01,  -8.82696748e-01,
                             1.11327164e-01,  -9.20038462e-01,   8.47388089e-01,
@@ -97,7 +94,7 @@ void softmax_test() {
                             2.93796062e-01,  -6.02131486e-01,   2.70461679e-01,
                            -8.92358482e-01,   1.04388881e+00,   2.66154885e-01};
-    std::vector<float> S = {0.30191708,  0.59879845,  0.50029165,  0.24915339,  0.36823985,
+    std::vector<float> s = {0.30191708,  0.59879845,  0.50029165,  0.24915339,  0.36823985,
                            0.13190967,  0.0349741 ,  0.18750034,  0.21905553,  0.27000085,
                            0.0547399 ,  0.56318235,  0.47422904,  0.78964758,  0.91381913,
                            0.44601166,  0.47902739,  0.13120073,  0.4449684 ,  0.18766427,
@@ -123,21 +120,213 @@ void softmax_test() {
                            0.32632929,  0.36892858,  0.09416146,  0.26656723,  0.42914796};
    rtg::shape a_shape{rtg::shape::float_type, {5,3,4,2}};
-    auto a = p.add_literal(rtg::literal{a_shape, A});
+    auto al = p.add_literal(rtg::literal{a_shape, a});
-    p.add_instruction(rtg::softmax{}, a);
+    p.add_instruction(rtg::softmax{}, al);
    p.compile(rtg::cpu::cpu_target{});
    auto result = p.eval({});
    std::vector<float> results_vector(120);
    memcpy(results_vector.data(), result.data(), 120*sizeof(float));
    float tol = 1e-6;
    for (int i = 0; i < results_vector.size(); i++) {
-        assert(std::abs(results_vector[i]-S[i]) < tol);
+        assert(std::abs(results_vector[i]-s[i]) < tol);
+    }
+}
+void conv2d_test() {
+    rtg::program p;
+    std::vector<float> a = { 2.71567607, -0.9960829 ,  0.91671127,  0.28140706,  0.63235772,
+                             0.08077253,  0.80927712, -0.59108931, -1.05421555, -2.76622486,
+                            -0.85044265, -0.52049929,  0.67726439, -0.65290606,  0.02345525,
+                            -0.33579525,  0.38901961,  1.05473483, -1.31188095,  1.8963089 ,
+                            -0.07265259,  0.947339  ,  0.41949373, -0.70814759,  0.25892952,
+                             1.07311416,  1.2571274 , -0.62318051, -0.19951548, -0.94232577,
+                            -0.29393643,  0.42292568, -0.80230367,  1.40909171,  0.63617158,
+                             0.13900366,  1.09253144, -0.15265895,  1.54781747,  0.72780299,
+                             1.09189606, -0.38068101,  0.97057933, -0.58958799,  1.56188643,
+                             0.21474874,  0.58725154, -1.27097559, -0.03024297,  1.09437096,
+                            -0.4897908 ,  0.34838957, -1.31042492, -1.69069934,  0.86956722,
+                            -0.40457946,  0.46691212,  1.29273605,  0.26464137,  0.22073045,
+                            -1.02178168,  0.22163901, -1.84387338,  0.75522131, -0.45775682,
+                            -0.42241111, -1.50944722,  1.07256448, -1.95876884, -0.28106022,
+                             0.3341668 ,  2.13129425, -1.14728117, -1.06555498, -0.298444  ,
+                            -0.88322699, -0.65866792, -2.06007552,  0.01374334,  0.45612028,
+                             0.52715492,  1.01914406, -1.72659791,  0.80650896,  0.16860051,
+                             2.24112225, -0.78620857,  0.36566174, -0.07020134, -0.47976932,
+                            -0.68230027, -0.94711417, -0.54506505,  1.66504931, -0.71860826,
+                             0.61132306};
+    std::vector<float> c = { 2.82721668e-02,   6.44195229e-02,   1.53499246e-02,
+                             1.72468081e-01,  -6.33238107e-02,   9.49496776e-02,
+                             1.40258059e-01,  -7.92879611e-02,  -1.29301161e-01,
+                             3.11307609e-03,  -1.90624535e-01,   1.13238767e-01,
+                            -2.80647576e-02,   3.12882811e-02,  -3.52091640e-02,
+                             3.33581865e-02,   6.43158704e-02,   7.40238279e-02,
+                            -1.00106120e-01,  -9.56912562e-02,   1.44342467e-01,
+                             9.40258950e-02,   6.36333972e-02,   1.66158378e-03,
+                            -8.91554281e-02,   2.58734226e-02,   1.70919895e-02,
+                             1.78214177e-01,   8.84564668e-02,   8.98126513e-02,
+                            -1.63809001e-01,   1.37802169e-01,   1.66439757e-01,
+                            -1.45631135e-02,   1.88469887e-04,   4.76950556e-02,
+                            -1.91969007e-01,  -1.76233292e-01,  -7.70473927e-02,
+                             1.14828631e-01,   1.76608220e-01,  -1.50728196e-01,
+                             1.99946314e-02,  -5.88052124e-02,   1.31612435e-01,
+                             1.61106288e-02,  -1.35080189e-01,   1.49512306e-01,
+                             3.86456847e-02,   1.29330024e-01,  -3.22975963e-02,
+                            -5.60784787e-02,  -5.41997552e-02,   4.78562862e-02};
+    std::vector<float> s = { 0.27039781,  0.19105849, -0.06339942, -0.65087199,  0.40867025,
+                             0.05063812, -0.14907975,  0.49018705, -0.49197209,  0.33236548,
+                            -0.39374301,  0.16012701,  0.06574871,  0.71606487, -0.55201721,
+                            -0.46427044};
+    rtg::shape a_shape{rtg::shape::float_type, {2,3,4,4}};
+    auto al = p.add_literal(rtg::literal{a_shape, a});
+    rtg::shape c_shape{rtg::shape::float_type, {2,3,3,3}};
+    auto cl = p.add_literal(rtg::literal{c_shape, c});
+    p.add_instruction(rtg::convolution{}, al, cl);
+    p.compile(rtg::cpu::cpu_target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector(16);
+    memcpy(results_vector.data(), result.data(), 16*sizeof(float));
+    float tol = 1e-6;
+    for (int i = 0; i < results_vector.size(); i++) {
+        assert(std::abs(results_vector[i]-s[i]) < tol);
+    }
+}
+void conv2d_padding_test() {
+    rtg::program p;
+    std::vector<float> a = { 2.71567607, -0.9960829 ,  0.91671127,  0.28140706,  0.63235772,
+                             0.08077253,  0.80927712, -0.59108931, -1.05421555, -2.76622486,
+                            -0.85044265, -0.52049929,  0.67726439, -0.65290606,  0.02345525,
+                            -0.33579525,  0.38901961,  1.05473483, -1.31188095,  1.8963089 ,
+                            -0.07265259,  0.947339  ,  0.41949373, -0.70814759,  0.25892952,
+                             1.07311416,  1.2571274 , -0.62318051, -0.19951548, -0.94232577,
+                            -0.29393643,  0.42292568, -0.80230367,  1.40909171,  0.63617158,
+                             0.13900366,  1.09253144, -0.15265895,  1.54781747,  0.72780299,
+                             1.09189606, -0.38068101,  0.97057933, -0.58958799,  1.56188643,
+                             0.21474874,  0.58725154, -1.27097559, -0.03024297,  1.09437096,
+                            -0.4897908 ,  0.34838957, -1.31042492, -1.69069934,  0.86956722,
+                            -0.40457946,  0.46691212,  1.29273605,  0.26464137,  0.22073045,
+                            -1.02178168,  0.22163901, -1.84387338,  0.75522131, -0.45775682,
+                            -0.42241111, -1.50944722,  1.07256448, -1.95876884, -0.28106022,
+                             0.3341668 ,  2.13129425, -1.14728117, -1.06555498, -0.298444  ,
+                            -0.88322699, -0.65866792, -2.06007552,  0.01374334,  0.45612028,
+                             0.52715492,  1.01914406, -1.72659791,  0.80650896,  0.16860051,
+                             2.24112225, -0.78620857,  0.36566174, -0.07020134, -0.47976932,
+                            -0.68230027, -0.94711417, -0.54506505,  1.66504931, -0.71860826,
+                             0.61132306};
+    std::vector<float> c = {-0.16115488, -0.09800646, -0.05412646,  0.10475694,  0.00555485,
+                            -0.12667653,  0.0458357 , -0.02656217, -0.16338061,  0.15037455,
+                             0.0102711 ,  0.01303349,  0.05242859,  0.02034754,  0.04751867,
+                            -0.17038961, -0.1434752 , -0.10770349,  0.05676742, -0.15838449,
+                             0.10128359, -0.18958683,  0.11954515,  0.10758857, -0.01058291,
+                            -0.12797487,  0.08971019,  0.18793164, -0.00881396, -0.06588994,
+                            -0.13321903, -0.03300409,  0.01439607,  0.07618178, -0.11556662,
+                             0.00764295,  0.12956454, -0.08937147, -0.12763587,  0.04674943,
+                             0.05765297,  0.11336918,  0.14747436, -0.06199479, -0.01166052,
+                            -0.12432006, -0.04494537, -0.17581205,  0.09475745,  0.1149437 ,
+                            -0.1014564 ,  0.0274073 , -0.01323579, -0.11092556};
+    std::vector<float> s = {-0.0201216 ,  0.40407312, -0.39005592, -0.0631946 ,  0.37963012,
+                            -0.64611685,  0.1349397 , -0.54113752,  0.28533003,  0.27667275,
+                            -0.16442731, -0.181494  ,  0.30564839,  0.58744538,  0.32015014,
+                             0.24969585, -0.27367792, -0.53308117,  0.41236052,  0.26136363,
+                            -0.01489828,  0.57652152, -0.38506854,  0.119615  ,  0.0437076 ,
+                             0.04779706,  0.57887721,  0.23126155,  0.05695833, -0.68200272,
+                             0.02063358, -0.10267162,  0.8062973 , -0.38149622, -0.40134856,
+                            -0.03353126,  0.38991132, -0.3478111 ,  0.03661491,  0.25783631,
+                             0.62772679, -0.1961118 ,  0.76423508, -0.36241418, -0.20994355,
+                            -0.12368261, -0.9406727 ,  0.02340185, -0.08793129, -0.02471633,
+                            -0.58163726, -0.02211772, -0.42014724,  0.77525634,  0.504951  ,
+                            -0.20537445, -0.20369984, -0.83037728, -1.40423918, -0.46160448,
+                            -0.22944322,  0.36074194,  0.49579027,  0.46527559};
+    rtg::shape a_shape{rtg::shape::float_type, {2,3,4,4}};
+    auto al = p.add_literal(rtg::literal{a_shape, a});
+    rtg::shape c_shape{rtg::shape::float_type, {2,3,3,3}};
+    auto cl = p.add_literal(rtg::literal{c_shape, c});
+    p.add_instruction(rtg::convolution{{{1,1}},{{1,1}}}, al, cl);
+    p.compile(rtg::cpu::cpu_target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector(64);
+    memcpy(results_vector.data(), result.data(), 64*sizeof(float));
+    float tol = 1e-6;
+    for (int i = 0; i < results_vector.size(); i++) {
+        assert(std::abs(results_vector[i]-s[i]) < tol);
    }
 }
+void conv2d_padding_stride_test() {
+    rtg::program p;
+    std::vector<float> a = { 2.71567607, -0.9960829 ,  0.91671127,  0.28140706,  0.63235772,
+                             0.08077253,  0.80927712, -0.59108931, -1.05421555, -2.76622486,
+                            -0.85044265, -0.52049929,  0.67726439, -0.65290606,  0.02345525,
+                            -0.33579525,  0.38901961,  1.05473483, -1.31188095,  1.8963089 ,
+                            -0.07265259,  0.947339  ,  0.41949373, -0.70814759,  0.25892952,
+                             1.07311416,  1.2571274 , -0.62318051, -0.19951548, -0.94232577,
+                            -0.29393643,  0.42292568, -0.80230367,  1.40909171,  0.63617158,
+                             0.13900366,  1.09253144, -0.15265895,  1.54781747,  0.72780299,
+                             1.09189606, -0.38068101,  0.97057933, -0.58958799,  1.56188643,
+                             0.21474874,  0.58725154, -1.27097559, -0.03024297,  1.09437096,
+                            -0.4897908 ,  0.34838957, -1.31042492, -1.69069934,  0.86956722,
+                            -0.40457946,  0.46691212,  1.29273605,  0.26464137,  0.22073045,
+                            -1.02178168,  0.22163901, -1.84387338,  0.75522131, -0.45775682,
+                            -0.42241111, -1.50944722,  1.07256448, -1.95876884, -0.28106022,
+                             0.3341668 ,  2.13129425, -1.14728117, -1.06555498, -0.298444  ,
+                            -0.88322699, -0.65866792, -2.06007552,  0.01374334,  0.45612028,
+                             0.52715492,  1.01914406, -1.72659791,  0.80650896,  0.16860051,
+                             2.24112225, -0.78620857,  0.36566174, -0.07020134, -0.47976932,
+                            -0.68230027, -0.94711417, -0.54506505,  1.66504931, -0.71860826,
+                             0.61132306};
+    std::vector<float> c = {-0.14601797, -0.13000923,  0.06521662,  0.06178288, -0.11083675,
+                             0.10154136,  0.09990512,  0.06030385, -0.11374587, -0.17523311,
+                            -0.14344215,  0.17802463,  0.06300922, -0.15325832,  0.07066704,
+                             0.05166031,  0.00615084, -0.02606523,  0.08083995, -0.17913306,
+                             0.0624622 ,  0.0735731 , -0.04198661, -0.0164391 , -0.06374192,
+                             0.16569914,  0.10681538,  0.07370754,  0.02802075,  0.00282027,
+                             0.15104802, -0.11084409, -0.00197773,  0.07924436,  0.03528272,
+                             0.04765259, -0.15896152,  0.07917164,  0.12125669, -0.1154705 ,
+                            -0.11999125,  0.12749968, -0.06269585,  0.18658121, -0.03944227,
+                             0.0111798 , -0.17731084,  0.11789055, -0.09982193,  0.08142821,
+                             0.0729029 ,  0.11303909,  0.12735154,  0.03885292};
+    std::vector<float> s = {-0.20817225,  0.87965256,  0.14958936, -1.24887264, -0.06540672,
+                             0.20778663,  0.40456355, -0.99900877,  0.4917807 ,  0.1994698 ,
+                             0.64205718,  0.37798831, -0.25315839,  0.44276932, -0.16138598,
+                             0.79344082};
+    rtg::shape a_shape{rtg::shape::float_type, {2,3,4,4}};
+    auto al = p.add_literal(rtg::literal{a_shape, a});
+    rtg::shape c_shape{rtg::shape::float_type, {2,3,3,3}};
+    auto cl = p.add_literal(rtg::literal{c_shape, c});
+    p.add_instruction(rtg::convolution{{{1,1}},{{2,2}}}, al, cl);
+    p.compile(rtg::cpu::cpu_target{});
+    auto result = p.eval({});
+    std::vector<float> results_vector(16);
+    memcpy(results_vector.data(), result.data(), 16*sizeof(float));
+    float tol = 1e-6;
+    for (int i = 0; i < results_vector.size(); i++) {
+        assert(std::abs(results_vector[i]-s[i]) < tol);
+    }
+}
 int main()
 {
    exp_test();
    gemm_test();
    softmax_test();
+    conv2d_test();
+    conv2d_padding_test();
+    conv2d_padding_stride_test();
 }
--- a/test/test.hpp
+++ b/test/test.hpp
@@ -82,7 +82,7 @@ struct lhs_expression
    template <class U>                                         \
    auto operator op(const U& rhs) const                       \
    {                                                          \
-        return make_expression(lhs, rhs, name{}); \
+        return make_expression(lhs, rhs, name{}); /* NOLINT */ \
    }
    TEST_FOREACH_OPERATOR(TEST_LHS_OPERATOR)

--- a/test/miopen/miopen.cpp
+++ b/test/miopen/miopen.cpp
+#include <rtg/program.hpp>
+#include <rtg/operators.hpp>
+#include <rtg/cpu/cpu_target.hpp>
+#include <rtg/miopen/miopen_target.hpp>
+#include <rtg/manage_ptr.hpp>
+#include <miopen/miopen.h>
+#include <random>
+#include "test.hpp"
+using hip_ptr       = RTG_MANAGE_PTR(void, hipFree);
+using miopen_handle = RTG_MANAGE_PTR(miopenHandle_t, miopenDestroy);
+template <class Result, class F, class... Ts>
+Result make_obj(F f, Ts... xs)
+{
+    typename Result::pointer x = nullptr;
+    auto status                = f(&x, xs...);
+    Result r{x};
+    if(status != miopenStatusSuccess)
+        RTG_THROW("MIOpen call failed");
+    return r;
+}
+hip_ptr hip_allocate(std::size_t sz)
+{
+    void* result;
+    // TODO: Check status
+    hipMalloc(&result, sz);
+    return hip_ptr{result};
+}
+template <class T>
+hip_ptr write(const T& x)
+{
+    using type  = typename T::value_type;
+    auto size   = x.size() * sizeof(type);
+    auto result = hip_allocate(size);
+    // TODO: Check status
+    hipMemcpy(result.get(), x.data(), size, hipMemcpyHostToDevice);
+    return result;
+}
+template <class T>
+std::vector<T> read(const void* x, std::size_t sz)
+{
+    std::vector<T> result(sz);
+    // TODO: Check status
+    hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
+    return result;
+}
+rtg::program create_program()
+{
+    rtg::program p;
+    auto input   = p.add_parameter("x", rtg::shape{rtg::shape::float_type, {4, 3, 3, 3}});
+    auto weights = p.add_parameter("w", rtg::shape{rtg::shape::float_type, {4, 3, 3, 3}});
+    auto conv    = p.add_instruction(rtg::convolution{}, input, weights);
+    p.add_instruction(rtg::activation{"relu"}, conv);
+    return p;
+}
+std::vector<float> get_tensor_data(rtg::shape s)
+{
+    std::vector<float> result(s.elements());
+    std::mt19937 engine{0};
+    std::uniform_real_distribution<> dist;
+    std::generate(result.begin(), result.end(), [&] { return dist(engine); });
+    return result;
+}
+rtg::argument get_tensor_argument_cpu(rtg::shape s)
+{
+    auto v = get_tensor_data(s);
+    return {s, [v]() mutable { return reinterpret_cast<char*>(v.data()); }};
+}
+rtg::argument get_tensor_argument_gpu(rtg::shape s)
+{
+    auto v = get_tensor_data(s);
+    auto p = rtg::share(write(v));
+    return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
+}
+std::vector<float> cpu()
+{
+    std::vector<float> result;
+    auto p = create_program();
+    auto x = get_tensor_argument_cpu({rtg::shape::float_type, {4, 3, 3, 3}});
+    auto w = get_tensor_argument_cpu({rtg::shape::float_type, {4, 3, 3, 3}});
+    p.compile(rtg::cpu::cpu_target{});
+    auto r = p.eval({{"x", x}, {"w", w}});
+    r.visit([&](auto output) { result.assign(output.begin(), output.end()); });
+    return result;
+}
+std::vector<float> gpu()
+{
+    std::vector<float> result;
+    auto p = create_program();
+    auto x = get_tensor_argument_gpu({rtg::shape::float_type, {4, 3, 3, 3}});
+    auto w = get_tensor_argument_gpu({rtg::shape::float_type, {4, 3, 3, 3}});
+    p.compile(rtg::miopen::miopen_target{});
+    auto y      = get_tensor_argument_gpu(p.get_parameter_shape("output"));
+    auto handle = make_obj<miopen_handle>(&miopenCreate);
+    auto r      = p.eval(
+        {{"x", x}, {"w", w}, {"output", y}, {"handle", {rtg::shape::any_type, handle.get()}}});
+    result = read<float>(r.data(), r.get_shape().elements());
+    return result;
+}
+void test1()
+{
+    auto x = cpu();
+    auto y = gpu();
+    // TODO: Use expect
+    if(x == y)
+        std::cout << "FAILED" << std::endl;
+}
+int main() { test1(); }