Merge branch 'master' into resnet18_demo

9b3eff99 · wsttiger · 256326cc · f0604d78 · 9b3eff99 · 9b3eff99
Commit 9b3eff99 authored Aug 14, 2018 by wsttiger
20 changed files
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -19,13 +19,13 @@ def rocmtestnode(variant, name, body) {
        }
        stage("image ${variant}") {
            try {
-                docker.build("${image}", ".")
+                docker.build("${image}", '.')
            } catch(Exception ex) {
-                docker.build("${image}", "--no-cache .")
+                docker.build("${image}", '--no-cache .')
            }
        }
-        withDockerContainer(image: image, args: '--device=/dev/kfd --device=/dev/dri --group-add video --cap-add SYS_PTRACE --add-host="bzip2.org:46.235.226.80" --add-host="www.bzip2.org:46.235.226.80"') {
+        withDockerContainer(image: image, args: '--device=/dev/kfd --device=/dev/dri --group-add video --cap-add SYS_PTRACE') {
            timeout(time: 1, unit: 'HOURS') {
                body(cmake_build)
            }

--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -39,3 +39,17 @@ add_sphinx_doc(src
    DEPENDS doxygen
 )
+find_package(LATEX)
+if(LATEX_FOUND)
+    add_sphinx_doc(src 
+        BUILDER latex
+        OUTPUT_DIR pdf
+        VARS 
+            breathe_projects.proj=${DOXYGEN_OUTPUT}/xml
+            breathe_default_project=proj
+        DEPENDS doxygen
+    )
+else()
+    message("Latex builder not found. Latex builder is required only for building the PDF documentation for MIGraph and is not necessary for building the library, or any other components. To build PDF documentation run make in ${CMAKE_CURRENT_SOURCE_DIR}/pdf, once a latex builder is installed.")
+endif()
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -5,6 +5,7 @@ add_library(migraph
    generate.cpp
    program.cpp
    shape.cpp
+    simplify_reshapes.cpp
 )
 rocm_clang_tidy_check(migraph)
 target_include_directories(migraph PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)

--- a/src/auto_contiguous.cpp
+++ b/src/auto_contiguous.cpp
@@ -13,8 +13,8 @@ void auto_contiguous::apply(program& p) const
        shape s = ins->result;
        if(not s.standard())
        {
-            auto prev = p.insert_instruction(ins, ins->op, ins->arguments);
+            auto c = p.insert_instruction(std::next(ins), contiguous{}, ins);
-            p.replace_instruction(ins, contiguous{}, prev);
+            p.replace_instruction(ins, c);
        }
    }
 }

--- a/src/dead_code_elimination.cpp
+++ b/src/dead_code_elimination.cpp
@@ -15,8 +15,12 @@ void dead_code_elimination::apply(program& p) const
        // instruction
        if(ins == p.begin())
            continue;
+        const auto i = std::prev(ins);
+        // Skip instruction with empty shape as output
+        if(i->result.elements() == 0)
+            continue;
        // Skip the last instruction
-        if(std::prev(ins) == last)
+        if(i == last)
            break;
        fix([&](auto self, auto leaf) {
            assert(p.has_instruction(leaf));
@@ -28,7 +32,7 @@ void dead_code_elimination::apply(program& p) const
                for(auto arg : args)
                    self(arg);
            }
-        })(std::prev(ins));
+        })(i);
    }
    p.remove_instructions(std::next(last), p.end());
 }

--- a/src/include/migraph/check_shapes.hpp
+++ b/src/include/migraph/check_shapes.hpp
@@ -70,7 +70,35 @@ struct check_shapes
    const check_shapes& same_ndims() const
    {
        if(!this->same([](const shape& s) { return s.lens().size(); }))
-            MIGRAPH_THROW(prefix() + "Dimensions do not match");
+            MIGRAPH_THROW(prefix() + "Number of dimensions do not match");
+        return *this;
+    }
+    const check_shapes& standard() const
+    {
+        if(!this->all_of([](const shape& s) { return s.standard(); }))
+            MIGRAPH_THROW(prefix() + "Shapes are not in standard layout");
+        return *this;
+    }
+    const check_shapes& packed() const
+    {
+        if(!this->all_of([](const shape& s) { return s.packed(); }))
+            MIGRAPH_THROW(prefix() + "Shapes are not packed");
+        return *this;
+    }
+    const check_shapes& not_transposed() const
+    {
+        if(!this->all_of([](const shape& s) { return not s.transposed(); }))
+            MIGRAPH_THROW(prefix() + "Shapes are transposed");
+        return *this;
+    }
+    const check_shapes& not_broadcasted() const
+    {
+        // if(!this->all_of([](const shape& s) { return not s.broadcasted(); }))
+        // MIGRAPH_THROW(prefix() + "Shapes are broadcasted");
        return *this;
    }

--- a/src/include/migraph/instruction.hpp
+++ b/src/include/migraph/instruction.hpp
@@ -38,22 +38,31 @@ struct instruction
            result = r;
            for(auto&& ins : output)
            {
-                ins->replace(compute_shape(ins->op, ins->arguments));
+                assert(ins->op.name().front() != '@');
+                ins->recompute_shape();
            }
        }
    }
+    void recompute_shape() { replace(compute_shape(op, arguments)); }
    void replace(std::vector<instruction_ref> args)
    {
        clear_arguments();
        arguments = std::move(args);
    }
+    void replace_argument(instruction_ref old, instruction_ref new_ins)
+    {
+        std::replace(arguments.begin(), arguments.end(), old, new_ins);
+        old->remove_output(*this);
+    }
    void clear_arguments()
    {
        for(auto&& arg : arguments)
        {
-            migraph::erase(arg->output, *this);
+            arg->remove_output(*this);
        }
        arguments.clear();
    }
@@ -64,6 +73,15 @@ struct instruction
    }
    bool valid(instruction_ref start) const
+    {
+        return valid() && std::all_of(arguments.begin(), arguments.end(), [&](instruction_ref i) {
+                   auto self = std::find(i->output.begin(), i->output.end(), *this);
+                   return self != i->output.end() &&
+                          std::distance(start, i) < std::distance(start, *self);
+               });
+    }
+    bool valid() const
    {
        shape computed;
        if(op.name() == "@literal")
@@ -86,16 +104,9 @@ struct instruction
            }
        }
        return result == computed &&
-               std::all_of(output.begin(),
+               std::all_of(output.begin(), output.end(), [&](instruction_ref i) {
-                           output.end(),
+                   return std::find(i->arguments.begin(), i->arguments.end(), *this) !=
-                           [&](instruction_ref i) {
+                          i->arguments.end();
-                               return std::find(i->arguments.begin(), i->arguments.end(), *this) !=
-                                      i->arguments.end();
-                           }) &&
-               std::all_of(arguments.begin(), arguments.end(), [&](instruction_ref i) {
-                   auto self = std::find(i->output.begin(), i->output.end(), *this);
-                   return self != i->output.end() &&
-                          std::distance(start, i) < std::distance(start, *self);
               });
    }
@@ -107,6 +118,18 @@ struct instruction
    friend bool operator!=(instruction_ref ref, const instruction& i) { return !(i == ref); }
+    void add_output(instruction_ref ins)
+    {
+        if(std::find(output.begin(), output.end(), ins) == output.end())
+            output.push_back(ins);
+    }
+    template <class T>
+    void remove_output(const T& ins)
+    {
+        migraph::erase(output, ins);
+    }
    operation op;
    shape result;
    std::vector<instruction_ref> output;
@@ -117,7 +140,14 @@ struct instruction
 inline void backreference(instruction_ref ref)
 {
    for(auto&& arg : ref->arguments)
-        arg->output.push_back(ref);
+        arg->add_output(ref);
+}
+inline void replace_argument(instruction_ref ins, instruction_ref old, instruction_ref new_ins)
+{
+    ins->replace_argument(old, new_ins);
+    backreference(ins);
+    ins->recompute_shape();
 }
 // TODO: Move to a cpp file

--- a/src/include/migraph/program.hpp
+++ b/src/include/migraph/program.hpp
@@ -52,6 +52,8 @@ struct program
    instruction_ref
    replace_instruction(instruction_ref ins, operation op, std::vector<instruction_ref> args);
+    instruction_ref replace_instruction(instruction_ref ins, instruction_ref rep);
    instruction_ref remove_instruction(instruction_ref ins);
    instruction_ref remove_instructions(instruction_ref first, instruction_ref last);
@@ -69,7 +71,7 @@ struct program
    instruction_ref add_parameter(std::string name, shape s);
-    shape get_parameter_shape(std::string name);
+    shape get_parameter_shape(std::string name) const;
    std::unordered_map<std::string, shape> get_parameter_shapes() const;
@@ -77,8 +79,8 @@ struct program
    bool has_instruction(instruction_ref ins) const;
-    instruction_ref begin();
+    instruction_ref begin() const;
-    instruction_ref end();
+    instruction_ref end() const;
    shape get_shape() const;

--- a/src/include/migraph/ranges.hpp
+++ b/src/include/migraph/ranges.hpp
@@ -17,6 +17,23 @@ void copy(Range&& r, Iterator it)
    std::copy(r.begin(), r.end(), it);
 }
+template <class Iterator>
+struct iterator_range
+{
+    Iterator start;
+    Iterator last;
+    Iterator begin() const { return start; }
+    Iterator end() const { return last; }
+};
+template <class Iterator>
+iterator_range<Iterator> range(Iterator start, Iterator last)
+{
+    return {start, last};
+}
 } // namespace migraph
 #endif
--- a/src/include/migraph/simplify_reshapes.hpp
+++ b/src/include/migraph/simplify_reshapes.hpp
+#ifndef MIGRAPH_GUARD_RTGLIB_SIMPLIFY_RESHAPES_HPP
+#define MIGRAPH_GUARD_RTGLIB_SIMPLIFY_RESHAPES_HPP
+#include <string>
+#include <migraph/instruction_ref.hpp>
+namespace migraph {
+struct program;
+struct simplify_reshapes
+{
+    std::string name() const { return "simplify_reshapes"; }
+    void apply(program& p) const;
+};
+} // namespace migraph
+#endif
--- a/src/program.cpp
+++ b/src/program.cpp
@@ -32,11 +32,13 @@ program::insert_instruction(instruction_ref ins, operation op, std::vector<instr
    assert(std::all_of(
               args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
           "Argument is not an exisiting instruction");
+    assert(not starts_with(op.name(), "@"));
    // TODO: Use move
    shape r     = compute_shape(op, args);
    auto result = impl->instructions.insert(ins, {op, r, args});
    backreference(result);
    assert(result->arguments == args);
+    assert(result->valid(begin()));
    return result;
 }
@@ -46,13 +48,41 @@ program::replace_instruction(instruction_ref ins, operation op, std::vector<inst
    assert(std::all_of(
               args.begin(), args.end(), [&](instruction_ref x) { return has_instruction(x); }) &&
           "Argument is not an exisiting instruction");
+    assert(not starts_with(op.name(), "@"));
    shape r = compute_shape(op, args);
    ins->replace(op, r, args);
    backreference(ins);
+    assert(ins->valid(begin()));
    return ins;
 }
+instruction_ref program::replace_instruction(instruction_ref ins, instruction_ref rep)
+{
+    assert(has_instruction(ins));
+    assert(has_instruction(rep));
+    assert(ins != rep);
+    // TODO: Should it be an error if the output is empty?
+    if(ins->output.empty())
+    {
+        return rep;
+    }
+    for(auto&& out : ins->output)
+    {
+        // TODO: Check for possible cycles
+        if(out != rep)
+        {
+            replace_argument(out, ins, rep);
+        }
+        assert(out->valid(begin()));
+    }
+    // Replacement should not be dead code unless its the last instruction
+    assert(!rep->output.empty() or rep == std::prev(end()));
+    assert(ins->valid(begin()));
+    assert(rep->valid(begin()));
+    return rep;
+}
 instruction_ref program::remove_instruction(instruction_ref ins)
 {
    assert(has_instruction(ins));
@@ -96,7 +126,7 @@ instruction_ref program::add_parameter(std::string name, shape s)
    return impl->instructions.begin();
 }
-shape program::get_parameter_shape(std::string name)
+shape program::get_parameter_shape(std::string name) const
 {
    auto ins = std::find_if(
        impl->instructions.begin(), impl->instructions.end(), [&](const instruction& x) {
@@ -137,8 +167,8 @@ bool program::has_instruction(instruction_ref ins) const
               }) != impl->instructions.end();
 }
-instruction_ref program::begin() { return impl->instructions.begin(); }
+instruction_ref program::begin() const { return impl->instructions.begin(); }
-instruction_ref program::end() { return impl->instructions.end(); }
+instruction_ref program::end() const { return impl->instructions.end(); }
 shape program::get_shape() const { return impl->instructions.back().result; }
@@ -162,7 +192,7 @@ void program::compile(const target& t)
        {
            auto index = std::distance(impl->instructions.begin(), invalid);
            MIGRAPH_THROW(p.name() + " pass produces invalid program at instruction " +
-                          std::to_string(index));
+                          std::to_string(index) + ": " + invalid->op.name());
        }
 #endif
    }

--- a/src/shape.cpp
+++ b/src/shape.cpp
@@ -43,6 +43,8 @@ const std::vector<std::size_t>& shape::strides() const { return this->m_strides;
 std::size_t shape::elements() const
 {
    assert(this->lens().size() == this->strides().size());
+    if(this->lens().empty())
+        return 0;
    return std::accumulate(
        this->lens().begin(), this->lens().end(), std::size_t{1}, std::multiplies<std::size_t>());
 }
@@ -101,6 +103,8 @@ bool shape::standard() const { return this->m_standard; }
 std::size_t shape::element_space() const
 {
    assert(this->lens().size() == this->strides().size());
+    if(this->lens().empty())
+        return 0;
    return std::inner_product(this->lens().begin(),
                              this->lens().end(),
                              this->strides().begin(),

--- a/src/simplify_reshapes.cpp
+++ b/src/simplify_reshapes.cpp
+#include <migraph/simplify_reshapes.hpp>
+#include <migraph/program.hpp>
+#include <migraph/instruction.hpp>
+#include <migraph/operators.hpp>
+#include <migraph/iterator_for.hpp>
+#include <migraph/ranges.hpp>
+#include <unordered_set>
+namespace migraph {
+bool is_reshaper(const std::string& name)
+{
+    // clang-format off
+    static const std::unordered_set<std::string> names = {
+        "reshape",
+        "transpose",
+        // "broadcast",
+        "contiguous"
+    };
+    // clang-format on
+    return contains(names, name);
+}
+void simplify_reshapes::apply(program& p) const
+{
+    for(auto ins : iterator_for(p))
+    {
+        if(not is_reshaper(ins->op.name()))
+            continue;
+        if(ins->output.size() != 1)
+            continue;
+        if(is_reshaper(ins->output.front()->op.name()))
+            continue;
+        // Gather reshapes
+        std::vector<instruction_ref> reshapes{ins};
+        while(is_reshaper(reshapes.back()->op.name()))
+        {
+            assert(!reshapes.back()->arguments.empty());
+            assert(p.has_instruction(reshapes.back()->arguments.front()));
+            reshapes.push_back(reshapes.back()->arguments.front());
+        }
+        std::pair<instruction_ref, instruction_ref> r{p.end(), p.end()};
+        for(auto start : iterator_for(reshapes))
+        {
+            auto last = std::find_if(reshapes.rbegin(), reshapes.rend(), [&](auto&& i) {
+                return i->result == (*start)->result and i != (*start);
+            });
+            if(last != reshapes.rend())
+            {
+                r = std::make_pair(*start, *last);
+                break;
+            }
+        }
+        if(r.first != r.second)
+        {
+            p.replace_instruction(r.first, r.second);
+        }
+    }
+}
+} // namespace migraph
--- a/src/targets/gpu/hip.cpp
+++ b/src/targets/gpu/hip.cpp
@@ -11,11 +11,14 @@ namespace gpu {
 using hip_ptr = MIGRAPH_MANAGE_PTR(void, hipFree);
+std::string hip_error(int error) { return hipGetErrorString(static_cast<hipError_t>(error)); }
 hip_ptr allocate_gpu(std::size_t sz)
 {
    void* result;
-    // TODO: Check status
+    auto status = hipMalloc(&result, sz);
-    hipMalloc(&result, sz);
+    if(status != hipSuccess)
+        MIGRAPH_THROW("Gpu allocation failed: " + hip_error(status));
    return hip_ptr{result};
 }
@@ -31,34 +34,36 @@ template <class T>
 std::vector<T> read_from_gpu(const void* x, std::size_t sz)
 {
    std::vector<T> result(sz);
-    // TODO: Check status
+    auto status = hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
-    hipMemcpy(result.data(), x, sz * sizeof(T), hipMemcpyDeviceToHost);
+    if(status != hipSuccess)
+        MIGRAPH_THROW("Copy from gpu failed: " + hip_error(status));
    return result;
 }
 hip_ptr write_to_gpu(const void* x, std::size_t sz)
 {
    auto result = allocate_gpu(sz);
-    // TODO: Check status
+    auto status = hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
-    hipMemcpy(result.get(), x, sz, hipMemcpyHostToDevice);
+    if(status != hipSuccess)
+        MIGRAPH_THROW("Copy to gpu failed: " + hip_error(status));
    return result;
 }
-migraph::argument allocate_gpu(migraph::shape s)
+argument allocate_gpu(shape s)
 {
-    auto p = share(allocate_gpu(s.bytes()));
+    auto p = share(allocate_gpu(s.bytes() + 1));
    return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
 }
-migraph::argument to_gpu(migraph::argument arg)
+argument to_gpu(argument arg)
 {
    auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes()));
    return {arg.get_shape(), [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
 }
-migraph::argument from_gpu(migraph::argument arg)
+argument from_gpu(argument arg)
 {
-    migraph::argument result;
+    argument result;
    arg.visit([&](auto x) {
        using type = typename decltype(x)::value_type;
        auto v     = read_from_gpu<type>(arg.data(), x.get_shape().bytes() / sizeof(type));

--- a/src/targets/gpu/hip_contiguous.cpp
+++ b/src/targets/gpu/hip_contiguous.cpp
@@ -5,16 +5,37 @@
 namespace migraph {
 namespace gpu {
+struct index
+{
+    std::size_t global;
+    std::size_t local;
+    std::size_t group;
+};
+template <class F>
+__global__ void launcher(F f)
+{
+    index idx{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x};
+    f(idx);
+}
+auto launch(std::size_t global, std::size_t local)
+{
+    return [=](auto f) {
+        assert(local > 0);
+        assert(global > 0);
+        using f_type = decltype(f);
+        dim3 nblocks(global / local);
+        dim3 nthreads(local);
+        hipLaunchKernelGGL((launcher<f_type>), nblocks, nthreads, 0, nullptr, f);
+    };
+}
 template <class F>
 void visit_tensor_size(std::size_t n, F f)
 {
    switch(n)
    {
-    case 0:
-    {
-        f(std::integral_constant<std::size_t, 0>{});
-        break;
-    }
    case 1:
    {
        f(std::integral_constant<std::size_t, 1>{});
@@ -86,48 +107,27 @@ struct hip_tensor_descriptor
    size_t strides[NDim] = {};
 };
-template <typename T, size_t NDim>
-__global__ void contiguous_gpu(const T* a,
-                               hip_tensor_descriptor<NDim> a_desc,
-                               T* at,
-                               hip_tensor_descriptor<NDim> at_desc,
-                               size_t nelements)
-{
-    for(size_t i = blockIdx.x * blockDim.x + threadIdx.x; i < nelements;
-        i += blockDim.x * gridDim.x)
-    {
-        hip_index<NDim> s = at_desc.multi(i);
-        size_t lidx       = a_desc.linear(s);
-        at[i]             = a[lidx];
-    }
-}
 void hip_contiguous(migraph::shape output_shape, migraph::argument arg, migraph::argument result)
 {
-    size_t ndim = output_shape.lens().size();
    visit_all(result, arg)([&](auto output, auto input) {
-        if(ndim == 4)
+        visit_tensor_size(output_shape.lens().size(), [&](auto ndim) {
-        {
            const auto& s = arg.get_shape();
-            hip_tensor_descriptor<4> a_desc(s.lens(), s.strides());
+            hip_tensor_descriptor<ndim> a_desc(s.lens(), s.strides());
-            hip_tensor_descriptor<4> at_desc(output_shape.lens(), output_shape.strides());
+            hip_tensor_descriptor<ndim> at_desc(output_shape.lens(), output_shape.strides());
-            dim3 nblocks(512);
+            auto* a             = input.data();
-            dim3 nthreads(512);
+            auto* at            = output.data();
-            hipLaunchKernelGGL((contiguous_gpu<int, 4>),
+            auto nelements      = s.elements();
-                               nblocks,
+            std::size_t nlocal  = 512;
-                               nthreads,
+            std::size_t nglobal = 512 * nlocal;
-                               0,
-                               nullptr,
+            launch(nglobal, nlocal)([=](auto idx) mutable {
-                               input.data(),
+                for(size_t i = idx.global; i < nelements; i += nglobal)
-                               a_desc,
+                {
-                               output.data(),
+                    size_t lidx = a_desc.linear(at_desc.multi(i));
-                               at_desc,
+                    at[i]       = a[lidx];
-                               s.elements());
+                }
-        }
+            });
-        else
+        });
-        {
-            MIGRAPH_THROW("contiguous is only valid for 4D tensors");
-        }
    });
 }
 } // namespace gpu

--- a/src/targets/gpu/include/migraph/gpu/lowering.hpp
+++ b/src/targets/gpu/include/migraph/gpu/lowering.hpp
@@ -2,12 +2,14 @@
 #define MIGRAPH_GUARD_RTGLIB_MIOPEN_LOWERING_HPP
 #include <migraph/program.hpp>
+#include <migraph/gpu/context.hpp>
 namespace migraph {
 namespace gpu {
 struct lowering
 {
+    context ctx;
    std::string name() const { return "gpu::lowering"; }
    void apply(program& p) const;
 };

--- a/src/targets/gpu/include/migraph/gpu/target.hpp
+++ b/src/targets/gpu/include/migraph/gpu/target.hpp
@@ -9,7 +9,7 @@ namespace gpu {
 struct target
 {
    std::string name() const;
-    std::vector<pass> get_passes(migraph::context& ctx) const;
+    std::vector<pass> get_passes(migraph::context& gctx) const;
    migraph::context get_context() const;
 };

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -3,6 +3,7 @@
 #include <migraph/manage_ptr.hpp>
 #include <migraph/instruction.hpp>
 #include <migraph/operators.hpp>
+#include <migraph/generate.hpp>
 #include <migraph/shape_for_each.hpp>
 #include <migraph/gpu/miopen.hpp>
 #include <migraph/gpu/hip.hpp>
@@ -36,9 +37,6 @@ struct miopen_batch_norm_inference
        float alpha = 1.0, beta = 0.0f;
-        // TODO: adityaatluri
-        // create bn-scale-bias-mean-variance descriptor for
-        // miopen call
        miopenBatchNormalizationForwardInference(ctx.handle.get(),
                                                 miopenBatchNormMode_t(op.bn_mode),
                                                 &alpha,
@@ -62,11 +60,12 @@ struct miopen_convolution
 {
    convolution op;
    shared<convolution_descriptor> cd;
+    miopenConvFwdAlgorithm_t algo{};
    std::string name() const { return "gpu::convolution"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(3);
+        check_shapes{inputs, *this}.has(4).standard();
        return op.compute_shape({inputs.at(0), inputs.at(1)});
    }
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
@@ -76,36 +75,57 @@ struct miopen_convolution
        auto y_desc = make_tensor(output_shape);
        float alpha = 1, beta = 0;
+        miopenConvolutionForward(ctx.handle.get(),
+                                 &alpha,
+                                 x_desc.get(),
+                                 args[0].implicit(),
+                                 w_desc.get(),
+                                 args[1].implicit(),
+                                 cd.get(),
+                                 algo,
+                                 &beta,
+                                 y_desc.get(),
+                                 args[3].implicit(),
+                                 args[2].implicit(),
+                                 args[2].get_shape().bytes());
+        return args[3];
+    }
+    shape compile(context& ctx, shape output_shape, std::vector<instruction_ref> inputs)
+    {
+        shape workspace_shape{};
+        auto x_desc = make_tensor(inputs[0]->get_shape());
+        auto w_desc = make_tensor(inputs[1]->get_shape());
+        auto y_desc = make_tensor(output_shape);
+        std::size_t workspace_size = 0;
+        miopenConvolutionForwardGetWorkSpaceSize(
+            ctx.handle.get(), x_desc.get(), w_desc.get(), cd.get(), y_desc.get(), &workspace_size);
+        workspace_shape = shape{shape::int8_type, {workspace_size}};
+        auto x         = to_gpu(generate_argument(inputs[0]->get_shape()));
+        auto w         = to_gpu(generate_argument(inputs[1]->get_shape()));
+        auto y         = to_gpu(generate_argument(output_shape));
+        auto workspace = allocate_gpu(workspace_shape);
        int algo_count;
        miopenConvAlgoPerf_t perf;
        miopenFindConvolutionForwardAlgorithm(ctx.handle.get(),
                                              x_desc.get(),
-                                              args[0].implicit(),
+                                              x.implicit(),
                                              w_desc.get(),
-                                              args[1].implicit(),
+                                              w.implicit(),
                                              cd.get(),
                                              y_desc.get(),
-                                              args[2].implicit(),
+                                              y.implicit(),
                                              1,
                                              &algo_count,
                                              &perf,
-                                              nullptr,
+                                              workspace.implicit(),
-                                              0,
+                                              workspace_size,
                                              false);
-        miopenConvolutionForward(ctx.handle.get(),
+        algo = perf.fwd_algo;
-                                 &alpha,
+        return workspace_shape;
-                                 x_desc.get(),
-                                 args[0].implicit(),
-                                 w_desc.get(),
-                                 args[1].implicit(),
-                                 cd.get(),
-                                 perf.fwd_algo,
-                                 &beta,
-                                 y_desc.get(),
-                                 args[2].implicit(),
-                                 nullptr,
-                                 0);
-        return args[2];
    }
 };
@@ -117,7 +137,7 @@ struct miopen_pooling
    std::string name() const { return "gpu::pooling"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(2);
+        check_shapes{inputs, *this}.has(2).standard();
        return op.compute_shape({inputs.at(1)});
    }
    argument compute(context& ctx, shape output_shape, std::vector<argument> args) const
@@ -148,7 +168,7 @@ struct miopen_add
    std::string name() const { return "gpu::add"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(3);
+        check_shapes{inputs, *this}.has(3).not_broadcasted();
        return inputs.at(0);
    }
@@ -250,7 +270,7 @@ struct miopen_relu
    std::string name() const { return "gpu::relu"; }
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(2);
+        check_shapes{inputs, *this}.has(2).not_broadcasted();
        return inputs.at(1);
    }
@@ -275,6 +295,7 @@ struct miopen_relu
 struct miopen_apply
 {
    program* prog = nullptr;
+    context ctx{};
    void apply()
    {
@@ -304,8 +325,6 @@ struct miopen_apply
            {
                apply_contiguous(it);
            }
-            // TODO: adityaatluri
-            // tagging to easily find where code changed
            else if(it->op.name() == "batch_norm_inference")
            {
                apply_batch_norm_inference(it);
@@ -329,15 +348,16 @@ struct miopen_apply
    void apply_convolution(instruction_ref ins)
    {
-        auto&& op   = any_cast<convolution>(ins->op);
+        auto&& op = any_cast<convolution>(ins->op);
-        auto cd     = make_conv(op);
-        auto output = insert_allocation(ins, ins->result);
-        prog->replace_instruction(ins,
+        auto conv = miopen_convolution{op, make_conv(op)};
-                                  miopen_convolution{op, std::move(cd)},
+        auto ws   = conv.compile(ctx, ins->result, ins->arguments);
-                                  ins->arguments.at(0),
-                                  ins->arguments.at(1),
+        auto workspace = insert_allocation(ins, ws);
-                                  output);
+        auto output    = insert_allocation(ins, ins->result);
+        prog->replace_instruction(
+            ins, conv, ins->arguments.at(0), ins->arguments.at(1), workspace, output);
    }
    void apply_pooling(instruction_ref ins)
@@ -384,34 +404,30 @@ struct miopen_apply
        prog->replace_instruction(ins, miopen_contiguous{op}, ins->arguments.at(0), output);
    }
-    // TODO: adityaatluri
-    // Not sure how to write this. Review and fix required
    void apply_batch_norm_inference(instruction_ref ins)
    {
        auto&& op       = any_cast<batch_norm_inference>(ins->op);
        auto output     = insert_allocation(ins, ins->result);
        shape old_shape = ins->arguments.at(1)->get_shape();
        std::vector<int64_t> new_shape{1, static_cast<int64_t>(old_shape.elements()), 1, 1};
-        auto arg1 =
+        auto reshape_op = reshape{new_shape};
-            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(1));
+        std::vector<instruction_ref> reshapes;
-        auto arg2 =
+        std::transform(ins->arguments.begin() + 1,
-            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(2));
+                       ins->arguments.end(),
-        auto arg3 =
+                       std::back_inserter(reshapes),
-            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(3));
+                       [&](auto i) { return prog->insert_instruction(ins, reshape_op, i); });
-        auto arg4 =
-            prog->insert_instruction(ins, migraph::reshape{new_shape}, ins->arguments.at(4));
        prog->replace_instruction(ins,
                                  miopen_batch_norm_inference{op},
                                  ins->arguments.at(0),
-                                  arg1,
+                                  reshapes[0],
-                                  arg2,
+                                  reshapes[1],
-                                  arg3,
+                                  reshapes[2],
-                                  arg4,
+                                  reshapes[3],
                                  output);
    }
 };
-void lowering::apply(program& p) const { miopen_apply{&p}.apply(); }
+void lowering::apply(program& p) const { miopen_apply{&p, ctx}.apply(); }
 } // namespace gpu

--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -3,13 +3,27 @@
 #include <migraph/gpu/write_literals.hpp>
 #include <migraph/gpu/context.hpp>
 #include <migraph/check_context.hpp>
+#include <migraph/auto_contiguous.hpp>
+#include <migraph/dead_code_elimination.hpp>
+#include <migraph/simplify_reshapes.hpp>
 namespace migraph {
 namespace gpu {
-std::vector<pass> target::get_passes(migraph::context&) const
+std::vector<pass> target::get_passes(migraph::context& gctx) const
 {
-    return {lowering{}, write_literals{}, check_context<context>{}};
+    auto& ctx = any_cast<context>(gctx);
+    // clang-format off
+    return
+    {
+        auto_contiguous{},
+        simplify_reshapes{},
+        lowering{ctx},
+        write_literals{},
+        check_context<context>{},
+        dead_code_elimination{}
+    };
+    // clang-format on
 }
 std::string target::name() const { return "miopen"; }

--- a/test/auto_contiguous_test.cpp
+++ b/test/auto_contiguous_test.cpp
@@ -18,8 +18,40 @@ migraph::literal get_2x2()
    return migraph::literal{{migraph::shape::float_type, {2, 2}}, {1, 2, 3, 4}};
 }
+migraph::literal get_2x2_transposed()
+{
+    return migraph::literal{{migraph::shape::float_type, {2, 2}, {1, 2}}, {1, 2, 3, 4}};
+}
 migraph::literal get_2() { return migraph::literal{{migraph::shape::float_type, {2}}, {1, 2}}; }
+migraph::literal get_2_broadcasted()
+{
+    return migraph::literal{{migraph::shape::float_type, {2, 1}, {1, 0}}, {1, 2}};
+}
+void literal_broadcast()
+{
+    migraph::program p;
+    p.add_literal(get_2_broadcasted());
+    EXPECT(not p.get_shape().standard());
+    EXPECT(p.get_shape().broadcasted());
+    p.compile(contiguous_target{});
+    EXPECT(p.get_shape().standard());
+    EXPECT(not p.get_shape().broadcasted());
+}
+void literal_transpose()
+{
+    migraph::program p;
+    p.add_literal(get_2x2_transposed());
+    EXPECT(not p.get_shape().standard());
+    EXPECT(p.get_shape().transposed());
+    p.compile(contiguous_target{});
+    EXPECT(p.get_shape().standard());
+    EXPECT(not p.get_shape().transposed());
+}
 void after_literal_transpose()
 {
    migraph::program p;
@@ -51,8 +83,43 @@ void after_literal_broadcast()
    EXPECT(not p.get_shape().broadcasted());
 }
+void after_param_transpose()
+{
+    migraph::program p;
+    auto l = p.add_parameter("2x2", {migraph::shape::float_type, {2, 2}});
+    EXPECT(p.get_shape().standard());
+    EXPECT(not p.get_shape().transposed());
+    auto t = p.add_instruction(migraph::transpose{{1, 0}}, l);
+    p.add_instruction(pass_op{}, t);
+    EXPECT(not p.get_shape().standard());
+    EXPECT(p.get_shape().transposed());
+    p.compile(contiguous_target{});
+    EXPECT(p.get_shape().standard());
+    EXPECT(not p.get_shape().transposed());
+}
+void after_param_broadcast()
+{
+    migraph::program p;
+    auto l1 = p.add_parameter("2x2", {migraph::shape::float_type, {2, 2}});
+    auto l2 = p.add_parameter("2", {migraph::shape::float_type, {2}});
+    EXPECT(p.get_shape().standard());
+    EXPECT(not p.get_shape().broadcasted());
+    auto b = p.add_instruction(migraph::broadcast{}, l1, l2);
+    p.add_instruction(pass_op{}, b);
+    EXPECT(not p.get_shape().standard());
+    EXPECT(p.get_shape().broadcasted());
+    p.compile(contiguous_target{});
+    EXPECT(p.get_shape().standard());
+    EXPECT(not p.get_shape().broadcasted());
+}
 int main()
 {
+    // literal_broadcast();
+    literal_transpose();
    after_literal_transpose();
    after_literal_broadcast();
+    after_param_transpose();
+    after_param_broadcast();
 }